-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathmirror.py
executable file
·85 lines (64 loc) · 2.36 KB
/
mirror.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
import json, internetarchive, oursql, os, sys
from requests.exceptions import ConnectionError
###
import logging
import sys
log = logging.getLogger()
out_hdlr = logging.StreamHandler(sys.stdout)
out_hdlr.setFormatter(logging.Formatter('%(asctime)s %(message)s'))
out_hdlr.setLevel(logging.DEBUG)
log.addHandler(out_hdlr)
log.setLevel(logging.DEBUG)
###
template = """
<p>
<strong>This public document was automatically mirrored from <a href="https://pdf.yt/">PDFy</a>.</strong>
</p>
<ul>
<li><strong>Original filename:</strong> %(real_filename)s</li>
<li><strong>URL:</strong> <a href="http://pdf.yt/d/%(slug)s">https://pdf.yt/d/%(slug)s</a></li>
<li><strong>Upload date:</strong> %(upload_date)s</li>
</ul>
""".replace("\n", "")
with open("config.json", "r") as f:
conf = json.loads(f.read())
dbconn = oursql.Connection(host=conf["database"]["hostname"], user=conf["database"]["username"], passwd=conf["database"]["password"], db=conf["database"]["database"], autoreconnect=True)
cur = dbconn.cursor()
cur.execute("SELECT `Id`, `SlugId`, `Filename`, `Uploaded`, `OriginalFilename` FROM documents WHERE `Mirrored` = 0 AND `Public` = 1")
items = cur.fetchall()
for doc in items:
id_, slug, storage_filename, upload_date, real_filename = doc
if upload_date is None:
upload_date = "Before April 27, 2014"
else:
upload_date = upload_date.strftime("%B %d, %Y %H:%M:%S")
source_file = "storage/%s" % storage_filename
item = internetarchive.get_item("pdfy-%s" % slug)
metadata = {
"mediatype": "texts",
"subject": ["mirror", "pdf.yt"],
"collection": "pdfymirrors",
"title": "%s (PDFy mirror)" % real_filename,
"description": template % {
"real_filename": real_filename,
"slug": slug,
"upload_date": upload_date
},
"date": "2014-01-01"
}
err = False
try:
if item.upload([(real_filename, source_file)], metadata=metadata, access_key=conf["internetarchive"]["accesskey"], secret_key=conf["internetarchive"]["secretkey"]):
cur = dbconn.cursor()
cur.execute("UPDATE documents SET `Mirrored` = 1 WHERE `Id` = ?", (id_,))
print "Uploaded %s (%s)" % (slug, real_filename)
else:
print "FAILED upload of %s (%s)!" % (slug, title)
err = True
except Exception, e:
sys.stderr.write(str(e))
err = True
if err == True:
cur = dbconn.cursor()
cur.execute("UPDATE documents SET `Mirrored` = 2 WHERE `Id` = ?", (id_,))