-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenCollectionInterface.py
225 lines (186 loc) · 7.89 KB
/
genCollectionInterface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/usr/bin/python
# genCollectionInterface.py
# generate a web interface from the result of a search on the internet archive
# dumped as a csv file, organized into categories from the category list. The
# categories are searched for in the subject field of the csv dump
# all un-categorized data is saved in 'otherCategory.html'
#
# 1) extract header, body, and etc from the template
# 2) output header
# 3) start body
# 4) for each entry in the csv file
# parse out the fields, save in list, categorize
# build the appropriate inteface element, save in categories lists
# 5) buildIntefaceElement(meta)
# get the cover using the OLID map and the OL Cover API
# create link to bookreader with URL of book using cover as "button"
# make the title, display under cover
# make a tooltip string using the metadata
# 6) generate the body using the interface elements above, one page for each category
#
#
#
import os
import sys
import csv
import re
def usage():
print 'usage: genweb.py [options] <input csv> <input categories file> [<output.html>]'
print ' options:'
print ' -attached-storage: generate links for attached storage solution'
# creates the interface elements (html, links, tooltip stuff, cover img link, etc) given
# a list of the metadata elements
def buildInterfaceElement(outfile,m):
# build the output for the div containg book cover, title, link
global attStorage # attached storage link switch
# open the div template
divstr = open('divtmpl.html.tmpl','r').read();
# replace the DOM element id with a generated one, use the book id?
divstr = re.sub('\$idstr', m[4],divstr)
# replace the cover img src with the one for this book
# this will use the mapping from iacl to open library id's from the
# original book list
olid =mapIaclIDToOLID(m[4])
if olid != '':
if attStorage == 0:
coverurl = getCoverUrl(olid)
else:
coverurl = getCoverUrl(m[4])
else: coverurl = ''
divstr = re.sub('\$coverstr',coverurl,divstr)
#replace the iacl lookup id with the one for this book in the link
if attStorage == 0:
#switch which of the following divstr assignment is commented to select what type of link
# interface to create
# 1. link to download djvu
# divstr = re.sub('\$iaclid',"http://archive.us.org/stream/" + m[4] + "/" + m[4] + ".djvu",divstr)
# 2. link to the embedded bookreader
divstr = re.sub('\$iaclid', "http://www.us.archive.org/GnuBook/GnuBookEmbed.php?id=" + m[4], divstr)
else:
# 3. link to already downloaded content on attached storage
divstr = re.sub('\$iaclid',"file:///media/LIB/djvu/" + m[4] + ".djvu",divstr)
#replace title
# tear the titles down to max 8 words
# split first at ':', then max 8 words of what is left of ':'
l = (m[1].split(':',1)[0]).split(' ',8)[0:7]
title = ''
for w in l:
title = title + w + ' '
divstr = re.sub('\$title',title,divstr)
outfile.write(divstr)
# build the tooltip list for each output file
# format of Tip:
# new Tip('<dom_id_of_element>', '<b>Full Title:<\/b> Adventures of Huckleberry Finn (Tom Sawyer\'s comrade)...<br \/><br \/><b>Author:<\/b> Twain, Mark, 1835-1910<br \/><br \/><b>Details:<\/b> Spec. Coll. copy 1: Green cloth over boards, blocked in gold and black, in case. Ex libris Olive Percival. Gift of Jo Swerling<br \/><br \/><b>Subjects:<\/b> Adventure and adventurers, Mississippi River -- Juvenile literature,Missouri -- Juvenile literature, Adventures of Huckleberry Finn');
def buildTooltip(ttfile,m):
# build the output string - probably wind up as tool tip
outstr = "new Tip('" + re.sub('\'','\\\'', m[4]) + "', '<b>Full Title:<\/b> " + re.sub('\'','\\\'', m[1])
outstr = outstr + '<br \/><br \/><b>Author:<\/b> ' + re.sub('\'','\\\'', m[0])
outstr = outstr + '<br \/><br \/><b>Details:<\/b> ' + re.sub('\'','\\\'', m[5] )
outstr = outstr + '<br \/><br \/><b>Subjects:<\/b> ' + re.sub('\'','\\\'', m[2] )
# outstr = outstr + 'date: ' + re.sub('\'','\\\'', m[3].split('-',1)[0]) + '<br>\n'
# outstr = outstr + 'file: ' + re.sub('\'','\\\'', m[4]) + '<br>\n'
outstr = outstr + "');\n"
ttfile.write(outstr)
# returns true if book is in the booklist file
def isInBookList(id):
bl = open('iaclBookList.txt','r')
for line in bl.readlines():
if line.find(id) != -1:
return 1
return 0
#returns the Open Library ID using the mappings in the book list file
def mapIaclIDToOLID(id):
bl = open('iaclBookList.txt','r')
for line in bl.readlines():
if line.find(id) != -1:
return(line.split(' ',1)[0].lstrip('/b'))
return('')
# returns the cover url, argument is img file basename
def getCoverUrl(name):
global attStorage;
if attStorage == 0:
return "http://covers.openlibrary.org/b/OLID/" + name + "-M.jpg"
else:
return "file:///media/LIB/covers/" + name + ".jpg"
# MAIN
i = 0
attStorage = 0
infile=''
outfile=''
catfile=''
meta=[] # list of csv meta data
for arg in sys.argv:
if arg[0] == '-':
if arg == "-attached-storage":
attStorage = 1;
else:
print 'unrecognized option: ' + arg + ' IGNORED'
continue;
if i == 1:
infile=arg
if i == 2:
catfile=arg
i = i+1
print i
print attStorage
if i < 3:
usage()
sys.exit(1)
print 'opening input csv for read: ' + infile
reader=csv.reader(open(infile, 'r'))
title = author = description = subject = date = ''
#indexes for the various fields
aidx=1 # author
tidx=7 # title
sidx=6 # subject, will extract category from here
didx=2 # date
fidx=5 # file - identifier field used to derive the book link
descidx=3 # description
for row in reader:
for f in row:
author=row[aidx]
title=row[tidx]
subject=row[sidx]
date=row[didx]
file=row[fidx]
desc=row[descidx]
if isInBookList(file):
meta.append([author, title, subject, date, file, desc])
cats = open(catfile,'r').readlines() # get the categories list
for m in meta:
# filter subject categories
# search subject m[2] for a match in categories
outcat='' # category string
for line in cats:
if re.search(line.rstrip(),m[2],re.I) != None:
print "found a category match for " + line
print "subject is: " + m[2]
print "-------------------------------------------------"
# if there's a match output to that category
outcat = line.rstrip() + '.html'
outcatf = open( outcat, 'a')
ttfname = line.rstrip() + 'Tooltip.js'
ttf = open( ttfname, 'a')
buildInterfaceElement(outcatf,m);
buildTooltip(ttf,m)
if (outcat == ''): # went through cat filter without a match
outcatf = open('other.html', 'a') # dump to the catchall file
ttf = open( 'otherTooltip.js', 'a')
buildInterfaceElement(outcatf,m);
buildTooltip(ttf,m)
# now concat templates head, <cat>.html, <cat_tooltip>.js and foot
cathtml = open('categories.html','w')
cathtml.write(re.sub('\$header','Categories',open('headtmpl.html.tmpl','r').read()))
for line in cats:
if os.path.exists(line.strip() + '.html') != True:
continue
# write the category html file
open(line.strip() + 'Category.html', 'w').write( re.sub('\$header',line.strip(),open('headtmpl.html.tmpl','r').read()) + open(line.strip() + '.html','r').read() + "<script type='text/javascript' language='javascript'>" + open(line.strip() + 'Tooltip.js','r').read() + open('foottmpl.html.tmpl','r').read() )
# write the link to the category in the categories.html
cathtml.write('<a href="' + line.strip() + 'Category.html' + '">' + line.strip() + "</a><br>\n")
if os.path.exists('other.html') == True:
open('otherCategory.html', 'w').write(open('headtmpl.html.tmpl','r').read() + open('other.html','r').read() + "<script type='text/javascript' language='javascript'>" + open('otherTooltip.js','r').read() + open('foottmpl.html.tmpl','r').read() )
cathtml.write('<a href="otherCategory.html">Other Category</a><br>\n')
# complete the categories.html
cathtml.write(open('foottmpl.html.tmpl','r').read())
sys.exit()