+
+
+
+ 1
+
+
+
+
+
+
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/usingDatabase-databankauth.txt b/docs/usingDatabase-databankauth.txt
new file mode 100644
index 0000000..e8b7828
--- /dev/null
+++ b/docs/usingDatabase-databankauth.txt
@@ -0,0 +1,134 @@
+Note: If you have forgotten the password for the root user, you can rest the password bu running dpkg-reconfigure
+$ dpkg -l mysql-server* | grep ii
+$ dpkg-configure mysql-server-5.1
+$ sudo dpkg-reconfigure mysql-server-5.1
+
+$ mysql -u root -p
+same as sudo password
+
+#Create Database databankauth and user databanksqladmin. Give user databanksqladmin access to databankauth
+mysql> use mysql;
+mysql> CREATE DATABASE databankauth DEFAULT CHARACTER SET utf8 COLLATE utf8_bin;
+mysql> GRANT ALL ON databankauth.* TO databanksqladmin@localhost IDENTIFIED BY 'password';
+mysql> exit
+
+$ mysql -h localhost -u databanksqladmin -p
+password: ######
+
+mysql> use databankauth;
+
+mysql> show tables;
+
+mysql> select * from silo;
++----+------------------------+----------+
+| id | group_name | silo |
++----+------------------------+----------+
+| 1 | sandbox_administrator | sandbox |
+| 2 | sandbox_manager | sandbox |
+| 3 | sandbox_submitter | sandbox |
+| 4 | sandbox2_administrator | sandbox2 |
+| 5 | sandbox2_manager | sandbox2 |
+| 6 | sandbox2_submitter | sandbox2 |
+| 7 | sandbox3_administrator | sandbox3 |
+| 8 | sandbox3_manager | sandbox3 |
+| 9 | sandbox3_submitter | sandbox3 |
++----+------------------------+----------+
+
+mysql> select * from permission;
++----+-----------------+
+| id | permission_name |
++----+-----------------+
+| 1 | administrator |
+| 2 | manager |
+| 3 | submitter |
++----+-----------------+
+3 rows in set (0.00 sec)
+
+mysql> select * from user;
++----+------------------+----------+-------+------------------------+-----------+----------+
+| id | user_name | password | email | name | firstname | lastname |
++----+------------------+----------+-------+------------------------+-----------+----------+
+| 1 | sandbox_user | 2de12713 | NULL | Test User I | NULL | NULL |
+| 2 | sandbox_user2 | aa585d66 | NULL | Test User II | NULL | NULL |
+| 3 | sandbox_user3 | 8ad67e77 | NULL | Test User III | NULL | NULL |
+| 4 | admin | 6f0006ba | NULL | Test Administrator I | NULL | NULL |
+| 5 | admin2 | 0b62a04c | NULL | Test Administrator II | NULL | NULL |
+| 6 | admin3 | 33bd9bbd | NULL | Test Administrator III | NULL | NULL |
+| 7 | sandbox_manager | bc6592d3 | NULL | Test Manager I | NULL | NULL |
+| 8 | sandbox_manager2 | 9c7643eb | NULL | Test Manager II | NULL | NULL |
+| 9 | sandbox_manager3 | 7f000410 | NULL | Test Manager III | NULL | NULL |
++----+------------------+----------+-------+------------------------+-----------+----------+
+9 rows in set (0.00 sec)
+
+mysql> select * from user_group;
++---------+----------+
+| user_id | group_id |
++---------+----------+
+| 1 | 3 |
+| 2 | 6 |
+| 3 | 9 |
+| 4 | 1 |
+| 5 | 4 |
+| 6 | 7 |
+| 7 | 2 |
+| 8 | 5 |
+| 9 | 8 |
++---------+----------+
+9 rows in set (0.00 sec)
+
+mysql> select * from group_permission;
++----------+---------------+
+| group_id | permission_id |
++----------+---------------+
+| 1 | 1 |
+| 2 | 2 |
+| 3 | 3 |
+| 4 | 1 |
+| 5 | 2 |
+| 6 | 3 |
+| 7 | 1 |
+| 8 | 2 |
+| 9 | 3 |
++----------+---------------+
+9 rows in set (0.00 sec)
+
+mysql> SELECT ug.user_id, ug.group_id, gp.permission_id
+FROM user_group ug
+INNER JOIN group_permission on ug.group_id = gp.group_id
++---------+----------+---------------+
+| user_id | group_id | permission_id |
++---------+----------+---------------+
+| 1 | 3 | 3 |
+| 2 | 6 | 3 |
+| 3 | 9 | 3 |
+| 4 | 1 | 1 |
+| 5 | 4 | 1 |
+| 6 | 7 | 1 |
+| 7 | 2 | 2 |
+| 8 | 5 | 2 |
+| 9 | 8 | 2 |
++---------+----------+---------------+
+9 rows in set (0.00 sec)
+
+mysql> SELECT u.user_name, g.silo, p.permission_name
+FROM user u
+INNER JOIN user_group ug ON u.id = ug.user_id
+INNER JOIN silo g ON ug.group_id = g.id
+INNER JOIN group_permission gp ON g.id = gp.group_id
+INNER JOIN permission p ON gp.permission_id = p.id;
++------------------+----------+-----------------+
+| user_name | silo | permission_name |
++------------------+----------+-----------------+
+| admin | sandbox | administrator |
+| admin2 | sandbox2 | administrator |
+| admin3 | sandbox3 | administrator |
+| sandbox_manager | sandbox | manager |
+| sandbox_manager2 | sandbox2 | manager |
+| sandbox_manager3 | sandbox3 | manager |
+| sandbox_user | sandbox | submitter |
+| sandbox_user2 | sandbox2 | submitter |
+| sandbox_user3 | sandbox3 | submitter |
++------------------+----------+-----------------+
+9 rows in set (0.00 sec)
+
+
diff --git a/docs/using_databank_api/DatabankDemo.py b/docs/using_databank_api/DatabankDemo.py
new file mode 100644
index 0000000..4519576
--- /dev/null
+++ b/docs/using_databank_api/DatabankDemo.py
@@ -0,0 +1,54 @@
+#Databank API demo
+
+import urllib2
+import base64
+import urllib
+from lib.multipartform import MultiPartForm
+
+#===============================================================================
+#Using urllib2 to create a package in Databank
+url = "http://databank-vm1.oerc.ox.ac.uk/test/datasets"
+req = urllib2.Request(url)
+USER = "admin"
+PASS = "test"
+identifier = "TestSubmission"
+auth = 'Basic ' + base64.urlsafe_b64encode("%s:%s" % (USER, PASS))
+req.add_header('Authorization', auth)
+req.add_header('Accept', 'application/JSON')
+req.add_data(urllib.urlencode({'id': identifier}))
+
+# To verify the method is POST
+req.get_method()
+
+ans = urllib2.urlopen(req)
+
+ans.read()
+ans.msg
+ans.code
+
+#===============================================================================
+#Using urllib2 to post a file in Databank
+#Add a file
+form = MultiPartForm()
+filename = "solrconfig.xml"
+filepath = "data/unicode07.xml"
+form.add_file('file', filename, fileHandle=open(filepath))
+
+# Build the request
+url2 = "http://databank-vm1.oerc.ox.ac.uk/test/datasets/TestSubmission"
+req2 = urllib2.Request(url2)
+auth = 'Basic ' + base64.urlsafe_b64encode("admin:test")
+req2.add_header('Authorization', auth)
+req2.add_header('Accept', 'application/JSON')
+body = str(form)
+req2.add_header('Content-type', form.get_content_type())
+req2.add_header('Content-length', len(body))
+req2.add_data(body)
+
+print
+print 'OUTGOING DATA:'
+print req2.get_data()
+ans2 = urllib2.urlopen(req2)
+print
+print 'SERVER RESPONSE:'
+ans2.read()
diff --git a/docs/using_databank_api/README b/docs/using_databank_api/README
new file mode 100644
index 0000000..d5ef618
--- /dev/null
+++ b/docs/using_databank_api/README
@@ -0,0 +1,32 @@
+Guide to using the RDFDatabank API
+
+The directory contains example python code that services can use for interacting with the Databank API.
+There are two sets of examples
+1. main.py
+ This uses the helper class HTTP_request.py which in turn uses httplib to make http calls.
+ It has code on using GET, POST, PUT and DELETE methods for interacting with the API and works over both http / https.
+
+2. DatabankDemo.py or postingToDatabank.py
+ This is exmaple code to POST items to Databank using urllib2.
+ The form data is constructed is slightly differently in DatabankDemo.py and postingToDatabank.py which can be seen in multipartform.py and multipart.py respectively.
+
+RDFDatabank was developed by Ben O'Steen and Anusha Ranganathan at the Bodleian Libraries, Universiy of Oxford
+
+The purpose of DataBank is to provide a robust and efficient system for the safe storage of and open access to research data.
+
+The API documentation for using RDFdatabank is at
+http://databank-vm1.oerc.ox.ac.uk/api or
+https://github.com/databank/RDFDatabank/tree/master/rdfdatabank/public/static/api_files
+
+The source code for databank is available at https://github.com/dataflow/RDFDatabank.
+The test code for databank can be found in https://github.com/dataflow/RDFDatabank/tree/master/rdfdatabank/tests/RDFDatabank
+
+Note:
+ DataBank is not intended to store large-scale data sets such as grid data or other vast data sets.
+
+ All of the metadata relating to the dataset is in the file manifest.rdf. The system generated metadata will contain a listing of the files in the dataset, date of submission and the current version
+
+ If the dataset contains a README file in the top level (i.e. not within a folder), the contents of this file will be displayed at the top of the page for that dataset within databank.ora.ox.ac.uk
+
+ If the dataset contains a file called 'manifest.rdf' and it is valid rdf, the metadata in this file will be merged with the system geenrated metadata, when the dataset is unpacked within databnak.ora.ox.ac.uk
+
diff --git a/rdfdatabank/tests/functional/__init__.py b/docs/using_databank_api/__init__.py
similarity index 100%
rename from rdfdatabank/tests/functional/__init__.py
rename to docs/using_databank_api/__init__.py
diff --git a/docs/using_databank_api/data/testrdf.zip b/docs/using_databank_api/data/testrdf.zip
new file mode 100644
index 0000000..a52cdce
Binary files /dev/null and b/docs/using_databank_api/data/testrdf.zip differ
diff --git a/docs/using_databank_api/data/testrdf4.zip b/docs/using_databank_api/data/testrdf4.zip
new file mode 100644
index 0000000..376bd34
Binary files /dev/null and b/docs/using_databank_api/data/testrdf4.zip differ
diff --git a/docs/using_databank_api/data/unicode07.xml b/docs/using_databank_api/data/unicode07.xml
new file mode 100644
index 0000000..cd7a87c
--- /dev/null
+++ b/docs/using_databank_api/data/unicode07.xml
@@ -0,0 +1,29 @@
+
+
+
+ Some verses in Sanskrit
+ The following is one stanza of canto â…¥ of the KumÄra-saṃbhava (“the birth of KumÄraâ€) by the great Sanskrit poet KÄlidÄsa: <br>
+ <br>
+ पशà¥à¤ªà¤¤à¤¿à¤°à¤ªà¤¿ तानà¥à¤¯à¤¹à¤¾à¤¨à¤¿ कृचà¥à¤›à¥à¤°à¤¾à¤¦à¥ <br>
+ अगमयददà¥à¤°à¤¿à¤¸à¥à¤¤à¤¾à¤¸à¤®à¤¾à¤—मोतà¥à¤•à¤ƒ । <br>
+ कमपरमवशं न विपà¥à¤°à¤•à¥à¤°à¥à¤¯à¥à¤°à¥ <br>
+ विà¤à¥à¤®à¤ªà¤¿ तं यदमी सà¥à¤ªà¥ƒà¤¶à¤¨à¥à¤¤à¤¿ à¤à¤¾à¤µà¤¾à¤ƒ ॥ <br>
+ <br>
+And here is the transcription of it: <br>
+ <br>
+ PaÅ›upatirapi tÄnyahÄni ká¹›cchrÄd <br>
+ agamayadadrisutÄsamÄgamotkaḥ; <br>
+ kamaparamavaśaṃ na viprakuryur <br>
+ vibhumapi taṃ yadamÄ« spṛśanti bhÄvÄḥ? <br>
+ <br>
+A rough translation might be: <br>
+ <br>
+ And Paśupati passed those days with hardship, / eager for union with the daughter of the mountain. / Which other powerless [creature] would they not torment, / such emotions, when they affect even the powerful [Śiva]?
+ http://www.madore.org/~david/misc/unitest/
+
+
diff --git a/docs/using_databank_api/lib/HTTP_request.py b/docs/using_databank_api/lib/HTTP_request.py
new file mode 100644
index 0000000..52d21fd
--- /dev/null
+++ b/docs/using_databank_api/lib/HTTP_request.py
@@ -0,0 +1,190 @@
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+import mimetypes
+import httplib
+import base64
+import urlparse
+import json as simplejson
+
+logger = logging.getLogger('Dataset')
+
+class HTTPRequest():
+ def __init__(self, endpointhost=None):
+ if endpointhost:
+ self._endpointhost = endpointhost
+ self._endpointpath = None
+
+ def get_content_type(self, filename):
+ # Originally copied from http://code.activestate.com/recipes/146306/:
+ return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
+ def get_data_type(self, params):
+ files = []
+ fields = []
+ decoded_params = params.items()
+ for i in decoded_params:
+ if len(i) == 2:
+ fields.append(i)
+ elif len(i) == 4:
+ files.append(i)
+ return fields, files
+
+ def encode_multipart_formdata(self, fields, files):
+ # Originally copied from http://code.activestate.com/recipes/146306/:
+ """
+ fields is a sequence of (name, value) elements for regular form fields.
+ files is a sequence of (name, filename, value, filetype) elements for data to be uploaded as files
+ Return (content_type, body) ready for httplib.HTTP instance
+ """
+ BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
+ CRLF = '\r\n'
+ L = []
+ for (key, value) in fields:
+ L.append('--' + BOUNDARY)
+ L.append('Content-Disposition: form-data; name="%s"' % key)
+ L.append('')
+ L.append(value)
+ for (key, filename, value, filetype) in files:
+ L.append('--' + BOUNDARY)
+ L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
+ L.append('Content-Type: %s' % (filetype or get_content_type(filename)))
+ L.append('')
+ L.append(value)
+ L.append('--' + BOUNDARY + '--')
+ L.append('')
+ body = CRLF.join(L)
+ content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+ return content_type, body
+
+ def setRequestEndPoint(self, endpointhost=None, endpointpath=None):
+ if endpointhost or endpointpath:
+ if endpointhost:
+ self._endpointhost = endpointhost
+ # Reset credentials if setting host
+ self._endpointuser = None
+ self._endpointpass = None
+ logger.debug("setRequestEndPoint: endpointhost %s: " % self._endpointhost)
+ if endpointpath:
+ self._endpointpath = endpointpath
+ logger.debug("setRequestEndPoint: endpointpath %s: " % self._endpointpath)
+ return
+
+ def setRequestUserPass(self, endpointuser=None, endpointpass=None):
+ if endpointuser:
+ self._endpointuser = endpointuser
+ self._endpointpass = endpointpass
+ logger.debug("setRequestEndPoint: endpointuser %s: " % self._endpointuser)
+ logger.debug("setRequestEndPoint: endpointpass %s: " % self._endpointpass)
+ else:
+ self._endpointuser = None
+ self._endpointpass = None
+ return
+
+ def getRequestPath(self, rel):
+ rel = rel or ""
+ return urlparse.urljoin(self._endpointpath,rel)
+
+ def getRequestUri(self, rel):
+ return "http://"+self._endpointhost+self.getRequestPath(rel)
+
+ def encodeFormData(self, params):
+ (fields, files) = self.get_data_type(params)
+ (reqtype, reqdata) = self.encode_multipart_formdata(fields, files)
+ return reqtype, reqdata
+
+ def doRequest(self, command, resource, reqdata=None, reqheaders={}):
+ #print "User:", self._endpointuser
+ #print "Host:", self._endpointhost
+ #print "Resource:", resource
+ if self._endpointuser:
+ auth = base64.encodestring("%s:%s" % (self._endpointuser, self._endpointpass)).strip()
+ reqheaders["Authorization"] = "Basic %s" % auth
+ hc = httplib.HTTPConnection(self._endpointhost)
+ #hc = httplib.HTTPSConnection(self._endpointhost)
+ #resource = self.getRequestPath(resource)
+ response = None
+ responsedata = None
+ repeat = 10
+ while resource and repeat > 0:
+ repeat -= 1
+ hc.request(command, resource, reqdata, reqheaders)
+ response = hc.getresponse()
+ if response.status != 301: break
+ path = response.getheader('Location', None)
+ if path[0:6] == "https:":
+ # close old connection, create new HTTPS connection
+ hc.close()
+ hc = httplib.HTTPSConnection(self._endpointhost) # Assume same host for https:
+ else:
+ response.read() # Seems to be needed to free up connection for new request
+ logger.debug("Status: %i %s" % (response.status, response.reason))
+ responsedata = response.read()
+ #print "Response data", responsedata
+ #print "Response status", response.status
+ #print "Response reason", response.reason
+ hc.close()
+ return (response, responsedata)
+
+ def doHTTP_GET(self, endpointhost=None, endpointpath=None, resource=None, expect_type="*/*"):
+ reqheaders = {
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("GET", resource, reqheaders=reqheaders)
+ #ctype = response.getheader('content-type')
+ #if (responsedata and expect_type.lower() == "application/json"): responsedata = simplejson.loads(responsedata)
+ #if (responsedata and "application/json" in ctype): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doHTTP_POST(self, data, data_type="application/octet-strem",
+ endpointhost=None, endpointpath=None, resource=None, expect_type="*/*"):
+ reqheaders = {
+ "Content-type": data_type,
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("POST", resource, reqdata=data, reqheaders=reqheaders)
+ #ctype = response.getheader('content-type')
+ #if (responsedata and expect_type.lower() == "application/json"): responsedata = simplejson.loads(responsedata)
+ #if (responsedata and "application/json" in ctype): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doHTTP_PUT(self, data, data_type="application/octet-strem",
+ endpointhost=None, endpointpath=None, resource=None, expect_type="*/*"):
+ reqheaders = {
+ "Content-type": data_type,
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("PUT", resource, reqdata=data, reqheaders=reqheaders)
+ #ctype = response.getheader('content-type')
+ #if (responsedata and "application/json" in ctype): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doHTTP_DELETE(self, endpointhost=None, endpointpath=None, resource=None):
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("DELETE", resource)
+ return (response, responsedata)
+
diff --git a/rdfdatabank/tests/test_models.py b/docs/using_databank_api/lib/__init__.py
similarity index 100%
rename from rdfdatabank/tests/test_models.py
rename to docs/using_databank_api/lib/__init__.py
diff --git a/docs/using_databank_api/lib/multipart.py b/docs/using_databank_api/lib/multipart.py
new file mode 100644
index 0000000..14aa3e7
--- /dev/null
+++ b/docs/using_databank_api/lib/multipart.py
@@ -0,0 +1,78 @@
+# ---------------------------------------------------------------------
+#
+# Copyright (c) 2012 University of Oxford
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# ---------------------------------------------------------------------
+
+import mimetools
+import mimetypes
+
+class MultiPartFormData(object):
+ def __init__(self, fields=None, files=None):
+ self._boundary = mimetools.choose_boundary()
+ self._fields = fields or ()
+ self._files = files or ()
+ for file in self._files:
+ file['mimetype'] = file.get('mimetype') or mimetypes.guess_type(file['filename'])[0] or 'application/octet-stream'
+ self._body = self._body_iterator()
+
+ @property
+ def content_type(self):
+ return 'multipart/form-data; boundary=%s' % self._boundary
+
+ @property
+ def content_length(self):
+ field_padding = '--\r\nContent-Disposition: form-data; name=""\r\n\r\n\r\n'
+ file_padding = '--\r\nContent-Disposition: form-data; name=""; filename=""\r\nContent-Type: \r\n\r\n'
+
+ field_length = sum(sum(map(len, [self._boundary, field_padding, k, v])) for k,v in self._fields)
+ file_length = sum(f['size'] + sum(map(len, [self._boundary, file_padding, f['name'], f['filename'], f['mimetype']])) for f in self._files)
+
+ return field_length + file_length + len('----\r\n') + len(self._boundary)
+
+ def _body_iterator(self):
+ for (key, value) in self._fields:
+ yield '--%s\r\n' % self._boundary
+ yield 'Content-Disposition: form-data; name="%s"\r\n' % key
+ yield '\r\n'
+ if value:
+ yield value
+ yield '\r\n'
+ for file in self._files:
+ yield '--%s\r\n' % self._boundary
+ yield 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (file['name'], file['filename'])
+ yield 'Content-Type: %s\r\n' % file['mimetype']
+ yield '\r\n'
+
+ stream = file['stream']
+ while True:
+ data = stream.read(4096)
+ if not data:
+ break
+ yield data
+ yield '--%s--\r\n' % self._boundary
+
+ def read(self, blocksize):
+ try:
+ return self._body.next()
+ except StopIteration:
+ return ''
diff --git a/docs/using_databank_api/lib/multipartform.py b/docs/using_databank_api/lib/multipartform.py
new file mode 100644
index 0000000..11a301d
--- /dev/null
+++ b/docs/using_databank_api/lib/multipartform.py
@@ -0,0 +1,66 @@
+import itertools
+import mimetools
+import mimetypes
+
+class MultiPartForm(object):
+ """Accumulate the data to be used when posting a form."""
+
+ def __init__(self):
+ self.form_fields = []
+ self.files = []
+ self.boundary = mimetools.choose_boundary()
+ return
+
+ def get_content_type(self):
+ return 'multipart/form-data; boundary=%s' % self.boundary
+
+ def add_field(self, name, value):
+ """Add a simple field to the form data."""
+ self.form_fields.append((name, value))
+ return
+
+ def add_file(self, fieldname, filename, fileHandle, mimetype=None):
+ """Add a file to be uploaded."""
+ body = fileHandle.read()
+ if mimetype is None:
+ mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+ self.files.append((fieldname, filename, mimetype, body))
+ return
+
+ def __str__(self):
+ """Return a string representing the form data, including attached files."""
+ # Build a list of lists, each containing "lines" of the
+ # request. Each part is separated by a boundary string.
+ # Once the list is built, return a string where each
+ # line is separated by '\r\n'.
+ parts = []
+ part_boundary = '--' + self.boundary
+
+ # Add the form fields
+ parts.extend(
+ [ part_boundary,
+ 'Content-Disposition: form-data; name="%s"' % name,
+ '',
+ value,
+ ]
+ for name, value in self.form_fields
+ )
+
+ # Add the files to upload
+ parts.extend(
+ [ part_boundary,
+ 'Content-Disposition: file; name="%s"; filename="%s"' % \
+ (field_name, filename),
+ 'Content-Type: %s' % content_type,
+ '',
+ body,
+ ]
+ for field_name, filename, content_type, body in self.files
+ )
+
+ # Flatten the list and add closing boundary marker,
+ # then return CR+LF separated data
+ flattened = list(itertools.chain(*parts))
+ flattened.append('--' + self.boundary + '--')
+ flattened.append('')
+ return '\r\n'.join(flattened)
\ No newline at end of file
diff --git a/docs/using_databank_api/main.py b/docs/using_databank_api/main.py
new file mode 100644
index 0000000..6a904df
--- /dev/null
+++ b/docs/using_databank_api/main.py
@@ -0,0 +1,175 @@
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+# Using the databank API
+
+"""
+Below is a guide on how to do HTTP GET, POST, PUT and DELETE in python.
+To run the pyhon code here, you would also need the file HTTP_request.py.
+
+The full functionality of RDFDatabank is detailed in the API documentation at
+http://databank.ora.ox.ac.uk/api
+https://github.com/dataflow/RDFDatabank/tree/master/rdfdatabank/public/static/api_files
+"""
+
+import json as simplejson
+from lib.HTTP_request import HTTPRequest
+
+#--CONFIG-------------------------------------------------------
+host = 'databank-vm1.oerc.ox.ac.uk'
+user_name = 'admin'
+password = 'test'
+datastore = HTTPRequest(endpointhost=host)
+datastore.setRequestUserPass(endpointuser=user_name, endpointpass=password)
+
+#--HTTP GET-------------------------------------------------------
+#Get a list of silos accessible to the user
+(resp, respdata) = datastore.doHTTP_GET(resource="/silos", expect_type="application/JSON")
+print "Get list of silos"
+print resp.status, resp.reason
+if resp.status >= 200 and resp.status < 300:
+ silos_list = simplejson.loads(respdata)
+ print "number of silos", len(silos_list)
+print "-"*40, "\n\n"
+
+#--HTTP GET-------------------------------------------------------
+#Get a list of all the datasets in the silo 'sandbox'
+(resp, respdata) = datastore.doHTTP_GET(resource="/sandbox", expect_type="application/JSON")
+print "Get list of datasets"
+print resp.status, resp.reason
+if resp.status >= 200 and resp.status < 300:
+ dataset_list = simplejson.loads(respdata)
+ print "number of datasets", len(dataset_list.keys())
+else:
+ print "Error getting list of datasets"
+print "-"*40, "\n\n"
+
+#--HTTP DELETE-------------------------------------------------------
+#Delete the dataset 'TestSubmission' in the silo 'sandbox'
+(resp, respdata) = datastore.doHTTP_DELETE(resource="/sandbox/datasets/TestSubmission")
+print "deleting dataset"
+print resp.status, resp.reason
+print respdata
+print "-"*40, "\n\n"
+
+#--HTTP POST-------------------------------------------------------
+#Create a new dataset 'TestSubmission' in the silo 'sandbox'
+fields = [
+ ("id", "TestSubmission")
+]
+files =[]
+(reqtype, reqdata) = datastore.encode_multipart_formdata(fields, files)
+(resp, respdata) = datastore.doHTTP_POST(reqdata, data_type=reqtype, resource="/sandbox/datasets", expect_type="application/JSON")
+print "Create new dataset"
+print resp.status, resp.reason
+if resp.status >= 200 and resp.status < 300:
+ print respdata
+else:
+ print "Error creating dataset"
+print "-"*40, "\n\n"
+
+#--HTTP POST-------------------------------------------------------
+#Upload file to dataset - POST file to dataset 'TestSubmission' in silo 'sandbox' (path is /sandbox/datasets/TestSubmission)
+file_name="testrdf4.zip"
+file_path="data/testrdf4.zip"
+fields = []
+zipdata = open(file_path).read()
+files = [
+ ("file", file_name, zipdata, "application/zip")
+]
+(reqtype, reqdata) = datastore.encode_multipart_formdata(fields, files)
+(resp, respdata) = datastore.doHTTP_POST(reqdata, data_type=reqtype, resource="/sandbox/datasets/TestSubmission", expect_type="application/JSON")
+print "Post file testrdf4.zip to dataset"
+print resp.status, resp.reason
+if resp.status >= 200 and resp.status < 300:
+ print respdata
+else:
+ print "Error posting file to dataset"
+print "-"*40, "\n\n"
+
+#--HTTP POST-------------------------------------------------------
+#Upload file to dataset and test munging. POST file to dataset 'TestSubmission' in silo 'sandbox' (path is /sandbox/datasets/TestSubmission)
+#file_name="unicode07.xml"
+file_name="manifest.rdf"
+file_path="data/unicode07.xml"
+fields = []
+zipdata = open(file_path).read()
+files = [
+ ("file", file_name, zipdata, "application/rdf+xml")
+]
+(reqtype, reqdata) = datastore.encode_multipart_formdata(fields, files)
+(resp, respdata) = datastore.doHTTP_POST(reqdata, data_type=reqtype, resource="/sandbox/datasets/TestSubmission", expect_type="application/JSON")
+print "Post file unicode07.xml to dataset"
+print resp.status, resp.reason
+if resp.status >= 200 and resp.status < 300:
+ print respdata
+else:
+ print "Error posting file to dataset"
+print "-"*40, "\n\n"
+
+#--HTTP PUT-------------------------------------------------------
+#example metadata constructed in rdf. Add this metadata to the manifest (PUT this in manifest.rdf file)
+metadata_content = """
+
+ Test dataset
+ Carl Sagan
+ abstract
+
+"""
+
+(resp, respdata) = datastore.doHTTP_PUT(metadata_content, resource="/sandbox/datasets/TestSubmission/manifest.rdf", expect_type="text/plain")
+print "Putting manifest data into dataset"
+print resp.status, resp.reason
+if resp.status >= 200 and resp.status < 300:
+ print respdata
+else:
+ print "Error putting manifest data into dataset"
+print "-"*40, "\n\n"
+
+#--HTTP POST-------------------------------------------------------
+#Unpack zip file in dataset
+file_name="testrdf4.zip"
+fields = []
+fields = [
+ ("filename", "testrdf4.zip"),
+ ("id", "TestSubmission")
+]
+zipdata = open(file_path).read()
+files = []
+(reqtype, reqdata) = datastore.encode_multipart_formdata(fields, files)
+(resp, respdata) = datastore.doHTTP_POST(reqdata, data_type=reqtype, resource="/sandbox/items/TestSubmission", expect_type="application/JSON")
+print "Post file testrdf4.zip to dataset for unpacking"
+print resp.status, resp.reason
+if resp.status >= 200 and resp.status < 300:
+ print respdata
+else:
+ print "Error unpacking file to dataset"
+print "-"*40, "\n\n"
+
+#---------------------------------------------------------
+
diff --git a/docs/using_databank_api/postingToDatabank.py b/docs/using_databank_api/postingToDatabank.py
new file mode 100644
index 0000000..095f8b2
--- /dev/null
+++ b/docs/using_databank_api/postingToDatabank.py
@@ -0,0 +1,65 @@
+import urllib2
+import base64
+import urllib
+from lib.multipart import MultiPartFormData
+import os
+
+#===============================================================================
+#Using urllib2 to create a package in Databank
+url = "http://databank-vm1.oerc.ox.ac.uk/test/datasets"
+identifier = "TestSubmission"
+req = urllib2.Request(url)
+USER = "admin"
+PASS = "test"
+auth = 'Basic ' + base64.urlsafe_b64encode("%s:%s" % (USER, PASS))
+req.add_header('Authorization', auth)
+req.add_header('Accept', 'application/JSON')
+req.add_data(urllib.urlencode({'id': identifier}))
+
+# To verify the method is POST
+req.get_method()
+
+ans = urllib2.urlopen(req)
+
+ans.read()
+ans.msg
+ans.code
+
+#===============================================================================
+#Using urllib2 to post a file
+#Add a file
+
+filename = "solrconfig.xml"
+filepath = "data/unicode07.xml"
+f = open(filepath, 'rb')
+stat_info = os.stat(filepath)
+
+file1_info = {
+ 'name':'file',
+ 'filename':filename,
+ 'mimetype':'application/xml',
+ 'stream': f,
+ 'size':int(stat_info.st_size)}
+
+data = MultiPartFormData(files=[file1_info])
+
+# Build the request
+url2 = "http://databank-vm1.oerc.ox.ac.uk/test/datasets/TestSubmission"
+req2 = urllib2.Request(url2)
+auth = 'Basic ' + base64.urlsafe_b64encode("admin:test")
+req2.add_header('Authorization', auth)
+req2.add_header('Accept', 'application/JSON')
+req2.add_header('Content-type', data.content_type)
+req2.add_header('Content-length', data.content_length)
+
+body = ''.join(list(data._body))
+req2.add_data(str(body))
+
+#print
+#print 'OUTGOING DATA:'
+#print req2.get_data()
+ans2 = urllib2.urlopen(req2)
+#print
+print 'SERVER RESPONSE:'
+ans2.read()
+#===============================================================================
diff --git a/docs/using_zipfile_library.py b/docs/using_zipfile_library.py
new file mode 100644
index 0000000..9b31e8c
--- /dev/null
+++ b/docs/using_zipfile_library.py
@@ -0,0 +1,123 @@
+from zipfile import ZipFile, BadZipfile as BZ
+#================================================
+def read_zipfile(filepath):
+ try:
+ tmpfile = ZipFile(filepath, "r")
+ except BZ:
+ raise BadZipfile
+ zipfile_contents = {}
+ for info in tmpfile.infolist():
+ zipfile_contents[info.filename] = (info.file_size, info.date_time)
+ tmpfile.close()
+ return zipfile_contents
+#================================================
+def read_file_in_zipfile(filepath, filename):
+ try:
+ tmpfile = ZipFile(filepath, "r")
+ except BZ:
+ raise BadZipfile
+ try:
+ fileinfo = tmpfile.getinfo(filename)
+ except KeyError:
+ return False
+ if fileinfo.file_size == 0:
+ return 0
+ file_contents = None
+ file_contents = tmpfile.read(filename)
+ tmpfile.close()
+ return file_contents
+#================================================
+def get_file_in_zipfile(filepath, filename, targetdir):
+ try:
+ tmpfile = ZipFile(filepath, "r")
+ except BZ:
+ raise BadZipfile
+ try:
+ fileinfo = tmpfile.getinfo(filename)
+ except KeyError:
+ return False
+ if fileinfo.file_size == 0:
+ return 0
+ targetfile = tmpfile.extract(filename, targetdir)
+ tmpfile.close()
+ return targetfile
+#================================================
+path = 'silos/sandbox/pairtree_root/da/ta/se/t1/obj/__26/'
+fp1 = path + 'test3.zip'
+fp2 = path + 'read_test.zip'
+fp3 = path + 'databank_logo.png'
+
+zc1 = read_zipfile(fp1)
+zc2 = read_zipfile(fp2)
+zc3 = read_zipfile(fp3)
+
+zc1_files = zc1.keys()
+zc2_files = zc2.keys()
+
+ans11 = read_file_in_zipfile(fp1, zc1_files[1]) #expected: 0
+ans12 = read_file_in_zipfile(fp1, 'test') #expected: False
+ans13 = read_file_in_zipfile(fp1, zc1_files[0]) #expected: file conts
+
+ans21 = read_file_in_zipfile(fp2, zc2_files[0]) #expected: file conts
+ans22 = read_file_in_zipfile(fp2, zc2_files[1]) #expected: 0
+ans23 = read_file_in_zipfile(fp2, zc2_files[4]) #expected: binary output
+
+ans14 = get_file_in_zipfile(fp1, zc1_files[1], '/tmp') #expected: 0
+ans15 = get_file_in_zipfile(fp1, 'test', '/tmp') #expected: False
+ans16 = get_file_in_zipfile(fp1, zc1_files[0], '/tmp') #expected: '/tmp/admiral-dataset.txt'
+
+ans24 = get_file_in_zipfile(fp2, zc2_files[0], '/tmp') #expected: '/tmp/read_test/Dir/TestScanFiles32.txt'
+ans25 = get_file_in_zipfile(fp2, zc2_files[1], '/tmp') #expected: 0
+ans26 = get_file_in_zipfile(fp2, zc2_files[4], '/tmp') #expected: '/tmp/read_test/databank_logo.png'
+#================================================
+#Expected Answers
+"""
+>>> zc1
+{'admiral-dataset.txt': (43, (2010, 11, 29, 16, 30, 52)), 'TestScanFilesSubDir/': (0, (2010, 11, 29, 17, 34, 42)), 'TestScanFilesSubDir/TestScanFiles31.txt': (9, (2010, 10, 4, 15, 39, 54)), 'TestScanFilesSubDir/TestScanFiles32.txt': (9, (2010, 10, 4, 15, 39, 54)), 'TestScanFilesSubDir/manifest.rdf': (511, (2010, 11, 29, 17, 42, 10))}
+
+>>> zc2
+{'read_test/Dir/TestScanFiles32.txt': (9, (2010, 10, 4, 15, 39, 54)), 'read_test/Dir/': (0, (2011, 1, 5, 13, 43, 30)), 'read_test/admiral-dataset.txt': (43, (2010, 11, 29, 16, 30, 52)), 'read_test/Dir/manifest.rdf': (511, (2010, 11, 29, 17, 42, 10)), 'read_test/databank_logo.png': (20220, (2010, 12, 6, 15, 11, 40)), 'read_test/': (0, (2011, 1, 5, 13, 44, 40)), 'read_test/Dir/TestScanFiles31.txt': (9, (2010, 10, 4, 15, 39, 54))}
+
+>>> zc1_files
+['admiral-dataset.txt', 'TestScanFilesSubDir/', 'TestScanFilesSubDir/TestScanFiles31.txt', 'TestScanFilesSubDir/TestScanFiles32.txt', 'TestScanFilesSubDir/manifest.rdf']
+
+>>> zc2_files
+['read_test/Dir/TestScanFiles32.txt', 'read_test/Dir/', 'read_test/admiral-dataset.txt', 'read_test/Dir/manifest.rdf', 'read_test/databank_logo.png', 'read_test/', 'read_test/Dir/TestScanFiles31.txt']
+
+>>> ans11
+0
+
+>>> ans12
+False
+
+>>> ans13
+'This directory contains an ADMIRAL dataset\n'
+
+>>> ans21
+'Test file'
+
+>>> ans22
+0
+
+>>> ans23
+'\x89PNG\.....
+
+>>> ans14
+0
+
+>>> ans15
+False
+
+>>> ans16
+'/tmp/admiral-dataset.txt'
+
+>>> ans24
+'/tmp/read_test/Dir/TestScanFiles32.txt'
+
+>>> ans25
+0
+
+>>> ans26
+'/tmp/read_test/databank_logo.png'
+"""
+#================================================
diff --git a/message_workers/LogConfigParser.py b/message_workers/LogConfigParser.py
new file mode 100755
index 0000000..92b8531
--- /dev/null
+++ b/message_workers/LogConfigParser.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import ConfigParser, os
+
+class Config(ConfigParser.ConfigParser):
+ DEFAULT_CONFIG_FILE = "loglines.cfg"
+ def __init__(self, config_file=DEFAULT_CONFIG_FILE):
+ ConfigParser.ConfigParser.__init__(self)
+ if os.path.exists(config_file) and os.path.isfile(config_file):
+ self.read(config_file)
+ self.validate()
+
+ def validate(self):
+ pass
diff --git a/message_workers/broker.py b/message_workers/broker.py
new file mode 100755
index 0000000..6746857
--- /dev/null
+++ b/message_workers/broker.py
@@ -0,0 +1,86 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from redisqueue import RedisQueue
+
+from LogConfigParser import Config
+
+import sys
+
+from time import sleep
+
+if __name__ == "__main__":
+ c = Config()
+ redis_section = "redis"
+ worker_section = "worker_broker"
+ worker_number = sys.argv[1]
+ if len(sys.argv) == 3:
+ if "redis_%s" % sys.argv[2] in c.sections():
+ redis_section = "redis_%s" % sys.argv[2]
+
+ rq = RedisQueue(c.get(worker_section, "listento"), "broker_%s" % worker_number,
+ db=c.get(redis_section, "db"),
+ host=c.get(redis_section, "host"),
+ port=c.get(redis_section, "port")
+ )
+ if c.has_option(worker_section, "fanout_status_queue"):
+ # keep a queue of messages to deliver for a given push'd item
+ # better resumeability at the cost of more redis operations
+ topushq = RedisQueue(c.get(worker_section, "fanout_status_queue"), "fanout_broker_%s" % worker_number,
+ db=c.get(redis_section, "db"),
+ host=c.get(redis_section, "host"),
+ port=c.get(redis_section, "port")
+ )
+ fanout_queues = [x.strip() for x in c.get(worker_section, "fanout").split(",") if x]
+
+ if c.has_option(worker_section, "idletime"):
+ try:
+ idletime = float(c.get(worker_section, "idletime"))
+ except ValueError:
+ idletime = 10
+
+ while(True):
+ line = rq.pop()
+ if line:
+ fanout_success = True
+ if topushq:
+ # if there are residual messages to send, restart with those:
+ if len(topushq) == 0:
+ # if the queue is empty, and this is a clean start
+ for q in fanout_queues:
+ topushq.push(q)
+ # Distribution:
+ while len(topushq) != 0:
+ q = topushq.pop()
+ rq.push(line, to_queue=q)
+ topushq.task_complete()
+ rq.task_complete()
+ else:
+ for q in fanout_queues:
+ rq.push(line, to_queue=q)
+ rq.task_complete()
+ else:
+ # ratelimit to stop it chewing through CPU cycles
+ sleep(idletime)
diff --git a/message_workers/loglines.cfg b/message_workers/loglines.cfg
new file mode 100644
index 0000000..647815d
--- /dev/null
+++ b/message_workers/loglines.cfg
@@ -0,0 +1,59 @@
+#Copyright (c) 2012 University of Oxford
+#
+#Permission is hereby granted, free of charge, to any person obtaining
+#a copy of this software and associated documentation files (the
+#"Software"), to deal in the Software without restriction, including
+#without limitation the rights to use, copy, modify, merge, publish,
+#distribute, sublicense, and/or sell copies of the Software, and to
+#permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be
+#included in all copies or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+#EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# Configuring workers:
+[worker_solr]
+listento = solrindex
+errorq = solrindexerror
+command = ./solr_worker.py
+solrurl = http://localhost:8080/solr
+dbroot = /var/lib/databank
+idletime = 1
+stdout_logfile = /var/log/databank/solr_worker.log
+numprocs = 2
+
+[worker_broker]
+listento = silochanges
+command = ./broker.py
+#fanout = auditlog, solrindex
+fanout = solrindex
+fanout_status_queue = broker_temp
+idletime = 1
+stdout_logfile = /var/log/databank/broker.log
+numprocs = 2
+
+[logger_auditlogger]
+listento = auditlog
+command = ./logfromqueue.py
+logfile = logs/audit.log
+stdout_logfile = /var/log/databank/auditlogger.log
+
+# DEFAULT VALUES FOLLOW
+##############################
+[redis]
+host = localhost
+port = 6379
+db = 0
+
+[redis_test]
+host = localhost
+port = 6379
+db = 1
diff --git a/message_workers/redisqueue.py b/message_workers/redisqueue.py
new file mode 100755
index 0000000..b76d3f6
--- /dev/null
+++ b/message_workers/redisqueue.py
@@ -0,0 +1,136 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from redis import Redis
+from redis.exceptions import ConnectionError
+from time import sleep
+
+WORKERPREFIX = "temp"
+HOST = "localhost"
+PORT = 6379
+DB = 0
+
+import logging
+
+logger = logging.getLogger("redisqueue")
+logger.setLevel(logging.INFO)
+# create console handler and set level to debug
+ch = logging.StreamHandler()
+# create formatter
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+# add formatter to ch
+ch.setFormatter(formatter)
+# add ch to logger
+logger.addHandler(ch)
+
+
+"""Simple wrapper around a redis queue that gives methods in line with the other Queue-style classes"""
+
+class RedisQueue(object):
+ def __init__(self, queuename, workername, db=DB, host=HOST, port=PORT, workerprefix=WORKERPREFIX, errorqueue=None):
+ self.host = host
+ if isinstance(port, str):
+ try:
+ self.port = int(port)
+ except ValueError:
+ self.port = PORT
+ else:
+ self.port = port
+ self.queuename = queuename
+ self.workername = workername
+ self.workeritem = ":".join([workerprefix, workername])
+ self.errorqueue = errorqueue
+ if not errorqueue:
+ self.errorqueue = queuename
+ self.db = db
+ self._initclient()
+
+ def _initclient(self):
+ logger.info("Initialising the redis queue %s for %s" % (self.queuename, self.workername))
+ logger.info("Host:%s port:%s DB:%s" % (self.host, self.port, self.db))
+ logger.debug("Debug messages detailing worker queue activity")
+ self._r = Redis(host=self.host, db=self.db, port=self.port)
+
+ def check_connection(self):
+ #sleep(1)
+ try:
+ self._r.info()
+ except ConnectionError:
+ self._initclient()
+
+ def __len__(self):
+ if self.inprogress():
+ return self._r.llen(self.queuename) + 1
+ else:
+ return self._r.llen(self.queuename)
+
+ def __getitem__(self, index):
+ #self.check_connection()
+ return self._r.lrange(self.queuename, index, index)
+
+ def inprogress(self):
+ #sleep(1)
+ #self.check_connection()
+ ip = self._r.lrange(self.workeritem, 0, 0)
+ if ip:
+ return ip.pop()
+ else:
+ return None
+
+ def task_complete(self):
+ #sleep(1)
+ #self.check_connection()
+ logger.debug("Task completed by worker %s" % self.workername)
+ return self._r.rpop(self.workeritem)
+
+ def task_failed(self):
+ #sleep(1)
+ #self.check_connection()
+ logger.error("Task FAILED by worker %s" % self.workername)
+ logger.debug(self.inprogress())
+ return self._r.rpoplpush(self.workeritem, self.errorqueue)
+
+ def push(self, item, to_queue=None):
+ #sleep(1)
+ #self.check_connection()
+ if to_queue:
+ logger.debug("{%s} put onto queue %s by worker %s" % (item, to_queue,self.workername))
+ return self._r.lpush(to_queue, item)
+ else:
+ logger.debug("{%s} put onto queue %s by worker %s" % (item, self.queuename,self.workername))
+ return self._r.lpush(self.queuename, item)
+
+ def pop(self):
+ #sleep(1)
+ #self.check_connection()
+ logger.debug("In pop - Queuename: %s, workeritem:%s"%(self.queuename, self.workeritem))
+ if self._r.llen(self.workeritem) == 0:
+ itemid = self._r.rpoplpush(self.queuename, self.workeritem)
+ if self.queuename != self.errorqueue:
+ self._r.lrem(self.errorqueue, itemid)
+ logger.debug("{%s} pulled from queue %s by worker %s" % (self.inprogress(), self.queuename,self.workername))
+ else:
+ logger.debug("{%s} pulled from temporary worker queue by worker %s" % (self.inprogress(), self.workername))
+ return self.inprogress()
diff --git a/message_workers/solrFields.py b/message_workers/solrFields.py
new file mode 100644
index 0000000..45dffe8
--- /dev/null
+++ b/message_workers/solrFields.py
@@ -0,0 +1,127 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+solr_fields_mapping = {
+ "silo":"silo",
+ "id":"id",
+ "uuid":"uuid",
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#type":"type",
+ "http://vocab.ox.ac.uk/dataset/schema#isEmbargoed":"embargoStatus",
+ "http://purl.org/spar/pso/Status":"embargoStatus",
+ "http://vocab.ox.ac.uk/dataset/schema#embargoedUntil":"embargoedUntilDate",
+ "http://purl.org/spar/fabio/hasEmbargoDate":"embargoedUntilDate",
+ "http://vocab.ox.ac.uk/dataset/schema#currentVersion":"currentVersion",
+ "http://purl.org/ontology/bibo/doi":"doi",
+ "http://www.openarchives.org/ore/terms/aggregates":"aggregatedResource",
+ "http://purl.org/spar/fabio/publicationDate":"publicationDate",
+ "http://purl.org/dc/terms/abstract":"abstract",
+ "http://purl.org/dc/terms/accessRights":"accessRights",
+ "http://purl.org/dc/terms/accrualMethod":"accrualMethod",
+ "http://purl.org/dc/terms/accrualPeriodicity":"accrualPeriodicity",
+ "http://purl.org/dc/terms/accrualPolicy":"accrualPolicy",
+ "http://purl.org/dc/terms/alternative":"alternative",
+ "http://purl.org/dc/terms/audience":"audience",
+ "http://purl.org/dc/terms/available":"available",
+ "http://purl.org/dc/terms/bibliographicCitation":"bibliographicCitation",
+ "http://purl.org/dc/terms/conformsTo":"conformsTo",
+ "http://purl.org/dc/terms/contributor":"contributor",
+ "http://purl.org/dc/terms/coverage":"coverage",
+ "http://purl.org/dc/terms/created":"created",
+ "http://purl.org/dc/terms/creator":"creator",
+ "http://purl.org/dc/terms/date":"date",
+ "http://purl.org/dc/terms/dateAccepted":"dateAccepted",
+ "http://purl.org/dc/terms/dateCopyrighted":"dateCopyrighted",
+ "http://purl.org/dc/terms/dateSubmitted":"dateSubmitted",
+ "http://purl.org/dc/terms/description":"description",
+ "http://purl.org/dc/terms/educationLevel":"educationLevel",
+ "http://purl.org/dc/terms/extent":"extent",
+ "http://purl.org/dc/terms/format":"format",
+ "http://purl.org/dc/terms/hasFormat":"hasFormat",
+ "http://purl.org/dc/terms/hasPart":"hasPart",
+ "http://purl.org/dc/terms/hasVersion":"hasVersion",
+ "http://purl.org/dc/terms/identifier":"identifier",
+ "http://purl.org/dc/terms/instructionalMethod":"instructionalMethod",
+ "http://purl.org/dc/terms/isFormatOf":"isFormatOf",
+ "http://purl.org/dc/terms/isPartOf":"isPartOf",
+ "http://purl.org/dc/terms/isReferencedBy":"isReferencedBy",
+ "http://purl.org/dc/terms/isReplacedBy":"isReplacedBy",
+ "http://purl.org/dc/terms/isRequiredBy":"isRequiredBy",
+ "http://purl.org/dc/terms/issued":"issued",
+ "http://purl.org/dc/terms/isVersionOf":"isVersionOf",
+ "http://purl.org/dc/terms/language":"language",
+ "http://purl.org/dc/terms/license":"license",
+ "http://purl.org/dc/terms/mediator":"mediator",
+ "http://purl.org/dc/terms/medium":"medium",
+ "http://purl.org/dc/terms/modified":"modified",
+ "http://purl.org/dc/terms/provenance":"provenance",
+ "http://purl.org/dc/terms/publisher":"publisher",
+ "http://purl.org/dc/terms/references":"references",
+ "http://purl.org/dc/terms/relation":"relation",
+ "http://purl.org/dc/terms/replaces":"replaces",
+ "http://purl.org/dc/terms/requires":"requires",
+ "http://purl.org/dc/terms/rights":"rights",
+ "http://purl.org/dc/terms/rightsHolder":"rightsHolder",
+ "http://purl.org/dc/terms/source":"source",
+ "http://purl.org/dc/terms/spatial":"spatial",
+ "http://purl.org/dc/terms/subject":"subject",
+ "http://purl.org/dc/terms/tableOfContents":"tableOfContents",
+ "http://purl.org/dc/terms/temporal":"temporal",
+ "http://purl.org/dc/terms/title":"title",
+ "http://purl.org/dc/terms/type":"type",
+ "http://purl.org/dc/terms/valid":"valid",
+ "http://purl.org/dc/elements/1.1/contributor":"contributor",
+ "http://purl.org/dc/elements/1.1/coverage":"coverage",
+ "http://purl.org/dc/elements/1.1/creator":"creator",
+ "http://purl.org/dc/elements/1.1/date":"date",
+ "http://purl.org/dc/elements/1.1/description":"description",
+ "http://purl.org/dc/elements/1.1/format":"format",
+ "http://purl.org/dc/elements/1.1/identifier":"identifier",
+ "http://purl.org/dc/elements/1.1/language":"language",
+ "http://purl.org/dc/elements/1.1/publisher":"publisher",
+ "http://purl.org/dc/elements/1.1/relation":"relation",
+ "http://purl.org/dc/elements/1.1/rights":"rights",
+ "http://purl.org/dc/elements/1.1/source":"source",
+ "http://purl.org/dc/elements/1.1/subject":"subject",
+ "http://purl.org/dc/elements/1.1/title":"title",
+ "http://purl.org/dc/elements/1.1/type":"type"
+}
+
+facets = [
+ 'f_creator',
+ 'f_mediator',
+ 'f_embargoedUntilDate',
+ 'f_license',
+ 'f_rights',
+ 'f_type',
+ 'f_publisher',
+ 'f_isPartOf',
+ 'f_hasVersion',
+ 'f_publicationDate',
+ 'f_contributor',
+ 'f_language',
+ 'f_rightsHolder',
+ 'f_source',
+ 'f_subject'
+]
diff --git a/message_workers/solr_worker.py b/message_workers/solr_worker.py
new file mode 100755
index 0000000..60ee535
--- /dev/null
+++ b/message_workers/solr_worker.py
@@ -0,0 +1,183 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from redisqueue import RedisQueue
+from LogConfigParser import Config
+from solrFields import solr_fields_mapping
+
+import sys
+from time import sleep
+from datetime import datetime, timedelta
+from rdflib import URIRef
+import simplejson
+from collections import defaultdict
+from uuid import uuid4
+
+from recordsilo import Granary
+from solr import SolrConnection
+
+import logging
+
+logger = logging.getLogger("redisqueue")
+logger.setLevel(logging.INFO)
+ch = logging.StreamHandler()
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+ch.setFormatter(formatter)
+logger.addHandler(ch)
+
+class NoSuchSilo(Exception):
+ pass
+
+def gather_document(silo_name, item):
+ graph = item.get_graph()
+ document = defaultdict(list)
+ document['uuid'].append(item.metadata['uuid'])
+ document['id'].append(item.item_id)
+ document['silo'].append(silo_name)
+ for (_,p,o) in graph.triples((URIRef(item.uri), None, None)):
+ if str(p) in solr_fields_mapping:
+ field = solr_fields_mapping[str(p)]
+ if field == "aggregatedResource":
+ if '/datasets/' in o:
+ fn = unicode(o).split('/datasets/')
+ if len(fn) == 2 and fn[1]:
+ document['filename'].append(unicode(fn[1]).encode("utf-8"))
+ if field == "embargoedUntilDate":
+ ans = u"%sZ"%unicode(o).split('.')[0]
+ document[field].append(unicode(ans).encode("utf-8"))
+ else:
+ document[field].append(unicode(o).encode("utf-8"))
+ else:
+ document['text'].append(unicode(o).encode("utf-8"))
+ document = dict(document)
+ return document
+
+if __name__ == "__main__":
+ c = Config()
+ redis_section = "redis"
+ worker_section = "worker_solr"
+ worker_number = sys.argv[1]
+ hours_before_commit = 1
+ if len(sys.argv) == 3:
+ if "redis_%s" % sys.argv[2] in c.sections():
+ redis_section = "redis_%s" % sys.argv[2]
+
+ rq = RedisQueue(c.get(worker_section, "listento"), "solr_%s" % worker_number,
+ db=c.get(redis_section, "db"),
+ host=c.get(redis_section, "host"),
+ port=c.get(redis_section, "port"),
+ errorqueue=c.get(worker_section, "errorq")
+ )
+ DB_ROOT = c.get(worker_section, "dbroot")
+ rdfdb_config = Config("%s/production.ini" % DB_ROOT)
+ granary_root = rdfdb_config.get("app:main", "granary.store", 0, {'here':DB_ROOT})
+
+ g = Granary(granary_root)
+
+ solr = SolrConnection(c.get(worker_section, "solrurl"))
+
+ idletime = 2
+ commit_time = datetime.now() + timedelta(hours=hours_before_commit)
+ toCommit = False
+ while(True):
+ sleep(idletime)
+
+ if datetime.now() > commit_time and toCommit:
+ solr.commit()
+ commit_time = datetime.now() + timedelta(hours=hours_before_commit)
+ toCommit = False
+
+ line = rq.pop()
+
+ if not line:
+ if toCommit:
+ solr.commit()
+ toCommit = False
+ commit_time = datetime.now() + timedelta(hours=hours_before_commit)
+ continue
+
+ logger.debug("Got message %s" %str(line))
+
+ toCommit = True
+ msg = simplejson.loads(line)
+ # get silo name
+ try:
+ silo_name = msg['silo']
+ except:
+ logger.error("Msg badly formed %s\n"%str(msg))
+ rq.task_complete()
+ continue
+ # Re-initialize granary
+ if silo_name not in g.silos and not msg['type'] == "d":
+ g = Granary(granary_root)
+ g.state.revert()
+ g._register_silos()
+ if silo_name not in g.silos:
+ logger.error("Silo %s does not exist\n"%silo_name)
+ rq.task_complete()
+ #raise NoSuchSilo
+ continue
+ if msg['type'] == "c" or msg['type'] == "u" or msg['type'] == "embargo":
+ s = g.get_rdf_silo(silo_name)
+ # Creation, update or embargo change
+ itemid = msg.get('id', None)
+ logger.info("Got creation message on id:%s in silo:%s" % (itemid, silo_name))
+ if itemid and s.exists(itemid):
+ item = s.get_item(itemid)
+ solr_doc = gather_document(silo_name, item)
+ try:
+ solr.add(_commit=False, **solr_doc)
+ except Exception, e :
+ logger.error("Error adding document to solr id:%s in silo:%s\n" % (itemid, silo_name))
+ try:
+ logger.error("%s\n\n" %str(e))
+ except:
+ pass
+ rq.task_failed()
+ continue
+ else:
+ silo_metadata = g.describe_silo(silo_name)
+ solr_doc = {'id':silo_name, 'silo':silo_name, 'type':'Silo', 'uuid':uuid4().hex}
+ solr_doc['title'] = ''
+ if 'title' in silo_metadata:
+ solr_doc['title'] = silo_metadata['title']
+ solr_doc['description'] = ''
+ if 'description' in silo_metadata:
+ solr_doc['description'] = silo_metadata['description']
+ solr.add(_commit=False, **solr_doc)
+ rq.task_complete()
+ elif msg['type'] == "d":
+ # Deletion
+ itemid = msg.get('id', None)
+ if itemid:
+ logger.info("Got deletion message on id:%s in silo:%s" % (itemid, silo_name))
+ query='silo:"%s" AND id:"%s"'%(silo_name, itemid)
+ solr.delete_query(query)
+ elif silo_name:
+ logger.info("Got deletion message on silo:%s" %silo_name)
+ query='silo:"%s"'%silo_name
+ solr.delete_query(query)
+ #solr.commit()
+ rq.task_complete()
diff --git a/message_workers/workers_available/worker_broker.conf b/message_workers/workers_available/worker_broker.conf
new file mode 100644
index 0000000..577ef22
--- /dev/null
+++ b/message_workers/workers_available/worker_broker.conf
@@ -0,0 +1,14 @@
+[program:worker_broker]
+autorestart = true
+numprocs = 2
+startretries = 3
+redirect_stderr = True
+stopwaitsecs = 10
+process_name = worker_broker_%(process_num)s
+priority = 777
+directory = /var/lib/databank/message_workers/
+command = /var/lib/databank/message_workers/broker.py %(process_num)s
+autostart = true
+startsecs = 10
+stdout_logfile = /var/log/databank/worker_broker.log
+
diff --git a/message_workers/workers_available/worker_solr.conf b/message_workers/workers_available/worker_solr.conf
new file mode 100644
index 0000000..3da3f8b
--- /dev/null
+++ b/message_workers/workers_available/worker_solr.conf
@@ -0,0 +1,14 @@
+[program:worker_solr]
+autorestart = true
+numprocs = 2
+startretries = 3
+redirect_stderr = True
+stopwaitsecs = 10
+process_name = worker_solr_%(process_num)s
+priority = 888
+directory = /var/lib/databank/message_workers/
+command = /var/lib/databank/message_workers/solr_worker.py %(process_num)s
+autostart = true
+startsecs = 10
+stdout_logfile = /var/log/databank/worker_solr.log
+
diff --git a/mod_wsgi/dispatch.wsgi b/mod_wsgi/dispatch.wsgi
new file mode 100644
index 0000000..7591098
--- /dev/null
+++ b/mod_wsgi/dispatch.wsgi
@@ -0,0 +1,42 @@
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+# Add the virtual Python environment site-packages directory to the path
+#import site
+#site.addsitedir('/home/simplesite/env/lib/python2.5/site-packages')
+#site.addsitedir('/usr/local/lib/python2.6/dist-packages')
+
+import pkg_resources
+pkg_resources.working_set.add_entry('/var/lib/databank')
+
+# Avoid ``[Errno 13] Permission denied: '/var/www/.python-eggs'`` messages
+import os
+os.environ['PYTHON_EGG_CACHE'] = '/var/cache/databank/egg-cache'
+
+import sys
+sys.stdout = sys.stderr
+
+# Load the Pylons application
+from paste.deploy import loadapp
+application = loadapp('config:/var/lib/databank/production.ini')
+
diff --git a/mod_wsgi/dispatch_development.wsgi b/mod_wsgi/dispatch_development.wsgi
new file mode 100644
index 0000000..3b8b054
--- /dev/null
+++ b/mod_wsgi/dispatch_development.wsgi
@@ -0,0 +1,39 @@
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+# Add the virtual Python environment site-packages directory to the path
+#import site
+#site.addsitedir('/home/simplesite/env/lib/python2.5/site-packages')
+#site.addsitedir('/usr/local/lib/python2.6/dist-packages')
+
+import pkg_resources
+pkg_resources.working_set.add_entry('/var/lib/databank')
+
+# Avoid ``[Errno 13] Permission denied: '/var/www/.python-eggs'`` messages
+import os
+os.environ['PYTHON_EGG_CACHE'] = '/var/cache/databank/egg-cache'
+
+# Load the Pylons application
+from paste.deploy import loadapp
+application = loadapp('config:/var/lib/databank/development.ini')
+
diff --git a/mod_wsgi/dispatch_ve_26.wsgi b/mod_wsgi/dispatch_ve_26.wsgi
new file mode 100644
index 0000000..9c1085e
--- /dev/null
+++ b/mod_wsgi/dispatch_ve_26.wsgi
@@ -0,0 +1,43 @@
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+# Add the virtual Python environment site-packages directory to the path
+import site
+site.addsitedir('/var/lib/databank/lib/python2.6/site-packages')
+#site.addsitedir('/home/simplesite/env/lib/python2.5/site-packages')
+#site.addsitedir('/usr/local/lib/python2.6/dist-packages')
+
+import pkg_resources
+pkg_resources.working_set.add_entry('/var/lib/databank')
+
+# Avoid ``[Errno 13] Permission denied: '/var/www/.python-eggs'`` messages
+import os
+os.environ['PYTHON_EGG_CACHE'] = '/var/cache/databank/egg-cache'
+
+import sys
+sys.stdout = sys.stderr
+
+# Load the Pylons application
+from paste.deploy import loadapp
+application = loadapp('config:/var/lib/databank/production.ini')
+
diff --git a/mod_wsgi/dispatch_ve_27.wsgi b/mod_wsgi/dispatch_ve_27.wsgi
new file mode 100644
index 0000000..dfbc9d4
--- /dev/null
+++ b/mod_wsgi/dispatch_ve_27.wsgi
@@ -0,0 +1,43 @@
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+# Add the virtual Python environment site-packages directory to the path
+import site
+site.addsitedir('/var/lib/databank/lib/python2.7/site-packages')
+#site.addsitedir('/home/simplesite/env/lib/python2.5/site-packages')
+#site.addsitedir('/usr/local/lib/python2.6/dist-packages')
+
+import pkg_resources
+pkg_resources.working_set.add_entry('/var/lib/databank')
+
+# Avoid ``[Errno 13] Permission denied: '/var/www/.python-eggs'`` messages
+import os
+os.environ['PYTHON_EGG_CACHE'] = '/var/cache/databank/egg-cache'
+
+import sys
+sys.stdout = sys.stderr
+
+# Load the Pylons application
+from paste.deploy import loadapp
+application = loadapp('config:/var/lib/databank/production.ini')
+
diff --git a/passwd-default b/passwd-default
new file mode 100644
index 0000000..c4930a9
--- /dev/null
+++ b/passwd-default
@@ -0,0 +1,9 @@
+admin:uaXjyn4Uw3qXo
+admin2:IFWZaH87O7ZDg
+admin3:b6TA/1MC7CD96
+sandbox_user:0kcQdq23ysbq.
+sandbox_user2:Zg8jCIxXK8/Sc
+sandbox_user3:qhc7aFzy.y5vU
+sandbox_manager:ej4TeOgLu4GQ6
+sandbox_manager2:blUNnoUzOfRNM
+sandbox_manager3:i/VlXSPqIgnwQ
diff --git a/persisted_state.json b/persisted_state.json
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/persisted_state.json
@@ -0,0 +1 @@
+{}
diff --git a/production-jenkins.ini b/production-jenkins.ini
new file mode 100644
index 0000000..c0c6e0a
--- /dev/null
+++ b/production-jenkins.ini
@@ -0,0 +1,138 @@
+# Copyright (c) 2012 University of Oxford
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# rdfdatabank - Pylons development environment configuration
+#
+# The %(here)s variable will be replaced with the parent directory of this file
+#
+[DEFAULT]
+debug = false
+# Uncomment and replace with the address which should receive any error reports
+#email_to = you@yourdomain.com
+smtp_server = localhost
+error_email_from = paste@jenkins
+
+[server:main]
+use = egg:Paste#http
+#Use these setings to run pylons using mod_wsgi and apache
+host = 127.0.0.1
+port = 5000
+#Use these settings tp run pylons from the commandline
+#host = 0.0.0.0
+#port = 80
+
+[app:main]
+use = egg:rdfdatabank
+full_stack = true
+static_files = true
+
+sqlalchemy.url = mysql://databanksqladmin:d6sqL4dm;n@localhost:3306/databankauth
+sqlalchemy.pool_recycle = 3600
+
+cache_dir = /var/cache/databank
+beaker.session.key = rdfdatabank
+beaker.session.secret = somesecret
+
+who.config_file = %(here)s/who.ini
+who.log_level = info
+who.log_file = /var/log/databank/who.log
+
+redis.host = localhost
+
+granary.store = %(here)s/silos
+granary.uri_root = http://dataflow-jenkins.bodleian.ox.ac.uk/
+
+#profile.log_filename = /var/log/databank/profile.log
+#profile.path = /__profile__
+
+#auth.file = /var/lib/databank/passwd
+#auth.info = /var/lib/databank/rdfdatabank/config/users.py
+
+doi.config = /var/lib/databank/rdfdatabank/config/doi_config.py
+doi.count = /var/lib/databank/rdfdatabank/config/doi_count
+
+broadcast.to = redis
+broadcast.queue = silochanges
+
+metadata.embargoed = False
+
+solr.host = http://localhost:8080/solr
+naming_rule = [^0-9a-zA-Z_\-\:]
+naming_rule_humanized = Numbers, alphabets and -:
+formats_served = text/html,text/xhtml,text/plain,application/json,application/rdf+xml,text/xml,text/rdf+n3,application/x-turtle,text/rdf+ntriples,text/rdf+nt
+publisher = Bodleian Libraries, University of Oxford
+rights = http://ora.ouls.ox.ac.uk/objects/uuid%3A1d00eebb-8fed-46ad-8e38-45dbdb4b224c
+license = CC0 1.0 Universal (CC0 1.0). See http://creativecommons.org/publicdomain/zero/1.0/legalcode
+#license = Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. See http://creativecommons.org/licenses/by-nc-sa/3.0/
+
+api.version = 0.3
+
+# If you'd like to fine-tune the individual locations of the cache data dirs
+# for the Cache data, or the Session saves, un-comment the desired settings
+# here:
+#beaker.cache.data_dir = %(here)s/data/cache
+#beaker.session.data_dir = %(here)s/data/sessions
+
+# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT*
+# Debug mode will enable the interactive debugging tool, allowing ANYONE to
+# execute malicious code after an exception is raised.
+#set debug = false
+
+# Logging configuration
+[loggers]
+keys = root, routes, rdfdatabank
+
+[handlers]
+keys = console, logfile
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = INFO
+handlers = logfile
+
+[logger_routes]
+level = INFO
+handlers = logfile
+qualname = routes.middleware
+# "level = DEBUG" logs the route matched and routing variables.
+
+[logger_rdfdatabank]
+level = INFO
+handlers = logfile
+qualname = rdfdatabank
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[handler_logfile]
+class = FileHandler
+level = INFO
+formatter = generic
+args = ('/var/log/databank/databank.log', 'w')
+
+[formatter_generic]
+format = %(asctime)s,%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/production.ini b/production.ini
new file mode 100644
index 0000000..a7a6320
--- /dev/null
+++ b/production.ini
@@ -0,0 +1,140 @@
+# Copyright (c) 2012 University of Oxford
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# rdfdatabank - Pylons development environment configuration
+#
+# The %(here)s variable will be replaced with the parent directory of this file
+#
+[DEFAULT]
+debug = false
+# Uncomment and replace with the address which should receive any error reports
+email_to = you@yourdomain.com
+smtp_server = localhost
+error_email_from = paste@databank
+
+[server:main]
+use = egg:Paste#http
+#Use these setings to run pylons using mod_wsgi and apache
+host = 127.0.0.1
+port = 5000
+#Use these settings tp run pylons from the commandline
+#host = 0.0.0.0
+#port = 80
+
+[app:main]
+use = egg:rdfdatabank
+full_stack = true
+static_files = true
+
+sqlalchemy.url = mysql://databanksqladmin:d6sqL4dm;n@localhost:3306/databankauth
+sqlalchemy.pool_recycle = 3600
+
+cache_dir = /var/cache/databank
+beaker.session.key = rdfdatabank
+beaker.session.secret = somesecret
+
+who.config_file = /var/lib/databank/who.ini
+who.log_level = info
+who.log_file = /var/log/databank/who.log
+
+redis.host = localhost
+
+granary.store = /silos
+granary.uri_root = http://databank/
+
+#auth.file = /var/lib/databank/passwd
+#auth.info = /var/lib/databank/rdfdatabank/config/users.py
+
+doi.config = /var/lib/databank/rdfdatabank/config/doi_config.py
+doi.count = /var/lib/databank/rdfdatabank/config/doi_count
+
+broadcast.to = redis
+broadcast.queue = silochanges
+
+metadata.embargoed = False
+
+solr.host = http://localhost:8080/solr
+naming_rule = [^0-9a-zA-Z_\-\:]
+naming_rule_humanized = numbers, letters, '-' and ':', must be more than one character long and must not contain any spaces.
+formats_served = text/html,text/xhtml,text/plain,application/json,application/rdf+xml,text/xml,text/rdf+n3,application/x-turtle,text/rdf+ntriples,text/rdf+nt
+publisher = Bodleian Libraries, University of Oxford
+rights = http://ora.ouls.ox.ac.uk/objects/uuid%3A1d00eebb-8fed-46ad-8e38-45dbdb4b224c
+license = CC0 1.0 Universal (CC0 1.0). See http://creativecommons.org/publicdomain/zero/1.0/legalcode
+#license = Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. See http://creativecommons.org/licenses/by-nc-sa/3.0/
+
+api.version = 0.3
+
+# If you'd like to fine-tune the individual locations of the cache data dirs
+# for the Cache data, or the Session saves, un-comment the desired settings
+# here:
+#beaker.cache.data_dir = %(here)s/data/cache
+#beaker.session.data_dir = %(here)s/data/sessions
+#
+# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT*
+# Debug mode will enable the interactive debugging tool, allowing ANYONE to
+# execute malicious code after an exception is raised.
+#set debug = false
+
+# Logging configuration
+[loggers]
+keys = root, routes, rdfdatabank, sqlalchemy
+
+[handlers]
+keys = console, logfile
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = INFO
+handlers = logfile
+
+[logger_routes]
+level = INFO
+handlers = logfile
+qualname = routes.middleware
+# "level = DEBUG" logs the route matched and routing variables.
+
+[logger_rdfdatabank]
+level = INFO
+handlers = logfile
+qualname = rdfdatabank
+
+[logger_sqlalchemy]
+level = INFO
+handlers = logfile
+qualname = sqlalchemy.engine
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[handler_logfile]
+class = FileHandler
+level = INFO
+formatter = generic
+args = ('/var/log/databank/databank.log', 'w')
+
+[formatter_generic]
+format = %(asctime)s,%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/rdfdatabank.egg-info/PKG-INFO b/rdfdatabank.egg-info/PKG-INFO
index b1eff68..41ca43e 100644
--- a/rdfdatabank.egg-info/PKG-INFO
+++ b/rdfdatabank.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: rdfdatabank
-Version: 0.1dev
-Summary: UNKNOWN
-Home-page: UNKNOWN
-Author: UNKNOWN
-Author-email: UNKNOWN
-License: UNKNOWN
+Version: 0.3
+Summary: RDF-enhanced, pairtree-backed storage API
+Home-page: https://github.com/dataflow/RDFDatabank
+Author: Anusha Ranganathan, Ben O'Steen
+Author-email: anusha.ranganathan@ieee.org, bosteen@gmail.com
+License: MIT License
Description: UNKNOWN
Platform: UNKNOWN
diff --git a/rdfdatabank.egg-info/SOURCES.txt b/rdfdatabank.egg-info/SOURCES.txt
index 41a75aa..48c1f04 100644
--- a/rdfdatabank.egg-info/SOURCES.txt
+++ b/rdfdatabank.egg-info/SOURCES.txt
@@ -16,18 +16,160 @@ rdfdatabank/config/__init__.py
rdfdatabank/config/deployment.ini_tmpl
rdfdatabank/config/environment.py
rdfdatabank/config/middleware.py
+rdfdatabank/config/namespaces.py
rdfdatabank/config/routing.py
+rdfdatabank/config/users-default.py
rdfdatabank/controllers/__init__.py
+rdfdatabank/controllers/about.py
+rdfdatabank/controllers/account.py
+rdfdatabank/controllers/admin.py
+rdfdatabank/controllers/api.py
+rdfdatabank/controllers/datasets.py
+rdfdatabank/controllers/doi.py
rdfdatabank/controllers/error.py
+rdfdatabank/controllers/home.py
+rdfdatabank/controllers/items.py
+rdfdatabank/controllers/keywords.py
+rdfdatabank/controllers/search.py
+rdfdatabank/controllers/searching.py
+rdfdatabank/controllers/silos.py
+rdfdatabank/controllers/states.py
+rdfdatabank/controllers/users.py
+rdfdatabank/lib/HTTP_request.py
rdfdatabank/lib/__init__.py
rdfdatabank/lib/app_globals.py
+rdfdatabank/lib/auth.py
rdfdatabank/lib/base.py
+rdfdatabank/lib/broadcast.py
+rdfdatabank/lib/conneg.py
+rdfdatabank/lib/doi_helper.py
+rdfdatabank/lib/doi_schema.py
+rdfdatabank/lib/file_unpack.py
rdfdatabank/lib/helpers.py
+rdfdatabank/lib/htpasswd.py
+rdfdatabank/lib/ident_md.py
+rdfdatabank/lib/search_term.py
+rdfdatabank/lib/short_pid.py
+rdfdatabank/lib/utils.py
rdfdatabank/model/__init__.py
-rdfdatabank/public/bg.png
-rdfdatabank/public/favicon.ico
-rdfdatabank/public/index.html
-rdfdatabank/public/pylons-logo.gif
+rdfdatabank/public/static/databank_logo.png
+rdfdatabank/public/static/databank_logo_generic.png
+rdfdatabank/public/static/jquery.js
+rdfdatabank/public/static/style.css
+rdfdatabank/public/static/js/html5.js
+rdfdatabank/public/static/json_data/DatasetStateInfo-dataset1-version0.txt
+rdfdatabank/public/static/json_data/DatasetStateInfo-dataset1-version1.txt
+rdfdatabank/public/static/json_data/DatasetStateInfo-dataset2.txt
+rdfdatabank/public/static/json_data/SiloStateInfo.txt
+rdfdatabank/public/static/json_data/adminInformation.txt
+rdfdatabank/public/static/json_data/adminInformationForSilo.txt
+rdfdatabank/public/static/json_data/datasetInformation-version0.txt
+rdfdatabank/public/static/json_data/datasetInformation-version1.txt
+rdfdatabank/public/static/json_data/datasetInformation.txt
+rdfdatabank/public/static/json_data/datasetSubdirInformation-version1.txt
+rdfdatabank/public/static/json_data/datasetSubdirInformation-version3.txt
+rdfdatabank/public/static/json_data/datasetSubdirInformation.txt
+rdfdatabank/public/static/json_data/datasetsInSiloInformation.txt
+rdfdatabank/public/static/json_data/itemInformationForDataset-old.txt
+rdfdatabank/public/static/json_data/itemInformationForDataset.txt
+rdfdatabank/public/static/json_data/itemInformationForZipFileinDataset.txt
+rdfdatabank/public/static/json_data/siloInformation.txt
+rdfdatabank/public/static/json_data/silos.txt
+rdfdatabank/public/static/styles/basic.css
+rdfdatabank/public/static/styles/chimpanzee.css
+rdfdatabank/public/static/styles/ie.css
+rdfdatabank/public/static/styles/marmoset.css
+rdfdatabank/public/static/styles/print.css
+rdfdatabank/public/static/styles/reset.css
+rdfdatabank/public/static/styles/silverback.css
+rdfdatabank/public/static/styles/squirrelMonkey.css
+rdfdatabank/public/static/styles/images/blkdiamond.gif
+rdfdatabank/public/static/styles/images/blksquare.gif
+rdfdatabank/public/static/styles/images/csv.png
+rdfdatabank/public/static/styles/images/delete-icon-24.png
+rdfdatabank/public/static/styles/images/down_arrow.png
+rdfdatabank/public/static/styles/images/down_arrow_black.png
+rdfdatabank/public/static/styles/images/down_arrow_blue.png
+rdfdatabank/public/static/styles/images/file-add-icon-24.png
+rdfdatabank/public/static/styles/images/file-new-icon-24.png
+rdfdatabank/public/static/styles/images/fminus.png
+rdfdatabank/public/static/styles/images/fplus.png
+rdfdatabank/public/static/styles/images/go-up-icon-24.png
+rdfdatabank/public/static/styles/images/help-icon-16.png
+rdfdatabank/public/static/styles/images/help-icon-24.png
+rdfdatabank/public/static/styles/images/info-icon-16.png
+rdfdatabank/public/static/styles/images/json.png
+rdfdatabank/public/static/styles/images/link.png
+rdfdatabank/public/static/styles/images/page-edit-icon-24.png
+rdfdatabank/public/static/styles/images/state-icon-24.png
+rdfdatabank/public/static/styles/images/unzip-icon-24.png
+rdfdatabank/public/static/styles/images/unzip-icon-32.png
+rdfdatabank/public/static/styles/images/up_arrow.png
+rdfdatabank/public/static/styles/images/up_arrow_black.png
+rdfdatabank/public/static/styles/images/up_arrow_blue.png
+rdfdatabank/public/static/styles/images/icons/breadcrumb-arrow.png
+rdfdatabank/templates/about.html
+rdfdatabank/templates/admin_api.html
+rdfdatabank/templates/admin_siloview.html
+rdfdatabank/templates/admin_user.html
+rdfdatabank/templates/admin_users.html
+rdfdatabank/templates/alter_silo.html
+rdfdatabank/templates/api.html
+rdfdatabank/templates/atom_results.html
+rdfdatabank/templates/base.html
+rdfdatabank/templates/create_doi.html
+rdfdatabank/templates/create_new_item.html
+rdfdatabank/templates/create_new_silo.html
+rdfdatabank/templates/datasets_api.html
+rdfdatabank/templates/datasetview.html
+rdfdatabank/templates/datasetview_version.html
+rdfdatabank/templates/delete_item.html
+rdfdatabank/templates/doiview.html
+rdfdatabank/templates/embargo_form.html
+rdfdatabank/templates/file_upload.html
+rdfdatabank/templates/files_unpack.html
+rdfdatabank/templates/files_unpack2.html
+rdfdatabank/templates/footer.html
+rdfdatabank/templates/header.html
+rdfdatabank/templates/home.html
+rdfdatabank/templates/item_file_upload.html
+rdfdatabank/templates/items_api.html
+rdfdatabank/templates/itemview.html
+rdfdatabank/templates/itemview_version.html
+rdfdatabank/templates/keywords.html
+rdfdatabank/templates/list_of_datasets.html
+rdfdatabank/templates/list_of_silos.html
+rdfdatabank/templates/list_of_zipfiles.html
+rdfdatabank/templates/login.html
+rdfdatabank/templates/logout.html
+rdfdatabank/templates/part_list.html
+rdfdatabank/templates/part_list_display.html
+rdfdatabank/templates/raw_search.html
+rdfdatabank/templates/rdf_manifest.html
+rdfdatabank/templates/rdf_manifest_form.html
+rdfdatabank/templates/readme_section.html
+rdfdatabank/templates/register_new_user.html
+rdfdatabank/templates/search.html
+rdfdatabank/templates/search_advanced.html
+rdfdatabank/templates/search_form.html
+rdfdatabank/templates/search_response_display.html
+rdfdatabank/templates/searching.html
+rdfdatabank/templates/silo_admin.html
+rdfdatabank/templates/silo_metadata.html
+rdfdatabank/templates/silos_api.html
+rdfdatabank/templates/siloview.html
+rdfdatabank/templates/states_api.html
+rdfdatabank/templates/update_user.html
+rdfdatabank/templates/zipfilesubitemview.html
+rdfdatabank/templates/zipfileview.html
+rdfdatabank/tests/RDFDatabankConfig-Jenkins.py
+rdfdatabank/tests/RDFDatabankConfig.py
+rdfdatabank/tests/TestAdmin.py
+rdfdatabank/tests/TestSubmission.py
+rdfdatabank/tests/TestSubmission_load.py
+rdfdatabank/tests/TestSubmission_submitter.py
rdfdatabank/tests/__init__.py
-rdfdatabank/tests/test_models.py
-rdfdatabank/tests/functional/__init__.py
\ No newline at end of file
+rdfdatabank/tests/pylons_init.py
+rdfdatabank/tests/testlib/SparqlQueryTestCase.py
+rdfdatabank/tests/testlib/TestUtils.py
+rdfdatabank/tests/testlib/__init__.py
\ No newline at end of file
diff --git a/rdfdatabank.egg-info/requires.txt b/rdfdatabank.egg-info/requires.txt
index a6822b7..c0672f6 100644
--- a/rdfdatabank.egg-info/requires.txt
+++ b/rdfdatabank.egg-info/requires.txt
@@ -1 +1,9 @@
-Pylons>=0.9.7
\ No newline at end of file
+Pylons>=0.9.7
+pairtree>=0.5.6-T
+recordsilo
+redis
+repoze.who>=2.0a4
+repoze.who_friendlyform
+solrpy
+rdflib==2.4.2
+python-dateutil>=1.4.1
diff --git a/rdfdatabank/config/environment.py b/rdfdatabank/config/environment.py
index f456ec5..fb39315 100644
--- a/rdfdatabank/config/environment.py
+++ b/rdfdatabank/config/environment.py
@@ -1,10 +1,38 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
"""Pylons environment configuration"""
+
import os
from mako.lookup import TemplateLookup
from pylons import config
from pylons.error import handle_mako_error
+from sqlalchemy import engine_from_config
+from rdfdatabank.model import init_model
+
import rdfdatabank.lib.app_globals as app_globals
import rdfdatabank.lib.helpers
from rdfdatabank.config.routing import make_map
@@ -20,12 +48,17 @@ def load_environment(global_conf, app_conf):
static_files=os.path.join(root, 'public'),
templates=[os.path.join(root, 'templates')])
+ engine = engine_from_config(app_conf, 'sqlalchemy.')
+ init_model(engine)
+
# Initialize config with the basic options
config.init_app(global_conf, app_conf, package='rdfdatabank', paths=paths)
config['routes.map'] = make_map()
config['pylons.app_globals'] = app_globals.Globals()
config['pylons.h'] = rdfdatabank.lib.helpers
+ config [ 'pylons.response_options' ][ 'charset' ] = 'utf-8'
+ config['pylons.strict_tmpl_context'] = False
# Create the Mako TemplateLookup, with the default auto-escaping
config['pylons.app_globals'].mako_lookup = TemplateLookup(
diff --git a/rdfdatabank/config/middleware.py b/rdfdatabank/config/middleware.py
index e2bb957..c08ce31 100644
--- a/rdfdatabank/config/middleware.py
+++ b/rdfdatabank/config/middleware.py
@@ -1,4 +1,30 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
"""Pylons middleware initialization"""
+
+#from paste import httpexceptions
from beaker.middleware import CacheMiddleware, SessionMiddleware
from paste.cascade import Cascade
from paste.registry import RegistryManager
@@ -42,12 +68,23 @@ def make_app(global_conf, full_stack=True, static_files=True, **app_conf):
# The Pylons WSGI app
app = PylonsApp()
+ #app = httpexceptions.make_middleware(app, global_conf)
+ #if asbool(config['debug']):
+ # from repoze.profile.profiler import AccumulatingProfileMiddleware
+ # app = AccumulatingProfileMiddleware(
+ # app,
+ # log_filename=app_conf['profile.log_filename'],
+ # discard_first_request=True,
+ # flush_at_shutdown=True,
+ # path=app_conf['profile.path']
+ # )
+
# Routing/Session/Cache Middleware
app = RoutesMiddleware(app, config['routes.map'])
app = SessionMiddleware(app, config)
app = CacheMiddleware(app, config)
-
+ #TODO: Check if the new error controller works with sword server
# CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares)
if asbool(full_stack):
# Handle Python exceptions
diff --git a/rdfdatabank/config/namespaces.py b/rdfdatabank/config/namespaces.py
new file mode 100644
index 0000000..cbfd32f
--- /dev/null
+++ b/rdfdatabank/config/namespaces.py
@@ -0,0 +1,23 @@
+from rdflib import Namespace
+
+NAMESPACES = {}
+NAMESPACES['rdf'] = Namespace(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
+NAMESPACES['rdfs'] = Namespace(u'http://www.w3.org/2000/01/rdf-schema#')
+NAMESPACES['dc'] = Namespace(u'http://purl.org/dc/elements/1.1/')
+NAMESPACES['dcterms'] = Namespace(u'http://purl.org/dc/terms/')
+NAMESPACES['foaf'] = Namespace(u'http://xmlns.com/foaf/0.1/')
+NAMESPACES['oxds'] = Namespace(u'http://vocab.ox.ac.uk/dataset/schema#')
+NAMESPACES['ore'] = Namespace(u'http://www.openarchives.org/ore/terms/')
+NAMESPACES['bibo'] = Namespace(u'http://purl.org/ontology/bibo/')
+
+PREFIXES = {}
+PREFIXES['http://www.w3.org/1999/02/22-rdf-syntax-ns#'] = 'rdf'
+PREFIXES['http://www.w3.org/2000/01/rdf-schema#'] = 'rdfs'
+PREFIXES['http://purl.org/dc/elements/1.1/'] = 'dc'
+PREFIXES['http://purl.org/dc/terms/'] = 'dcterms'
+PREFIXES['http://xmlns.com/foaf/0.1/'] = 'foaf'
+PREFIXES['http://vocab.ox.ac.uk/dataset/schema#'] = 'oxds'
+PREFIXES['http://www.openarchives.org/ore/terms/'] = 'ore'
+PREFIXES['http://purl.org/ontology/bibo/'] = 'bibo'
+
+
diff --git a/rdfdatabank/config/routing.py b/rdfdatabank/config/routing.py
index 656fd25..bbc4756 100644
--- a/rdfdatabank/config/routing.py
+++ b/rdfdatabank/config/routing.py
@@ -1,3 +1,27 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
"""Routes configuration
The more specific and detailed routes should be defined first so they
@@ -19,22 +43,61 @@ def make_map():
map.connect('/error/{action}/{id}', controller='error')
# CUSTOM ROUTES HERE
- map.redirect("/", "/objects")
+
+ map.redirect('/*(url)/', '/{url}',
+ _redirect_code='301 Moved Permanently')
+
+ #Special controller to redirect datasets from databank.ouls to databank.ora
+ #map.connect('/objects/{id}', controller='redirect', action='index')
+
+ map.connect("/login", controller='account', action='login')
+ map.connect("/logout", controller='account', action='logout')
+ map.connect("/welcome", controller='account', action='welcome')
+
+ map.connect('/', controller='home', action='index')
+ map.connect('/api', controller='api', action='index')
+ map.connect('/api/{api_name}', controller='api', action='apiview')
+
+ map.connect('/keywords', controller='keywords', action='index')
+ map.connect('/about', controller='about', action='index')
+ map.connect('/cookies', controller='cookies', action='index')
+ map.connect('/searching', controller='searching', action='index')
map.connect('/admin', controller='admin', action='index')
- map.connect('/admin/{silo_name}', controller='admin', action='archive')
- map.connect('/packages', controller='packages', action='index')
- map.connect('/packages/{silo}', controller='packages', action='siloview')
- map.connect('/packages/{silo}/upload', controller='packages', action='upload')
- map.connect('/objects', controller='objects', action='index')
- map.connect('/objects/{silo}', controller='objects', action='siloview')
- map.connect('/objects/{silo}/{id}', controller='objects', action='itemview')
- map.connect('/objects/{silo}/{id}/{path:.*}', controller='objects', action='subitemview')
+ map.connect('/users', controller='users', action='index')
+ map.connect('/users/{username}', controller='users', action='userview')
+ map.connect('/{silo}/users', controller='users', action='siloview')
+ map.connect('/{silo}/users/{username}', controller='users', action='silouserview')
+ map.connect('/{silo}/admin', controller='admin', action='siloview')
+
+ map.connect('/silos', controller='silos', action='index')
+ #map.connect('/{silo}', controller='silos', action='siloview')
+
+ map.connect('/{silo}', controller='datasets', action='siloview')
+ map.connect('/{silo}/datasets', controller='datasets', action='siloview')
+ map.connect('/{silo}/datasets/{id}', controller='datasets', action='datasetview')
+ map.connect('/{silo}/datasets/{id}/{path:.*}', controller='datasets', action='itemview')
+
+ map.connect('/{silo}/items', controller='items', action='siloview')
+ map.connect('/{silo}/items/{id}', controller='items', action='datasetview')
+ map.connect('/{silo}/items/{id}/{path:.*?\.zip}', controller='items', action='itemview')
+ map.connect('/{silo}/items/{id}/{path:.*?\.zip}/{subpath:.*}', controller='items', action='subitemview')
+ #map.connect('/{silo}/items/{id}/{path:.*}', controller='items', action='itemview') # Use verb dataset instead
+
+ map.connect('/{silo}/states', controller='states', action='siloview')
+ map.connect('/{silo}/states/{id}', controller='states', action='datasetview')
+ map.connect('/{silo}/doi/{id}', controller='doi', action='datasetview')
+
+ # SWORDv2 Configuration
+ map.connect('/swordv2/service-document', controller="sword", action="service_document") # From which to retrieve the service document
+ map.connect('/swordv2/silo/{path:.*?}', controller="sword", action="collection") # Representing a Collection as listed in the service document
+ map.connect('/swordv2/edit-media/{path:.*?}', controller="sword", action="media_resource") # The URI used in atom:link@rel=edit-media
+ map.connect('/swordv2/edit/{path:.*?}', controller="sword", action="container") # The URI used in atom:link@rel=edit
+ map.connect('/swordv2/statement/{path:.*?}', controller="sword", action="statement") # The URI used in atom:link@rel=sword:statement
+
+ map.connect('/{controller}')
map.connect('/{controller}/{action}')
map.connect('/{controller}/{action}/{id}')
-
- map.redirect('/*(url)/', '/{url}',
- _redirect_code='301 Moved Permanently')
return map
diff --git a/rdfdatabank/config/users-default.py b/rdfdatabank/config/users-default.py
new file mode 100644
index 0000000..2c25dac
--- /dev/null
+++ b/rdfdatabank/config/users-default.py
@@ -0,0 +1,35 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+_USERS = {
+'admin': {'owner': '*', 'first_name': 'Databank', 'last_name': 'Admin', 'role': 'admin', 'description': 'Admin for all silos'},
+'admin2': {'owner': ['sandbox'], 'first_name': 'Databank', 'last_name': 'Admin-2', 'role': 'admin', 'description': 'Admin for silo Sandbox'},
+'admin3': {'owner': ['sandbox2'], 'first_name': 'Databank', 'last_name': 'Admin-3', 'role': 'admin', 'description': 'Admin for silo Sandbox2'},
+'sandbox_user': {'owner': ['sandbox'], 'role': 'user', 'name': 'Sandbox user', 'description': 'User for silo Sandbox'},
+'sandbox_user2': {'owner': ['sandbox'], 'role': 'user', 'name': 'Sandbox user-2', 'description': 'User for silo Sandbox'},
+'sandbox_user3': {'owner': ['sandbox2'], 'role': 'user', 'name': 'Sandbox user-3', 'description': 'User for silo Sandbox2'},
+'sandbox_manager': {'owner': ['sandbox'], 'role': 'manager', 'name': 'Sandbox manager', 'description': 'Manager for silo Sandbox'},
+'sandbox_manager2': {'owner': ['sandbox'], 'role': 'manager', 'name': 'Sandbox manager-2', 'description': 'Manager for silo Sandbox'},
+'sandbox_manager3': {'owner': ['sandbox2'], 'role': 'manager', 'name': 'Sandbox manager-3', 'description': 'Manager for silo Sandbox2'}
+}
diff --git a/rdfdatabank/controllers/about.py b/rdfdatabank/controllers/about.py
new file mode 100644
index 0000000..b40290f
--- /dev/null
+++ b/rdfdatabank/controllers/about.py
@@ -0,0 +1,31 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+
+from rdfdatabank.lib.base import BaseController, render
+
+class AboutController(BaseController):
+ def index(self):
+ return render('/about.html')
diff --git a/rdfdatabank/controllers/account.py b/rdfdatabank/controllers/account.py
new file mode 100644
index 0000000..a36b1d2
--- /dev/null
+++ b/rdfdatabank/controllers/account.py
@@ -0,0 +1,99 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from pylons import url
+from pylons import request, response, session, tmpl_context as c, url, app_globals as ag
+from rdfdatabank.lib.base import BaseController, render
+from pylons.controllers.util import abort, redirect
+from paste.request import get_cookies
+from webob.exc import HTTPUnauthorized
+from urllib import unquote
+
+class AccountController(BaseController):
+ def login(self):
+ #c.ident = None
+ #c.ident = request.environ.get('repoze.who.identity')
+ #script_name = request.environ.get('SCRIPT_NAME') or '/'
+ #referer = request.environ.get('HTTP_REFERER', script_name)
+
+ #if not c.ident:
+ # abort(401, "Not Authorised")
+ c.login_counter = request.environ['repoze.who.logins']
+ if c.login_counter > 0:
+ session['login_flash'] = """Wrong credentials. Have you been registered?"""
+ session.save()
+ c.came_from = request.params.get('came_from') or "/"
+ return render('/login.html')
+
+ def welcome(self):
+ identity = request.environ.get("repoze.who.identity")
+ came_from = request.params.get('came_from') or "/"
+ came_from = unquote(came_from)
+ came_from = unquote(came_from)
+ came_from = unquote(came_from)
+ came_from = str(came_from)
+ if identity:
+ # Login succeeded
+ userid = identity['repoze.who.userid']
+ #user_det = get_mediator_details(userid)
+ #if user_det['name']:
+ # session['user_name'] = user_det['name']
+ #if user_det['uri']:
+ # session['user_uri'] = str(user_det['uri'])
+ session['user_id'] = userid
+ session.save()
+ return redirect(url(came_from))
+ else:
+ # Login failed
+ try:
+ login_counter = request.environ['repoze.who.logins'] + 1
+ except:
+ login_counter = 0
+ destination = "/login?came_from=%s&logins=%s" % (came_from, login_counter)
+ return redirect(url(destination))
+
+ def logout(self):
+ c.userid = None
+ c.message = "We hope to see you soon!"
+ #display_message("We hope to see you soon!", status="success")
+ came_from = request.params.get('came_from') or "/"
+ #came_from = request.params.get('came_from', '') or "/"
+ came_from = unquote(came_from)
+ came_from = unquote(came_from)
+ came_from = unquote(came_from)
+ came_from = str(came_from)
+ if session.has_key('user_name'):
+ del session['user_name']
+ if session.has_key('user_uri'):
+ del session['user_uri']
+ if session.has_key('user_id'):
+ del session['user_id']
+ if session.has_key('user_dept'):
+ del session['user_dept']
+ if session.has_key('user_email'):
+ del session['user_email']
+ session.save()
+ #return render('/logout.html')
+ #return redirect(url(came_from))
+ return redirect(url("/"))
diff --git a/rdfdatabank/controllers/admin.py b/rdfdatabank/controllers/admin.py
index 239df1c..bd72bde 100644
--- a/rdfdatabank/controllers/admin.py
+++ b/rdfdatabank/controllers/admin.py
@@ -1,135 +1,400 @@
-import logging
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
-from pylons import request, response, session, config, tmpl_context as c
-from pylons.controllers.util import abort, redirect_to
-from pylons import app_globals as ag
-from rdfdatabank.lib.base import BaseController, render
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
-import re, os
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
-from rdfdatabank.lib.unpack import store_zipfile, unpack_zip_item, BadZipfile
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+import logging
+import simplejson
+from pylons import request, response, session, config, tmpl_context as c, url
+from pylons.controllers.util import abort, redirect
+from pylons.decorators import rest
+from pylons import app_globals as ag
+from rdfdatabank.lib.base import BaseController, render
from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
+from rdfdatabank.lib.utils import allowable_id2
+from rdfdatabank.lib.auth_entry import add_silo, delete_silo, add_group_users, delete_group_users
+from rdfdatabank.lib.auth_entry import add_user, update_user, list_usernames, list_user_groups
+import codecs
log = logging.getLogger(__name__)
-accepted_params = ['title', 'description', 'notes', 'owners']
+accepted_params = ['title', 'description', 'notes', 'owners', 'disk_allocation', 'administrators', 'managers', 'submitters']
class AdminController(BaseController):
+ @rest.restrict('GET', 'POST')
def index(self):
if not request.environ.get('repoze.who.identity'):
abort(401, "Not Authorised")
ident = request.environ.get('repoze.who.identity')
c.ident = ident
- c.granary_list = ag.granary.silos
-
- # Admin only
- if ident.get('role') == "admin":
- http_method = request.environ['REQUEST_METHOD']
- if http_method == "GET":
- c.granary = ag.granary
- return render("/silo_admin.html")
- elif http_method == "POST":
- params = request.POST
- if 'silo' in params and not ag.granary.issilo(params['silo']):
- # Create new silo
- silo_name = params['silo']
- g_root = config.get("granary.uri_root", "info:")
- c.silo = ag.granary.get_rdf_silo(silo_name, uri_base="%s%s" % (g_root, silo_name))
- ag.granary._register_silos()
- kw = {}
- for term in accepted_params:
- if term in params:
- kw[term] = params[term]
- ag.granary.describe_silo(silo_name, **kw)
- ag.granary.sync()
- # conneg return
+ c.granary_list = ag.authz(ident, permission=['administrator', 'manager'])
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if http_method == 'GET':
+ if not 'administrator' in ident['permissions'] and not 'manager' in ident['permissions']:
+ abort(403, "Do not have administrator or manager credentials")
+ else:
+ if not 'administrator' in ident['permissions']:
+ abort(403, "Do not have administrator credentials")
+
+ if http_method == "GET":
+ #c.granary = ag.granary
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/admin_silos.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(list(c.granary_list))
+ try:
mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- redirect_to(controller="admin", action="index")
- else:
- response.status_int = 201
- return "Created Silo %s" % silo_name
- else:
- abort(403)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/html
+ return render("admin_silos.html")
+ elif http_method == "POST":
+ params = request.POST
+ if 'silo' in params:
+ if ag.granary.issilo(params['silo']):
+ abort(403, "The silo %s exists"%params['silo'])
+ if not allowable_id2(params['silo']):
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad request. Silo name not valid"
+ return "Silo name can contain only the following characters - %s and has to be more than 1 character"%ag.naming_rule_humanized
+ #NOTE:
+ #If any userid in params['administrators']/params['managers']/params['submitters'] does not exist, return 403
+ #if administartor list is empty, append current user to administartor list
+ #Owner is the superset of adminstrators, managers and submitters
+ existing_users = list_usernames()
+ owners = []
+ admins = []
+ managers = []
+ submitters = []
+ #if 'owners' in params and params['owners']:
+ # owners = [x.strip() for x in kw['owners'].split(",") if x]
+ if 'administrators' in params and params['administrators']:
+ admins = [x.strip() for x in params['administrators'].split(",") if x]
+ owners.extend(admins)
+ if 'managers' in params and params['managers']:
+ managers = [x.strip() for x in params['managers'].split(",") if x]
+ owners.extend(managers)
+ if 'submitters' in params and params['submitters']:
+ submitters = [x.strip() for x in params['submitters'].split(",") if x]
+ owners.extend(submitters)
+ if not admins:
+ owners.append(ident['user'].user_name)
+ admins.append(ident['user'].user_name)
+ owners = list(set(owners))
+ for o in owners:
+ if not o in existing_users:
+ abort (403, "User %s does not exist"%o)
+ admins = list(set(admins))
+ managers = list(set(managers))
+ submitters = list(set(submitters))
+
+ # Create new silo
+ silo = params['silo']
+ g_root = config.get("granary.uri_root", "info:")
+ c.silo = ag.granary.get_rdf_silo(silo, uri_base="%s%s/datasets/" % (g_root, silo))
+ ag.granary._register_silos()
+ kw = {}
+ for term in accepted_params:
+ if term in params:
+ kw[term] = params[term]
+ kw['owners'] = ','.join(owners)
+ kw['administrators'] = ','.join(admins)
+ kw['managers'] = ','.join(managers)
+ kw['submitters'] = ','.join(submitters)
+ du = ag.granary.disk_usage_silo(silo)
+ kw['disk_usage'] = du
+ ag.granary.describe_silo(silo, **kw)
+ ag.granary.sync()
+
+ # Add silo to database
+ add_silo(silo)
+
+ try:
+ ag.b.silo_creation(silo, ident=ident['repoze.who.userid'])
+ except:
+ pass
+
+ #Add users belonging to the silo, to the database
+ all_silo_users = []
+
+ for a in admins:
+ all_silo_users.append((a, 'administrator'))
+ for a in managers:
+ all_silo_users.append((a, 'manager'))
+ for a in submitters:
+ all_silo_users.append((a, 'submitter'))
+ add_group_users(params['silo'], all_silo_users)
+
+ ag.granary.state.revert()
+ ag.granary._register_silos()
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="siloview", silo=silo))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ response.status_int = 201
+ response.status = "201 Created"
+ response.headers['Content-Location'] = url(controller="datasets", action="siloview", silo=silo)
+ return "201 Created Silo %s" % silo
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ response.status_int = 201
+ response.status = "201 Created"
+ response.headers['Content-Location'] = url(controller="datasets", action="siloview", silo=silo)
+ return "201 Created Silo %s" % silo
+ else:
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad Request"
+ return "400 Bad request. No valid parameters found."
- def archive(self, silo_name):
+ @rest.restrict('GET', 'POST', 'DELETE')
+ def siloview(self, silo):
if not request.environ.get('repoze.who.identity'):
abort(401, "Not Authorised")
+ if not ag.granary.issilo(silo):
+ abort(404)
ident = request.environ.get('repoze.who.identity')
c.ident = ident
- c.granary_list = ag.granary.silos
- c.silo_name = silo_name
- # Admin only
- if ident.get('role') == "admin":
- http_method = request.environ['REQUEST_METHOD']
- if http_method == "GET":
- if ag.granary.issilo(silo_name):
- c.kw = ag.granary.describe_silo(silo_name)
+ c.silo = silo
+ silos = ag.authz(ident, permission=['administrator', 'manager'])
+ if not silo in silos:
+ abort(403, "Do not have administrator or manager credentials for silo %s"%silo)
+ user_groups = list_user_groups(ident['user'].user_name)
+ if ('*', 'administrator') in user_groups:
+ #User is super user
+ c.roles = ["admin", "manager", "user"]
+ elif (silo, 'administrator') in user_groups:
+ c.roles = ["admin", "manager", "user"]
+ elif (silo, 'manager') in user_groups:
+ c.roles = ["manager", "user"]
+ else:
+ abort(403, "Do not have administrator or manager credentials for silo %s"%silo)
+ http_method = request.environ['REQUEST_METHOD']
+
+ c.kw = ag.granary.describe_silo(silo)
+ if http_method == "GET":
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
return render("/admin_siloview.html")
- else:
- abort(404)
- elif http_method == "POST":
- params = request.POST
- if ag.granary.issilo(silo_name):
- kw = {}
- for term in accepted_params:
- if term in params:
- kw[term] = params[term]
- ag.granary.describe_silo(silo_name, **kw)
- ag.granary.sync()
- # conneg return
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(dict(c.kw))
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/html
+ return render("/admin_siloview.html")
+ elif http_method == "POST":
+ params = request.POST
+ #Get existing owners, admins, managers and submitters
+ owners = []
+ admins = []
+ managers = []
+ submitters = []
+ if 'owners' in c.kw and c.kw['owners']:
+ owners = [x.strip() for x in c.kw['owners'].split(",") if x]
+ if 'administrators' in c.kw and c.kw['administrators']:
+ admins = [x.strip() for x in c.kw['administrators'].split(",") if x]
+ if 'managers' in c.kw and c.kw['managers']:
+ managers = [x.strip() for x in c.kw['managers'].split(",") if x]
+ if 'submitters' in c.kw and c.kw['submitters']:
+ submitters = [x.strip() for x in c.kw['submitters'].split(",") if x]
+
+ #Get new members
+ new_owners = []
+ #Get new admins
+ new_admins = []
+ if 'administrators' in params and params['administrators']:
+ returned_admins = [x.strip() for x in params['administrators'].split(",") if x]
+ new_admins = [x for x in returned_admins if not x in admins]
+ new_owners.extend(new_admins)
+ #Get new managers
+ new_managers = []
+ if 'managers' in params and params['managers']:
+ returned_managers = [x.strip() for x in params['managers'].split(",") if x]
+ new_managers = [x for x in returned_managers if not x in managers]
+ new_owners.extend(new_managers)
+ #Get new submitters
+ new_submitters = []
+ if 'submitters' in params and params['submitters']:
+ returned_submitters = [x.strip() for x in params['submitters'].split(",") if x]
+ new_submitters = [x for x in returned_submitters if not x in submitters]
+ new_owners.extend(new_submitters)
+
+ #Check if the new members exist. If not return 403
+ existing_users = list_usernames()
+ new_owners = list(set(new_owners))
+ for o in new_owners:
+ if not o in existing_users:
+ abort (403, "User %s does not exist"%o)
+
+ if new_admins and not 'admin' in c.roles:
+ abort (403, "Only administrators can assing users to role 'administrator'")
+
+ owners.extend(new_owners)
+ new_admins = list(set(new_admins))
+ admins.extend(new_admins)
+ new_managers = list(set(new_managers))
+ managers.extend(new_managers)
+ new_submitters = list(set(new_submitters))
+ submitters.extend(new_submitters)
+
+ # Update silo info
+ updateMetadata = False
+ for term in accepted_params:
+ if term in params and not term in ['owners', 'administrators', 'managers', 'submitters'] and params[term]:
+ c.kw[term] = params[term]
+ updateMetadata = True
+ if new_owners or new_admins or new_managers or new_submitters or updateMetadata:
+ new_silo_users = []
+ if new_owners:
+ c.kw['owners'] = ','.join(owners)
+ if new_admins:
+ c.kw['administrators'] = ','.join(admins)
+ for a in new_admins:
+ new_silo_users.append((a, 'administrator'))
+ if new_managers:
+ c.kw['managers'] = ','.join(managers)
+ for a in new_managers:
+ new_silo_users.append((a, 'manager'))
+ if new_submitters:
+ c.kw['submitters'] = ','.join(submitters)
+ for a in new_submitters:
+ new_silo_users.append((a, 'submitter'))
+ #Add metadat changes to the silo
+ ag.granary.describe_silo(silo, **c.kw)
+ ag.granary.sync()
+ #Add new silo users into database
+ if new_silo_users:
+ add_group_users(silo, new_silo_users)
+ if updateMetadata:
+ try:
+ ag.b.silo_change(silo, ident=ident['repoze.who.userid'])
+ except:
+ pass
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ c.message = "Metadata updated"
+ c.kw = ag.granary.describe_silo(silo)
+ return render("/admin_siloview.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ response.status_int = 204
+ response.status = "204 Updated"
+ #return "Updated Silo %s" % silo
+ return
+ try:
mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- c.message = "Metadata updated"
- c.kw = ag.granary.describe_silo(silo_name)
- return render("/admin_siloview.html")
- else:
- response.status_int = 204
- return "Updated Silo %s" % silo_name
- else:
- # Create new silo
- g_root = config.get("granary.uri_root", "info:")
- c.silo = ag.granary.get_rdf_silo(silo_name, uri_base="%s%s/" % (g_root, silo_name))
- ag.granary._register_silos()
- kw = {}
- for term in accepted_params:
- if term in params:
- kw[term] = params[term]
- ag.granary.describe_silo(silo_name, **kw)
- ag.granary.sync()
- response.status_int = 201
- return "Created Silo %s" % silo_name
- elif http_method == "DELETE":
- if ag.granary.issilo(silo_name):
- # Deletion of an entire Silo...
- # Serious consequences follow this action
- # Walk through all the items, emit a delete msg for each
- # and then remove the silo
- todelete_silo = ag.granary.get_rdf_silo(silo_name)
- for item in todelete_silo.list_items():
- ag.b.deletion(silo_name, item, ident=ident['repoze.who.userid'])
- ag.granary.delete_silo(silo_name)
- ag.b.silo_deletion(silo_name, ident=ident['repoze.who.userid'])
- try:
- del ag.granary.state[silo_name]
- except:
- pass
- ag.granary.sync()
- ag.granary._register_silos()
- response.status_int = 200
- return """{'status':'Silo %s deleted'}""" % silo_name
- else:
- abort(404)
- else:
- abort(403)
-
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ response.status_int = 204
+ response.status = "204 Updated"
+ return
+ elif http_method == "DELETE":
+ # Deletion of an entire Silo...
+ # Serious consequences follow this action
+ # Walk through all the items, emit a delete msg for each
+ # and then remove the silo
+ todelete_silo = ag.granary.get_rdf_silo(silo)
+ #for item in todelete_silo.list_items():
+ # try:
+ # ag.b.deletion(silo, item, ident=ident['repoze.who.userid'])
+ # except:
+ # pass
+ ag.granary.delete_silo(silo)
+ try:
+ ag.b.silo_deletion(silo, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ try:
+ del ag.granary.state[silo]
+ except:
+ pass
+ ag.granary.sync()
+ ag.granary._register_silos()
+ #Delete silo from database
+ delete_silo(silo)
+ # conneg return
+ accept_list = None
+ response.content_type = "text/plain"
+ response.status_int = 200
+ response.status = "200 OK"
+ return "{'ok':'true'}"
diff --git a/rdfdatabank/controllers/api.py b/rdfdatabank/controllers/api.py
new file mode 100644
index 0000000..ab8a779
--- /dev/null
+++ b/rdfdatabank/controllers/api.py
@@ -0,0 +1,40 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+
+from pylons import request, response, session, tmpl_context as c, url
+from pylons.controllers.util import abort, redirect
+from pylons import app_globals as ag
+from rdfdatabank.lib.base import BaseController, render
+
+class ApiController(BaseController):
+ def index(self):
+ redirect(url(controller="api", action="apiview", api_name="silos"))
+
+ def apiview(self, api_name):
+ if api_name not in ['silos', 'datasets', 'states', 'items']:
+ redirect(url(controller="api", action="apiview", api_name="silos"))
+ c.api_file = "%s_api.html"%api_name
+ return render('/api.html')
diff --git a/rdfdatabank/controllers/cookies.py b/rdfdatabank/controllers/cookies.py
new file mode 100755
index 0000000..33ec448
--- /dev/null
+++ b/rdfdatabank/controllers/cookies.py
@@ -0,0 +1,31 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+
+from rdfdatabank.lib.base import BaseController, render
+
+class CookiesController(BaseController):
+ def index(self):
+ return render('/cookies.html')
diff --git a/rdfdatabank/controllers/datasets.py b/rdfdatabank/controllers/datasets.py
new file mode 100644
index 0000000..7e21598
--- /dev/null
+++ b/rdfdatabank/controllers/datasets.py
@@ -0,0 +1,1092 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+import re, os, shutil, codecs
+import simplejson
+from datetime import datetime, timedelta
+from dateutil.relativedelta import *
+#from dateutil.parser import parse
+import time
+from uuid import uuid4
+from pylons import request, response, session, tmpl_context as c, url, app_globals as ag
+from pylons.controllers.util import abort, redirect
+from pylons.decorators import rest
+from paste.fileapp import FileApp
+from rdfdatabank.lib.base import BaseController, render
+from rdfdatabank.lib.utils import create_new, get_readme_text, serialisable_stat, allowable_id2, natural_sort
+from rdfdatabank.lib.utils import is_embargoed, test_rdf, munge_manifest, get_embargo_values, get_rdf_template, extract_metadata
+from rdfdatabank.lib.file_unpack import get_zipfiles_in_dataset
+from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
+from rdfdatabank.lib.auth_entry import add_dataset, delete_dataset, get_datasets_count, get_datasets
+
+JAILBREAK = re.compile("[\/]*\.\.[\/]*")
+
+log = logging.getLogger(__name__)
+
+class DatasetsController(BaseController):
+ @rest.restrict('GET', 'POST')
+ def siloview(self, silo):
+ if not ag.granary.issilo(silo):
+ abort(404)
+ c.silo_name = silo
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if http_method == "GET":
+ if silo in ['ww1archives', 'digitalbooks']:
+ abort(501, "The silo %s contains too many data packages to list"%silo)
+ c.editor = False
+ if ag.metadata_embargoed:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ c.editor = True
+ else:
+ if ident:
+ silos = ag.authz(ident)
+ if silo in silos:
+ c.editor = True
+
+ options = request.GET
+ c.start = 0
+ if 'start' in options and options['start']:
+ try:
+ c.start = int(options['start'])
+ except ValueError:
+ c.start = 0
+ c.rows = 100
+ if 'rows' in options and options['rows']:
+ try:
+ c.rows = int(options['rows'])
+ except ValueError:
+ c.rows = 100
+
+ c_silo = ag.granary.get_rdf_silo(silo)
+ # Get title of silo
+ state_info = ag.granary.describe_silo(silo)
+ if 'title' in state_info and state_info['title']:
+ c.title = state_info['title']
+ # Get number of data packages in silo
+ numFound = get_datasets_count(silo)
+ try:
+ c.numFound = int(numFound)
+ except ValueError:
+ c.numFound = 0
+
+ #c.embargos = {'params':{'numFound':numFound, 'start':c.start, 'rows':c.rows}}
+ c.embargos = {}
+ #for item in c_silo.list_items():
+ for item in get_datasets(silo, start=c.start, rows=c.rows):
+ try:
+ c.embargos[item] = is_embargoed(c_silo, item)
+ except:
+ c.embargos[item] = None
+ c.items = c.embargos.keys()
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ #Calculate the pagination for display of data packages
+ c.permissible_offsets = []
+ c.pages_to_show = 5
+ print type(c.start), type(c.pages_to_show), type(c.rows), type(c.numFound)
+ print c.start, c.pages_to_show, c.rows, c.numFound
+ try:
+ remainder = c.numFound % c.rows
+ if remainder > 0:
+ c.lastPage = c.numFound - remainder
+ else:
+ c.lastPage = c.numFound - c.rows
+
+ if c.numFound > c.rows:
+ offset_start = c.start - ( (c.pages_to_show/2) * c.rows )
+ if offset_start < 0:
+ offset_start = 0
+
+ offset_end = offset_start + (c.pages_to_show * c.rows)
+ if offset_end > c.numFound:
+ offset_end = c.numFound
+ if remainder > 0:
+ offset_start = c.lastPage - (c.pages_to_show * c.rows)
+ else:
+ offset_start = c.lastPage - ((c.pages_to_show-1) * c.rows)
+
+ if offset_start < 0:
+ offset_start = 0
+
+ c.permissible_offsets = list( xrange( offset_start, offset_end, c.rows) )
+ except ValueError:
+ pass
+ return render('/siloview.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.embargos)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/html
+ return render('/siloview.html')
+ elif http_method == "POST":
+ if not ident:
+ abort(401, "Not Authorised")
+
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ params = request.POST
+
+ if not params.has_key("id"):
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad Request: Parameter 'id' is not available"
+ return "Parameter 'id' is not available"
+
+ c_silo = ag.granary.get_rdf_silo(silo)
+ if c_silo.exists(params['id']):
+ response.content_type = "text/plain"
+ response.status_int = 409
+ response.status = "409 Conflict: Data package already exists"
+ return "Data package already exists"
+
+ # Supported params:
+ # id, title, embargoed, embargoed_until, embargo_days_from_now
+ id = params['id']
+ if not allowable_id2(id):
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad request. Data package name not valid"
+ return "Data package name can only contain %s"%ag.naming_rule_humanized
+
+ del params['id']
+ item = create_new(c_silo, id, ident['repoze.who.userid'], **params)
+ add_dataset(silo, id)
+ # Broadcast change as message
+ try:
+ ag.b.creation(silo, id, ident=ident['repoze.who.userid'])
+ except:
+ pass
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="datasetview", silo=silo, id=id))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ response.status_int = 201
+ response.status = "201 Created"
+ response.headers["Content-Location"] = url(controller="datasets", action="datasetview", silo=silo, id=id)
+ return "201 Created"
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ response.status_int = 201
+ response.headers["Content-Location"] = url(controller="datasets", action="datasetview", silo=silo, id=id)
+ response.status = "201 Created"
+ return "201 Created"
+
+ @rest.restrict('GET', 'POST', 'DELETE')
+ def datasetview(self, silo, id):
+ if not ag.granary.issilo(silo):
+ abort(404)
+ # Check to see if embargo is on:
+ c.silo_name = silo
+ c.id = id
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ c_silo = ag.granary.get_rdf_silo(silo)
+
+ c.version = None
+ c.editor = False
+
+ if not (http_method == "GET"):
+ #identity management of item
+ if not request.environ.get('repoze.who.identity'):
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+
+ if http_method in ["GET", "DELETE"]:
+ if not c_silo.exists(id):
+ abort(404)
+
+ if http_method == "GET":
+ embargoed = False
+ item = c_silo.get_item(id)
+
+ options = request.GET
+
+ currentversion = str(item.currentversion)
+ c.version = currentversion
+ if 'version' in options:
+ if not options['version'] in item.manifest['versions']:
+ abort(404)
+ c.version = str(options['version'])
+ if c.version and not c.version == currentversion:
+ item.set_version_cursor(c.version)
+
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+
+ if ag.metadata_embargoed:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+ elif item.metadata.get('embargoed') not in ["false", 0, False]:
+ #TODO: This will always provide the embargo information for the latest version.
+ # The embargo status should always reflect the latest version, but should the embargo information displayed be that of the vesion???
+ embargoed = True
+ if ident:
+ silos = ag.authz(ident)
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ if silo in silos:
+ #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+ elif ident:
+ silos = ag.authz(ident)
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ if silo in silos:
+ #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+
+ c.show_files = True
+ #Only the administrator, manager and creator can view embargoed files.
+ if embargoed and not c.editor:
+ c.show_files = False
+
+ #Display but do not edit previous versions of files, since preious versions are read only.
+ if c.version and not c.version == currentversion:
+ c.editor = False
+
+ # View options
+ if "view" in options and c.editor:
+ c.view = options['view']
+ elif c.editor:
+ c.view = 'editor'
+ else:
+ c.view = 'user'
+
+ c.embargos = {}
+ c.embargos[id] = is_embargoed(c_silo, id)
+ c.parts = item.list_parts(detailed=True)
+ c.manifest_pretty = item.rdf_to_string(format="pretty-xml")
+ c.metadata = None
+ c.metadata = extract_metadata(item)
+ c.versions = item.manifest['versions']
+ c.versions = natural_sort(c.versions)
+ #c.manifest = item.rdf_to_string()
+ c.manifest = get_rdf_template(item.uri, id)
+ c.zipfiles = get_zipfiles_in_dataset(item)
+ c.readme_text = None
+ #if item.isfile("README"):
+ if "README" in c.parts.keys():
+ c.readme_text = get_readme_text(item)
+ #if item.manifest:
+ # state = item.manifest.state
+
+ # conneg:
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render('/datasetview.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ returndata = {}
+ returndata['embargos'] = c.embargos
+ returndata['view'] = c.view
+ returndata['show_files'] = c.show_files
+ returndata['editor'] = c.editor
+ returndata['parts'] = {}
+ for part in c.parts:
+ returndata['parts'][part] = serialisable_stat(c.parts[part])
+ returndata['readme_text'] = c.readme_text
+ returndata['manifest_pretty'] = c.manifest_pretty
+ returndata['manifest'] = c.manifest
+ returndata['zipfiles'] = c.zipfiles
+ if c.version:
+ returndata['version'] = c.version
+ #items['state'] = state
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(returndata)
+ elif str(mimetype).lower() in ["application/rdf+xml", "text/xml"]:
+ response.status_int = 200
+ response.status = "200 OK"
+ response.content_type = 'application/rdf+xml; charset="UTF-8"'
+ return c.manifest_pretty
+ elif str(mimetype).lower() == "text/rdf+n3":
+ response.content_type = 'text/rdf+n3; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return item.rdf_to_string(format="n3")
+ elif str(mimetype).lower() == "application/x-turtle":
+ response.content_type = 'application/x-turtle; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return item.rdf_to_string(format="turtle")
+ elif str(mimetype).lower() in ["text/rdf+ntriples", "text/rdf+nt"]:
+ response.content_type = 'text/rdf+ntriples; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return item.rdf_to_string(format="nt")
+ # Whoops - nothing satisfies
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing staisfies - default to text/html
+ return render('/datasetview.html')
+ elif http_method == "POST":
+ code = None
+ #Create new dataset if it does not exist
+ if not c_silo.exists(id):
+ if not allowable_id2(id):
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad request. Data package name not valid"
+ return "Data package name can contain only the following characters - %s and has to be more than 1 character"%ag.naming_rule_humanized
+ params = {}
+ item = create_new(c_silo, id, ident['repoze.who.userid'], **params)
+ add_dataset(silo, id)
+ code = 201
+ response.status = "201 Created"
+ response.status_int = 201
+ response.headers["Content-Location"] = url(controller="datasets", action="datasetview", id=id, silo=silo)
+ response_message = "201 Created empyt data package"
+ #Update embargo info
+ params = request.POST
+ if params.has_key('embargoed') and params['embargoed']:
+ item = c_silo.get_item(id)
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403)
+ if not params['embargoed'].lower() in ['true', 'false', '0', '1']:
+ abort(400, "The value for embargoed has to be either 'True' or 'False'")
+
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ if params.has_key('embargoed_until') and params['embargoed_until']:
+ e, e_d = get_embargo_values(embargoed=params['embargoed'], embargoed_until=params['embargoed_until'])
+ elif params.has_key('embargo_days_from_now') and params['embargo_days_from_now']:
+ e, e_d = get_embargo_values(embargoed=params['embargoed'], embargo_days_from_now=params['embargo_days_from_now'])
+ else:
+ e, e_d = get_embargo_values(embargoed=params['embargoed'])
+ item.metadata['embargoed_until'] = ''
+ item.del_triple(item.uri, u"oxds:isEmbargoed")
+ item.del_triple(item.uri, u"oxds:embargoedUntil")
+ try:
+ ag.r.set("%s:%s:embargoed_until" % (c_silo.state['storage_dir'], id), ' ')
+ except:
+ pass
+
+ if e:
+ item.metadata['embargoed'] = True
+ item.add_triple(item.uri, u"oxds:isEmbargoed", 'True')
+ try:
+ ag.r.set("%s:%s:embargoed" % (c_silo.state['storage_dir'], id), True)
+ except:
+ pass
+ if e_d:
+ item.metadata['embargoed_until'] = e_d
+ item.add_triple(item.uri, u"oxds:embargoedUntil", e_d)
+ try:
+ ag.r.set("%s:%s:embargoed_until" % (c_silo.state['storage_dir'], id), e_d)
+ except:
+ pass
+ else:
+ item.metadata['embargoed'] = False
+ item.add_triple(item.uri, u"oxds:isEmbargoed", 'False')
+ try:
+ ag.r.set("%s:%s:embargoed" % (c_silo.state['storage_dir'], id), False)
+ except:
+ pass
+
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.del_triple(item.uri, u"oxds:currentVersion")
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ item.sync()
+
+ if not code:
+ code = 204
+ response.content_type = "text/plain"
+ response.status_int = 204
+ response.status = "204 Updated"
+ response_message = None
+ if params.has_key('file'):
+ # File upload by a not-too-savvy method - Service-orientated fallback:
+ # Assume file upload to 'filename'
+ item = c_silo.get_item(id)
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403)
+
+ upload = params.get('file')
+ #if not upload:
+ # abort(400, "No file was received")
+ filename = params.get('filename')
+ if not filename:
+ filename = params['file'].filename
+ if filename and JAILBREAK.search(filename) != None:
+ abort(400, "'..' cannot be used in the path or as a filename")
+ target_path = filename
+
+ if item.isfile(target_path):
+ code = 204
+ elif item.isdir(target_path):
+ response.content_type = "text/plain"
+ response.status_int = 403
+ response.status = "403 Forbidden"
+ return "Cannot POST a file on to an existing directory"
+ else:
+ code = 201
+
+ if filename == "manifest.rdf":
+ #Copy the uploaded file to a tmp area
+ #mani_file = os.path.join('/tmp', filename.lstrip(os.sep))
+ mani_file = os.path.join('/tmp', uuid4().hex)
+ mani_file_obj = open(mani_file, 'w')
+ shutil.copyfileobj(upload.file, mani_file_obj)
+ upload.file.close()
+ mani_file_obj.close()
+ #test rdf file
+ if not test_rdf(mani_file):
+ response.status_int = 400
+ return "Bad manifest file"
+ #munge rdf
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ a = item.get_rdf_manifest()
+ b = a.to_string()
+ #munge_manifest(manifest_str, item)
+ munge_manifest(mani_file, item)
+ else:
+ if code == 204:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf', filename])
+ else:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ item.put_stream(target_path, upload.file)
+ upload.file.close()
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.del_triple(item.uri, u"oxds:currentVersion")
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ item.sync()
+
+ if code == 201:
+ try:
+ ag.b.creation(silo, id, target_path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "201 Created"
+ response.status_int = 201
+ response.headers["Content-Location"] = url(controller="datasets", action="itemview", id=id, silo=silo, path=filename)
+ response_message = "201 Created. Added file %s to item %s" % (filename, id)
+ else:
+ try:
+ ag.b.change(silo, id, target_path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "204 Updated"
+ response.status_int = 204
+ response_message = None
+ elif params.has_key('text'):
+ # Text upload convenience service
+ item = c_silo.get_item(id)
+ filename = params.get('filename')
+ if not filename:
+ abort(400, "Bad Request. Must supply a filename")
+ if JAILBREAK.search(filename) != None:
+ abort(400, "'..' cannot be used in the path or as a filename")
+
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403)
+
+ target_path = filename
+
+ if item.isfile(target_path):
+ code = 204
+ elif item.isdir(target_path):
+ response.content_type = "text/plain"
+ response.status_int = 403
+ response.status = "403 forbidden"
+ return "Cannot POST a file on to an existing directory"
+ else:
+ code = 201
+
+ if filename == "manifest.rdf":
+ # valid to make sure it's valid RDF
+ # Otherwise this dataset will not be accessible
+ text = params['text']
+ fname = '/tmp/%s'%uuid4().hex
+ f = codecs.open(fname, 'w', 'utf-8')
+ #f = open(fname, 'w')
+ f.write(text)
+ f.close()
+ #if not test_rdf(text):
+ if not test_rdf(fname):
+ abort(400, "Not able to parse RDF/XML")
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ a = item.get_rdf_manifest()
+ b = a.to_string()
+ munge_manifest(fname, item)
+ os.remove(fname)
+ else:
+ if code == 204:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf', filename])
+ else:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ item.put_stream(target_path, params['text'].encode("utf-8"))
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.del_triple(item.uri, u"oxds:currentVersion")
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ item.sync()
+
+ if code == 201:
+ try:
+ ag.b.creation(silo, id, target_path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "201 Created"
+ response.status_int = 201
+ response.headers["Content-Location"] = url(controller="datasets", action="datasetview", id=id, silo=silo)
+ response_message = "201 Created. Added file %s to item %s" % (filename, id)
+ else:
+ try:
+ ag.b.change(silo, id, target_path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "204 Updated"
+ response.status_int = 204
+ response_message = None
+ if not code:
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad request"
+ return "400 Bad Request. No valid parameters found."
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="datasetview", id=id, silo=silo))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing satisfies - return text / plain
+ response.content_type = "text/plain"
+ return response_message
+ elif http_method == "DELETE":
+ item = c_silo.get_item(id)
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403)
+
+ try:
+ ag.r.delete("%s:%s:embargoed_until" % (c_silo.state['storage_dir'], id))
+ ag.r.delete("%s:%s:embargoed" % (c_silo.state['storage_dir'], id))
+ except:
+ pass
+
+ # Broadcast deletion
+ try:
+ ag.b.deletion(silo, id, ident=ident['repoze.who.userid'])
+ except:
+ pass
+
+ c_silo.del_item(id)
+ delete_dataset(silo, id)
+
+ response.content_type = "text/plain"
+ response.status_int = 200
+ response.status = "200 OK"
+ return "{'ok':'true'}" # required for the JQuery magic delete to succede.
+
+ @rest.restrict('GET', 'POST', 'PUT', 'DELETE')
+ def itemview(self, silo, id, path):
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ c.silo_name = silo
+ c.id = id
+ c.path = path
+
+ c_silo = ag.granary.get_rdf_silo(silo)
+ if not c_silo.exists(id):
+ abort(404)
+
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ item = c_silo.get_item(id)
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+
+ c.version = None
+ c.editor = False
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if not (http_method == "GET"):
+ #identity management of item
+ if not request.environ.get('repoze.who.identity'):
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403, "Forbidden")
+ elif http_method == "GET":
+ embargoed = False
+ options = request.GET
+
+ currentversion = str(item.currentversion)
+ c.version = currentversion
+ if 'version' in options:
+ if not options['version'] in item.manifest['versions']:
+ abort(404)
+ c.version = str(options['version'])
+ if c.version and not c.version == currentversion:
+ item.set_version_cursor(c.version)
+
+ if ag.metadata_embargoed:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+ elif item.metadata.get('embargoed') not in ["false", 0, False]:
+ if not ident:
+ abort(401)
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403)
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if not ident['repoze.who.userid'] == creator and not ident.get('role') in ["admin", "manager"]:
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403)
+ embargoed = True
+ c.editor = True
+ elif ident:
+ silos = ag.authz(ident)
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ if silo in silos:
+ #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+
+ c.show_files = True
+ #Only the administrator, manager and creator can view embargoed files.
+ if embargoed and not c.editor:
+ c.show_files = False
+
+ #Display but do not edit previous versions of files, since preious versions are read only.
+ if c.version and not c.version == currentversion:
+ c.editor = False
+
+ # View options
+ if "view" in options and c.editor:
+ c.view = options['view']
+ elif c.editor:
+ c.view = 'editor'
+ else:
+ c.view = 'user'
+
+ if http_method == "GET":
+ if item.isfile(path):
+ fileserve_app = FileApp(item.to_dirpath(path))
+ return fileserve_app(request.environ, self.start_response)
+ elif item.isdir(path):
+ #c.parts = item.list_parts(detailed=True)
+ c.versions = item.manifest['versions']
+ c.versions = natural_sort(c.versions)
+ c.parts = item.list_parts(path, detailed=True)
+ c.readme_text = None
+ if "README" in c.parts.keys():
+ c.readme_text = get_readme_text(item, "%s/README" % path)
+
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/itemview.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ returndata = {}
+ returndata['parts'] = {}
+ for part in c.parts:
+ returndata['parts'][part] = serialisable_stat(c.parts[part])
+ returndata['readme_text'] = c.readme_text
+ return simplejson.dumps(returndata)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing satisfies - return text/html
+ return render("/itemview.html")
+ else:
+ abort(404)
+ elif http_method == "PUT":
+ # Pylons loads the request body into request.body...
+ # This is not going to work for large files... ah well
+ # POST will handle large files as they are pushed to disc,
+ # but this won't
+ content = request.body
+
+ if JAILBREAK.search(path) != None:
+ abort(400, "'..' cannot be used in the path")
+
+ if item.isfile(path):
+ code = 204
+ elif item.isdir(path):
+ response.content_type = "text/plain"
+ response.status_int = 403
+ response.status = "403 Forbidden"
+ return "Cannot PUT a file on to an existing directory"
+ else:
+ code = 201
+
+ #Check if path is manifest.rdf - If, yes Munge
+ if "manifest.rdf" in path:
+ fname = '/tmp/%s'%uuid4().hex
+ f = open(fname, 'w')
+ f.write(content)
+ f.close()
+ #test content is valid rdf
+ #if not test_rdf(content):
+ "Manifest file created:", fname
+ if not test_rdf(fname):
+ response.status_int = 400
+ return "Bad manifest file"
+ #munge rdf
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ a = item.get_rdf_manifest()
+ b = a.to_string()
+ #munge_manifest(content, item)
+ munge_manifest(fname, item)
+ os.remove(fname)
+ else:
+ if code == 204:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf', path])
+ else:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ item.put_stream(path, content)
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.del_triple(item.uri, u"oxds:currentVersion")
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ item.sync()
+
+ if code == 201:
+ try:
+ ag.b.creation(silo, id, path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "201 Created"
+ response.status_int = 201
+ response.headers["Content-Location"] = url(controller="datasets", action="itemview", id=id, silo=silo, path=path)
+ response_message = "201 Created"
+ else:
+ try:
+ ag.b.change(silo, id, path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "204 Updated"
+ response.status_int = 204
+ response_message = None
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="itemview", id=id, silo=silo, path=path))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing satisfies - return text / plain
+ response.content_type = "text/plain"
+ return response_message
+ elif http_method == "POST":
+ # POST... differences from PUT:
+ # path = filepath that this acts on, should be dir, or non-existant
+ # if path is a file, this will revert to PUT's functionality and
+ # overwrite the file, if there is a multipart file uploaded
+ # Expected params: filename, file (uploaded file)
+ params = request.POST
+ if not params.has_key('file'):
+ abort(400, "No file was received")
+ filename = params.get('filename')
+ upload = params.get('file')
+ if not filename:
+ filename = params['file'].filename
+ if filename and JAILBREAK.search(filename) != None:
+ abort(400, "'..' cannot be used in the path or as a filename")
+ target_path = path
+ if item.isdir(path) and filename:
+ target_path = os.path.join(path, filename)
+
+ if item.isfile(target_path):
+ code = 204
+ elif item.isdir(target_path):
+ response.content_type = "text/plain"
+ response.status_int = 403
+ response.status = "403 Forbidden"
+ return "Cannot POST a file on to an existing directory"
+ else:
+ code = 201
+
+ if filename == "manifest.rdf":
+ #Copy the uploaded file to a tmp area
+ #mani_file = os.path.join('/tmp', filename.lstrip(os.sep))
+ mani_file = os.path.join('/tmp', uuid4().hex)
+ mani_file_obj = open(mani_file, 'w')
+ shutil.copyfileobj(upload.file, mani_file_obj)
+ upload.file.close()
+ mani_file_obj.close()
+ #test rdf file
+ if not test_rdf(mani_file):
+ response.status_int = 400
+ return "Bad manifest file"
+ #munge rdf
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ a = item.get_rdf_manifest()
+ b = a.to_string()
+ #munge_manifest(manifest_str, item)
+ munge_manifest(mani_file, item)
+ os.remove(mani_file)
+ else:
+ if code == 204:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf', filename])
+ else:
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ item.put_stream(target_path, upload.file)
+ upload.file.close()
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.del_triple(item.uri, u"oxds:currentVersion")
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ item.sync()
+
+ if code == 201:
+ try:
+ ag.b.creation(silo, id, target_path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "201 Created"
+ response.status_int = 201
+ response.headers["Content-Location"] = url(controller="datasets", action="itemview", id=id, silo=silo, path=path)
+ response_message = "201 Created"
+ else:
+ try:
+ ag.b.change(silo, id, target_path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.status = "204 Updated"
+ response.status_int = 204
+ response_message = None
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="itemview", id=id, silo=silo, path=path))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing satisfies - return text / plain
+ response.content_type = "text/plain"
+ return response_message
+ elif http_method == "DELETE":
+ if item.isfile(path):
+ if 'manifest.rdf' in path:
+ response.content_type = "text/plain"
+ response.status_int = 403
+ response.status = "403 Forbidden"
+ return "Forbidden - Cannot delete the manifest"
+ if '3=' in path or '4=' in path:
+ response.content_type = "text/plain"
+ response.status_int = 403
+ response.status = "403 Forbidden"
+ return "Forbidden - These files are generated by the system and connot be deleted"
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ item.del_stream(path)
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.del_triple(item.uri, u"oxds:currentVersion")
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ item.sync()
+ try:
+ #ag.b.deletion(silo, id, path, ident=ident['repoze.who.userid'])
+ ag.b.change(silo, id, path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.content_type = "text/plain"
+ response.status_int = 200
+ response.status = "200 OK"
+ return "{'ok':'true'}" # required for the JQuery magic delete to succede.
+ elif item.isdir(path):
+ item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ item.del_triple(item.uri, u"oxds:currentVersion")
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ item.del_dir(path)
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.sync()
+ try:
+ #ag.b.deletion(silo, id, path, ident=ident['repoze.who.userid'])
+ ag.b.change(silo, id, path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ response.content_type = "text/plain"
+ response.status_int = 200
+ response.status = "200 OK"
+ return "{'ok':'true'}" # required for the JQuery magic delete to succede.
+ else:
+ abort(404)
+
diff --git a/rdfdatabank/controllers/doi.py b/rdfdatabank/controllers/doi.py
new file mode 100644
index 0000000..9ea87cf
--- /dev/null
+++ b/rdfdatabank/controllers/doi.py
@@ -0,0 +1,331 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from pylons import request, response, session, tmpl_context as c, url, app_globals as ag
+from pylons.controllers.util import abort
+from pylons.decorators import rest
+from datetime import datetime
+from rdflib import Literal, URIRef
+
+from rdfdatabank.lib.base import BaseController, render
+from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
+from rdfdatabank.lib.HTTP_request import HTTPRequest
+from rdfdatabank.lib import short_pid
+from rdfdatabank.lib.auth_entry import list_silos
+from rdfdatabank.lib.doi_helper import get_doi_metadata, doi_count
+
+from rdfdatabank.config.doi_config import OxDataciteDoi
+
+class DoiController(BaseController):
+ """Class to generate and register DOIs along with the metadata of the data-package (POST),
+ update the metadata registered with the DOI (PUT), delete the DOI (DELETE) and
+ to get the information (GET) registered with Datacite - the organization responsible for minting DOIs
+
+ if the metadata for the data package is also under embargo, then a DOI cannot be registered for such data-packages
+ """
+ @rest.restrict('GET', 'POST', 'PUT', 'DELETE')
+ def datasetview(self, silo, id):
+ c.silo_name = silo
+ c.id = id
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ granary_list = list_silos()
+ if not silo in granary_list:
+ abort(404)
+
+ c_silo = ag.granary.get_rdf_silo(silo)
+ if not c_silo.exists(id):
+ abort(404)
+
+ if ag.metadata_embargoed:
+ abort(403, "DOIs cannot be issued to data packages whose metadata ia also under embargo")
+
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+
+ item = c_silo.get_item(id)
+
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+
+ c.version = item.currentversion
+ c.version_doi = None
+ c.editor = False
+
+ #Get version number
+ vnum = request.params.get('version', '') or ""
+ if vnum:
+ vnum = str(vnum)
+ if not vnum in item.manifest['versions']:
+ abort(404, "Version %s of data package %s not found"%(vnum, c.silo_name))
+ c.version = vnum
+
+ if not (http_method == "GET"):
+ #identity management of item
+ if not request.environ.get('repoze.who.identity'):
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403, "Forbidden")
+ elif http_method == "GET":
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+
+ version_uri = "%s/version%s"%(item.uri.rstrip('/'), c.version)
+ c.version_doi = item.list_rdf_objects(URIRef(version_uri), u"http://purl.org/ontology/bibo/doi")
+ if not c.version_doi or not c.version_doi[0]:
+ c.version_doi = None
+ else:
+ c.version_doi = c.version_doi[0]
+
+ doi_conf = OxDataciteDoi()
+ doi_api = HTTPRequest(endpointhost=doi_conf.endpoint_host, secure=True)
+ doi_api.setRequestUserPass(endpointuser=doi_conf.account, endpointpass=doi_conf.password)
+
+ # conneg:
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+
+ c.message = None
+ c.resp_status = None
+ c.resp_reason = None
+ c.metadata = None
+
+ if http_method == "GET":
+ #Get a list of all dois registered for this data package
+ c.dois = {}
+ for v in item.manifest['versions']:
+ doi_ans = None
+ doi_ans = item.list_rdf_objects(URIRef("%s/version%s"%(item.uri.rstrip('/'), v)), u"http://purl.org/ontology/bibo/doi")
+ if doi_ans and doi_ans[0]:
+ c.dois[v] = doi_ans[0]
+
+ c.heading = "Doi metadata information from Datacite"
+ if not c.version_doi:
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ #Doint this to avoid displaying the erro page!!!
+ response.status_int = 200
+ response.status = "200 OK"
+ c.metadata = None
+ return render('/doiview.html')
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ c.message = 'DOI not registered for version %s of data package %s'%(c.version, c.silo_name)
+ return render('/doiview.html')
+
+ resource = "%s?doi=%s"%(doi_conf.endpoint_path_metadata, c.version_doi)
+ (resp, respdata) = doi_api.doHTTP_GET(resource=resource, expect_type='application/xml')
+ c.resp_reason = resp.reason
+ c.resp_status = resp.status
+ if resp.status < 200 or resp.status >= 300:
+ response.status_int = 400
+ response.status = "400 Bad Request"
+ response_msg = ''
+ c.metadata = ''
+ if resp.status == 403:
+ #TODO: Confirm 403 is not due to authorization
+ msg = "403 Forbidden - login error with Datacite or data package belongs to another party at Datacite."
+ elif resp.status == 404:
+ msg = "404 Not Found - DOI does not exist in DatCite's database"
+ elif resp.status == 410:
+ msg = "410 Gone - the requested data package was marked inactive (using DELETE method) at Datacite"
+ elif resp.status == 500:
+ msg = "500 Internal Server Error - Error retreiving the metadata from Datacite."
+ else:
+ msg = "Error retreiving the metadata from Datacite. %s"%str(resp.status)
+ c.message = msg
+ else:
+ response.status_int = 200
+ response.status = "200 OK"
+ c.metadata = respdata
+ response_msg = respdata
+ # conneg:
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ #Setting headers to 200 to avoid displaying the error page!!!
+ response.status_int = 200
+ response.status = "200 OK"
+ return render('/doiview.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'text/plain; charset="UTF-8"'
+ return str(respdata.decode('utf-8'))
+ elif str(mimetype).lower() in ["application/rdf+xml", "text/xml"]:
+ response.status_int = 200
+ response.status = "200 OK"
+ response.content_type = 'text/xml; charset="UTF-8"'
+ return response_msg
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing staisfies - default to text/html
+ #Setting headers to 200 to avoid displaying the error page!!!
+ response.status_int = 200
+ response.status = "200 OK"
+ return render('/doiview.html')
+
+ if http_method == "POST":
+ item.set_version_cursor(c.version)
+ #1a. If doi doen not exist for this version, generate doi
+ register_doi = False
+ if not c.version_doi:
+ cnt = doi_count()
+ if not cnt:
+ abort(400, "Error generating DOI")
+ register_doi = True
+ tiny_pid = short_pid.encode_url(cnt)
+ c.version_doi = "%s/bodleian%s.%s"%(doi_conf.prefix, tiny_pid, c.version)
+ #1b. Construct XML metadata
+ xml_metadata = get_doi_metadata(c.version_doi, item)
+ c.metadata = xml_metadata
+ #FOR TEST PURPOSES ONLY
+ #xml_metadata = False
+ if not xml_metadata and not register_doi:
+ #2a. If the doi already exists and there is no xml metadata to update, return bad request
+ c.message = "Coud not update matadata"
+ response.status_int = 400
+ response.status = "Bad request"
+ c.metadata = ''
+ elif not xml_metadata and register_doi:
+ #2b. If the doi is not registered, but there is no xml metadata to update, register just the doi with datacite
+ c.heading = "Registering new DOI with Datacite"
+ resource = "%s"%doi_conf.endpoint_path_doi
+ body = "%s\n%s"%(c.version_doi, version_uri)
+ #body_unicode = unicode(body, "utf-8")
+ body_unicode = unicode(body)
+ (resp, respdata) = doi_api.doHTTP_POST(body_unicode, resource=resource, data_type='text/plain;charset=UTF-8')
+ c.resp_reason = resp.reason
+ c.resp_status = resp.status
+ if resp.status < 200 or resp.status >= 300:
+ response.status_int = 400
+ response.status = "400 Bad Request"
+ response_msg = "DOI not registered"
+ c.metadata = ''
+ if resp.status == 400:
+ msg = "400 Bad Request - Request body must be exactly two lines: DOI and URL"
+ elif resp.status == 403:
+ #TODO: Confirm 403 is not due to authorization
+ msg = "403 Forbidden - From Datacite: login problem, quota excceded, wrong domain, wrong prefix"
+ elif resp.status == 500:
+ msg = "500 Internal Server Error - Error registering the DOI."
+ else:
+ msg = "Error retreiving the metadata from Datacite. %s"%str(resp.status)
+ c.message = msg
+ else:
+ #3. Add the DOI to the rdf metadata
+ item.add_namespace('bibo', "http://purl.org/ontology/bibo/")
+ item.add_triple(URIRef(version_uri), u"bibo:doi", Literal(c.version_doi))
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.sync()
+ response.status_int = 200
+ response.status = "200 OK"
+ response_msg = "DOI Registered. %s"%respdata
+ c.metadata = ''
+ c.message = "201 Created - DOI registered. %s"%respdata
+ else:
+ #register the DOI and metadata with Datacite
+ c.heading = "Registering new DOI along with its metadata with Datacite"
+ #body_unicode = unicode(xml_metadata, "utf-8")
+ #body_unicode = unicode(xml_metadata)
+ body_unicode = xml_metadata
+ resource = "%s?doi=%s&url=%s"%(doi_conf.endpoint_path_metadata, c.version_doi, version_uri)
+ (resp, respdata) = doi_api.doHTTP_POST(body_unicode, resource=resource, data_type='application/xml;charset=UTF-8')
+ c.resp_reason = resp.reason
+ c.resp_status = resp.status
+ if resp.status < 200 or resp.status >= 300:
+ response.status_int = 400
+ response.status = "400 Bad Request"
+ response_msg = "DOI and metadata not registered"
+ c.metadata = body_unicode
+ if resp.status == 400:
+ msg = "400 Bad Request - Invalid XML metadata"
+ elif resp.status == 403:
+ #TODO: Confirm 403 is not due to authorization
+ msg = "403 Forbidden - From Datacite: login problem, quota excceded, wrong domain, wrong prefix"
+ elif resp.status == 500:
+ msg = "500 Internal Server Error - Error registering the DOI."
+ else:
+ msg = "Error retreiving the metadata from Datacite. %s"%str(resp.status)
+ c.message = msg
+ else:
+ #3. Add the DOI to the rdf metadata
+ item.add_namespace('bibo', "http://purl.org/ontology/bibo/")
+ item.add_triple(URIRef(version_uri), u"bibo:doi", Literal(c.version_doi))
+ item.del_triple(item.uri, u"dcterms:modified")
+ item.add_triple(item.uri, u"dcterms:modified", datetime.now())
+ item.sync()
+ response.status_int = 200
+ response.status = "200 OK"
+ response_msg = body_unicode
+ c.metadata = body_unicode
+ c.message = "201 Created - DOI registered. %s"%respdata
+ # conneg:
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ #Setting headers to 200 to avoid displaying the error page!!!
+ response.status_int = 200
+ response.status = "200 OK"
+ return render('/doiview.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'text/plain; charset="UTF-8"'
+ return str(respdata.decode('utf-8'))
+ elif str(mimetype).lower() in ["application/rdf+xml", "text/xml"]:
+ response.status_int = 200
+ response.status = "200 OK"
+ response.content_type = 'text/xml; charset="UTF-8"'
+ return response_msg
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing staisfies - default to text/html
+ #Setting headers to 200 to avoid displaying the error page!!!
+ response.status_int = 200
+ response.status = "200 OK"
+ return render('/doiview.html')
+
diff --git a/rdfdatabank/controllers/error.py b/rdfdatabank/controllers/error.py
index f7bc3bc..78f5daf 100644
--- a/rdfdatabank/controllers/error.py
+++ b/rdfdatabank/controllers/error.py
@@ -1,12 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
import cgi
from paste.urlparser import PkgResourcesParser
-from pylons import request
+from pylons import request, response, tmpl_context as c
from pylons.controllers.util import forward
from pylons.middleware import error_document_template
from webhelpers.html.builder import literal
-from rdfdatabank.lib.base import BaseController
+from rdfdatabank.lib.base import BaseController, render
+from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
class ErrorController(BaseController):
@@ -22,13 +47,64 @@ class ErrorController(BaseController):
def document(self):
"""Render the error document"""
+ icode = 404
+ code= "404"
+ status = "Not Found"
resp = request.environ.get('pylons.original_response')
- content = literal(resp.body) or cgi.escape(request.GET.get('message', ''))
- page = error_document_template % \
- dict(prefix=request.environ.get('SCRIPT_NAME', ''),
- code=cgi.escape(request.GET.get('code', str(resp.status_int))),
- message=content)
- return page
+ if resp and resp.body:
+ content = literal(resp.body)
+ else:
+ content = request.GET.get('message', '')
+ if content:
+ content = cgi.escape(content)
+ if resp and resp.status_int:
+ icode = resp.status_int
+ code = str(resp.status_int)
+ elif request.GET.get('code', ''):
+ code = request.GET.get('code')
+ if code:
+ code = cgi.escape(code)
+ else:
+ code = 404
+ if resp and resp.status:
+ status = resp.status
+ c.message = request.GET.get('message', '')
+ if c.message:
+ c.message = cgi.escape(c.message)
+ else:
+ c.message = content
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "plain")]
+ if not accept_list:
+ accept_list= [MT("text", "plain")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ #page = error_document_template % \
+ #dict(prefix=request.environ.get('SCRIPT_NAME', ''),
+ # code=code,
+ # message=content)
+ #return page
+ c.status = status.replace(c.code, '').strip()
+ return render('/error.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'text/plain; charset="UTF-8"'
+ response.status_int = icode
+ response.status = status
+ return content
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/plain
+ response.content_type = 'text/plain; charset="UTF-8"'
+ response.status_int = resp.status_int
+ response.status = resp.status
+ return content
def img(self, id):
"""Serve Pylons' stock images"""
diff --git a/rdfdatabank/controllers/home.py b/rdfdatabank/controllers/home.py
new file mode 100644
index 0000000..be6e28c
--- /dev/null
+++ b/rdfdatabank/controllers/home.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+
+from rdfdatabank.lib.base import BaseController, render
+
+class HomeController(BaseController):
+ def index(self):
+ return render('/home.html')
diff --git a/rdfdatabank/controllers/items.py b/rdfdatabank/controllers/items.py
new file mode 100644
index 0000000..08a9449
--- /dev/null
+++ b/rdfdatabank/controllers/items.py
@@ -0,0 +1,506 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+import re, os, time
+from datetime import datetime, timedelta
+import simplejson
+from pylons import request, response, session, tmpl_context as c, url, app_globals as ag
+from pylons.controllers.util import abort, redirect
+from pylons.decorators import rest
+
+from rdfdatabank.lib.base import BaseController, render
+from rdfdatabank.lib.utils import create_new, allowable_id2
+from rdfdatabank.lib.file_unpack import check_file_mimetype, BadZipfile, get_zipfiles_in_dataset, unpack_zip_item, read_zipfile
+from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
+
+log = logging.getLogger(__name__)
+JAILBREAK = re.compile("[\/]*\.\.[\/]*")
+
+class ItemsController(BaseController):
+ def siloview(self, silo):
+ abort(403, "Forbidden")
+
+ @rest.restrict('GET', 'POST')
+ def datasetview(self, silo, id):
+ """Get a list of zipfiles in dataset 'id' within the silo 'silo' and unpack a dataset."""
+
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ rdfsilo = ag.granary.get_rdf_silo(silo)
+ if not rdfsilo.exists(id):
+ abort (404)
+
+ #tmpl_context variables needed: c.silo_name, c.zipfiles, c.ident, c.id, c.path
+ c.silo_name = silo
+ c.id = id
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ dataset = rdfsilo.get_item(id)
+
+ creator = None
+ if dataset.manifest and dataset.manifest.state and 'metadata' in dataset.manifest.state and dataset.manifest.state['metadata'] and \
+ 'createdby' in dataset.manifest.state['metadata'] and dataset.manifest.state['metadata']['createdby']:
+ creator = dataset.manifest.state['metadata']['createdby']
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if http_method == "GET":
+ c.editor = False
+ if ag.metadata_embargoed:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+ elif ident:
+ silos = ag.authz(ident)
+ if silo in silos:
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
+ if ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
+ c.editor = True
+ else:
+ #identity management of item
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403, "Forbidden")
+
+ if http_method == "GET":
+ c.zipfiles = get_zipfiles_in_dataset(dataset)
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/list_of_zipfiles.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ #return simplejson.dumps(dict(c.zipfiles))
+ return simplejson.dumps(list(c.zipfiles.keys()))
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/html
+ return render("/list_of_zipfiles.html")
+ elif http_method == "POST":
+ params = request.POST
+ if not (params.has_key("filename") and params['filename']):
+ abort(400, "You must supply a filename to unpack")
+
+ item_real_filepath = dataset.to_dirpath()
+ target_filepath = "%s/%s"%(item_real_filepath, params['filename'])
+ if not os.path.isfile(target_filepath):
+ abort(404, "File to unpack not found")
+ if not check_file_mimetype(target_filepath, 'application/zip'):
+ abort(415, "File is not of type application/zip")
+
+ if params.has_key("id") and params['id']:
+ target_dataset_name = params['id']
+ else:
+ #(head, fn) = os.path.split(params['filename'])
+ #(fn, ext) = os.path.splitext(fn)
+ #target_dataset_name = "%s-%s"%(id,fn)
+ target_dataset_name = id
+
+ #step 1: Create / initialize target dataset
+ if not rdfsilo.exists(target_dataset_name):
+ if not allowable_id2(target_dataset_name):
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad request. Data package name not valid"
+ return "Data package name can contain only the following characters - %s and has to be more than 1 character"%ag.naming_rule_humanized
+ target_dataset = create_new(rdfsilo, target_dataset_name, ident['repoze.who.userid'])
+ response.status_int = 201
+ response.status = "201 Created"
+ response.headers["Content-Location"] = url(controller="datasets", action="datasetview", silo=silo, id=target_dataset_name)
+ response_message = "201 Created"
+ else:
+ target_dataset = rdfsilo.get_item(target_dataset_name)
+ response.status = "204 Updated"
+ response.status_int = 204
+ response_message = None
+
+ #step 2: Unpack zip item
+ try:
+ unpack_zip_item(target_dataset, dataset, params['filename'], rdfsilo, ident['repoze.who.userid'])
+ except BadZipfile:
+ abort(400, "BadZipfile: Couldn't unpack zipfile")
+
+ target_dataset.sync()
+ target_dataset.sync()
+ target_dataset.sync()
+
+ if response.status_int == 201:
+ try:
+ ag.b.creation(silo, id, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ else:
+ try:
+ ag.b.change(silo, id, ident=ident['repoze.who.userid'])
+ except:
+ pass
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="datasetview", silo=silo, id=target_dataset_name))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ return response_message
+
+ @rest.restrict('GET', 'POST', 'PUT')
+ def itemview(self, silo, id, path):
+ """API call to
+ GET - read the contents of a zip-file (without having to unpack) and
+ POST- unpack a zip file into a new / existing dataset
+ PUT - Add the zipfile and unpack it onto the existing dataset"""
+ #tmpl_context variables needed: c.silo_name, c.zipfile_contents c.ident, c.id, c.path
+ if not path:
+ abort(400, "You must supply a filename to unpack")
+
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ rdfsilo = ag.granary.get_rdf_silo(silo)
+ if not rdfsilo.exists(id):
+ abort (404)
+
+ c.silo_name = silo
+ c.id = id
+ c.path = path
+
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ dataset = rdfsilo.get_item(id)
+
+ creator = None
+ if dataset.manifest and dataset.manifest.state and 'metadata' in dataset.manifest.state and dataset.manifest.state['metadata'] and \
+ 'createdby' in dataset.manifest.state['metadata'] and dataset.manifest.state['metadata']['createdby']:
+ creator = dataset.manifest.state['metadata']['createdby']
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if http_method == "GET":
+ if dataset.metadata.get('embargoed') not in ["false", 0, False]:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ else:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403, "Forbidden")
+
+ item_real_filepath = dataset.to_dirpath()
+ target_filepath = "%s/%s"%(item_real_filepath, path)
+ #c.parts = dataset.list_parts(detailed=False)
+
+ if http_method in ["GET", "POST"]:
+ if not dataset.isfile(path):
+ abort(404, "File not found")
+ if not os.path.isfile(target_filepath):
+ abort(404, "File not found")
+ if not check_file_mimetype(target_filepath, 'application/zip'):
+ abort(415, "File is not of type application/zip")
+
+ if http_method == "GET":
+ try:
+ c.zipfile_contents = read_zipfile(target_filepath)
+ except BadZipfile:
+ abort(400, "Could not read zipfile")
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/zipfileview.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.zipfile_contents)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/html
+ return render("/zipfileview.html")
+ elif http_method == "POST":
+ params = request.POST
+ #if not (params.has_key("filename") and params['filename']):
+ # abort(400, "You must supply a filename to unpack")
+
+ if params.has_key("id") and params['id']:
+ target_dataset_name = params['id']
+ else:
+ #(head, fn) = os.path.split(path)
+ #(fn, ext) = os.path.splitext(fn)
+ #target_dataset_name = "%s-%s"%(id,fn)
+ target_dataset_name = id
+
+ #step 1: Create / initialize target dataset
+ if not rdfsilo.exists(target_dataset_name):
+ if not allowable_id2(target_dataset_name):
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad request. Data package name not valid"
+ return "Data package name can contain only the following characters - %s and has to be more than 1 character"%ag.naming_rule_humanized
+ target_dataset = create_new(rdfsilo, target_dataset_name, ident['repoze.who.userid'])
+ response.status_int = 201
+ response.status = "201 Created"
+ response.headers["Content-Location"] = url(controller="datasets", action="datasetview", silo=silo, id=target_dataset_name)
+ response_message = "201 Created"
+ else:
+ target_dataset = rdfsilo.get_item(target_dataset_name)
+ response.status = "204 Updated"
+ response.status_int = 204
+ response_message = None
+
+ #step 2: Unpack zip item
+ try:
+ unpack_zip_item(target_dataset_name, dataset, path, rdfsilo, ident['repoze.who.userid'])
+ except BadZipfile:
+ abort(400, "Couldn't unpack zipfile")
+
+ target_dataset.sync()
+ target_dataset.sync()
+ target_dataset.sync()
+
+ if response.status_int == 201:
+ try:
+ ag.b.creation(silo, id, ident=ident['repoze.who.userid'])
+ except:
+ pass
+ else:
+ try:
+ ag.b.change(silo, id, ident=ident['repoze.who.userid'])
+ except:
+ pass
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="datasetview", silo=silo, id=target_dataset_name))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ return response_message
+ elif http_method == "PUT":
+ # Pylons loads the request body into request.body...
+ # This is not going to work for large files... ah well
+ # POST will handle large files as they are pushed to disc,
+ # but this won't
+ content = request.body
+
+ if JAILBREAK.search(path) != None:
+ abort(400, "'..' cannot be used in the path")
+
+ #Step 1: Put zipfile in dataset
+ if dataset.isdir(path):
+ response.content_type = "text/plain"
+ response.status_int = 403
+ response.status = "403 Forbidden"
+ return "Cannot PUT a file on to an existing directory"
+
+ if dataset.isfile(path):
+ code = 204
+ else:
+ code = 201
+
+ if code == 204:
+ dataset.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf', path])
+ else:
+ dataset.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+ dataset.put_stream(path, content)
+ dataset.del_triple(dataset.uri, u"dcterms:modified")
+ dataset.add_triple(dataset.uri, u"dcterms:modified", datetime.now())
+ dataset.del_triple(dataset.uri, u"oxds:currentVersion")
+ dataset.add_triple(dataset.uri, u"oxds:currentVersion", dataset.currentversion)
+ dataset.sync()
+
+ target_dataset = rdfsilo.get_item(id)
+ #step 2: Unpack zip item
+ if not check_file_mimetype(target_filepath, 'application/zip'):
+ abort(415, "File is not of type application/zip")
+ try:
+ unpack_zip_item(target_dataset, dataset, path, rdfsilo, ident['repoze.who.userid'])
+ except BadZipfile:
+ abort(400, "Couldn't unpack zipfile")
+
+ target_dataset.sync()
+ target_dataset.sync()
+ target_dataset.sync()
+
+ response.status = "204 Updated"
+ response.status_int = 204
+ response_message = None
+ try:
+ ag.b.change(silo, id, path, ident=ident['repoze.who.userid'])
+ except:
+ pass
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="datasets", action="datasetview", silo=silo, id=id))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ return response_message
+
+ @rest.restrict('GET')
+ def subitemview(self, silo, id, path, subpath):
+ #Function to retreive a file from the zipfile
+ #TODO
+ # I check to see the path is avlid and it is a zip file.
+ # I do not deal with subpath. if it is a file - serve it. If it is a dir, show the contents of it.
+
+ #tmpl_context variables needed: c.silo_name, c.zipfile_contents c.ident, c.id, c.path
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ if not (path or subpath):
+ abort(400, "You must supply a filename to unpack")
+
+ rdfsilo = ag.granary.get_rdf_silo(silo)
+ if not rdfsilo.exists(id):
+ abort (404)
+
+ c.silo_name = silo
+ c.id = id
+ c.path = path
+ c.subpath = subpath
+
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ dataset = rdfsilo.get_item(id)
+
+ if dataset.metadata.get('embargoed') not in ["false", 0, False]:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+
+ item_real_filepath = dataset.to_dirpath()
+ target_filepath = "%s/%s"%(item_real_filepath, path)
+ #c.parts = dataset.list_parts(detailed=False)
+ if not dataset.isfile(path):
+ abort(404, "File not found")
+ if not os.path.isfile(target_filepath):
+ abort(404, "File not found")
+ if not check_file_mimetype(target_filepath, 'application/zip'):
+ abort(415, "File is not of type application/zip")
+
+ #TODO : if subpath is a file - serve it. If subpath is a dir, show the contents of the dir
+
+ return render("/zipfilesubitemview.html")
+
diff --git a/rdfdatabank/controllers/keywords.py b/rdfdatabank/controllers/keywords.py
new file mode 100644
index 0000000..b3f25af
--- /dev/null
+++ b/rdfdatabank/controllers/keywords.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+
+from rdfdatabank.lib.base import BaseController, render
+
+class KeywordsController(BaseController):
+ def index(self):
+ return render('/keywords.html')
diff --git a/rdfdatabank/controllers/objects.py b/rdfdatabank/controllers/objects.py
deleted file mode 100644
index b2868f8..0000000
--- a/rdfdatabank/controllers/objects.py
+++ /dev/null
@@ -1,568 +0,0 @@
-import logging
-
-from pylons import request, response, session, tmpl_context as c
-from pylons.controllers.util import abort, redirect_to
-from pylons import app_globals as ag
-from rdfdatabank.lib.base import BaseController, render
-from rdfdatabank.lib.utils import create_new, is_embargoed, get_readme_text, test_rdf
-
-from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
-
-from datetime import datetime, timedelta
-from paste.fileapp import FileApp
-
-import re, os
-
-JAILBREAK = re.compile("[\/]*\.\.[\/]*")
-
-import simplejson
-
-log = logging.getLogger(__name__)
-
-class ObjectsController(BaseController):
- def index(self):
- if not request.environ.get('repoze.who.identity'):
- abort(401, "Not Authorised")
- ident = request.environ.get('repoze.who.identity')
- granary_list = ag.granary.silos
- c.silos = ag.authz(granary_list, ident)
- c.ident = ident
- return render('/list_of_archives.html')
-
- def siloview(self, silo):
- if not request.environ.get('repoze.who.identity'):
- abort(401, "Not Authorised")
- ident = request.environ.get('repoze.who.identity')
- c.ident = ident
- granary_list = ag.granary.silos
- c.silos = ag.authz(granary_list, ident)
- if silo not in c.silos:
- abort(403, "Forbidden")
-
- c.silo_name = silo
- c.silo = ag.granary.get_rdf_silo(silo)
-
- http_method = request.environ['REQUEST_METHOD']
- if http_method == "GET":
- c.embargos = {}
- for item in c.silo.list_items():
- c.embargos[item] = is_embargoed(c.silo, item)
- c.items = c.silo.list_items()
- # conneg return
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- return render('/siloview.html')
- elif str(mimetype) in ["text/plain", "application/json"]:
- response.content_type = "text/plain"
- items = {}
- for item_id in c.items:
- items[item_id] = {}
- items[item_id]['embargo_info'] = c.embargos[item_id]
- return simplejson.dumps(items)
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
-
- return render('/siloview.html')
- elif http_method == "POST":
- params = request.POST
- if params.has_key("id"):
- if c.silo.exists(params['id']):
- response.content_type = "text/plain"
- response.status_int = 409
- response.status = "409 Conflict: Object Already Exists"
- return "Object Already Exists"
- else:
- # Supported params:
- # id, title, embargoed, embargoed_until, embargo_days_from_now
- id = params['id']
- del params['id']
- item = create_new(c.silo, id, ident['repoze.who.userid'], **params)
-
- # Broadcast change as message
- ag.b.creation(silo, id, ident=ident['repoze.who.userid'])
-
- # conneg return
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- # probably a browser - redirect to newly created object
- redirect_to(controller="objects", action="itemview", silo=silo, id=id)
- elif str(mimetype) in ["text/plain"]:
- response.content_type = "text/plain"
- response.status_int = 201
- response.status = "201 Created"
- #response.headers.add("Content-Location", item.uri)
- return "Created"
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
- # Whoops - nothing satisfies
- response.content_type = "text/plain"
- response.status_int = 201
- #response.headers.add("Content-Location", item.uri)
- response.status = "201 Created"
- return "Created"
-
- def itemview(self, silo, id):
- # Check to see if embargo is on:
- c.silo_name = silo
- c.id = id
- c.silo = ag.granary.get_rdf_silo(silo)
-
- c.embargoed = False
- if c.silo.exists(id):
- c.item = c.silo.get_item(id)
-
- if c.item.metadata.get('embargoed') not in ["false", 0, False]:
- c.embargoed = True
- c.embargos = {}
- c.embargos[id] = is_embargoed(c.silo, id)
- http_method = request.environ['REQUEST_METHOD']
-
- c.editor = False
-
- if not (http_method == "GET" and not c.embargoed):
- #identity management if item
- if not request.environ.get('repoze.who.identity'):
- abort(401, "Not Authorised")
- ident = request.environ.get('repoze.who.identity')
- c.ident = ident
- granary_list = ag.granary.silos
- if ident:
- c.silos = ag.authz(granary_list, ident)
- if silo not in c.silos:
- abort(403, "Forbidden")
- else:
- abort(403, "Forbidden")
-
- c.editor = silo in c.silos
-
- # Method determination
- if http_method == "GET":
- if c.silo.exists(id):
- # conneg:
- c.item = c.silo.get_item(id)
-
- c.parts = c.item.list_parts(detailed=True)
-
- if "README" in c.parts.keys():
- c.readme_text = get_readme_text(c.item)
-
- # View options
- options = request.GET
- if "view" in options:
- c.view = options['view']
- elif c.editor:
- c.view = 'editor'
- else:
- c.view = 'user'
-
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- return render('/itemview.html')
- elif str(mimetype) in ["text/plain", "application/json"]:
- response.content_type = 'application/json; charset="UTF-8"'
- def serialisable_stat(stat):
- stat_values = {}
- for f in ['st_atime', 'st_blksize', 'st_blocks', 'st_ctime', 'st_dev', 'st_gid', 'st_ino', 'st_mode', 'st_mtime', 'st_nlink', 'st_rdev', 'st_size', 'st_uid']:
- try:
- stat_values[f] = stat.__getattribute__(f)
- except AttributeError:
- pass
- return stat_values
- items = {}
- items['parts'] = {}
- for part in c.parts:
- items['parts'][part] = serialisable_stat(c.parts[part])
- if c.readme_text:
- items['readme_text'] = c.readme_text
- if c.item.manifest:
- items['state'] = c.item.manifest.state
- return simplejson.dumps(items)
- elif str(mimetype) in ["application/rdf+xml", "text/xml"]:
- response.content_type = 'application/rdf+xml; charset="UTF-8"'
- return c.item.rdf_to_string(format="pretty-xml")
- elif str(mimetype) == "text/rdf+n3":
- response.content_type = 'text/rdf+n3; charset="UTF-8"'
- return c.item.rdf_to_string(format="n3")
- elif str(mimetype) == "application/x-turtle":
- response.content_type = 'application/x-turtle; charset="UTF-8"'
- return c.item.rdf_to_string(format="turtle")
- elif str(mimetype) in ["text/rdf+ntriples", "text/rdf+nt"]:
- response.content_type = 'text/rdf+ntriples; charset="UTF-8"'
- return c.item.rdf_to_string(format="nt")
- # Whoops - nothing satisfies
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
- abort(406)
- else:
- abort(404)
- elif http_method == "POST" and c.editor:
- params = request.POST
- if not c.silo.exists(id):
- if 'id' in params.keys():
- del params['id']
- item = create_new(c.silo, id, ident['repoze.who.userid'], **params)
-
- # Broadcast change as message
- ag.b.creation(silo, id, ident=ident['repoze.who.userid'])
-
- # conneg return
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- # probably a browser - redirect to newly created object
- redirect_to(controller="objects", action="itemview", silo=silo, id=id)
- elif str(mimetype) in ["text/plain"]:
- response.content_type = "text/plain"
- response.status_int = 201
- response.status = "201 Created"
- #response.headers.add("Content-Location", item.uri)
- return "Created"
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
- # Whoops - nothing satisfies
- response.content_type = "text/plain"
- response.status_int = 201
- #response.headers.add("Content-Location", item.uri)
- response.status = "201 Created"
- return "Created"
- elif params.has_key('embargo_change'):
- item = c.silo.get_item(id)
- if params.has_key('embargoed'):
- item.metadata['embargoed'] = True
- else:
- #if is_embargoed(c.silo, id)[0] == True:
- item.metadata['embargoed'] = False
- if params.has_key('embargoed_until'):
- item.metadata['embargoed_until'] = params['embargoed_until']
- item.sync()
- e, e_d = is_embargoed(c.silo, id, refresh=True)
-
- # Broadcast change as message
- ag.b.embargo_change(silo, id, item.metadata['embargoed'], item.metadata['embargoed_until'], ident=ident['repoze.who.userid'])
-
- response.content_type = "text/plain"
- response.status_int = 200
- return simplejson.dumps({'embargoed':e, 'embargoed_until':e_d})
- elif params.has_key('file'):
- # File upload by a not-too-savvy method - Service-orientated fallback:
- # Assume file upload to 'filename'
- params = request.POST
- item = c.silo.get_item(id)
- filename = params.get('filename')
- if not filename:
- filename = params['file'].filename
- upload = params.get('file')
- if JAILBREAK.search(filename) != None:
- abort(400, "'..' cannot be used in the path or as a filename")
- target_path = filename
-
- if item.isfile(target_path):
- code = 200
- elif item.isdir(target_path):
- response.status_int = 403
- return "Cannot POST a file on to an existing directory"
- else:
- code = 201
- item.put_stream(target_path, upload.file)
- upload.file.close()
-
- if code == 201:
- ag.b.creation(silo, id, target_path, ident=ident['repoze.who.userid'])
- else:
- ag.b.change(silo, id, target_path, ident=ident['repoze.who.userid'])
- response.status_int = code
- # conneg return
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- redirect_to(controller="objects", action="itemview", id=id, silo=silo)
- elif str(mimetype) in ["text/plain"]:
- response.status_int = code
- return "Added file %s to item %s" % (filename, id)
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
-
- response.status_int = code
- return "Added file %s to item %s" % (filename, id)
- elif params.has_key('text'):
- # Text upload convenience service
- params = request.POST
- item = c.silo.get_item(id)
- filename = params.get('filename')
- if not filename:
- abort(406, "Must supply a filename")
-
- if JAILBREAK.search(filename) != None:
- abort(400, "'..' cannot be used in the path or as a filename")
- target_path = filename
-
- if item.isfile(target_path):
- code = 204
- elif item.isdir(target_path):
- response.status_int = 403
- return "Cannot POST a file on to an existing directory"
- else:
- code = 201
-
- if filename == "manifest.rdf":
- # valid to make sure it's valid RDF
- # Otherwise this object will not be accessible
- text = params['text']
- if not test_rdf(text):
- abort(406, "Not able to parse RDF/XML")
-
- item.put_stream(target_path, params['text'].encode("utf-8"))
-
- if code == 201:
- ag.b.creation(silo, id, target_path, ident=ident['repoze.who.userid'])
- else:
- ag.b.change(silo, id, target_path, ident=ident['repoze.who.userid'])
- response.status_int = code
- # conneg return
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- redirect_to(controller="objects", action="itemview", id=id, silo=silo)
- elif str(mimetype) in ["text/plain"]:
- response.status_int = 200
- return "Added file %s to item %s" % (filename, id)
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
-
- response.status_int = 200
- return "Added file %s to item %s" % (filename, id)
- else:
- ## TODO apply changeset handling
- ## 1 - store posted CS docs in 'version' "___cs"
- ## 2 - apply changeset to RDF manifest
- ## 3 - update state to reflect latest CS applied
- response.status_int = 204
- return
-
- elif http_method == "DELETE" and c.editor:
- if c.silo.exists(id):
- c.silo.del_item(id)
-
- # Broadcast deletion
- ag.b.deletion(silo, id, ident=ident['repoze.who.userid'])
-
- response.status_int = 200
- return "{'ok':'true'}" # required for the JQuery magic delete to succede.
- else:
- abort(404)
-
- def subitemview(self, silo, id, path):
- # Check to see if embargo is on:
- c.silo_name = silo
- c.id = id
- c.silo = ag.granary.get_rdf_silo(silo)
-
- embargoed = False
- if c.silo.exists(id):
- c.item = c.silo.get_item(id)
-
- if c.item.metadata.get('embargoed') not in ["false", 0, False]:
- embargoed = True
-
- http_method = request.environ['REQUEST_METHOD']
-
- c.editor = False
-
- if not (http_method == "GET" and not embargoed):
- #identity management if item
- if not request.environ.get('repoze.who.identity'):
- abort(401, "Not Authorised")
- ident = request.environ.get('repoze.who.identity')
- c.ident = ident
- granary_list = ag.granary.silos
- if ident:
- c.silos = ag.authz(granary_list, ident)
- if silo not in c.silos:
- abort(403, "Forbidden")
- else:
- abort(403, "Forbidden")
-
- c.editor = silo in c.silos
-
- c.path = path
-
- http_method = request.environ['REQUEST_METHOD']
-
- if http_method == "GET":
- if c.silo.exists(id):
- c.item = c.silo.get_item(id)
- if c.item.isfile(path):
- fileserve_app = FileApp(c.item.to_dirpath(path))
- return fileserve_app(request.environ, self.start_response)
- elif c.item.isdir(path):
- c.parts = c.item.list_parts(path, detailed=True)
-
- if "README" in c.parts.keys():
- c.readme_text = get_readme_text(c.item, "%s/README" % path)
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- return render("/subitemview.html")
- elif str(mimetype) in ["text/plain", "application/json"]:
- def serialisable_stat(stat):
- stat_values = {}
- for f in ['st_atime', 'st_blksize', 'st_blocks', 'st_ctime', 'st_dev', 'st_gid', 'st_ino', 'st_mode', 'st_mtime', 'st_nlink', 'st_rdev', 'st_size', 'st_uid']:
- try:
- stat_values[f] = stat.__getattribute__(f)
- except AttributeError:
- pass
- return stat_values
- response.content_type = "text/plain"
- items = {}
- items['parts'] = {}
- for part in c.parts:
- items['parts'][part] = serialisable_stat(c.parts[part])
- if c.readme_text:
- items['readme_text'] = c.readme_text
- return simplejson.dumps(items)
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
- return render("/subitemview.html")
- else:
- abort(404)
- elif http_method == "PUT" and c.editor:
- if c.silo.exists(id):
- # Pylons loads the request body into request.body...
- # This is not going to work for large files... ah well
- # POST will handle large files as they are pushed to disc,
- # but this won't
- content = request.body
- item = c.silo.get_item(id)
-
- if JAILBREAK.search(path) != None:
- abort(400, "'..' cannot be used in the path")
-
- if item.isfile(path):
- code = 204
- elif item.isdir(path):
- response.status_int = 403
- return "Cannot PUT a file on to an existing directory"
- else:
- code = 201
-
- item.put_stream(path, content)
-
- if code == 201:
- ag.b.creation(silo, id, path, ident=ident['repoze.who.userid'])
- else:
- ag.b.change(silo, id, path, ident=ident['repoze.who.userid'])
-
- response.status_int = code
- return
- else:
- # item in which to store file doesn't exist yet...
- # DECISION: Auto-instantiate object and then put file there?
- # or error out with perhaps a 404?
- # Going with error out...
- response.status_int = 404
- return "Object %s doesn't exist" % id
- elif http_method == "POST" and c.editor:
- if c.silo.exists(id):
- # POST... differences from PUT:
- # path = filepath that this acts on, should be dir, or non-existant
- # if path is a file, this will revert to PUT's functionality and
- # overwrite the file, if there is a multipart file uploaded
- # Expected params: filename, file (uploaded file)
- params = request.POST
- item = c.silo.get_item(id)
- filename = params.get('filename')
- upload = params.get('file')
- if JAILBREAK.search(filename) != None:
- abort(400, "'..' cannot be used in the path or as a filename")
- target_path = path
- if item.isdir(path) and filename:
- target_path = os.path.join(path, filename)
-
- if item.isfile(target_path):
- code = 204
- elif item.isdir(target_path):
- response.status_int = 403
- return "Cannot POST a file on to an existing directory"
- else:
- code = 201
- item.put_stream(target_path, upload.file)
- upload.file.close()
-
- if code == 201:
- ag.b.creation(silo, id, target_path, ident=ident['repoze.who.userid'])
- else:
- ag.b.change(silo, id, target_path, ident=ident['repoze.who.userid'])
- response.status_int = code
- return
- else:
- # item doesn't exist yet...
- # DECISION: Auto-instantiate object and then put file there?
- # or error out with perhaps a 404?
- # Going with error out...
- response.status_int = 404
- return "Object %s doesn't exist" % id
- elif http_method == "DELETE" and c.editor:
- if c.silo.exists(id):
- item = c.silo.get_item(id)
- if item.isfile(path):
- item.del_stream(path)
-
- ag.b.deletion(silo, id, path, ident=ident['repoze.who.userid'])
- response.status_int = 200
- return "{'ok':'true'}" # required for the JQuery magic delete to succede.
- elif item.isdir(path):
- parts = item.list_parts(path)
- for part in parts:
- if item.isdir(os.path.join(path, part)):
- # TODO implement proper recursive delete, with RDF aggregation
- # updating
- abort(400, "Directory is not empty of directories")
- for part in parts:
- item.del_stream(os.path.join(path, part))
- ag.b.deletion(silo, id, os.path.join(path, part), ident=ident['repoze.who.userid'])
- item.del_stream(path)
- ag.b.deletion(silo, id, path, ident=ident['repoze.who.userid'])
- response.status_int = 200
- return "{'ok':'true'}" # required for the JQuery magic delete to succede.
- else:
- abort(404)
- else:
- abort(404)
diff --git a/rdfdatabank/controllers/packages.py b/rdfdatabank/controllers/packages.py
deleted file mode 100644
index b92d352..0000000
--- a/rdfdatabank/controllers/packages.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import logging
-
-from pylons import request, response, session, tmpl_context as c
-from pylons.controllers.util import abort, redirect_to
-
-from pylons import app_globals as ag
-from rdfdatabank.lib.base import BaseController, render
-
-import re, os
-
-from rdfdatabank.lib.unpack import store_zipfile, unpack_zip_item, BadZipfile
-
-from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
-
-log = logging.getLogger(__name__)
-
-class PackagesController(BaseController):
- def index(self):
- if not request.environ.get('repoze.who.identity'):
- abort(401, "Not Authorised")
- ident = request.environ.get('repoze.who.identity')
- c.ident = ident
- granary_list = ag.granary.silos
- c.silos = ag.authz(granary_list, ident)
-
- return render('/list_of_zipfile_archives.html')
-
- def success(self, message):
- c.message = message
- return render("/success_message.html")
-
- def siloview(self, silo):
- if not request.environ.get('repoze.who.identity'):
- abort(401, "Not Authorised")
- ident = request.environ.get('repoze.who.identity')
- c.ident = ident
- granary_list = ag.granary.silos
- c.silos = ag.authz(granary_list, ident)
- if silo not in c.silos:
- abort(403, "Forbidden")
-
- c.silo_name = silo
- c.silo = ag.granary.get_rdf_silo(silo)
-
- http_method = request.environ['REQUEST_METHOD']
- if http_method == "GET":
- return render("/package_form_upload.html")
- elif http_method == "POST":
- params = request.POST
- if params.has_key("id") and params.has_key("file") and params['id'] and params['file'].filename:
- target_uri = "%s%s" % (c.silo.state['uri_base'], params['id'])
- info = {}
- info['package_filename'] = params['file'].filename
- zip_item = store_zipfile(c.silo, target_uri, params['file'], ident['repoze.who.userid'])
-
- # Broadcast zipfile creation
- ag.b.creation(silo, params['id'], ident=ident['repoze.who.userid'], package_type="zipfile")
-
- info['zip_id'] = zip_item.item_id
- info['zip_uri'] = zip_item.uri
- info['zip_target'] = target_uri
- info['zip_file_stat'] = zip_item.stat(info['package_filename'])
- info['zip_file_size'] = info['zip_file_stat'].st_size
- try:
- unpack_zip_item(zip_item, c.silo, ident['repoze.who.userid'])
-
- except BadZipfile:
- # Bad zip file
- info['unpacking_status'] = "FAIL - Couldn't unzip package"
- abort(500, "Couldn't unpack zipfile")
- # Broadcast derivative creation
- ag.b.creation(silo, params['id'], ident=ident['repoze.who.userid'])
-
- # 302 Redirect to new resource? 201 with Content-Location?
- # For now, content-location
- #response.headers.add("Content-Location", target_uri)
- # conneg return
- accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
- if not accept_list:
- accept_list= [MT("text", "html")]
- mimetype = accept_list.pop(0)
- while(mimetype):
- if str(mimetype) in ["text/html", "text/xhtml"]:
- c.info = info
- return render('/successful_package_upload.html')
- elif str(mimetype) == "application/json":
- response.status_int = 201
- response.content_type = 'application/json; charset="UTF-8"'
- return simplejson.dumps(info)
- elif str(mimetype) in ["application/rdf+xml", "text/xml"]:
- response.status_int = 201
- response.content_type = 'application/rdf+xml; charset="UTF-8"'
- return zip_item.rdf_to_string(format="pretty-xml")
- try:
- mimetype = accept_list.pop(0)
- except IndexError:
- mimetype = None
- # Whoops - nothing satisfies
- abort(406)
- else:
- abort(400, "You must supply a valid id")
- abort(404)
diff --git a/rdfdatabank/controllers/redirect.py b/rdfdatabank/controllers/redirect.py
new file mode 100644
index 0000000..444e142
--- /dev/null
+++ b/rdfdatabank/controllers/redirect.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+import logging
+from pylons import url
+
+from pylons.controllers.util import redirect
+from rdfdatabank.lib.base import BaseController
+
+class RedirectController(BaseController):
+ def index(self, id):
+ if id.lower().endswith(('.html', '.rdf', '.json')):
+ id = id.rsplit('.', 1)[0]
+ lid = id.lower()
+ if lid == 'dataset%3a1' or lid == 'dataset:1':
+ redirect(url(controller="datasets", action="datasetview", silo="general", id='Tick1AudioCorpus'))
+ elif lid == 'dataset%3A2.html' or lid == 'dataset:2':
+ redirect(url(controller="datasets", action="datasetview", silo="general", id='RobertDarnton'))
+ if lid == 'dataset%3A3' or lid == 'dataset:3':
+ redirect(url(controller="datasets", action="datasetview", silo="general", id='MostynBrown'))
+ else:
+ redirect(url(controller="datasets", action="datasetview", silo="general", id=id))
+
diff --git a/rdfdatabank/controllers/search.py b/rdfdatabank/controllers/search.py
index 09aa0c4..b481bd2 100644
--- a/rdfdatabank/controllers/search.py
+++ b/rdfdatabank/controllers/search.py
@@ -1,28 +1,522 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
import logging
+from urllib import urlencode, unquote, quote
+import json
from pylons import request, response, session, tmpl_context as c
-from pylons.controllers.util import abort, redirect_to
+from pylons.controllers.util import abort
from pylons import app_globals as ag
from rdfdatabank.lib.base import BaseController, render
+from rdfdatabank.lib.search_term import term_list
+from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
log = logging.getLogger(__name__)
class SearchController(BaseController):
+ def __before__(self):
+ c.all_fields = term_list().get_all_search_fields()
+ c.field_names = term_list().get_search_field_dictionary()
+ c.facetable_fields = term_list().get_all_facet_fields()
+ c.types = term_list().get_type_field_dictionary()
+ c.search_fields = ['silo', 'id', 'title', 'uuid', 'embargoStatus', 'embargoedUntilDate', 'currentVersion', 'doi', 'publicationDate', 'abstract', 'description', 'creator', 'isVersionOf', 'isPartOf', 'subject', 'type']
+ c.sort_options = {'score desc':'Relevance', 'publicationDate desc':'Date (Latest to oldest)','publicationDate asc':'Date (Oldest to Latest)','silo asc':'Silo A to Z','silo desc':'Silo Z to A'}
+
def raw(self):
- http_method = request.environ['REQUEST_METHOD']
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+
+ silos = None
+ if ag.metadata_embargoed:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+
+ if silos and not isinstance(silos, basestring) and type(silos).__name__ == 'list':
+ silos = ' '.join(silos)
+
+ http_method = request.environ['REQUEST_METHOD']
if http_method == "GET":
params = request.GET
elif http_method == "POST":
params = request.POST
- if "q" in params and "wt" in params:
+
+ if not "q" in params:
+ abort(400, "Parameter 'q' is not available")
+
+ #If ag.metadata_embargoed, search only within your silos
+ if params['q'] == '*':
+ if silos:
+ params['q'] = """silo:(%s)"""%silos
+ else:
+ params['q'] = "*:*"
+ elif silos and not 'silo:' in params['q']:
+ params['q'] = """%s AND silo:(%s)"""%(params['q'], silos)
+
+ accept_list = None
+ if 'wt' in params and params['wt'] == "json":
+ accept_list = [MT("application", "json")]
+ elif 'wt' in params and params['wt'] == "xml":
+ accept_list = [MT("text", "xml")]
+ else:
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ params['wt'] = 'json'
+ accept_list= [MT("text", "html")]
+ break
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ params['wt'] = 'json'
+ accept_list= [MT("application", "json")]
+ break
+ elif str(mimetype).lower() in ["application/rdf+xml", "text/xml"]:
+ params['wt'] = 'xml'
+ accept_list = [MT("text", "xml")]
+ break
+ # Whoops - nothing satisfies
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+
+ if not 'wt' in params or not params['wt'] in ['json', 'xml']:
+ params['wt'] = 'json'
+ accept_list= [MT("text", "html")]
+ if not 'fl' in params or not params['fl']:
+ #Also include the following fields - date modified, publication year / publication date, embargo status, embargo date, version
+ params['fl'] = "id,silo,mediator,creator,title,score"
+ if not 'start' in params or not params['start']:
+ params['start'] = '0'
+ if not 'rows' in params or not params['rows']:
+ params['rows'] = '100'
+ try:
result = ag.solr.raw_query(**params)
- if params['wt'] == "json":
+ except:
+ result = {}
+
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ c.result = result
+ return render('/raw_search.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return result
+ elif str(mimetype).lower() in ["application/rdf+xml", "text/xml"]:
+ response.content_type = 'text/xml; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return result
+ # Whoops - nothing satisfies
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops - nothing staisfies - default to text/html
+ c.result = result
+ return render('/raw_search.html')
+
+ def detailed(self, query=None, additional_fields=[]):
+
+ if query:
+ c.q = query
+ else:
+ c.q = request.params.get('q', None)
+ try:
+ c.q = unquote(c.q)
+ except:
+ pass
+
+ c.typ = 'all'
+ if request.params.get("type", None):
+ c.typ = request.params.get("type")
+
+ if not c.q or c.q == '*' or c.q == "":
+ c.q = "*:*"
+
+ # Search controls
+ truncate = request.params.get('truncate', None)
+ start = request.params.get('start', None)
+ rows = request.params.get('rows', None)
+ sort = request.params.get('sort', None)
+ fields = request.params.get('fl', None)
+ res_format = request.params.get('format', None)
+ if not res_format:
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ res_format = 'html'
+ break
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ res_format = 'json'
+ break
+ elif str(mimetype).lower() in ["text/xml"]:
+ res_format = 'xml'
+ break
+ elif str(mimetype).lower() in ["text/csv"]:
+ res_format = 'csv'
+ break
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ if not res_format:
+ res_format = 'html'
+
+ c.sort = 'score desc'
+ # Lock down the sort parameter.
+ if sort and sort in c.sort_options:
+ c.sort = sort
+ c.sort_text = c.sort_options[c.sort]
+
+ c.chosen_fields = []
+ c.chosen_fields.extend(c.search_fields)
+
+ if fields:
+ fields = fields.split(',')
+ if fields and type(fields).__name__ == 'list':
+ fields = [x.strip() for x in fields]
+ for fld in fields:
+ if fld in c.all_fields and not fld in c.chosen_fields:
+ c.chosen_fields.append(fld)
+
+ for fld in additional_fields:
+ if not fld in c.chosen_fields:
+ c.chosen_fields.append(fld)
+
+ c.fields_to_facet = []
+ c.fields_to_facet.extend(c.facetable_fields)
+
+ c.facet_limit = 10
+
+ c.chosen_facets = {}
+
+ query_filter = ""
+
+ #Setup to capture all the url parameters needed to regenerate this search
+ c.search = {}
+ filter_url = ""
+
+ for field in c.all_fields:
+ if request.params.get("filter"+field, None):
+ multi = request.params.getall("filter"+field)
+ c.chosen_facets[field] = []
+ #c.search["filter"+field] = ""
+ for m in multi:
+ try:
+ m = unquote(m)
+ except:
+ pass
+ m = m.strip()
+ m = m.strip('"')
+ c.chosen_facets[field].append(m)
+ query_filter += ' AND %s:"%s"'%(field, m)
+ try:
+ filter_url += '&filter%s=%s'%(field, quote('"%s"'%m))
+ except:
+ filter_url += '&filter%s=%s'%(field, '"%s"'%m)
+ #if field in c.fields_to_facet:
+ # del c.fields_to_facet[field]
+
+ for field in c.chosen_facets:
+ if field not in c.chosen_fields:
+ c.chosen_fields.append(field)
+
+ c.truncate = 450
+ c.start = 0
+ c.rows = 25
+
+ # Parse/Validate search controls
+ if truncate:
+ try:
+ c.truncate = int(truncate)
+ except ValueError:
+ pass
+ if c.truncate < 10:
+ c.truncate = 10
+ if c.truncate > 1000:
+ c.truncate = 1000
+
+ if start:
+ try:
+ c.start = int(start)
+ except ValueError:
+ pass
+ if c.start < 0:
+ c.start = 0
+
+ if rows:
+ try:
+ c.rows = int(rows)
+ except ValueError:
+ pass
+ if c.rows < 5:
+ c.rows = 5
+ elif c.rows > 5000:
+ c.rows=5000
+
+ #c.search['rows'] = c.rows
+ c.search['truncate'] = c.truncate
+ c.search['type'] = c.typ
+ #c.search['start'] = c.start
+ #c.search['sort'] = c.sort
+ #if c.q:
+ # c.search['q'] = c.q.encode('utf-8')
+ solr_params = {}
+
+ if c.q:
+ if c.typ and 'silo' in c.typ:
+ solr_params['q'] = c.q.encode('utf-8')+query_filter+" AND type:silo"
+ elif c.typ and 'dataset' in c.typ:
+ solr_params['q'] = c.q.encode('utf-8')+query_filter+" AND type:dataset"
+ elif c.typ and 'item' in c.typ and c.q != "*:*":
+ #solr_params['q'] = """aggregatedResource:"%s" %s"""%(c.q.encode('utf-8'),query_filter)
+ solr_params['q'] = """filename:"%s" %s"""%(c.q.encode('utf-8'),query_filter)
+ else:
+ solr_params['q'] = c.q.encode('utf-8')+query_filter
+
+ if res_format in ['json', 'xml', 'python', 'php']:
+ solr_params['wt'] = res_format
+ else:
+ solr_params['wt'] = 'json'
+
+ solr_params['fl'] = ','.join(c.chosen_fields)
+ solr_params['rows'] = c.rows
+ solr_params['start'] = c.start
+
+ if c.sort:
+ solr_params['sort'] = c.sort
+
+ if c.fields_to_facet:
+ solr_params['facet'] = 'true'
+ solr_params['facet.limit'] = c.facet_limit
+ solr_params['facet.mincount'] = 1
+ solr_params['facet.field'] = []
+ for facet in c.fields_to_facet:
+ solr_params['facet.field'].append(facet)
+
+ solr_response = None
+ try:
+ solr_response = ag.solr.raw_query(**solr_params)
+ except:
+ pass
+
+ c.add_facet = u"%ssearch/detailed?q=%s&" % (ag.root, c.q.encode('utf-8'))
+ c.add_facet = c.add_facet + urlencode(c.search) + filter_url
+
+ if not solr_response:
+ # conneg return
+ response.status_int = 200
+ response.status = "200 OK"
+ if res_format == "html":
+ c.numFound = 0
+ c.message = 'Sorry, either that search "%s" resulted in no matches, or the search service is not functional.' % c.q
+ return render('/search.html')
+ elif res_format == 'xml':
+ response.headers['Content-Type'] = 'application/xml'
+ response.charset = 'utf8'
+ c.atom = {}
+ return render('/atom_results.html')
+ elif res_format == 'json':
+ response.headers['Content-Type'] = 'application/json'
+ response.charset = 'utf8'
+ return {}
+ else:
+ response.headers['Content-Type'] = 'application/text'
+ response.charset = 'utf8'
+ return solr_response
+
+ response.status_int = 200
+ response.status = "200 OK"
+ if res_format == 'xml':
+ response.headers['Content-Type'] = 'application/xml'
+ response.charset = 'utf8'
+ c.atom = solr_response
+ return render('/atom_results.html')
+ elif res_format == 'json':
response.headers['Content-Type'] = 'application/json'
- elif params['wt'] == "xml":
- response.headers['Content-Type'] = 'text/xml'
- return result
- else:
- return render("/raw_search.html")
+ response.charset = 'utf8'
+ return solr_response
+ elif res_format in ['csv', 'python', 'php']:
+ response.headers['Content-Type'] = 'application/text'
+ response.charset = 'utf8'
+ return solr_response
+
+ search = json.loads(solr_response)
+
+ numFound = search['response'].get('numFound',None)
+
+ c.numFound = 0
+ c.permissible_offsets = []
+
+ c.pages_to_show = 5
+
+ try:
+ c.numFound = int(numFound)
+ remainder = c.numFound % c.rows
+ if remainder > 0:
+ c.lastPage = c.numFound - remainder
+ else:
+ c.lastPage = c.numFound - c.rows
+
+ if c.numFound > c.rows:
+ offset_start = c.start - ( (c.pages_to_show/2) * c.rows )
+ if offset_start < 0:
+ offset_start = 0
+
+ offset_end = offset_start + (c.pages_to_show * c.rows)
+ if offset_end > c.numFound:
+ offset_end = c.numFound
+ if remainder > 0:
+ offset_start = c.lastPage - (c.pages_to_show * c.rows)
+ else:
+ offset_start = c.lastPage - ((c.pages_to_show-1) * c.rows)
+
+ if offset_start < 0:
+ offset_start = 0
+
+ c.permissible_offsets = list( xrange( offset_start, offset_end, c.rows) )
+ except ValueError:
+ pass
+
+ c.docs = search['response'].get('docs',None)
+
+ if c.fields_to_facet:
+ c.returned_facets = {}
+ for facet in search['facet_counts']['facet_fields']:
+ facet_list = search['facet_counts']['facet_fields'][facet]
+ keys = facet_list[::2]
+ values = facet_list[1::2]
+ c.returned_facets[facet] = []
+ for index in range(len(keys)):
+ c.returned_facets[facet].append((keys[index],values[index]))
+
+ return render('/search.html')
+
+
+ def advanced(self):
+
+ c.q = "*:*"
+ c.typ = 'all'
+
+ # Search controls
+ format = 'html'
+ c.sort = 'score desc'
+ c.sort_text = c.sort_options[c.sort]
+
+ c.chosen_fields = []
+ c.chosen_fields.extend(c.search_fields)
+
+ c.fields_to_facet = []
+ c.fields_to_facet.extend(c.facetable_fields)
+
+ c.facet_limit = 10
+
+ c.chosen_facets = {}
+
+ query_filter = ""
+
+ #Setup to capture all the url parameters needed to regenerate this search
+ c.search = {}
+ filter_url = ""
+
+ c.truncate = 450
+ c.start = 0
+ c.rows = 25
+ c.search['truncate'] = c.truncate
+ c.search['type'] = c.typ
+
+ solr_params = {}
+
+ if c.q:
+ solr_params['q'] = c.q.encode('utf-8')+query_filter
+ solr_params['wt'] = 'json'
+ solr_params['fl'] = ','.join(c.chosen_fields)
+ solr_params['rows'] = c.rows
+ solr_params['start'] = c.start
+ if c.sort:
+ solr_params['sort'] = c.sort
+ if c.fields_to_facet:
+ solr_params['facet'] = 'true'
+ solr_params['facet.limit'] = c.facet_limit
+ solr_params['facet.mincount'] = 1
+ solr_params['facet.field'] = []
+ for facet in c.fields_to_facet:
+ solr_params['facet.field'].append(facet)
+ try:
+ solr_response = ag.solr.raw_query(**solr_params)
+ except:
+ solr_response = None
+
+ c.add_facet = u"%ssearch/detailed?q=%s&" % (ag.root, c.q.encode('utf-8'))
+ c.add_facet = c.add_facet + urlencode(c.search) + filter_url
+
+ if not solr_response:
+ # FAIL - do something here:
+ c.message = 'Sorry, either that search "%s" resulted in no matches, or the search service is not functional.' % c.q
+ h.redirect_to(controller='/search', action='index')
+
+ search = json.loads(solr_response)
+
+ numFound = search['response'].get('numFound',None)
+ try:
+ c.numFound = int(numFound)
+ except:
+ c.numFound = 0
+ c.docs = search['response'].get('docs',None)
+
+ if c.fields_to_facet:
+ c.returned_facets = {}
+ for facet in search['facet_counts']['facet_fields']:
+ facet_list = search['facet_counts']['facet_fields'][facet]
+ keys = facet_list[::2]
+ values = facet_list[1::2]
+ c.returned_facets[facet] = []
+ for index in range(len(keys)):
+ c.returned_facets[facet].append((keys[index],values[index]))
+
+ return render('/search_advanced.html')
diff --git a/rdfdatabank/controllers/searching.py b/rdfdatabank/controllers/searching.py
new file mode 100644
index 0000000..d215996
--- /dev/null
+++ b/rdfdatabank/controllers/searching.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+
+from rdfdatabank.lib.base import BaseController, render
+
+class SearchingController(BaseController):
+ def index(self):
+ return render('/searching.html')
diff --git a/rdfdatabank/controllers/silos.py b/rdfdatabank/controllers/silos.py
new file mode 100644
index 0000000..0c7c277
--- /dev/null
+++ b/rdfdatabank/controllers/silos.py
@@ -0,0 +1,155 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+from datetime import datetime, timedelta
+import re
+import simplejson
+
+from pylons import request, response, session, tmpl_context as c, app_globals as ag, url
+from pylons.controllers.util import abort
+from pylons.decorators import rest
+from paste.fileapp import FileApp
+
+from rdfdatabank.lib.base import BaseController, render
+from rdfdatabank.lib.utils import is_embargoed, getSiloModifiedDate
+from rdfdatabank.lib.auth_entry import list_silos, get_datasets_count
+from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
+
+JAILBREAK = re.compile("[\/]*\.\.[\/]*")
+
+log = logging.getLogger(__name__)
+
+class SilosController(BaseController):
+ @rest.restrict('GET')
+ def index(self):
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ #granary_list = ag.granary.silos
+ #c.silos = granary_list
+ c.silos = list_silos()
+ if ag.metadata_embargoed:
+ if not ident:
+ abort(401, "Not Authorised")
+ c.silos = ag.authz(ident)
+
+ c.silo_infos = {}
+ for silo in c.silos:
+ c.silo_infos[silo] = []
+ state_info = ag.granary.describe_silo(silo)
+ if 'title' in state_info and state_info['title']:
+ c.silo_infos[silo].append(state_info['title'])
+ else:
+ c.silo_infos[silo].append(silo)
+ c.silo_infos[silo].append(get_datasets_count(silo))
+ c.silo_infos[silo].append(getSiloModifiedDate(silo))
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render('/list_of_silos.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.silos)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/html
+ return render('/list_of_silos.html')
+
+ @rest.restrict('GET')
+ def siloview(self, silo):
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ c.silo_name = silo
+ c.editor = False
+ if ag.metadata_embargoed:
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ c.editor = True
+ elif ident:
+ silos = ag.authz(ident)
+ if silo in silos:
+ c.editor = True
+
+ if silo in ['ww1archives', 'digitalbooks']:
+ abort(501, "The silo %s contains too many data packages to list"%silo)
+
+ rdfsilo = ag.granary.get_rdf_silo(silo)
+ state_info = ag.granary.describe_silo(silo)
+ if 'title' in state_info and state_info['title']:
+ c.title = state_info['title']
+ c.embargos = {}
+ c.items = []
+ for item in rdfsilo.list_items():
+ c.embargos[item] = None
+ try:
+ c.embargos[item] = is_embargoed(rdfsilo, item)
+ except:
+ pass
+ c.items.append(item)
+ #c.embargos[item] = ()
+
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render('/siloview.html')
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.embargos)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/html
+ return render('/siloview.html')
diff --git a/rdfdatabank/controllers/states.py b/rdfdatabank/controllers/states.py
new file mode 100644
index 0000000..29a653b
--- /dev/null
+++ b/rdfdatabank/controllers/states.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+import simplejson
+
+from pylons import request, response, app_globals as ag
+from pylons.controllers.util import abort
+from pylons.decorators import rest
+
+from rdfdatabank.lib.base import BaseController
+from rdfdatabank.lib.utils import is_embargoed, serialisable_stat
+from rdfdatabank.lib.auth_entry import get_datasets_count, get_datasets
+
+log = logging.getLogger(__name__)
+
+class StatesController(BaseController):
+ @rest.restrict('GET')
+ def siloview(self, silo):
+ """
+ Returns the state information of a silo.
+ Only authorized users with role 'admin' or 'manager' can view this information
+
+ The state information for a silo contains the following:
+ Name of the silo (machine name, used in uris) - ans["silo"]
+ Base URI for the silo - ans["uri_base"]
+ Users who can access the silo (silo owners) - ans["owners"]
+ Silo description - ans["description"]
+ Title of the silo (human readable) - ans["title"]
+ Disk allocation for the silo (in kB) - ans["disk_allocation"]
+ List of datasets in the silo (ans["datasets"])
+ with embargo information for each of the datasets
+ (ans["datasets"]["dataset_name"]["embargo_info"])
+ """
+
+ # Only authorized users can view state information.
+ # Should this be restricted to admins and managers only, or shoud users too be able to see this information?
+ # Going with restricting this information to admins and managers
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ ident = request.environ.get('repoze.who.identity')
+ if not ident:
+ abort(401, "Not Authorised")
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+ #if not ident.get('role') in ["admin", "manager"]:
+ if not (silo in silos_admin or silo in silos_manager):
+ abort(403, "Forbidden. You should be an administrator or manager to view this information")
+
+ options = request.GET
+ start = 0
+ if 'start' in options and options['start']:
+ try:
+ start = int(options['start'])
+ except:
+ start = 0
+ rows = 1000
+ if 'rows' in options and options['rows']:
+ try:
+ rows = int(options['rows'])
+ except:
+ rows = 1000
+
+ rdfsilo = ag.granary.get_rdf_silo(silo)
+ state_info = ag.granary.describe_silo(silo)
+ state_info['silo'] = silo
+ state_info['uri_base'] = ''
+ if rdfsilo.state and rdfsilo.state['uri_base']:
+ state_info['uri_base'] = rdfsilo.state['uri_base']
+ state_info['number of data packages'] = get_datasets_count(silo)
+ state_info['params'] = {'start':start, 'rows':rows}
+ items = {}
+ #for item in rdfsilo.list_items():
+ for item in get_datasets(silo, start=start, rows=rows):
+ items[item] = {}
+ try:
+ items[item]['embargo_info'] = is_embargoed(rdfsilo, item)
+ except:
+ pass
+ state_info['datasets'] = items
+
+ # conneg return
+ # Always return application/json
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(state_info)
+
+ @rest.restrict('GET')
+ def datasetview(self, silo, id):
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ ident = request.environ.get('repoze.who.identity')
+
+ if not ident:
+ abort(401, "Not Authorised")
+
+ silos = ag.authz(ident)
+ if silo not in silos:
+ abort(403, "Forbidden")
+ silos_admin = ag.authz(ident, permission='administrator')
+ silos_manager = ag.authz(ident, permission='manager')
+
+ rdfsilo = ag.granary.get_rdf_silo(silo)
+ if not rdfsilo.exists(id):
+ abort(404)
+
+ item = rdfsilo.get_item(id)
+
+ creator = None
+ if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
+ 'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
+ creator = item.manifest.state['metadata']['createdby']
+ #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
+ if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
+ abort(403, "Forbidden. You should be the creator or manager or administrator to view this information")
+
+ options = request.GET
+ if 'version' in options and options['version']:
+ if not options['version'] in item.manifest['versions']:
+ abort(404)
+ currentversion = str(item.currentversion)
+ vnum = str(options['version'])
+ if vnum and not vnum == currentversion:
+ item.set_version_cursor(vnum)
+
+ parts = item.list_parts(detailed=True)
+
+ dataset = {}
+ dataset['parts'] = {}
+ for part in parts:
+ dataset['parts'][part] = serialisable_stat(parts[part])
+ if item.manifest:
+ dataset['state'] = item.manifest.state
+
+ # Always return application/json
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(dataset)
+
diff --git a/rdfdatabank/controllers/sword.py b/rdfdatabank/controllers/sword.py
new file mode 100644
index 0000000..c908777
--- /dev/null
+++ b/rdfdatabank/controllers/sword.py
@@ -0,0 +1,2 @@
+from sss.pylons_sword_controller import SwordController
+__controller__ = "SwordController"
diff --git a/rdfdatabank/controllers/users.py b/rdfdatabank/controllers/users.py
new file mode 100644
index 0000000..1cc7b7d
--- /dev/null
+++ b/rdfdatabank/controllers/users.py
@@ -0,0 +1,538 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+import simplejson
+import codecs
+from pylons import request, response, session, config, tmpl_context as c, url
+from pylons.controllers.util import abort, redirect
+from pylons.decorators import rest
+from pylons import app_globals as ag
+from rdfdatabank.lib.base import BaseController, render
+from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse
+from rdfdatabank.lib.utils import allowable_id2
+from rdfdatabank.lib.auth_entry import add_user, update_user, delete_user, add_group_users, delete_group_users
+from rdfdatabank.lib.auth_entry import list_users, list_usernames, list_user_groups, list_group_users, list_user, list_group_usernames
+
+#from rdfdatabank.config import users
+
+log = logging.getLogger(__name__)
+
+accepted_params = ['title', 'description', 'notes', 'owners', 'disk_allocation']
+
+class UsersController(BaseController):
+ @rest.restrict('GET', 'POST')
+ def index(self):
+ if not request.environ.get('repoze.who.identity'):
+ abort(401, "Not Authorised")
+ ident = request.environ.get('repoze.who.identity')
+ if not ('administrator' in ident['permissions'] or 'manager' in ident['permissions']):
+ abort(403, "Do not have administrator or manager credentials")
+
+ c.ident = ident
+ #silos = ag.authz(ident, permission=['administrator', 'manager'])
+ c.users = list_users()
+ if 'administrator' in ident['permissions']:
+ c.roles = ["admin", "manager", "user"]
+ else:
+ c.roles = ["manager", "user"]
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if http_method == "GET":
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/users.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.users)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/plain
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.users)
+ elif http_method == "POST":
+ params = request.POST
+ if not ('username' in params and params['username'] and 'password' in params and params['password']):
+ abort(400, "username and password not supplied")
+ if not allowable_id2(params['username']):
+ response.content_type = "text/plain"
+ response.status_int = 400
+ response.status = "400 Bad request. Username not valid"
+ return "username can contain only the following characters - %s and has to be more than 1 character"%ag.naming_rule_humanized
+
+ existing_users = list_usernames()
+ if params['username'] in existing_users:
+ abort(403, "User exists")
+ if (('firstname' in params and params['firstname'] and 'lastname' in params and params['lastname']) \
+ or 'name' in params and params['name']):
+ add_user(params)
+ else:
+ abort(400, "The following parameters have to be supplied: username, pasword and name (or firstname and lastname)")
+ response.status_int = 201
+ response.status = "201 Created"
+ response.headers['Content-Location'] = url(controller="users", action="userview", username=params['username'])
+ response_message = "201 Created"
+
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="users", action="userview", username=params['username']))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ return response_message
+
+ @rest.restrict('GET', 'POST', 'DELETE')
+ def userview(self, username):
+ if not request.environ.get('repoze.who.identity'):
+ abort(401, "Not Authorised")
+
+ ident = request.environ.get('repoze.who.identity')
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if http_method == 'GET' or 'DELETE':
+ #Admins, managers and user can see user data / delete the user
+ if not ('administrator' in ident['permissions'] or \
+ 'manager' in ident['permissions'] or ident['user'].user_name == username):
+ abort(403, "Do not have administrator or manager credentials to view profiles of other users")
+ elif http_method == 'POST':
+ #Only user can updte their data
+ if not ident['user'].user_name == username:
+ abort(403, "Login as %s to edit profile"%username)
+
+ existing_users = list_usernames()
+ if not username in existing_users:
+ abort(404, "User not found")
+
+ c.ident = ident
+ c.username = username
+
+ if http_method == "GET":
+ c.user = list_user(username)
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/admin_user.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.user)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/html
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.user)
+ elif http_method == "POST":
+ params = request.POST
+ if not('password' in params or 'name' in params or \
+ 'email' in params or 'firstname' in params or 'lastname' in params):
+ abort(400, "No valid parameters found")
+ params['username'] = username
+ update_user(params)
+ response.status_int = 204
+ response.status = "204 Updated"
+ response_message = None
+ # conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="users", action="userview", username=username))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = "text/plain"
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ # Whoops - nothing satisfies - return text/plain
+ response.content_type = "text/plain"
+ return response_message
+ elif http_method == "DELETE":
+ user_groups = list_user_groups(username)
+ if user_groups:
+ abort(403, "User is member of silos. Remove user from all silos before deleting them")
+ #Delete user from database
+ delete_user(username)
+ #Get all the silos user belomgs to, remove them from each silo and sync silo metadata
+ # conneg return
+ accept_list = None
+ response.content_type = "text/plain"
+ response.status_int = 200
+ response.status = "200 OK"
+ return "{'ok':'true'}"
+
+ @rest.restrict('GET')
+ def siloview(self, silo):
+ if not request.environ.get('repoze.who.identity'):
+ abort(401, "Not Authorised")
+ if not ag.granary.issilo(silo):
+ abort(404)
+ ident = request.environ.get('repoze.who.identity')
+ c.ident = ident
+ silos = ag.authz(ident, permission=['administrator', 'manager'])
+ if not silo in silos:
+ abort(403, "Do not have administrator or manager credentials for silo %s"%silo)
+ user_groups = list_user_groups(ident['user'].user_name)
+ if ('*', 'administrator') in user_groups:
+ c.roles = ["admin", "manager", "user"]
+ elif (silo, 'administrator') in user_groups:
+ c.roles = ["admin", "manager", "user"]
+ elif (silo, 'manager') in user_groups:
+ c.roles = ["manager", "user"]
+ else:
+ abort(403, "Do not have administrator or manager credentials for silo %s"%silo)
+ c.silo = silo
+
+ http_method = request.environ['REQUEST_METHOD']
+
+ if http_method == "GET":
+ c.users = list_group_users(silo)
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/silo_users.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.users)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/plain
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.users)
+
+ @rest.restrict('GET', 'POST', 'DELETE')
+ def silouserview(self, silo, username):
+ if not request.environ.get('repoze.who.identity'):
+ abort(401, "Not Authorised")
+
+ if not ag.granary.issilo(silo):
+ abort(404)
+
+ ident = request.environ.get('repoze.who.identity')
+
+ http_method = request.environ['REQUEST_METHOD']
+ if http_method == 'GET':
+ silos = ag.authz(ident)
+ if not silo in silos:
+ abort(403, "User is not a member of the silo %s"%silo)
+ if not ('administrator' in ident['permissions'] or \
+ 'manager' in ident['permissions'] or ident['user'].user_name == username):
+ abort(403, "Do not have administrator or manager credentials to view profiles of other users")
+ else:
+ silos = ag.authz(ident, permission=['administrator', 'manager'])
+ if not silo in silos:
+ abort(403, "Do not have administrator or manager credentials for silo %s"%silo)
+ if not ('administrator' in ident['permissions'] or 'manager' in ident['permissions']):
+ abort(403, "Do not have administrator or manager credentials")
+
+ existing_users = list_usernames()
+ if not username in existing_users:
+ abort(404, "User not found")
+
+ c.ident = ident
+ c.silo = silo
+ c.username = username
+
+ if http_method == "GET":
+ a, m, s = list_group_usernames(silo)
+ if not (username in a or username in m or username in s):
+ abort(404, "User not found in silo")
+ c.user = list_user(username)
+ #if 'groups' in c.user and c.user['groups']:
+ # for i in c.user['groups']:
+ # if i[0] != silo:
+ # c.user['groups'].remove(i)
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ return render("/silo_user.html")
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.user)
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/plain
+ response.content_type = 'application/json; charset="UTF-8"'
+ response.status_int = 200
+ response.status = "200 OK"
+ return simplejson.dumps(c.user)
+ elif http_method == "POST":
+ params = request.POST
+ if not ('role' in params and params['role'] and params['role'] in ['administrator', 'manager', 'submitter']):
+ abort(400, "Parameters 'role' not found or is invalid")
+ kw = ag.granary.describe_silo(silo)
+ #Get existing owners, admins, managers and users
+ owners = []
+ admins = []
+ managers = []
+ submitters = []
+ if 'owners' in kw and kw['owners']:
+ owners = [x.strip() for x in kw['owners'].split(",") if x]
+ if 'administrators' in kw and kw['administrators']:
+ admins = [x.strip() for x in kw['administrators'].split(",") if x]
+ if 'managers' in kw and kw['managers']:
+ managers = [x.strip() for x in kw['managers'].split(",") if x]
+ if 'submitters' in kw and kw['submitters']:
+ submitters = [x.strip() for x in kw['submitters'].split(",") if x]
+ to_remove = []
+ to_add = []
+ if params['role'] == 'administrator':
+ if not 'administrator' in ident['permissions']:
+ abort(403, "Need to be administrator to add user to role admin")
+ if not username in admins:
+ to_add.append((username, 'administrator'))
+ admins.append(username)
+ if not username in owners:
+ owners.append(username)
+ if username in managers:
+ managers.remove(username)
+ to_remove.append((username, 'manager'))
+ if username in submitters:
+ submitters.remove(username)
+ to_remove.append((username, 'submitter'))
+ elif params['role'] == 'manager':
+ if not username in managers:
+ to_add.append((username, 'manager'))
+ managers.append(username)
+ if not username in owners:
+ owners.append(username)
+ if username in admins:
+ if not 'administrator' in ident['permissions']:
+ abort(403, "Need to be admin to modify user of role admin")
+ if len(admins) == 1:
+ abort(403, "Add another administrator to silo before updating user role")
+ admins.remove(username)
+ to_remove.append((username, 'administrator'))
+ if username in submitters:
+ submitters.remove(username)
+ to_remove.append((username, 'submitter'))
+ elif params['role'] == 'submitter':
+ if not username in submitters:
+ to_add.append((username, 'submitter'))
+ submitters.append(username)
+ if not username in owners:
+ owners.append(username)
+ if username in admins:
+ if not 'administrator' in ident['permissions']:
+ abort(403, "Need to be admin to modify user of role admin")
+ if len(admins) == 1:
+ abort(403, "Add another administrator to silo before updating user role")
+ admins.remove(username)
+ to_remove.append((username, 'administrator'))
+ if username in managers:
+ if len(managers) == 1 and len(admins) == 0:
+ abort(403, "Add another administrator or manager to silo before updating user role")
+ managers.remove(username)
+ to_remove.append((username, 'manager'))
+
+ owners = list(set(owners))
+ admins = list(set(admins))
+ managers = list(set(managers))
+ submitters = list(set(submitters))
+
+ # Update silo info
+ if to_remove or to_add:
+ kw['owners'] = ','.join(owners)
+ kw['administrators'] = ','.join(admins)
+ kw['managers'] = ','.join(managers)
+ kw['submitters'] = ','.join(submitters)
+ ag.granary.describe_silo(silo, **kw)
+ ag.granary.sync()
+
+ #Add new silo users into database
+ if to_add:
+ add_group_users(silo, to_add)
+ response.status_int = 201
+ response.status = "201 Created"
+ response.headers['Content-Location'] = url(controller="users", action="silouserview", silo=silo, username=username)
+ response_message = "201 Created"
+
+ if to_remove:
+ delete_group_users(silo, to_remove)
+ response.status_int = 204
+ response.status = "204 Updated"
+ response_message = None
+ else:
+ response.status_int = 400
+ response.status = "400 Bad Request"
+ response_message = "No updates to user role"
+
+ #Conneg return
+ accept_list = None
+ if 'HTTP_ACCEPT' in request.environ:
+ try:
+ accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
+ except:
+ accept_list= [MT("text", "html")]
+ if not accept_list:
+ accept_list= [MT("text", "html")]
+ mimetype = accept_list.pop(0)
+ while(mimetype):
+ if str(mimetype).lower() in ["text/html", "text/xhtml"]:
+ redirect(url(controller="users", action="silouserview", silo=silo, username=username))
+ elif str(mimetype).lower() in ["text/plain", "application/json"]:
+ response.content_type = 'application/json; charset="UTF-8"'
+ return response_message
+ try:
+ mimetype = accept_list.pop(0)
+ except IndexError:
+ mimetype = None
+ #Whoops nothing satisfies - return text/plain
+ response.content_type = 'application/json; charset="UTF-8"'
+ return response_message
+ elif http_method == "DELETE":
+ kw = ag.granary.describe_silo(silo)
+ #Get existing owners, admins, managers and users
+ owners = []
+ admins = []
+ managers = []
+ submitters = []
+ if 'owners' in kw and kw['owners']:
+ owners = [x.strip() for x in kw['owners'].split(",") if x]
+ if 'administrators' in kw and kw['administrators']:
+ admins = [x.strip() for x in kw['administrators'].split(",") if x]
+ if 'managers' in kw and kw['managers']:
+ managers = [x.strip() for x in kw['managers'].split(",") if x]
+ if 'submitters' in kw and kw['submitters']:
+ submitters = [x.strip() for x in kw['submitters'].split(",") if x]
+
+ #Gather user roles to delete
+ to_remove = []
+ if username in admins:
+ if not 'administrator' in ident['permissions']:
+ abort(403, "Need to be admin to modify user of role admin")
+ if len(admins) == 1:
+ abort(403, "Add another administrator to silo before deleting user")
+ to_remove.append((username, 'administrator'))
+ admins.remove(username)
+ if username in managers:
+ if len(managers) == 1 and len(admins) == 0:
+ abort(403, "Add another administrator or manager to silo before deleting user")
+ managers.remove(username)
+ to_remove.append((username, 'manager'))
+ if username in submitters:
+ submitters.remove(username)
+ to_remove.append((username, 'submitter'))
+ if username in owners:
+ owners.remove(username)
+
+ owners = list(set(owners))
+ admins = list(set(admins))
+ managers = list(set(managers))
+ submitters = list(set(submitters))
+
+ if to_remove:
+ # Update silo info
+ kw['owners'] = ','.join(owners)
+ kw['administrators'] = ','.join(admins)
+ kw['managers'] = ','.join(managers)
+ kw['submitters'] = ','.join(submitters)
+ ag.granary.describe_silo(silo, **kw)
+ ag.granary.sync()
+ delete_group_users(silo, to_remove)
+ else:
+ abort(400, "No user to delete")
+ accept_list = None
+ response.content_type = "text/plain"
+ response.status_int = 200
+ response.status = "200 OK"
+ return "{'ok':'true'}"
+
diff --git a/rdfdatabank/lib/HTTP_request.py b/rdfdatabank/lib/HTTP_request.py
new file mode 100644
index 0000000..ad2a63c
--- /dev/null
+++ b/rdfdatabank/lib/HTTP_request.py
@@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import logging
+import mimetypes
+import httplib
+import base64
+import urlparse
+import json as simplejson
+
+logger = logging.getLogger('Dataset')
+
+class HTTPRequest():
+ def __init__(self, endpointhost=None, secure=False):
+ if endpointhost:
+ self._endpointhost = endpointhost
+ self._endpointpath = None
+ self.secure = secure
+
+ def get_content_type(self, filename):
+ # Originally copied from http://code.activestate.com/recipes/146306/:
+ return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
+ def get_data_type(self, params):
+ files = []
+ fields = []
+ decoded_params = params.items()
+ for i in decoded_params:
+ if len(i) == 2:
+ fields.append(i)
+ elif len(i) == 4:
+ files.append(i)
+ return fields, files
+
+ def encode_multipart_formdata(self, fields, files):
+ # Originally copied from http://code.activestate.com/recipes/146306/:
+ """
+ fields is a sequence of (name, value) elements for regular form fields.
+ files is a sequence of (name, filename, value, filetype) elements for data to be uploaded as files
+ Return (content_type, body) ready for httplib.HTTP instance
+ """
+ BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
+ CRLF = '\r\n'
+ L = []
+ for (key, value) in fields:
+ L.append('--' + BOUNDARY)
+ L.append('Content-Disposition: form-data; name="%s"' % key)
+ L.append('')
+ L.append(value)
+ for (key, filename, value, filetype) in files:
+ L.append('--' + BOUNDARY)
+ L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
+ L.append('Content-Type: %s' % (filetype or get_content_type(filename)))
+ L.append('')
+ L.append(value)
+ L.append('--' + BOUNDARY + '--')
+ L.append('')
+ body = CRLF.join(L)
+ content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+ return content_type, body
+
+ def setRequestEndPoint(self, endpointhost=None, endpointpath=None):
+ if endpointhost or endpointpath:
+ if endpointhost:
+ self._endpointhost = endpointhost
+ # Reset credentials if setting host
+ self._endpointuser = None
+ self._endpointpass = None
+ logger.debug("setRequestEndPoint: endpointhost %s: " % self._endpointhost)
+ if endpointpath:
+ self._endpointpath = endpointpath
+ logger.debug("setRequestEndPoint: endpointpath %s: " % self._endpointpath)
+ return
+
+ def setRequestUserPass(self, endpointuser=None, endpointpass=None):
+ if endpointuser:
+ self._endpointuser = endpointuser
+ self._endpointpass = endpointpass
+ logger.debug("setRequestEndPoint: endpointuser %s: " % self._endpointuser)
+ logger.debug("setRequestEndPoint: endpointpass %s: " % self._endpointpass)
+ else:
+ self._endpointuser = None
+ self._endpointpass = None
+ return
+
+ def getRequestPath(self, rel):
+ rel = rel or ""
+ return urlparse.urljoin(self._endpointpath,rel)
+
+ def getRequestUri(self, rel):
+ return "http://"+self._endpointhost+self.getRequestPath(rel)
+
+ def encodeFormData(self, params):
+ (fields, files) = self.get_data_type(params)
+ (reqtype, reqdata) = self.encode_multipart_formdata(fields, files)
+ return reqtype, reqdata
+
+ def doRequest(self, command, resource, reqdata=None, reqheaders={}):
+ if self._endpointuser:
+ auth = base64.encodestring("%s:%s" % (self._endpointuser, self._endpointpass)).strip()
+ reqheaders["Authorization"] = "Basic %s" % auth
+ if self.secure:
+ hc = httplib.HTTPSConnection(self._endpointhost)
+ else:
+ hc = httplib.HTTPConnection(self._endpointhost)
+ path = self.getRequestPath(resource)
+ response = None
+ responsedata = None
+ repeat = 10
+ while path and repeat > 0:
+ repeat -= 1
+ hc.request(command, path, reqdata, reqheaders)
+ response = hc.getresponse()
+ if response.status != 301: break
+ path = response.getheader('Location', None)
+ if path[0:6] == "https:":
+ # close old connection, create new HTTPS connection
+ hc.close()
+ hc = httplib.HTTPSConnection(self._endpointhost) # Assume same host for https:
+ else:
+ response.read() # Seems to be needed to free up connection for new request
+ logger.debug("Status: %i %s" % (response.status, response.reason))
+ responsedata = response.read()
+ hc.close()
+ return (response, responsedata)
+
+ def doHTTP_GET(self, endpointhost=None, endpointpath=None, resource=None, expect_type="*/*"):
+ reqheaders = {
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("GET", resource, reqheaders=reqheaders)
+ #ctype = response.getheader('content-type')
+ #if (responsedata and expect_type.lower() == "application/json"): responsedata = simplejson.loads(responsedata)
+ #if (responsedata and "application/json" in ctype): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doHTTP_POST(self, data, data_type="application/octet-strem",
+ endpointhost=None, endpointpath=None, resource=None, expect_type="*/*"):
+ reqheaders = {
+ "Content-type": data_type,
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("POST", resource, reqdata=data, reqheaders=reqheaders)
+ #ctype = response.getheader('content-type')
+ #if (responsedata and expect_type.lower() == "application/json"): responsedata = simplejson.loads(responsedata)
+ #if (responsedata and "application/json" in ctype): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doHTTP_PUT(self, data, data_type="application/octet-strem",
+ endpointhost=None, endpointpath=None, resource=None, expect_type="*/*"):
+ reqheaders = {
+ "Content-type": data_type,
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("PUT", resource, reqdata=data, reqheaders=reqheaders)
+ #ctype = response.getheader('content-type')
+ #if (responsedata and "application/json" in ctype): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doHTTP_DELETE(self, endpointhost=None, endpointpath=None, resource=None):
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("DELETE", resource)
+ return (response, responsedata)
+
diff --git a/rdfdatabank/lib/app_globals.py b/rdfdatabank/lib/app_globals.py
index 4a916a0..591f143 100644
--- a/rdfdatabank/lib/app_globals.py
+++ b/rdfdatabank/lib/app_globals.py
@@ -1,3 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
"""The application's Globals object"""
from pylons import config
@@ -6,9 +30,13 @@
from redis import Redis
from rdfdatabank.lib.utils import authz
-
+from rdfdatabank.lib.data_sync import sync_members
+from rdfdatabank.lib.htpasswd import HtpasswdFile
from rdfdatabank.lib.broadcast import BroadcastToRedis
+#from rdfdatabank.config.users import _USERS
+from rdfdatabank.config.namespaces import NAMESPACES, PREFIXES
+
class Globals(object):
"""Globals acts as a container for objects available throughout the
@@ -24,19 +52,91 @@ def __init__(self):
"""
self.authz = authz
-
+ #self.users = _USERS
+ self.NAMESPACES = NAMESPACES
+ self.PREFIXES = PREFIXES
+
+ if config.has_key("granary.uri_root"):
+ self.root = config['granary.uri_root']
+
if config.has_key("granary.store"):
self.granary = Granary(config['granary.store'])
if config.has_key("redis.host"):
self.redishost = config['redis.host']
- self.r = Redis(self.redishost)
+ try:
+ self.r = Redis(self.redishost)
+ except:
+ self.r = None
+ if self.r and config.has_key("broadcast.to") and config['broadcast.to'] == "redis" and config.has_key("broadcast.queue"):
+ self.b = BroadcastToRedis(config['redis.host'], config['broadcast.queue'])
+ else:
+ self.r = None
+ self.redishost = None
+ self.b = None
if config.has_key("solr.host"):
from solr import SolrConnection
self.solrhost = config['solr.host']
- self.solr = SolrConnection(self.solrhost)
+ try:
+ self.solr = SolrConnection(self.solrhost)
+ except:
+ self.solr = None
+ else:
+ self.solrhost = None
+ self.solr = None
- if config.has_key("broadcast.to"):
- if config['broadcast.to'] == "redis":
- self.b = BroadcastToRedis(config['redis.host'], config['broadcast.queue'])
+ if config.has_key("naming_rule"):
+ self.naming_rule = config['naming_rule']
+
+ if config.has_key("naming_rule_humanized"):
+ self.naming_rule_humanized = config['naming_rule_humanized']
+ elif config.has_key("naming_rule"):
+ self.naming_rule_humanized = config['naming_rule']
+
+ if config.has_key("metadata.embargoed"):
+ self.metadata_embargoed = config['metadata.embargoed']
+ if isinstance(self.metadata_embargoed, basestring):
+ if self.metadata_embargoed.lower().strip() == 'true':
+ self.metadata_embargoed = True
+ else:
+ self.metadata_embargoed = False
+ elif not type(self.metadata_embargoed).__name__ == 'bool':
+ self.metadata_embargoed = False
+ else:
+ self.metadata_embargoed = False
+
+ if config.has_key("auth.file"):
+ pwdfile = config['auth.file']
+ self.passwdfile = HtpasswdFile(pwdfile)
+ self.passwdfile.load()
+
+ if config.has_key("auth.info"):
+ self.userfile = config['auth.info']
+
+ if config.has_key("doi.count"):
+ self.doi_count_file = config['doi.count']
+
+ if config.has_key("formats_served"):
+ self.formats_served = config['formats_served']
+ else:
+ self.formats_served = ["text/html", "text/xhtml", "text/plain", "application/json", "application/rdf+xml", "text/xml"]
+
+ if config.has_key("publisher"):
+ self.publisher = config['publisher']
+ else:
+ self.publisher = "Bodleian Libraries, University of Oxford"
+
+ if config.has_key("rights"):
+ self.rights = config['rights']
+
+ if config.has_key("license"):
+ self.license = config['license']
+
+ if config.has_key("api.version"):
+ self.api_version = config['api.version']
+
+ try:
+ sync_members(self.granary)
+ except:
+ pass
diff --git a/rdfdatabank/lib/auth.py b/rdfdatabank/lib/auth.py
new file mode 100644
index 0000000..733f5be
--- /dev/null
+++ b/rdfdatabank/lib/auth.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+"""Intended to work like a quick-started SQLAlchemy plugin"""
+
+from repoze.what.middleware import AuthorizationMetadata
+from repoze.what.plugins.pylonshq import booleanize_predicates
+from repoze.what.plugins.sql import configure_sql_adapters
+from repoze.who.plugins.sa import SQLAlchemyAuthenticatorPlugin
+from repoze.who.plugins.sa import SQLAlchemyUserMDPlugin
+
+from rdfdatabank.model import meta, User, Group, Permission
+
+# authenticator plugin
+authenticator = SQLAlchemyAuthenticatorPlugin(User, meta.Session)
+#authenticator.translations['user_name'] = 'username'
+
+# metadata provider plugins
+#
+# From the documentation in repoze.what.plugins.sql.adapters package
+#
+# For developers to be able to use the names they want in their model, both the
+# groups and permissions source adapters use a "translation table" for the
+# field and table names involved:
+# * Group source adapter:
+# * "section_name" (default: "group_name"): The name of the table field that
+# contains the primary key in the groups table.
+# * "sections" (default: "groups"): The groups to which a given user belongs.
+# * "item_name" (default: "user_name"): The name of the table field that
+# contains the primary key in the users table.
+# * "items" (default: "users"): The users that belong to a given group.
+# * Permission source adapter:
+# * "section_name" (default: "permission_name"): The name of the table field
+# that contains the primary key in the permissions table.
+# * "sections" (default: "permissions"): The permissions granted to a given
+# group.
+# * "item_name" (default: "group_name"): The name of the table field that
+# contains the primary key in the groups table.
+# * "items" (default: "groups"): The groups that are granted a given
+# permission.
+
+#adapters = configure_sql_adapters(User, Group, Permission, meta.Session,
+# group_translations={'section_name': 'name',
+# 'item_name': 'username'},
+# permission_translations={'section_name': 'name',
+# 'item_name': 'username'})
+adapters = configure_sql_adapters(User, Group, Permission, meta.Session)
+
+user = SQLAlchemyUserMDPlugin(User, meta.Session)
+#user.translations['user_name'] = 'username'
+
+group = AuthorizationMetadata(
+ {'sqlauth': adapters['group']},
+ {'sqlauth': adapters['permission']}
+)
+
+# THIS IS CRITICALLY IMPORTANT! Without this your site will
+# consider every repoze.what predicate True!
+booleanize_predicates()
+
diff --git a/rdfdatabank/lib/auth_entry.py b/rdfdatabank/lib/auth_entry.py
new file mode 100644
index 0000000..4dc13b8
--- /dev/null
+++ b/rdfdatabank/lib/auth_entry.py
@@ -0,0 +1,437 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from rdfdatabank.model import meta, User, Group, Permission, Datasets
+from sqlalchemy.exc import IntegrityError
+#import traceback
+#import logging
+#log = logging.getLogger(__name__)
+
+def add_silo(silo_name):
+ try:
+ p_q = meta.Session.query(Permission)
+
+ ga = Group()
+ if silo_name == '*':
+ ga.group_name = u'databank_administrator'
+ else:
+ ga.group_name = u'%s_administrator'%silo_name
+ ga.silo = u"%s"%silo_name
+ meta.Session.add(ga)
+
+ p_q_admin = p_q.filter(Permission.permission_name == u'administrator').one()
+ p_q_admin.groups.append(ga)
+
+ gb = Group()
+ if silo_name == '*':
+ gb.group_name = u'databank_manager'
+ else:
+ gb.group_name = u'%s_manager'%silo_name
+ gb.silo = u"%s"%silo_name
+ meta.Session.add(gb)
+
+ p_q_manager = p_q.filter(Permission.permission_name == u'manager').one()
+ p_q_manager.groups.append(gb)
+
+ gc = Group()
+ if silo_name == '*':
+ gc.group_name = u'databank_submitter'
+ else:
+ gc.group_name = u'%s_submitter'%silo_name
+ gc.silo = u'%s'%silo_name
+ meta.Session.add(gc)
+
+ p_q_submitter = p_q.filter(Permission.permission_name == u'submitter').one()
+ p_q_submitter.groups.append(gc)
+
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error adding new silo %s'%silo_name)
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def delete_silo(silo_name):
+ try:
+ g_q = meta.Session.query(Group)
+ if silo_name == '*':
+ g_q_group1 = g_q.filter(Group.group_name == u'databank_administrator').one()
+ g_q_group2 = g_q.filter(Group.group_name == u'databank_manager').one()
+ g_q_group3 = g_q.filter(Group.group_name == u'databank_submitter').one()
+ else:
+ g_q_group1 = g_q.filter(Group.group_name == u'%s_administrator'%silo_name).one()
+ g_q_group2 = g_q.filter(Group.group_name == u'%s_manager'%silo_name).one()
+ g_q_group3 = g_q.filter(Group.group_name == u'%s_submitter'%silo_name).one()
+ meta.Session.delete(g_q_group1)
+ meta.Session.delete(g_q_group2)
+ meta.Session.delete(g_q_group3)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error deleting silo %s'%silo_name)
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def add_user(user_details):
+ u = User()
+ u.user_name = user_details['username']
+ u._set_password(u'%s'%user_details['password'])
+
+ if 'name' in user_details and user_details['name']:
+ u.name = u'%s'%user_details['name']
+
+ if 'firstname' in user_details and user_details['firstname']:
+ u.firstname = u'%s'%user_details['firstname']
+
+ if 'lastname' in user_details and user_details['lastname']:
+ u.lastname = u'%s'%user_details['lastname']
+
+ if 'email' in user_details and user_details['email']:
+ u.email = u'%s'%user_details['email']
+ try:
+ meta.Session.add(u)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error adding user %s'%user_details['username'])
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def update_user(user_details):
+ if not ('username' in user_details and user_details['username']):
+ return False
+ try:
+ u_q = meta.Session.query(User)
+ u = u_q.filter(User.user_name == u'%s'%user_details['username']).one()
+
+ if 'password' in user_details and user_details['password']:
+ u._set_password(u'%s'%user_details['password'])
+
+ if 'name' in user_details and user_details['name']:
+ u.name = u'%s'%user_details['name']
+
+ if 'firstname' in user_details and user_details['firstname']:
+ u.firstname = u'%s'%user_details['firstname']
+
+ if 'lastname' in user_details and user_details['lastname']:
+ u.lastname = u'%s'%user_details['lastname']
+
+ if 'email' in user_details and user_details['email']:
+ u.email = u'%s'%user_details['email']
+
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error updating user data for user %s'%user_details['username'])
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def delete_user(username):
+ try:
+ u_q = meta.Session.query(User)
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ meta.Session.delete(u)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error deleting user %s. Does the user exist?'%username)
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def add_user_groups(username, groups):
+ #groups is a list of (silo_name, permission_name) tuples
+ try:
+ u_q = meta.Session.query(User)
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ g_q = meta.Session.query(Group)
+ for silo_name, permission_name in groups:
+ if silo_name =='*':
+ g_q_group = g_q.filter(Group.group_name == u'databank_%s'%permission_name).one()
+ else:
+ g_q_group = g_q.filter(Group.group_name == u'%s_%s'%(silo_name, permission_name)).one()
+ u.groups.append(g_q_group)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error adding user %s to group %s'%(username, unicode(groups)))
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def delete_user_groups(username, groups):
+ #groups is a list of (silo_name, permission_name) tuples
+ try:
+ u_q = meta.Session.query(User)
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ g_q = meta.Session.query(Group)
+ for silo_name, permission_name in groups:
+ if silo_name =='*':
+ g = g_q.filter(Group.group_name == u'databank_%s'%permission_name).one()
+ else:
+ g = g_q.filter(Group.group_name == u'%s_%s'%(silo_name, permission_name)).one()
+ query = "DELETE FROM user_group WHERE user_id=%d and group_id=%d"%(u.id, g.id)
+ meta.Session.execute(query)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error deleting user %s from group %s'%(username, unicode(groups)))
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def add_group_users(silo_name, user_groups):
+ #user_groups is a list of (user_name, permission_name) tuples
+ try:
+ u_q = meta.Session.query(User)
+ g_q = meta.Session.query(Group)
+ for username, permission_name in user_groups:
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ if u:
+ if silo_name =='*':
+ g_q_group = g_q.filter(Group.group_name == u'databank_%s'%permission_name).one()
+ else:
+ g_q_group = g_q.filter(Group.group_name == u'%s_%s'%(silo_name, permission_name)).one()
+ u.groups.append(g_q_group)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error( 'Error adding users %s to group %s'%(unicode(user_groups), silo_name))
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def delete_group_users(silo_name, user_groups):
+ #user_groups is a list of (user_name, permission_name) tuples
+ try:
+ u_q = meta.Session.query(User)
+ g_q = meta.Session.query(Group)
+ for username, permission_name in user_groups:
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ if silo_name =='*':
+ g = g_q.filter(Group.group_name == u'databank_%s'%permission_name).one()
+ else:
+ g = g_q.filter(Group.group_name == u'%s_%s'%(silo_name, permission_name)).one()
+ query = "DELETE FROM user_group WHERE user_id=%d and group_id=%d"%(u.id, g.id)
+ meta.Session.execute(query)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error deleting users %s from group %s'%(unicode(user_groups), silo_name))
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def list_users():
+ users = meta.Session.query(User)
+ users_list = []
+ for u in users:
+ # print u.id, u.user_name, u.email, u.name, u.firstname, u.lastname
+ # u._get_password
+ user_details = {}
+ user_details['user_name'] = u.user_name
+ user_details['name'] = u.name
+ user_details['firstname'] = u.firstname
+ user_details['lastname'] = u.lastname
+ user_details['email'] = u.email
+ user_details['groups'] = []
+ # groups user belongs to
+ for g in u.groups:
+ # print g.id, g.group_name, g.silo
+ # permissions associated with the group
+ for p in g.permissions:
+ # print p.id, p.permission_name
+ user_details['groups'].append((g.silo, p.permission_name))
+ users_list.append(user_details)
+ return users_list
+
+def list_groups():
+ groups = meta.Session.query(Group)
+ #for g in groups:
+ # print g.id, g.group_name, g.silo
+ # permissions associated with the group
+ # for p in g.permissions:
+ # print p.id, p.permission_name
+ return groups
+
+def list_silos(star=False):
+ silos = []
+ groups = meta.Session.query(Group)
+ for g in groups:
+ if g.silo == "*" and star and not g.silo in silos:
+ silos.append(g.silo)
+ elif not g.silo in silos and g.silo != "*":
+ silos.append(g.silo)
+ return silos
+
+def list_permissions():
+ permissions = meta.Session.query(Permission)
+ #for p in permissions:
+ # print p.id, p.permission_name
+ return permissions
+
+def list_usernames():
+ users = meta.Session.query(User)
+ usernames = []
+ for u in users:
+ usernames.append(u.user_name)
+ return usernames
+
+def list_user_permissions(username, siloname):
+ u_q = meta.Session.query(User)
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ p_list = []
+ #groups user belongs to
+ for g in u.groups:
+ if g.silo == siloname:
+ #permissions associated with the group
+ for p in g.permissions:
+ p_list.append(p.permission_name)
+ return p_list
+
+def list_user_groups(username):
+ u_q = meta.Session.query(User)
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ groups =[]
+ for g in u.groups:
+ for p in g.permissions:
+ groups.append((g.silo, p.permission_name))
+ return groups
+
+def list_group_users(siloname):
+ all_users = meta.Session.query(User)
+ group_users =[]
+ for u in all_users:
+ for g in u.groups:
+ if g.silo == siloname:
+ #permissions associated with the group
+ for p in g.permissions:
+ #TODO:Check if I am getting only those permission associated with the user and group
+ group_users.append({
+ 'user_name':u.user_name,
+ 'permission':p.permission_name,
+ 'name':u.name,
+ 'firstname':u.firstname,
+ 'lastname':u.lastname})
+ return group_users
+
+def list_group_usernames(siloname):
+ all_users = meta.Session.query(User)
+ admins = []
+ managers = []
+ submitters = []
+ for u in all_users:
+ for g in u.groups:
+ if g.silo == siloname:
+ for p in g.permissions:
+ if p.permission_name == 'administrator' and not u.user_name in admins:
+ admins.append(u.user_name)
+ if p.permission_name == 'manager' and not u.user_name in managers:
+ managers.append(u.user_name)
+ if p.permission_name == 'submitter' and not u.user_name in submitters:
+ submitters.append(u.user_name)
+ return (admins, managers, submitters)
+
+def list_new_users():
+ #Users not a part of any group
+ all_users = meta.Session.query(User)
+ ungrouped_users =[]
+ for u in all_users:
+ if not u.groups:
+ ungrouped_users.append({
+ 'user_name':u.user_name,
+ 'permission':None,
+ 'name':u.name,
+ 'firstname':u.firstname,
+ 'lastname':u.lastname})
+ return ungrouped_users
+
+def list_user(username):
+ u_q = meta.Session.query(User)
+ u = u_q.filter(User.user_name == u'%s'%username).one()
+ user_details = {}
+ user_details['id'] = u.id
+ user_details['user_name'] = u.user_name
+ user_details['name'] = u.name
+ user_details['firstname'] = u.firstname
+ user_details['lastname'] = u.lastname
+ user_details['email'] = u.email
+ user_details['groups'] = []
+ for g in u.groups:
+ for p in g.permissions:
+ user_details['groups'].append((g.silo, p.permission_name))
+ return user_details
+
+def add_dataset(silo_name, id):
+ d = Datasets()
+ d.silo = u"%s"%silo_name
+ d.id = u"%s"%id
+ try:
+ meta.Session.add(d)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error adding dataset %s in silo %s'%(id, silo_name))
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def delete_dataset(silo_name, id):
+ try:
+ d_q = meta.Session.query(Datasets)
+ d_q_id = d_q.filter(Datasets.silo == u'%s'%silo_name).filter(Datasets.id == u'%s'%id).one()
+ meta.Session.delete(d_q_id)
+ meta.Session.commit()
+ except IntegrityError:
+ #log.error('Error deleting dataset %s in silo %s'%(id, silo_name))
+ #print traceback.format_exc()
+ meta.Session.rollback()
+ return False
+ return True
+
+def get_datasets_count(silo_name):
+ d_q = meta.Session.query(Datasets)
+ d_q_silo = d_q.filter(Datasets.silo == u'%s'%silo_name).count()
+ return d_q_silo
+
+def get_datasets(silo_name, start=0, rows=100):
+ d_q = meta.Session.query(Datasets)
+ try:
+ start = int(start)
+ except:
+ start = 0
+ try:
+ rows = int(rows)
+ except:
+ rows = 100
+ d_q_silo = d_q.filter(Datasets.silo == u'%s'%silo_name).limit(rows).offset(start).all()
+ datasets = []
+ for s in d_q_silo:
+ datasets.append(s.id)
+ return datasets
+
diff --git a/rdfdatabank/lib/base.py b/rdfdatabank/lib/base.py
index 56a090b..0ed3113 100644
--- a/rdfdatabank/lib/base.py
+++ b/rdfdatabank/lib/base.py
@@ -1,9 +1,34 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
"""The base Controller API
Provides the BaseController class for subclassing.
"""
from pylons.controllers import WSGIController
from pylons.templating import render_mako as render
+from rdfdatabank.model import meta
class BaseController(WSGIController):
@@ -12,4 +37,7 @@ def __call__(self, environ, start_response):
# WSGIController.__call__ dispatches to the Controller method
# the request is routed to. This routing information is
# available in environ['pylons.routes_dict']
- return WSGIController.__call__(self, environ, start_response)
+ try:
+ return WSGIController.__call__(self, environ, start_response)
+ finally:
+ meta.Session.remove()
diff --git a/rdfdatabank/lib/broadcast.py b/rdfdatabank/lib/broadcast.py
index f320331..31044ce 100644
--- a/rdfdatabank/lib/broadcast.py
+++ b/rdfdatabank/lib/broadcast.py
@@ -1,3 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
from redis import Redis
from redis.exceptions import ConnectionError
@@ -16,7 +40,7 @@ def lpush(self, msg):
self.r.lpush(self.queue, msg)
except ConnectionError: # The client can sometimes be timed out and disconnected at the server.
self.r = Redis(self.redis_host)
- self.lpush(self.queue, msg)
+ self.r.lpush(self.queue, msg)
def change(self, silo, id, filepath=None, **kw):
msg = {}
@@ -51,6 +75,14 @@ def deletion(self, silo, id, filepath=None, **kw):
msg['filepath'] = filepath
self.lpush(simplejson.dumps(msg))
+ def silo_creation(self, silo, **kw):
+ msg = {}
+ msg.update(kw)
+ msg['_timestamp'] = datetime.now().isoformat()
+ msg.update({'type':'c',
+ 'silo':silo})
+ self.lpush(simplejson.dumps(msg))
+
def silo_deletion(self, silo, **kw):
msg = {}
msg.update(kw)
@@ -59,6 +91,14 @@ def silo_deletion(self, silo, **kw):
'silo':silo})
self.lpush(simplejson.dumps(msg))
+ def silo_change(self, silo, **kw):
+ msg = {}
+ msg.update(kw)
+ msg['_timestamp'] = datetime.now().isoformat()
+ msg.update({'type':'u',
+ 'silo':silo})
+ self.lpush(simplejson.dumps(msg))
+
def embargo_change(self, silo, id, embargoed=None, until=None, **kw):
msg = {}
msg.update(kw)
diff --git a/rdfdatabank/lib/conneg.py b/rdfdatabank/lib/conneg.py
index 6f03d3c..0eced7b 100644
--- a/rdfdatabank/lib/conneg.py
+++ b/rdfdatabank/lib/conneg.py
@@ -1,5 +1,30 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from pylons import app_globals as ag
+from datetime import datetime
def skipws(next):
skip = 1
if not skip:
@@ -205,7 +230,7 @@ def param(self):
key = self.ml.next()
eq = self.ml.next()
if eq != "=":
- raise ParseError("Expected =, got: " + sl)
+ raise ParseError("Expected =, got: " + eq)
val = self.ml.next()
return (key, val)
@@ -231,8 +256,20 @@ def best(client, server):
def parse(data):
lex = MiniLex(data)
+
p = Parser(lex)
mts = p.process()
+
+ #Accept headers added using javascript are appended to the end of the list of default accept headers
+ #This behaviour observed in Opera 9.80, Chrome 10.0, MSIE 7.0, MSIE 8.0.
+ #In Firefox 3.6.14 and Firefox 3.6.15, only the new headers set in ajax is sent
+ #See doc accessLogEWithHeaderInfo_2011_03_16
+ #So moving the last accept header to the front
+ tmp = str(mts[-1]).lower()
+ if tmp in ag.formats_served:
+ last_mt = mts.pop()
+ mts.insert(0, last_mt)
+
mts.sort(key=lambda x: x.sort2(), reverse=True)
mts.sort(key=lambda x: x.qval, reverse=True)
return mts
@@ -249,5 +286,4 @@ def parse(data):
mts2 = p2.process()
b = best(mts, mts2)
- print b
diff --git a/rdfdatabank/lib/data_sync.py b/rdfdatabank/lib/data_sync.py
new file mode 100644
index 0000000..790435f
--- /dev/null
+++ b/rdfdatabank/lib/data_sync.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from rdfdatabank.lib.auth_entry import list_silos, list_usernames, list_group_usernames, add_silo, add_group_users
+
+def sync_members(g):
+ # NOTE: g._register_silos() IS AN EXPENSIVE OPERATION.
+ # THIS FUNCTION IS EXPENSIVE AND SHOULD BE CALLED ONLY IF REALLY NECESSARY
+ #g = ag.granary
+ g.state.revert()
+ g._register_silos()
+ granary_list = g.silos
+
+ granary_list_database = list_silos()
+ usernames = list_usernames()
+ for silo in granary_list:
+ if not silo in granary_list_database:
+ add_silo(silo)
+ kw = g.describe_silo(silo)
+
+ #Get existing owners, admins, managers and submitters from silo metadata
+ owners = []
+ admins = []
+ managers = []
+ submitters = []
+ if 'administrators' in kw and kw['administrators']:
+ admins = [x.strip() for x in kw['administrators'].split(",") if x]
+ if 'managers' in kw and kw['managers']:
+ managers = [x.strip() for x in kw['managers'].split(",") if x]
+ if 'submitters' in kw and kw['submitters']:
+ submitters = [x.strip() for x in kw['submitters'].split(",") if x]
+
+ # Check users in silo metadata are valid users
+ owners = [x for x in owners if x in usernames]
+ admins = [x for x in admins if x in usernames]
+ managers = [x for x in managers if x in usernames]
+ submitters = [x for x in submitters if x in usernames]
+
+ #Synchronize members in silo metadata with users in database
+ d_admins = []
+ d_managers = []
+ d_sunbmitters = []
+ if silo in granary_list_database:
+ d_admins, d_managers, d_submitters = list_group_usernames(silo)
+ admins.extend(d_admins)
+ managers.extend(d_managers)
+ submitters.extend(d_submitters)
+
+ # Ensure users are listed just once in silo metadata and owner is superset
+ owners.extend(admins)
+ owners.extend(managers)
+ owners.extend(submitters)
+ admins = list(set(admins))
+ managers = list(set(managers))
+ submitters = list(set(submitters))
+ owners = list(set(owners))
+
+ # Add users in silo metadata to the database
+ new_silo_users = []
+ for a in admins:
+ if not a in d_admins:
+ new_silo_users.append((a, 'administrator'))
+ for a in managers:
+ if not a in d_managers:
+ new_silo_users.append((a, 'manager'))
+ for a in new_submitters:
+ if not a in d_submitters:
+ new_silo_users.append((a, 'submitter'))
+ if new_silo_users:
+ add_group_users(silo, new_silo_users)
+
+ #Write members into silo
+ kw['owners'] = ','.join(owners)
+ kw['administrators'] = ','.join(admins)
+ kw['managers'] = ','.join(managers)
+ kw['submitters'] = ','.join(submitters)
+ g.describe_silo(silo, **kw)
+
+ g.sync()
+ return
diff --git a/rdfdatabank/lib/doi_helper.py b/rdfdatabank/lib/doi_helper.py
new file mode 100644
index 0000000..5cd5ef9
--- /dev/null
+++ b/rdfdatabank/lib/doi_helper.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from rdfdatabank.lib.doi_schema import DataciteDoiSchema
+from pylons import app_globals as ag
+import os, codecs, uuid
+
+def get_doi_metadata(doi, item):
+ schema = DataciteDoiSchema()
+ xml_metadata = {}
+ xml_metadata['identifier']= schema.xml_schema['identifier']%doi
+ for key, predicates in schema.mandatory_metadata.iteritems():
+ answers = None
+ for p in predicates:
+ answers = item.list_rdf_objects(item.uri, p)
+ if answers:
+ break
+ if not answers:
+ return False
+ if key == 'publicationYear':
+ xml_metadata[key] = schema.xml_schema[key]%answers[0].split('-')[0]
+ elif key not in schema.parent_tags:
+ xml_metadata[key] = schema.xml_schema[key]%answers[0]
+ else:
+ xml_subset = []
+ for ans in answers:
+ if key == 'creator':
+ if len(ans.split(',')) == 2:
+ xml_subset.append(" "+schema.xml_schema[key]%ans)
+ else:
+ xml_subset.append(" "+schema.xml_schema[key]%ans)
+ if not xml_subset:
+ return False
+ xml_subset.insert(0, "<%s>"%schema.parent_tags[key])
+ xml_subset.append("%s>"%schema.parent_tags[key])
+ xml_subset = "\n ".join(xml_subset)
+ xml_metadata[key] = xml_subset
+
+ for grp, keys in schema.groups.iteritems():
+ xml_subset = {}
+ for k in keys:
+ predicates = schema.optional_metadata['%s:%s'%(grp, k)]
+ answers = None
+ for p in predicates:
+ answers = item.list_rdf_objects(item.uri, p)
+ if answers:
+ break
+ if not answers or not answers[0]:
+ continue
+ if grp =='date':
+ xml_subset[k] = " "+schema.xml_schema[k]%answers[0].split('T')[0]
+ else:
+ xml_subset[k] = " "+schema.xml_schema[k]%answers[0]
+ if xml_subset:
+ xml_subset_str = ["<%s>"%schema.parent_tags[grp]]
+ for o in schema.schema_order[grp]:
+ if o in xml_subset.keys():
+ xml_subset_str.append(xml_subset[o])
+ xml_subset_str.append("%s>"%schema.parent_tags[grp])
+ xml_subset_str = "\n ".join(xml_subset_str)
+ xml_metadata[grp] = xml_subset_str
+
+ for key, predicates in schema.optional_metadata.iteritems():
+ if ':' in key and key.split(':')[0] in schema.groups.keys():
+ continue
+ answers = None
+ for p in predicates:
+ answers = item.list_rdf_objects(item.uri, p)
+ if answers:
+ break
+ if not answers:
+ continue
+ if key not in schema.parent_tags:
+ xml_metadata[key] = schema.xml_schema[key]%answers[0]
+ else:
+ xml_subset = []
+ for ans in answers:
+ xml_subset.append(" "+schema.xml_schema[key]%ans)
+ if xml_subset:
+ xml_subset.insert(0, "<%s>"%schema.parent_tags[key])
+ xml_subset.append("%s>"%schema.parent_tags[key])
+ xml_subset = "\n ".join(xml_subset)
+ xml_metadata[key] = xml_subset
+ if not xml_metadata:
+ return False
+ fn = "/tmp/%s"%uuid.uuid4()
+ f = open(fn, 'w')
+ f.write("%s\n"%schema.xml_schema['header'])
+ for o in schema.schema_order['all']:
+ if o in xml_metadata:
+ f.write(" %s\n "%xml_metadata[o])
+ f.write("%s\n"%schema.xml_schema['footer'])
+ f.close()
+ unicode_metadata = codecs.open(fn, 'r', encoding='utf-8').read()
+ return unicode_metadata
+
+def doi_count(increase=True):
+ if not os.path.isfile(ag.doi_count_file):
+ count = 0
+ if increase:
+ count += 1
+ f = open(ag.doi_count_file, 'w')
+ f.write(str(count))
+ f.close()
+ return count
+
+ f = open(ag.doi_count_file, 'r')
+ count = f.read()
+ f.close()
+ count = count.replace('\n', '').strip()
+ try:
+ count = int(count)
+ except:
+ return False
+ if not increase:
+ return str(count)
+
+ count += 1
+ f = open(ag.doi_count_file, 'w')
+ f.write(str(count))
+ f.close()
+ return count
diff --git a/rdfdatabank/lib/doi_schema.py b/rdfdatabank/lib/doi_schema.py
new file mode 100755
index 0000000..1619ad1
--- /dev/null
+++ b/rdfdatabank/lib/doi_schema.py
@@ -0,0 +1,117 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+class DataciteDoiSchema():
+ def __init__(self):
+ """
+ DOI service provided by the British Library on behalf of Datacite.org
+ API Doc: https://api.datacite.org/
+ Metadata requirements: http://datacite.org/schema/DataCite-MetadataKernel_v2.0.pdf
+ """
+ #Mandatory metadata
+ self.mandatory_metadata={
+ #'identifier':['bibo:doi'],
+ 'creator':['dc:creator', 'dcterms:creator'],
+ 'title':['dc:title', 'dcterms:title'],
+ 'publisher':['dc:publisher', 'dcterms:publisher'],
+ 'publicationYear':['oxds:embargoedUntil', 'dcterms:issued', 'dcterms:modified', 'dc:date']
+ }
+
+ self.optional_metadata={
+ 'subject':['dc:subject', 'dcterms:subject'],
+ #'contributor':['dc:contributor', 'dcterms:contributor'],
+ 'date:accepted':['dcterms:dateAccepted'],
+ 'date:available':['oxds:embargoedUntil'],
+ 'date:copyrighted':['dcterms:dateCopyrighted'],
+ 'date:created':['dcterms:created'],
+ 'date:issued':['dcterms:issued'],
+ 'date:submitted':['dcterms:dateSubmitted'],
+ 'date:updated':['dcterms:modified'],
+ #'date:valid':['dcterms:date'],
+ 'language':['dc:language', 'dcterms:language'],
+ 'resourceType':['dc:type','dcterms:type'],
+ 'alternateIdentifier':['dc:identifier', 'dcterms:identifier'],
+ #'RelatedIdentifier':[],
+ 'size':['dcterms:extent'],
+ 'format':['dc:format', 'dcterms:format'],
+ 'version':['oxds:currentVersion'],
+ 'rights':['dc:rights', 'dcterms:rights'],
+ 'description:other':['dc:description', 'dcterms:description'],
+ 'description:abstract':['dcterms:abstract']
+ }
+
+ self.schema_order={
+ 'all':('identifier', 'creator', 'title', 'publisher', 'publicationYear', 'subject', 'contributor', 'date', 'language', 'resourceType', \
+ 'alternateIdentifier', 'RelatedIdentifier', 'size', 'format', 'version', 'rights', 'description'),
+ 'date':('accepted', 'available', 'copyrighted', 'created', 'issued', 'submitted', 'updated', 'valid'),
+ 'description':('other', 'abstract')
+ }
+
+ self.xml_schema={
+ 'header':"""
+""",
+ #'header':"""""",
+ #'header':"""""",
+ 'identifier':"""%s""",
+ 'creator':"""%s""",
+ 'title':"""%s""",
+ 'publisher':"""%s""",
+ 'publicationYear':"""%s""",
+ 'subject':"""%s""",
+ 'accepted':"""%s""",
+ 'available':"""%s""",
+ 'copyrighted':"""%s""",
+ 'created':"""%s""",
+ 'issued':"""%s""",
+ 'submitted':"""%s""",
+ 'updated':"""%s""",
+ 'valid':"""%s""",
+ 'language':"""%s""",
+ 'resourceType':"""%s""",
+ 'alternateIdentifier':"""%s""",
+ 'size':"""%s""",
+ 'format':"""%s""",
+ 'version':"""%s""",
+ 'rights':"""%s""",
+ 'other':"""%s""",
+ 'abstract':"""%s""",
+ 'footer':""""""
+ }
+
+ self.parent_tags={
+ 'creator':'creators',
+ 'title':'titles',
+ 'subject':'subjects',
+ 'date':'dates',
+ 'alternateIdentifier':'alternateIdentifiers',
+ 'size':'sizes',
+ 'format':'formats',
+ 'description':'descriptions'
+ }
+
+ self.groups={
+ 'date':['accepted', 'available', 'copyrighted', 'created', 'issued', 'submitted', 'updated'],
+ 'description':['other', 'abstract']
+ }
diff --git a/rdfdatabank/lib/file_unpack.py b/rdfdatabank/lib/file_unpack.py
new file mode 100644
index 0000000..90a1521
--- /dev/null
+++ b/rdfdatabank/lib/file_unpack.py
@@ -0,0 +1,338 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import subprocess
+from threading import Thread
+from datetime import datetime, timedelta
+import os, shutil
+from uuid import uuid4
+from rdflib import URIRef, Literal
+from rdfdatabank.lib.utils import create_new, munge_manifest, test_rdf
+
+from pylons import app_globals as ag
+
+#import checkm
+from zipfile import ZipFile, BadZipfile as BZ
+
+zipfile_root = "zipfile:"
+
+class BadZipfile(Exception):
+ """Cannot open zipfile using commandline tool 'unzip' to target directory"""
+
+def check_file_mimetype(real_filepath, mimetype):
+ if os.path.isdir(real_filepath):
+ return False
+ if os.path.islink(real_filepath):
+ real_filepath = os.readlink(real_filepath)
+ if not os.path.isfile(real_filepath):
+ return False
+ p = subprocess.Popen("file -ib '%s'" %(real_filepath), shell=True, stdout=subprocess.PIPE)
+ output_file = p.stdout
+ output_str = output_file.read()
+ if mimetype in output_str:
+ return True
+ else:
+ return False
+
+def get_zipfiles_in_dataset(dataset):
+ derivative = dataset.list_rdf_objects("*", "ore:aggregates")
+ zipfiles = {}
+ #if derivative and derivative.values() and derivative.values()[0]:
+ if derivative:
+ #for file_uri in derivative.values()[0]:
+ for file_uri in derivative:
+ if not file_uri.lower().endswith('.zip'):
+ continue
+ filepath = file_uri[len(dataset.uri)+1:]
+ real_filepath = dataset.to_dirpath(filepath)
+ if os.path.islink(real_filepath):
+ real_filepath = os.readlink(real_filepath)
+ if check_file_mimetype(real_filepath, 'application/zip'):
+ (fn, ext) = os.path.splitext(filepath)
+ #zipfiles[filepath]="%s-%s"%(dataset.item_id, fn)
+ zipfiles[filepath]=dataset.item_id
+ return zipfiles
+
+def store_zipfile(silo, target_item_uri, POSTED_file, ident):
+ zipfile_id = get_next_zipfile_id(silo.state['storage_dir'])
+ while(silo.exists("%s%s" % (zipfile_root, zipfile_id))):
+ zipfile_id = get_next_zipfile_id(silo.state['storage_dir'])
+
+ #zip_item = silo.get_item("%s%s" % (zipfile_root, zipfile_id))
+ zip_item = create_new(silo, "%s%s" % (zipfile_root, zipfile_id), ident)
+ zip_item.add_triple("%s/%s" % (zip_item.uri, POSTED_file.filename.lstrip(os.sep)), "dcterms:hasVersion", target_item_uri)
+ zip_item.put_stream(POSTED_file.filename, POSTED_file.file)
+ try:
+ POSTED_file.file.close()
+ except:
+ pass
+ zip_item.sync()
+ return zip_item
+
+def read_zipfile(filepath):
+ try:
+ tmpfile = ZipFile(filepath, "r")
+ except BZ:
+ raise BadZipfile
+
+ # list filenames
+ #list_of_files = tmpfile.namelist()
+
+ # file information
+ zipfile_contents = {}
+ for info in tmpfile.infolist():
+ zipfile_contents[info.filename] = (info.file_size, info.date_time)
+ tmpfile.close()
+ return zipfile_contents
+
+def read_file_in_zipfile(filepath, filename):
+ try:
+ tmpfile = ZipFile(filepath, "r")
+ except BZ:
+ raise BadZipfile
+
+ try:
+ fileinfo = tmpfile.getinfo(filename)
+ except KeyError:
+ return False
+ if fileinfo.file_size == 0:
+ return 0
+
+ # read file
+ file_contents = None
+ file_contents = tmpfile.read(filename)
+ tmpfile.close()
+ return file_contents
+
+def get_file_in_zipfile(filepath, filename, targetdir):
+ try:
+ tmpfile = ZipFile(filepath, "r")
+ except BZ:
+ raise BadZipfile
+
+ try:
+ fileinfo = tmpfile.getinfo(filename)
+ except KeyError:
+ return False
+ if fileinfo.file_size == 0:
+ return 0
+
+ # extract file
+ targetfile = tmpfile.extract(filename, targetdir)
+ tmpfile.close()
+ return targetfile
+
+def unzip_file(filepath, target_directory=None):
+ # TODO add the checkm stuff back in
+ if not target_directory:
+ target_directory = "/tmp/%s" % (uuid4().hex)
+ p = subprocess.Popen("unzip -qq -d %s %s" % (target_directory, filepath), shell=True, stdout=subprocess.PIPE)
+ p.wait()
+ if p.returncode != 0:
+ raise BadZipfile
+ else:
+ return target_directory
+
+def get_items_in_dir(items_list, dirname, fnames):
+ for fname in fnames:
+ items_list.append(os.path.join(dirname,fname))
+ return
+
+def unpack_zip_item(target_dataset, current_dataset, zip_item, silo, ident):
+ filepath = current_dataset.to_dirpath(zip_item)
+ if os.path.islink(filepath):
+ filepath = os.readlink(filepath)
+ emb = target_dataset.metadata.get('embargoed')
+ emb_until = target_dataset.metadata.get('embargoed_until')
+
+ # -- Step 1 -----------------------------
+ unpacked_dir = unzip_file(filepath)
+
+ # -- Step 2 -----------------------------
+ file_uri = current_dataset.uri
+ if not file_uri.endswith('/'):
+ file_uri += '/'
+ file_uri = "%s%s?version=%s"%(file_uri,zip_item,current_dataset.currentversion)
+
+ items_list = []
+ os.path.walk(unpacked_dir,get_items_in_dir,items_list)
+
+ # -- Step 3 -----------------------------
+ mani_file = None
+ #Read manifest
+ for i in items_list:
+ if 'manifest.rdf' in i and os.path.isfile(i):
+ mani_file = os.path.join('/tmp', uuid4().hex)
+ shutil.move(i, mani_file)
+ items_list.remove(i)
+ #os.remove(i)
+ break
+
+ # -- Step 4 -----------------------------
+ #Copy unpacked dir as new version
+ target_dataset.move_directory_as_new_version(unpacked_dir, log="Unpacked file %s. Contents"%zip_item)
+
+ # -- Step 5 -----------------------------
+ #Add type and isVersionOf metadata
+ target_dataset.add_namespace('oxds', "http://vocab.ox.ac.uk/dataset/schema#")
+ target_dataset.add_triple(target_dataset.uri, u"rdf:type", "oxds:Grouping")
+ target_dataset.add_triple(target_dataset.uri, "dcterms:isVersionOf", file_uri)
+ #TODO: Adding the following metadata again as moving directory deletes all this information. Need to find a better way
+ if emb:
+ target_dataset.add_triple(target_dataset.uri, u"oxds:isEmbargoed", 'True')
+ if emb_until:
+ target_dataset.add_triple(target_dataset.uri, u"oxds:embargoedUntil", emb_until)
+ else:
+ target_dataset.add_triple(target_dataset.uri, u"oxds:isEmbargoed", 'False')
+ #The embargo
+ #embargoed_until_date = (datetime.now() + timedelta(days=365*70)).isoformat()
+ #target_dataset.add_triple(target_dataset.uri, u"oxds:embargoedUntil", embargoed_until_date)
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:identifier", target_dataset.item_id)
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:mediator", ident)
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:publisher", ag.publisher)
+ if ag.rights and ag.rights.startswith('http'):
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:rights", URIRef(ag.rights))
+ elif ag.rights:
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:rights", Literal(ag.rights))
+ if ag.license and ag.license.startswith('http'):
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:license", URIRef(ag.license))
+ elif ag.license:
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:license", Literal(ag.license))
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:created", datetime.now())
+ target_dataset.add_triple(target_dataset.uri, u"oxds:currentVersion", target_dataset.currentversion)
+ #Adding ore aggregates
+ unp_dir = unpacked_dir
+ if not unp_dir.endswith('/'):
+ unp_dir += '/'
+ target_uri_base = target_dataset.uri
+ if not target_uri_base.endswith('/'):
+ target_uri_base += '/'
+ for i in items_list:
+ i = i.replace(unp_dir, '')
+ target_dataset.add_triple(target_dataset.uri, "ore:aggregates", "%s%s"%(target_uri_base,i))
+ target_dataset.add_triple(target_dataset.uri, u"dcterms:modified", datetime.now())
+ target_dataset.sync()
+
+ # -- Step 6 -----------------------------
+ #Munge rdf
+ #TODO: If manifest is not well formed rdf - inform user. Currently just ignored.
+ if mani_file and os.path.isfile(mani_file) and test_rdf(mani_file):
+ munge_manifest(mani_file, target_dataset)
+ os.remove(mani_file)
+
+ # -- Step 7 -----------------------------
+ #uri_s = "%s/%s" % (current_dataset.uri, zip_item.lstrip(os.sep))
+ #uri_p = "%s?version=%s" % (target_dataset.uri, target_dataset.currentversion)
+ #current_dataset.add_triple(uri_s, "dcterms:hasVersion", uri_p)
+ #current_dataset.sync()
+
+ target_dataset.sync()
+ target_dataset.sync()
+ target_dataset.sync()
+ return True
+
+"""
+class unpack_zip_item(Thread):
+ def __init__ (self, target_dataset, current_dataset, zip_item, silo, ident):
+ Thread.__init__(self)
+ self.target_dataset = target_dataset
+ self.current_dataset = current_dataset
+ self.zip_item = zip_item
+ self.silo =silo
+ self.ident = ident
+
+ def run(self):
+ filepath = self.current_dataset.to_dirpath(self.zip_item)
+ if os.path.islink(filepath):
+ filepath = os.readlink(filepath)
+
+ # -- Step 1 -----------------------------
+ unpacked_dir = unzip_file(filepath)
+
+ # -- Step 2 -----------------------------
+ file_uri = self.current_dataset.uri
+ if not file_uri.endswith('/'):
+ file_uri += '/'
+ file_uri = "%s%s"%(file_uri,self.zip_item)
+
+ items_list = []
+ os.path.walk(unpacked_dir,get_items_in_dir,items_list)
+
+ # -- Step 3 -----------------------------
+ mani_file = None
+ #Read manifest
+ for i in items_list:
+ if 'manifest.rdf' in i and os.path.isfile(i):
+ mani_file = os.path.join('/tmp', uuid4().hex)
+ shutil.move(i, mani_file)
+ items_list.remove(i)
+ #os.remove(i)
+ break
+
+ # -- Step 4 -----------------------------
+ #Copy unpacked dir as new version
+ self.target_dataset.move_directory_as_new_version(unpacked_dir)
+
+ # -- Step 5 -----------------------------
+ #Add type and isVersionOf metadata
+ self.target_dataset.add_namespace('oxds', "http://vocab.ox.ac.uk/dataset/schema#")
+ self.target_dataset.add_triple(self.target_dataset.uri, u"rdf:type", "oxds:Grouping")
+ self.target_dataset.add_triple(self.target_dataset.uri, "dcterms:isVersionOf", file_uri)
+ #TODO: Adding the following metadata again as moving directory deletes all this information. Need to find a better way
+ embargoed_until_date = (datetime.now() + timedelta(days=365*70)).isoformat()
+ self.target_dataset.add_triple(self.target_dataset.uri, u"oxds:isEmbargoed", 'True')
+ self.target_dataset.add_triple(self.target_dataset.uri, u"oxds:embargoedUntil", embargoed_until_date)
+ self.target_dataset.add_triple(self.target_dataset.uri, u"dcterms:identifier", self.target_dataset.item_id)
+ self.target_dataset.add_triple(self.target_dataset.uri, u"dcterms:mediator", self.ident)
+ self.target_dataset.add_triple(self.target_dataset.uri, u"dcterms:publisher", ag.publisher)
+ self.target_dataset.add_triple(self.target_dataset.uri, u"dcterms:created", datetime.now())
+ self.target_dataset.add_triple(self.target_dataset.uri, u"oxds:currentVersion", self.target_dataset.currentversion)
+ #Adding ore aggregates
+ unp_dir = unpacked_dir
+ if not unp_dir.endswith('/'):
+ unp_dir += '/'
+ target_uri_base = self.target_dataset.uri
+ if not target_uri_base.endswith('/'):
+ target_uri_base += '/'
+ for i in items_list:
+ i = i.replace(unp_dir, '')
+ self.target_dataset.add_triple(self.target_dataset.uri, "ore:aggregates", "%s%s"%(target_uri_base,i))
+ self.target_dataset.add_triple(self.target_dataset.uri, u"dcterms:modified", datetime.now())
+ self.target_dataset.sync()
+
+ # -- Step 6 -----------------------------
+ #Munge rdf
+ #TODO: If manifest is not well formed rdf - inform user. Currently just ignored.
+ if mani_file and os.path.isfile(mani_file) and test_rdf(mani_file):
+ munge_manifest(mani_file, self.target_dataset, manifest_type='http://vocab.ox.ac.uk/dataset/schema#Grouping')
+
+ # -- Step 7 -----------------------------
+ #Delete the status
+ self.target_dataset.del_triple(self.target_dataset.uri, u"dcterms:status")
+ self.target_dataset.sync()
+ self.target_dataset.sync()
+ self.target_dataset.sync()
+ self.current_dataset.add_triple("%s/%s" % (self.current_dataset.uri, self.zip_item.lstrip(os.sep)), "dcterms:hasVersion", self.target_dataset.uri)
+ self.current_dataset.sync()
+"""
diff --git a/rdfdatabank/lib/helpers.py b/rdfdatabank/lib/helpers.py
index e30d8fd..aff6947 100644
--- a/rdfdatabank/lib/helpers.py
+++ b/rdfdatabank/lib/helpers.py
@@ -1,3 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
"""Helper functions
Consists of functions to typically be used within templates, but also
diff --git a/rdfdatabank/lib/htpasswd.py b/rdfdatabank/lib/htpasswd.py
new file mode 100644
index 0000000..e4da240
--- /dev/null
+++ b/rdfdatabank/lib/htpasswd.py
@@ -0,0 +1,155 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""Replacement for htpasswd
+Downloaded from: http://trac.edgewall.org/browser/trunk/contrib/htpasswd.py
+Original author: Eli Carter
+
+Copyright (C) 2003-2012 Edgewall Software
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 3. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior
+ written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import os
+import sys
+import random
+from optparse import OptionParser
+
+# We need a crypt module, but Windows doesn't have one by default. Try to find
+# one, and tell the user if we can't.
+try:
+ import crypt
+except ImportError:
+ try:
+ import fcrypt as crypt
+ except ImportError:
+ #sys.stderr.write("Cannot find a crypt module. "
+ # "Possibly http://carey.geek.nz/code/python-fcrypt/\n")
+ sys.exit(1)
+
+
+def salt():
+ """Returns a string of 2 randome letters"""
+ letters = 'abcdefghijklmnopqrstuvwxyz' \
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \
+ '0123456789/.'
+ return random.choice(letters) + random.choice(letters)
+
+
+class HtpasswdFile:
+ """A class for manipulating htpasswd files."""
+
+ def __init__(self, filename, create=False):
+ self.entries = []
+ self.filename = filename
+ if not create:
+ if os.path.exists(self.filename):
+ self.load()
+ else:
+ raise Exception("%s does not exist" % self.filename)
+
+ def load(self):
+ """Read the htpasswd file into memory."""
+ lines = open(self.filename, 'r').readlines()
+ self.entries = []
+ for line in lines:
+ username, pwhash = line.split(':')
+ entry = [username, pwhash.rstrip()]
+ self.entries.append(entry)
+
+ def save(self):
+ """Write the htpasswd file to disk"""
+ open(self.filename, 'w').writelines(["%s:%s\n" % (entry[0], entry[1])
+ for entry in self.entries])
+
+ def update(self, username, password):
+ """Replace the entry for the given user, or add it if new."""
+ pwhash = crypt.crypt(password, salt())
+ matching_entries = [entry for entry in self.entries
+ if entry[0] == username]
+ if matching_entries:
+ matching_entries[0][1] = pwhash
+ else:
+ self.entries.append([username, pwhash])
+
+ def delete(self, username):
+ """Remove the entry for the given user."""
+ self.entries = [entry for entry in self.entries
+ if entry[0] != username]
+
+
+def main():
+ """%prog [-c] -b filename username password
+ Create or update an htpasswd file"""
+ # For now, we only care about the use cases that affect tests/functional.py
+ parser = OptionParser(usage=main.__doc__)
+ parser.add_option('-b', action='store_true', dest='batch', default=False,
+ help='Batch mode; password is passed on the command line IN THE CLEAR.'
+ )
+ parser.add_option('-c', action='store_true', dest='create', default=False,
+ help='Create a new htpasswd file, overwriting any existing file.')
+ parser.add_option('-D', action='store_true', dest='delete_user',
+ default=False, help='Remove the given user from the password file.')
+
+ options, args = parser.parse_args()
+
+ def syntax_error(msg):
+ """Utility function for displaying fatal error messages with usage
+ help.
+ """
+ #sys.stderr.write("Syntax error: " + msg)
+ #sys.stderr.write(parser.get_usage())
+ sys.exit(1)
+
+ if not options.batch:
+ syntax_error("Only batch mode is supported\n")
+
+ # Non-option arguments
+ if len(args) < 2:
+ syntax_error("Insufficient number of arguments.\n")
+ filename, username = args[:2]
+ if options.delete_user:
+ if len(args) != 2:
+ syntax_error("Incorrect number of arguments.\n")
+ password = None
+ else:
+ if len(args) != 3:
+ syntax_error("Incorrect number of arguments.\n")
+ password = args[2]
+
+ passwdfile = HtpasswdFile(filename, create=options.create)
+
+ if options.delete_user:
+ passwdfile.delete(username)
+ else:
+ passwdfile.update(username, password)
+
+ passwdfile.save()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/rdfdatabank/lib/ident_md.py b/rdfdatabank/lib/ident_md.py
index 6c7969c..d7fc87a 100644
--- a/rdfdatabank/lib/ident_md.py
+++ b/rdfdatabank/lib/ident_md.py
@@ -1,8 +1,28 @@
-_DATA = {
- 'admin': {'first_name':'ben', 'last_name':'OSteen', 'owner':'*', 'role':'admin'},
- 'admiral': {'name':'ADMIRAL Project', 'description':'ADMIRAL: A Data Management Infrastructure for Research', 'owner':['admiral'], 'role':'user'},
- 'eidcsr': {'name':'EIDCSR Project', 'description':'The Embedding Institutional Data Curation Services in Research (EIDCSR) project is addressing the research data management and curation challenges of three research groups in the University of Oxford.', 'owner':['eidcsr'], 'role':'user'},
- }
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from rdfdatabank.config.users import _USERS as _DATA
class IdentMDProvider(object):
diff --git a/rdfdatabank/lib/reqclassifier.py b/rdfdatabank/lib/reqclassifier.py
new file mode 100644
index 0000000..9bffb8d
--- /dev/null
+++ b/rdfdatabank/lib/reqclassifier.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from webob import Request
+import zope.interface
+from repoze.who.classifiers import default_request_classifier
+from repoze.who.interfaces import IRequestClassifier
+import ConfigParser
+from pylons import config
+
+def custom_request_classifier(environ):
+ """ Returns one of the classifiers 'app', 'browser' or any
+ standard classifiers returned by
+ repoze.who.classifiers:default_request_classifier
+ """
+
+
+ classifier = default_request_classifier(environ)
+ if classifier == 'browser':
+ login_form_url = '/login'
+ login_handler = '/login_handler'
+ logout_handler = '/logout_handler'
+ logout_url = '/logout'
+ # Decide if the client is a (user-driven) browser or an application
+ if config.has_key("who.config_file"):
+ config_file = config["who.config_file"]
+ config_who = ConfigParser.ConfigParser()
+ config_who.readfp(open(config_file))
+ login_form_url = config_who.get("plugin:friendlyform", "login_form_url")
+ login_handler = config_who.get("plugin:friendlyform", "login_handler_path")
+ logout_handler = config_who.get("plugin:friendlyform", "logout_handler_path")
+ logout_url = config_who.get("plugin:friendlyform", "post_logout_url")
+
+ path_info = environ['PATH_INFO']
+ #request = Request(environ)
+ #if not request.accept.best_match(['application/xhtml+xml', 'text/html']):
+ # # In our view, any client who doesn't support HTML/XHTML is an "app",
+ # # not a (user-driven) "browser".
+ # classifier = 'app'
+ if not path_info in [login_form_url, login_handler, logout_handler, logout_url]:
+ # In our view, any client who hasn't come in from the login url is an app
+ classifier = 'app'
+ return classifier
+zope.interface.directlyProvides(custom_request_classifier, IRequestClassifier)
+
diff --git a/rdfdatabank/lib/search_term.py b/rdfdatabank/lib/search_term.py
new file mode 100644
index 0000000..9ae3737
--- /dev/null
+++ b/rdfdatabank/lib/search_term.py
@@ -0,0 +1,230 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+
+class term_list():
+ def get_all_search_fields(self):
+ return [
+ "silo",
+ "id",
+ "uuid",
+ "embargoStatus",
+ "embargoedUntilDate",
+ "currentVersion",
+ "doi",
+ "aggregatedResource",
+ "publicationDate",
+ "abstract",
+ "accessRights",
+ "accrualMethod",
+ "accrualPeriodicity",
+ "accrualPolicy",
+ "alternative",
+ "audience",
+ "available",
+ "bibliographicCitation",
+ "conformsTo",
+ "contributor",
+ "coverage",
+ "created",
+ "creator",
+ "date",
+ "dateAccepted",
+ "dateCopyrighted",
+ "dateSubmitted",
+ "description",
+ "educationLevel",
+ "extent",
+ "format",
+ "hasFormat",
+ "hasPart",
+ "hasVersion",
+ "identifier",
+ "instructionalMethod",
+ "isFormatOf",
+ "isPartOf",
+ "isReferencedBy",
+ "isReplacedBy",
+ "isRequiredBy",
+ "issued",
+ "isVersionOf",
+ "language",
+ "license",
+ "mediator",
+ "medium",
+ "modified",
+ "provenance",
+ "publisher",
+ "references",
+ "relation",
+ "replaces",
+ "requires",
+ "rights",
+ "rightsHolder",
+ "source",
+ "spatial",
+ "subject",
+ "tableOfContents",
+ "temporal",
+ "title",
+ "type",
+ "valid",
+ "f_creator",
+ "f_mediator",
+ "f_embargoedUntilDate",
+ "f_license",
+ "f_rights",
+ "f_type",
+ "f_publisher",
+ "f_isPartOf",
+ "f_hasVersion",
+ "f_publicationDate",
+ "f_contributor",
+ "f_language",
+ "f_rightsHolder",
+ "f_source",
+ "f_subject",
+ "timestamp"
+ ]
+
+ def get_search_field_dictionary(self):
+ field_names = {
+ "silo":"Silo",
+ "id":"Identifier",
+ "uuid":"Unique Identifier",
+ "embargoStatus":"Embargo status",
+ "embargoedUntilDate":"Embargoed until date",
+ "currentVersion":"Current version",
+ "doi":"DOI",
+ "aggregatedResource":"Aggregated resource",
+ "publicationDate":"Publication date",
+ "abstract":"Abstract",
+ "accessRights":"Access rights",
+ "accrualMethod":"Accrual method",
+ "accrualPeriodicity":"Accrual periodicity",
+ "accrualPolicy":"Accrual policy",
+ "alternative":"Alternative title",
+ "audience":"Audience",
+ "available":"Availability",
+ "bibliographicCitation":"Bibliographic citation",
+ "conformsTo":"Conforms to",
+ "contributor":"Contributor",
+ "coverage":"Coverage",
+ "created":"Date created",
+ "creator":"Creator",
+ "date":"Date",
+ "dateAccepted":"Date accepted",
+ "dateCopyrighted":"Date copyrighted",
+ "dateSubmitted":"Date submitted",
+ "description":"Description",
+ "educationLevel":"Education level",
+ "extent":"Extent",
+ "format":"Format",
+ "hasFormat":"Has format",
+ "hasPart":"Has part",
+ "hasVersion":"Has version",
+ "identifier":"Identifier",
+ "instructionalMethod":"Instructional method",
+ "isFormatOf":"Is format of",
+ "isPartOf":"Is part of",
+ "isReferencedBy":"Is referenced by",
+ "isReplacedBy":"Is replaced by",
+ "isRequiredBy":"Is required by",
+ "issued":"Date issued",
+ "isVersionOf":"Is version Of",
+ "language":"Language",
+ "license":"License",
+ "mediator":"Mediator",
+ "medium":"Medium",
+ "modified":"Date modified",
+ "provenance":"Provenance",
+ "publisher":"Publisher",
+ "references":"References",
+ "relation":"Relation",
+ "replaces":"Replaces",
+ "requires":"Requires",
+ "rights":"Rights",
+ "rightsHolder":"Rights holder",
+ "source":"Source",
+ "spatial":"Spatial coverage",
+ "subject":"Subject",
+ "tableOfContents":"Table of contents",
+ "temporal":"Temporal coverage",
+ "title":"Title",
+ "type":"Type",
+ "valid":"Valid",
+ "f_creator":"Creator",
+ "f_mediator":"Mediator",
+ "f_embargoedUntilDate":"Embargoed until date",
+ "f_license":"License",
+ "f_rights":"Rights",
+ "f_type":"Type",
+ "f_publisher":"Publisher",
+ "f_isPartOf":"Is part of",
+ "f_hasVersion":"Has version",
+ "f_publicationDate":"Publication date",
+ "f_contributor":"Contributor",
+ "f_language":"Language",
+ "f_rightsHolder":"Rights holder",
+ "f_source":"Source",
+ "f_subject":"Subject",
+ "timestamp":"Information indexed on"
+ }
+ return field_names
+
+ def get_type_field_dictionary(self):
+ type_names = {
+ "silo":'Silos',
+ "dataset":"Data packages",
+ "item":"File names",
+ "all":"Any level"
+ }
+ return type_names
+
+ def get_all_facet_fields(self):
+ return [
+ "silo",
+ "embargoStatus",
+ "f_creator",
+ "f_mediator",
+ "f_embargoedUntilDate",
+ "f_license",
+ "f_rights",
+ "f_type",
+ "f_publisher",
+ "f_isPartOf",
+ "f_hasVersion",
+ "f_publicationDate",
+ "f_contributor",
+ "f_language",
+ "f_rightsHolder",
+ "f_source",
+ "f_subject"
+ ]
+
+ def get_range_facet_fields(self):
+ return [
+ "f_embargoedUntilDate",
+ "f_publicationDate"
+ ]
diff --git a/rdfdatabank/lib/short_pid.py b/rdfdatabank/lib/short_pid.py
new file mode 100644
index 0000000..d46e946
--- /dev/null
+++ b/rdfdatabank/lib/short_pid.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+"""
+#Downloaded from http://code.activestate.com/recipes/576918/
+#Created by Michael Fogleman
+#Short URL Generator
+"""
+
+#DEFAULT_ALPHABET = 'JedR8LNFY2j6MrhkBSADUyfP5amuH9xQCX4VqbgpsGtnW7vc3TwKE'
+#DEFAULT_BLOCK_SIZE = 22
+DEFAULT_ALPHABET = 'ed82j6rh1kyfo5almu9x4iqzbgpstn7vc3w'
+DEFAULT_BLOCK_SIZE = 18
+
+class UrlEncoder(object):
+ def __init__(self, alphabet=DEFAULT_ALPHABET, block_size=DEFAULT_BLOCK_SIZE):
+ self.alphabet = alphabet
+ self.block_size = block_size
+ self.mask = (1 << block_size) - 1
+ self.mapping = range(block_size)
+ self.mapping.reverse()
+ def encode_url(self, n, min_length=0):
+ return self.enbase(self.encode(n), min_length)
+ def decode_url(self, n):
+ return self.decode(self.debase(n))
+ def encode(self, n):
+ return (n & ~self.mask) | self._encode(n & self.mask)
+ def _encode(self, n):
+ result = 0
+ for i, b in enumerate(self.mapping):
+ if n & (1 << i):
+ result |= (1 << b)
+ return result
+ def decode(self, n):
+ return (n & ~self.mask) | self._decode(n & self.mask)
+ def _decode(self, n):
+ result = 0
+ for i, b in enumerate(self.mapping):
+ if n & (1 << b):
+ result |= (1 << i)
+ return result
+ def enbase(self, x, min_length=0):
+ result = self._enbase(x)
+ padding = self.alphabet[0] * (min_length - len(result))
+ return '%s%s' % (padding, result)
+ def _enbase(self, x):
+ n = len(self.alphabet)
+ if x < n:
+ return self.alphabet[x]
+ return self.enbase(x/n) + self.alphabet[x%n]
+ def debase(self, x):
+ n = len(self.alphabet)
+ result = 0
+ for i, c in enumerate(reversed(x)):
+ result += self.alphabet.index(c) * (n**i)
+ return result
+
+DEFAULT_ENCODER = UrlEncoder()
+
+def encode(n):
+ return DEFAULT_ENCODER.encode(n)
+
+def decode(n):
+ return DEFAULT_ENCODER.decode(n)
+
+def enbase(n, min_length=0):
+ return DEFAULT_ENCODER.enbase(n, min_length)
+
+def debase(n):
+ return DEFAULT_ENCODER.debase(n)
+
+def encode_url(n, min_length=0):
+ return DEFAULT_ENCODER.encode_url(n, min_length)
+
+def decode_url(n):
+ return DEFAULT_ENCODER.decode_url(n)
+
+if __name__ == '__main__':
+ for a in range(0, 200000, 37):
+ b = encode(a)
+ c = enbase(b)
+ d = debase(c)
+ e = decode(d)
+ assert a == e
+ assert b == d
+ c = (' ' * (7 - len(c))) + c
+ #print '%6d %12d %s %12d %6d' % (a, b, c, d, e)
+
diff --git a/rdfdatabank/lib/sword_server.py b/rdfdatabank/lib/sword_server.py
new file mode 100644
index 0000000..986dc64
--- /dev/null
+++ b/rdfdatabank/lib/sword_server.py
@@ -0,0 +1,793 @@
+from rdfdatabank.lib.utils import allowable_id2, create_new
+from rdfdatabank.lib.auth_entry import list_silos, add_dataset
+from sss import SwordServer, Authenticator, Auth, ServiceDocument, SDCollection, DepositResponse, SwordError, EntryDocument, Statement, Namespaces, AuthException
+from sss.negotiator import AcceptParameters, ContentType
+
+from pylons import app_globals as ag
+
+import uuid, re, logging, urllib
+from datetime import datetime
+from rdflib import URIRef
+
+ssslog = logging.getLogger(__name__)
+
+JAILBREAK = re.compile("[\/]*\.\.[\/]*")
+
+class SwordDataBank(SwordServer):
+ """
+ The main SWORD Server class. This class deals with all the CRUD requests as provided by the web.py HTTP
+ handlers
+ """
+ def __init__(self, config, auth):
+ # get the configuration
+ self.config = config
+ self.auth_credentials = auth
+
+ self.um = URLManager(config)
+ self.ns = Namespaces()
+
+ def container_exists(self, path):
+ # extract information from the path
+ silo, dataset_id, accept_parameters = self.um.interpret_path(path)
+
+ # is this a silo?
+ if not ag.granary.issilo(silo):
+ return False
+
+ # is this an authorised silo?
+ silos = ag.authz(self.auth_credentials.identity)
+ if silo not in silos:
+ return False
+
+ # get a full silo object
+ rdf_silo = ag.granary.get_rdf_silo(silo)
+
+ # is the dataset in the authorised silo?
+ if not rdf_silo.exists(dataset_id):
+ return False
+
+ # if we get here without failing, then the container exists (from the
+ # perspective of the user)
+ return True
+
+ def media_resource_exists(self, path):
+ raise NotImplementedError()
+
+ def service_document(self, path=None):
+ """
+ Construct the Service Document. This takes the set of collections that are in the store, and places them in
+ an Atom Service document as the individual entries
+ """
+ service = ServiceDocument(version=self.config.sword_version,
+ max_upload_size=self.config.max_upload_size)
+
+ # get the authorised list of silos
+ silos = ag.authz(self.auth_credentials.identity)
+
+ # now for each collection create an sdcollection
+ collections = []
+ for col_name in silos:
+ href = self.um.silo_url(col_name)
+ title = col_name
+ mediation = self.config.mediation
+
+ # content types accepted
+ accept = []
+ multipart_accept = []
+ if not self.config.accept_nothing:
+ if self.config.app_accept is not None:
+ for acc in self.config.app_accept:
+ accept.append(acc)
+
+ if self.config.multipart_accept is not None:
+ for acc in self.config.multipart_accept:
+ multipart_accept.append(acc)
+
+ # SWORD packaging formats accepted
+ accept_package = []
+ for format in self.config.sword_accept_package:
+ accept_package.append(format)
+
+ col = SDCollection(href=href, title=title, accept=accept, multipart_accept=multipart_accept,
+ accept_package=accept_package, mediation=mediation)
+
+ collections.append(col)
+
+ service.add_workspace("Silos", collections)
+
+ # serialise and return
+ return service.serialise()
+
+ def list_collection(self, path):
+ """
+ List the contents of a collection identified by the supplied id
+ """
+ raise NotImplementedError()
+
+ def _get_authorised_rdf_silo(self, silo):
+
+ if not ag.granary.issilo(silo):
+ return SwordError(status=404, empty=True)
+
+ # get the authorised list of silos
+ #granary_list = ag.granary.silos
+ granary_list = list_silos()
+ silos = ag.authz(self.auth_credentials.identity)
+
+ # does the collection/silo exist? If not, we can't do a deposit
+ if silo not in silos:
+ # if it's not in the silos it is either non-existant or it is
+ # forbidden...
+ if silo in granary_list:
+ # forbidden
+ raise SwordError(status=403, empty=True)
+ else:
+ # not found
+ raise SwordError(status=404, empty=True)
+
+ # get a full silo object
+ rdf_silo = ag.granary.get_rdf_silo(silo)
+ return rdf_silo
+
+ def deposit_new(self, silo, deposit):
+ """
+ Take the supplied deposit and treat it as a new container with content to be created in the specified collection
+ Args:
+ -collection: the ID of the collection to be deposited into
+ -deposit: the DepositRequest object to be processed
+ Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error
+ """
+ # check against the authorised list of silos
+ rdf_silo = self._get_authorised_rdf_silo(silo)
+
+ # ensure that we have a slug
+ if deposit.slug is None:
+ deposit.slug = str(uuid.uuid4())
+
+ # weed out unacceptable deposits
+ if rdf_silo.exists(deposit.slug):
+ raise SwordError(error_uri=DataBankErrors.dataset_conflict, msg="A Dataset with the name " + deposit.slug + " already exists")
+ if not allowable_id2(deposit.slug):
+ raise SwordError(error_uri=Errors.bad_request, msg="Dataset name can contain only the following characters - " +
+ ag.naming_rule_humanized + " and has to be more than 1 character")
+
+ # NOTE: we pass in an empty dictionary of metadata on create, and then run
+ # _ingest_metadata to augment the item from the deposit
+ item = create_new(rdf_silo, deposit.slug, self.auth_credentials.username, {})
+ add_dataset(silo, deposit.slug)
+ self._ingest_metadata(item, deposit)
+
+ # NOTE: left in for reference for the time being, but deposit_new
+ # only support entry only deposits in databank. This will need to be
+ # re-introduced for full sword support
+ # store the content file if one exists, and do some processing on it
+ #deposit_uri = None
+ #derived_resource_uris = []
+ #if deposit.content is not None:
+
+ # if deposit.filename is None:
+ # deposit.filename = "unnamed.file"
+ # fn = self.dao.store_content(collection, id, deposit.content, deposit.filename)
+
+ # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
+ # all the metadata and any files we want
+
+ # FIXME: because the deposit interpreter doesn't deal with multipart properly
+ # we don't get the correct packaging format here if the package is anything
+ # other than Binary
+ # ssslog.info("attempting to load ingest packager for format " + str(deposit.packaging))
+ # packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
+ # derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
+
+ # An identifier which will resolve to the package just deposited
+ # deposit_uri = self.um.part_uri(collection, id, fn)
+
+ # a list of identifiers which will resolve to the derived resources
+ # derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)
+
+ # the aggregation uri
+ agg_uri = self.um.agg_uri(silo, deposit.slug)
+
+ # the Edit-URI
+ edit_uri = self.um.edit_uri(silo, deposit.slug)
+
+ # create the initial statement
+ s = Statement(aggregation_uri=agg_uri, rem_uri=edit_uri, states=[DataBankStates.initial_state])
+
+ # FIXME: need to sort out authentication before we can do this ...
+ # FIXME: also, it's not relevant unless we take a binary-only deposit, which
+ # we currently don't
+ # User already authorized to deposit in this silo (_get_authorised_rdf_silo).
+ # This is to augment metadata with details like who created, on behalf of, when
+ #
+ #by = deposit.auth.username if deposit.auth is not None else None
+ #obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
+ #if deposit_uri is not None:
+ # s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
+ #s.aggregates = derived_resource_uris
+
+ # In creating the statement we use the existing manifest.rdf file in the
+ # item:
+ manifest = item.get_rdf_manifest()
+ f = open(manifest.filepath, "r")
+ rdf_string = f.read()
+
+ # create the new manifest and store it
+ #Serialize rdf adds the sword statement - state, depositedOn, by, onBehalfOf, stateDesc
+ new_manifest = s.serialise_rdf(rdf_string)
+ item.put_stream("manifest.rdf", new_manifest)
+
+ # FIXME: here is where we have to put the correct treatment in
+ # now generate a receipt for the deposit
+ # TODO: Add audit log from item.manifest in place of "created new item"
+ receipt = self.deposit_receipt(silo, deposit.slug, item, "created new item")
+
+ # FIXME: while we don't have full text deposit, we don't need to augment
+ # the deposit receipt
+
+ # now augment the receipt with the details of this particular deposit
+ # this handles None arguments, and converts the xml receipt into a string
+ # receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)
+
+ # finally, assemble the deposit response and return
+ dr = DepositResponse()
+ dr.receipt = receipt.serialise()
+ dr.location = receipt.edit_uri
+
+ # Broadcast change as message
+ ag.b.creation(silo, deposit.slug, ident=self.auth_credentials.username)
+
+ return dr
+
+ def get_media_resource(self, path, accept_parameters):
+ """
+ Get a representation of the media resource for the given id as represented by the specified content type
+ -id: The ID of the object in the store
+ -content_type A ContentType object describing the type of the object to be retrieved
+ """
+ raise NotImplementedError()
+
+ def replace(self, path, deposit):
+ """
+ Replace all the content represented by the supplied id with the supplied deposit
+ Args:
+ - oid: the object ID in the store
+ - deposit: a DepositRequest object
+ Return a DepositResponse containing the Deposit Receipt or a SWORD Error
+ """
+ silo, dataset_id, accept_parameters = self.um.interpret_path(path)
+ rdf_silo = self._get_authorised_rdf_silo(silo)
+
+ # now get the dataset object itself
+ dataset = rdf_silo.get_item(dataset_id)
+
+ # deal with possible problems with the filename
+ if deposit.filename is None or deposit.filename == "":
+ raise SwordError(error_uri=Errors.bad_request, msg="You must supply a filename to unpack")
+ if JAILBREAK.search(deposit.filename) != None:
+ raise SwordError(error_uri=Errors.bad_request, msg="'..' cannot be used in the path or as a filename")
+
+ # FIXME: at the moment this metadata operation is not supported by DataBank
+ #
+ # first figure out what to do about the metadata
+ keep_atom = False
+ metadata_state = None # This will be used to store any state information associated
+ # with a metadata update. It gets tied up with the content state
+ # and any pre-existing states further down
+ #if deposit.atom is not None:
+ # ssslog.info("Replace request has ATOM part - updating")
+ # entry_ingester = self.configuration.get_entry_ingester()(self.dao)
+ # entry_ingester.ingest(collection, id, deposit.atom)
+ # keep_atom = True
+
+ content_state = None
+ deposit_uri = None
+ derived_resource_uris = []
+ if deposit.content is not None:
+ ssslog.info("Replace request has file content - updating")
+
+ # remove all the old files before adding the new. We always leave
+ # behind the metadata; this will be overwritten later if necessary
+ #self.dao.remove_content(collection, id, True, keep_atom)
+ #Increment the version, but do not clone the previous version.
+ # An update will replace the entire contents of the container (if previously unpacked) with the bagit file
+ dataset.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
+
+ # store the content file
+ dataset.put_stream(deposit.filename, deposit.content)
+ ssslog.debug("New incoming file stored with filename " + deposit.filename)
+
+ # FIXME: unpacking doesn't happen here ... (keeping for the time being for reference)
+ # Broadcast to unpack and add sword:state in manifest
+ #
+
+ # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
+ # all the metadata and any files we want. Notice that we pass in the metadata_relevant flag, so the
+ # packager won't overwrite the existing metadata if it isn't supposed to
+ #packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
+ #derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
+ #ssslog.debug("Resources derived from deposit: " + str(derived_resources))
+
+ # a list of identifiers which will resolve to the derived resources
+ #derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)
+
+ # An identifier which will resolve to the package just deposited
+ deposit_uri = self.um.file_uri(silo, dataset_id, deposit.filename)
+ ssslog.debug("Incoming file has been stored at URI " + deposit_uri)
+
+ # register a new content state to be used
+ content_state = DataBankStates.zip_file_added
+
+ # Taken from dataset.py, seems to be the done thing when adding an item.
+ # NOTE: confirmed with Anusha that this is correct
+ dataset.del_triple(dataset.uri, u"dcterms:modified")
+ dataset.add_triple(dataset.uri, u"dcterms:modified", datetime.now())
+ dataset.del_triple(dataset.uri, u"oxds:currentVersion")
+ dataset.add_triple(dataset.uri, u"oxds:currentVersion", dataset.currentversion)
+
+ # before we do any state management, we have to be sure that the sword namespace
+ # is registered
+ dataset.get_rdf_manifest().add_namespace("sword", "http://purl.org/net/sword/terms/")
+ dataset.sync()
+
+ # sort out the new list of states for the item
+ current_states = self._extract_states(dataset)
+ new_states = []
+
+ # for each existing state, consider whether to carry it over
+ ssslog.info("new content state: " + str(content_state))
+ for state_uri, state_desc in current_states:
+ keep = True
+ if metadata_state is not None and state_uri in DataBankStates.metadata_states:
+ # we do not want the state if it is a metadata state and we have been given
+ # a new metadata state
+ keep = False
+ if content_state is not None and state_uri in DataBankStates.content_states:
+ ssslog.debug("Removing state: " + state_uri)
+ # we do not want the state if it is a content state and we have been given
+ # a new content state
+ keep = False
+ if keep:
+ ssslog.debug("carrying over state: " + state_uri)
+ new_states.append((state_uri, state_desc))
+
+ # add the new metadata and content states provided from above
+ if metadata_state is not None:
+ new_states.append(metadata_state)
+ if content_state is not None:
+ ssslog.debug("adding new content state: " + str(content_state))
+ new_states.append(content_state)
+
+ ssslog.debug("New Dataset States: " + str(new_states))
+
+ # FIXME: how safe is this? What other ore:aggregates might there be?
+ # we need to back out some of the triples in preparation to update the
+ # statement
+ # NOTE AR: I have commented the following lines.
+ # For aggregates this is not needed. put_stream will add the aggregate into the URI.
+ # Why delete other triples in the manifest - ??
+ # sword:originalDeposit point to isVersionOf
+
+ aggregates = dataset.list_rdf_objects(dataset.uri, u"ore:aggregates")
+ original_deposits = dataset.list_rdf_objects(dataset.uri, u"sword:originalDeposit")
+ states = dataset.list_rdf_objects(dataset.uri, u"sword:state")
+
+ for a in aggregates:
+ dataset.del_triple(a, "*")
+ for od in original_deposits:
+ dataset.del_triple(od, "*")
+ for s in states:
+ dataset.del_triple(s, "*")
+ dataset.del_triple(dataset.uri, u"ore:aggregates")
+ dataset.del_triple(dataset.uri, u"sword:originalDeposit")
+ dataset.del_triple(dataset.uri, u"sword:state")
+
+ # FIXME: also unsafe in the same way as above
+ # Write the md5 checksum into the manifest
+ # A deposit contains just the new stuff so no harm in deleting all triples
+ dataset.del_triple("*", u"oxds:hasMD5")
+ #dataset.del_triple(deposit_uri, u"oxds:hasMD5")
+ if deposit.content_md5 is not None:
+ dataset.add_triple(deposit_uri, u"oxds:hasMD5", deposit.content_md5)
+
+ dataset.sync()
+
+ # the aggregation uri
+ agg_uri = self.um.agg_uri(silo, dataset_id)
+
+ # the Edit-URI
+ edit_uri = self.um.edit_uri(silo, dataset_id)
+
+ # FIXME: here we also need to keep existing states where relevant.
+ # A state will continue to be relevant if it applies to an area of the
+ # item (i.e. the container or the media resource) for which this operation
+ # has no effect.
+ # for example:
+ # this is a metadata replace, but a status on the item is set to say that
+ # the item's zip file is corrupt and needs replacing. The new status
+ # should leave this alone (and probably not do anything, tbh), no matter
+ # what else it does
+ # create the statement outline
+ # FIXME: there is something weird going on with instantiating this object without the original_deposits argument
+ # apparently if I don't explicitly say there are no original deposits, then it "remembers" original deposits
+ # from previous uses of the object
+ s = Statement(aggregation_uri=agg_uri, rem_uri=edit_uri, states=new_states, original_deposits=[])
+
+ # set the original deposit (which sorts out the aggregations for us too)
+ by = deposit.auth.username if deposit.auth is not None else None
+ obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
+ if deposit_uri is not None:
+ s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
+
+ # create the new manifest and store it
+ manifest = dataset.get_rdf_manifest()
+ f = open(manifest.filepath, "r")
+ rdf_string = f.read()
+
+ new_manifest = s.serialise_rdf(rdf_string)
+ dataset.put_stream("manifest.rdf", new_manifest)
+
+ # FIXME: add in proper treatment here
+ # now generate a receipt.
+ # TODO: Include audit log instead of 'added zip to dataset'
+ receipt = self.deposit_receipt(silo, dataset_id, dataset, "added zip to dataset")
+
+ # now augment the receipt with the details of this particular deposit
+ # this handles None arguments, and converts the xml receipt into a string
+ receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)
+
+ # finally, assemble the deposit response and return
+ dr = DepositResponse()
+ dr.receipt = receipt.serialise()
+ dr.location = receipt.edit_uri
+ return dr
+
+ def delete_content(self, path, delete):
+ """
+ Delete all of the content from the object identified by the supplied id. the parameters of the delete
+ request must also be supplied
+ - oid: The ID of the object to delete the contents of
+ - delete: The DeleteRequest object
+ Return a DeleteResponse containing the Deposit Receipt or the SWORD Error
+ """
+ raise NotImplementedError()
+
+ def add_content(self, path, deposit):
+ """
+ Take the supplied deposit and treat it as a new container with content to be created in the specified collection
+ Args:
+ -collection: the ID of the collection to be deposited into
+ -deposit: the DepositRequest object to be processed
+ Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error
+ """
+ raise NotImplementedError()
+
+ def get_container(self, path, accept_parameters):
+ """
+ Get a representation of the container in the requested content type
+ Args:
+ -oid: The ID of the object in the store
+ -content_type A ContentType object describing the required format
+ Returns a representation of the container in the appropriate format
+ """
+ # by the time this is called, we should already know that we can return this type, so there is no need for
+ # any checking, we just get on with it
+
+ ssslog.info("Container requested in mime format: " + accept_parameters.content_type.mimetype())
+ silo, dataset_id, _ = self.um.interpret_path(path)
+ rdf_silo = self._get_authorised_rdf_silo(silo)
+
+ # now get the dataset object itself
+ dataset = rdf_silo.get_item(dataset_id)
+
+ # pick either the deposit receipt or the pure statement to return to the client
+ if accept_parameters.content_type.mimetype() == "application/atom+xml;type=entry":
+ # Supply audit log as treatment, in place of 'no treatment'
+ receipt = self.deposit_receipt(silo, dataset_id, dataset, "no treatment") # FIXME: what should the treatment here be
+ return receipt.serialise()
+ # FIXME: at the moment we don't support conneg on the edit uri
+ #elif accept_parameters.content_type.mimetype() == "application/rdf+xml":
+ # return self.dao.get_statement_content(collection, id)
+ #elif accept_parameters.content_type.mimetype() == "application/atom+xml;type=feed":
+ # return self.dao.get_statement_feed(collection, id)
+ else:
+ ssslog.info("Requested mimetype not recognised/supported: " + accept_parameters.content_type.mimetype())
+ return None
+
+ def deposit_existing(self, path, deposit):
+ """
+ Deposit the incoming content into an existing object as identified by the supplied identifier
+ Args:
+ -oid: The ID of the object we are depositing into
+ -deposit: The DepositRequest object
+ Returns a DepositResponse containing the Deposit Receipt or a SWORD Error
+ """
+ raise NotImplementedError()
+
+ def delete_container(self, path, delete):
+ """
+ Delete the entire object in the store
+ Args:
+ -oid: The ID of the object in the store
+ -delete: The DeleteRequest object
+ Return a DeleteResponse object with may contain a SWORD Error document or nothing at all
+ """
+ raise NotImplementedError()
+
+ def get_statement(self, path):
+ silo, dataset_id, accept_parameters = self.um.interpret_path(path)
+ rdf_silo = self._get_authorised_rdf_silo(silo)
+
+ # now get the dataset object itself
+ dataset = rdf_silo.get_item(dataset_id)
+
+ if accept_parameters.content_type.mimetype() == "application/rdf+xml":
+ return self.get_rdf_statement(dataset)
+ elif accept_parameters.content_type.mimetype() == "application/atom+xml;type=feed":
+ return self.get_atom_statement(dataset)
+ else:
+ return None
+
+ # NOT PART OF STANDARD, BUT USEFUL
+ # These are used by the webpy interface to provide easy access to certain
+ # resources. Not implementing them is fine. If they are not implemented
+ # then you just have to make sure that your file paths don't rely on the
+ # Part http handler
+
+ def get_part(self, path):
+ """
+ Get a file handle to the part identified by the supplied path
+ - path: The URI part which is the path to the file
+ """
+ raise NotImplementedError()
+
+ def get_edit_uri(self, path):
+ raise NotImplementedError()
+
+ def get_rdf_statement(self, dataset):
+ # The RDF statement is just the manifest file...
+ manifest = dataset.get_rdf_manifest()
+ f = open(manifest.filepath, "r")
+ return f.read()
+
+ def get_atom_statement(self, dataset):
+ # FIXME: there isn't a requirement at this stage to support the atom
+ # statment for DataBank
+ return None
+
+ def deposit_receipt(self, silo, identifier, item, treatment, verbose_description=None):
+ """
+ Construct a deposit receipt document for the provided URIs
+ Returns an EntryDocument object
+ """
+ # FIXME: we don't know what the item's API looks like yet; it's probably
+ # from somewhere within RecordSilo or Pairtree. Suck it and see ...
+
+ # assemble the URIs we are going to need
+
+ # the atom entry id
+ drid = self.um.atom_id(silo, identifier)
+
+ # the Cont-URI
+ cont_uri = self.um.cont_uri(silo, identifier)
+
+ # the EM-URI
+ em_uri = self.um.em_uri(silo, identifier)
+ em_uris = [(em_uri, None), (em_uri + ".atom", "application/atom+xml;type=feed")]
+
+ # the Edit-URI and SE-IRI
+ edit_uri = self.um.edit_uri(silo, identifier)
+ se_uri = edit_uri
+
+ # the splash page URI
+ splash_uri = self.um.html_url(silo, identifier)
+
+ # the two statement uris
+ atom_statement_uri = self.um.state_uri(silo, identifier, "atom")
+ ore_statement_uri = self.um.state_uri(silo, identifier, "ore")
+ state_uris = [(atom_statement_uri, "application/atom+xml;type=feed"), (ore_statement_uri, "application/rdf+xml")]
+
+ # ensure that there is a metadata object, and that it is populated with enough information to build the
+ # deposit receipt
+ dc_metadata, other_metadata = self._extract_metadata(item)
+ ssslog.debug("Incorporating metadata: " + str(dc_metadata))
+ if dc_metadata is None:
+ dc_metadata = {}
+ if not dc_metadata.has_key("title"):
+ dc_metadata["title"] = ["SWORD Deposit"]
+ if not dc_metadata.has_key("creator"):
+ dc_metadata["creator"] = ["SWORD Client"]
+ if not dc_metadata.has_key("abstract"):
+ dc_metadata["abstract"] = ["Content deposited with SWORD client"]
+
+ packaging = []
+ for disseminator in self.config.sword_disseminate_package:
+ packaging.append(disseminator)
+
+ # Now assemble the deposit receipt
+ dr = EntryDocument(atom_id=drid, alternate_uri=splash_uri, content_uri=cont_uri,
+ edit_uri=edit_uri, se_uri=se_uri, em_uris=em_uris,
+ packaging=packaging, state_uris=state_uris, dc_metadata=dc_metadata,
+ verbose_description=verbose_description, treatment=treatment)
+
+ return dr
+
+ # FIXME: currently this only deals with DC metadata as per the SWORD spec.
+ # If possible, we should extract other metadata from the item too, but since
+ # it is in RDF it's not so obvious how best to do it. Just pull out rdf
+ # terms?
+ def _extract_metadata(self, item):
+ graph = item.get_graph()
+ dc_metadata = {}
+ other_metadata = {}
+ # we're just going to focus on DC metadata, to comply with the SWORD
+ # spec
+ dc_offset = len(self.ns.DC_NS)
+
+ for s, p, o in graph.triples((URIRef(item.uri), None, None)):
+ if p.startswith(self.ns.DC_NS):
+ # it is Dublin Core
+ field = p[dc_offset:]
+ if dc_metadata.has_key(field):
+ dc_metadata[field].append(o)
+ else:
+ dc_metadata[field] = [o]
+ return dc_metadata, other_metadata
+
+ def augmented_receipt(self, receipt, original_deposit_uri, derived_resource_uris=[]):
+ receipt.original_deposit_uri = original_deposit_uri
+ receipt.derived_resource_uris = derived_resource_uris
+ return receipt
+
+ def _ingest_metadata(self, item, deposit):
+ ed = deposit.get_entry_document()
+ entry_ingester = self.config.get_entry_ingester()()
+ entry_ingester.ingest(item, ed)
+
+ def _extract_states(self, dataset):
+ states = []
+ state_uris = dataset.list_rdf_objects(dataset.uri, u"sword:state")
+ for su in state_uris:
+ descriptions = dataset.list_rdf_objects(su, u"sword:stateDescription")
+ sd = None
+ if len(descriptions) > 0:
+ sd = str(descriptions[0]) # just take the first one, there should only be one
+ states.append((str(su), sd))
+ return states
+
+class DefaultEntryIngester(object):
+ def __init__(self):
+ self.ns = Namespaces()
+
+ # FIXME: could we put this into configuration?
+ # or we could define handlers for each element rather than
+ # just a field to put the value in. This will allow us to
+ # handle hierarchical metadata (e.g. atom:author), but without
+ # having to go down the route of building XSLT xwalks
+ # FIXME: a fuller treatment of atom metadata may be appropriate here
+ self.metadata_map = {
+ self.ns.ATOM + "title" : u"dcterms:title",
+ self.ns.ATOM + "summary" : u"dcterms:abstract"
+ }
+ # NOTE: much atom metadata is hierarchical so this approach may
+ # not work
+
+ def ingest(self, item, entry, additive=False):
+ ssslog.debug("Ingesting Metadata; Additive? " + str(additive))
+
+ ssslog.debug("Non DC Metadata: " + str(entry.other_metadata))
+ for element in entry.other_metadata:
+ if not self.metadata_map.has_key(element.tag):
+ # FIXME: only process metadata we recognise
+ ssslog.debug("skipping unrecognised metadata: " + element.tag)
+ continue
+ if element.text is not None:
+ item.add_triple(item.uri, self.metadata_map[element.tag], element.text.strip())
+
+ # explicitly handle the DC
+ for dc, values in entry.dc_metadata.iteritems():
+ for v in values:
+ item.add_triple(item.uri, u"dcterms:" + dc, v)
+
+ item.sync()
+
+class DataBankAuthenticator(Authenticator):
+ def __init__(self, config):
+ self.config = config
+
+ def basic_authenticate(self, username, password, obo):
+ # In [AtomPub] Section 14, implementations MUST support HTTP Basic Authentication
+ # in conjunction with a TLS connection. The SWORD Profile relaxes this requirement:
+ # SWORD client and server implementations SHOULD be capable of being configured to
+ # use HTTP Basic Authentication [RFC2617] in conjunction with a TLS connection
+ # as specified by [RFC2818].
+
+ # FIXME: basic authentication does not attempt to actually authenticate
+ # anyone, it simply rejects any such request. This is in-line with SWORD
+ # above, but it would be better if it did authenticate.
+
+ # Nonetheless, in general, databank will use repoze for everything including
+ # HTTP basic, so this method should never be activated
+ #return Auth(username, obo)
+ raise AuthException(authentication_failed=True, msg="HTTP Basic Auth without repoze.who not permitted on this server")
+
+ def repoze_who_authenticate(self, identity, obo):
+ # the authentication is actually already done, so all we need to do is
+ # populate the Auth object
+ return DataBankAuth(identity["repoze.who.userid"], obo, identity)
+
+class DataBankAuth(Auth):
+ def __init__(self, username, on_behalf_of, identity):
+ Auth.__init__(self, username, on_behalf_of)
+ self.identity = identity
+
+class URLManager(object):
+ def __init__(self, config):
+ self.config = config
+
+ def silo_url(self, silo):
+ return self.config.base_url + "silo/" + urllib.quote(silo)
+
+ def atom_id(self, silo, identifier):
+ return "tag:container@databank/" + urllib.quote(silo) + "/" + urllib.quote(identifier)
+
+ def cont_uri(self, silo, identifier):
+ return self.config.base_url + "edit-media/" + urllib.quote(silo) + "/" + urllib.quote(identifier)
+
+ def em_uri(self, silo, identifier):
+ """ The EM-URI """
+ return self.config.base_url + "edit-media/" + urllib.quote(silo) + "/" + urllib.quote(identifier)
+
+ def edit_uri(self, silo, identifier):
+ """ The Edit-URI """
+ return self.config.base_url + "edit/" + urllib.quote(silo) + "/" + urllib.quote(identifier)
+
+ def agg_uri(self, silo, identifier):
+ return self.config.db_base_url + urllib.quote(silo) + "/datasets/" + urllib.quote(identifier)
+
+ def html_url(self, silo, identifier):
+ """ The url for the HTML splash page of an object in the store """
+ return self.agg_uri(silo, identifier)
+
+ def state_uri(self, silo, identifier, type):
+ root = self.config.base_url + "statement/" + urllib.quote(silo) + "/" + urllib.quote(identifier)
+ if type == "atom":
+ return root + ".atom"
+ elif type == "ore":
+ return root + ".rdf"
+
+ def file_uri(self, silo, identifier, filename):
+ """ The URL for accessing the parts of an object in the store """
+ return self.config.db_base_url + urllib.quote(silo) + "/datasets/" + urllib.quote(identifier) + "/" + urllib.quote(filename)
+
+ def interpret_path(self, path):
+ accept_parameters = None
+ silo = None
+ dataset = None
+
+ # first figure out the accept parameters from the path suffix and chomp
+ # the path down to size
+ if path.endswith("rdf"):
+ accept_parameters = AcceptParameters(ContentType("application/rdf+xml"))
+ path = path[:-4]
+ elif path.endswith("atom"):
+ accept_parameters = AcceptParameters(ContentType("application/atom+xml;type=feed"))
+ path = path[:-5]
+
+ # check to see if this has a / separator
+ if "/" in path:
+ # deconstruct the path into silo/dataset (if possible)
+ silo, dataset_id = path.split("/", 1)
+ else:
+ silo = path
+
+ return silo, dataset_id, accept_parameters
+
+class DataBankErrors(object):
+ dataset_conflict = "http://databank.ox.ac.uk/errors/DatasetConflict"
+
+class DataBankStates(object):
+ initial_state = ("http://databank.ox.ac.uk/state/EmptyContainer", "Only the container for the dataset has been created so far")
+ zip_file_added = ("http://databank.ox.ac.uk/state/ZipFileAdded", "The dataset contains only the zip file")
+ content_states = [u"http://databank.ox.ac.uk/state/EmptyContainer", u"http://databank.ox.ac.uk/state/ZipFileAdded"]
+ metadata_states = []
diff --git a/rdfdatabank/lib/text.zip b/rdfdatabank/lib/text.zip
deleted file mode 100644
index 1ec731a..0000000
Binary files a/rdfdatabank/lib/text.zip and /dev/null differ
diff --git a/rdfdatabank/lib/unpack.py b/rdfdatabank/lib/unpack.py
deleted file mode 100644
index ea72167..0000000
--- a/rdfdatabank/lib/unpack.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import subprocess
-
-import os
-
-from redis import Redis
-
-from uuid import uuid4
-
-from rdfdatabank.lib.utils import create_new
-
-#import checkm
-
-zipfile_root = "zipfile:"
-
-class BadZipfile(Exception):
- """Cannot open zipfile using commandline tool 'unzip' to target directory"""
-
-def get_next_zipfile_id(siloname):
- # TODO make this configurable
- r = Redis()
- return str(r.incr("%s:zipfile" % (siloname)))
-
-def find_last_zipfile(silo):
- siloname = silo.state['storage_dir']
- r = Redis()
- r.set("%s:zipfile" % (siloname), 0)
- zipfile_id = 0
- while(silo.exists("%s%s" % (zipfile_root, zipfile_id))):
- zipfile_id = r.incr("%s:zipfile" % (siloname))
- return zipfile_id
-
-def store_zipfile(silo, target_item_uri, POSTED_file, ident):
- zipfile_id = get_next_zipfile_id(silo.state['storage_dir'])
- while(silo.exists("%s%s" % (zipfile_root, zipfile_id))):
- zipfile_id = get_next_zipfile_id(silo.state['storage_dir'])
-
- #zip_item = silo.get_item("%s%s" % (zipfile_root, zipfile_id))
- zip_item = create_new(silo, "%s%s" % (zipfile_root, zipfile_id), ident)
- zip_item.add_triple("%s/%s" % (zip_item.uri, POSTED_file.filename.lstrip(os.sep)), "dcterms:hasVersion", target_item_uri)
- zip_item.put_stream(POSTED_file.filename, POSTED_file.file)
- try:
- POSTED_file.file.close()
- except:
- pass
- zip_item.sync()
- return zip_item
-
-def unzip_file(filepath, target_directory=None):
- # TODO add the checkm stuff back in
- if not target_directory:
- target_directory = "/tmp/%s" % (uuid4().hex)
- p = subprocess.Popen("unzip -d %s %s" % (target_directory, filepath), shell=True, stdout=subprocess.PIPE)
- _,_ = p.communicate()
- if p.returncode != 0:
- raise BadZipfile
- else:
- return target_directory
-
-def unpack_zip_item(zip_item, silo, ident):
- derivative = zip_item.list_rdf_objects("*", "dcterms:hasVersion")
- # 1 object holds 1 zipfile - may relax this easily given demand
- assert len(derivative.keys()) == 1
- for file_uri in derivative.keys():
- filepath = file_uri[len(zip_item.uri)+1:]
- real_filepath = zip_item.to_dirpath(filepath)
- target_item = derivative[file_uri][0][len(silo.state['uri_base']):]
-
- # Overwrite current version instead of making new version?
-
- to_item = create_new(silo, target_item, ident)
- #to_item = silo.get_item(target_item)
- unpacked_dir = unzip_file(real_filepath)
- to_item.move_directory_as_new_version(unpacked_dir)
- to_item.add_triple(to_item.uri, "dcterms:isVersionOf", file_uri)
- to_item.sync()
- return True
-
diff --git a/rdfdatabank/lib/utils.py b/rdfdatabank/lib/utils.py
index 9a0cbeb..b45d3ea 100644
--- a/rdfdatabank/lib/utils.py
+++ b/rdfdatabank/lib/utils.py
@@ -1,50 +1,116 @@
-from datetime import datetime, timedelta
+# -*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
-from redis import Redis
+from datetime import datetime, timedelta
+from dateutil.relativedelta import *
+from dateutil.parser import parse
+from time import sleep, strftime
+import os
import simplejson
from pylons import app_globals as ag
-from rdfobject.constructs import Manifest
+from rdflib import ConjunctiveGraph
+from StringIO import StringIO
+from rdflib import StringInputSource
+from rdflib import Namespace, RDF, RDFS, URIRef, Literal, BNode
from uuid import uuid4
-
import re
+from collections import defaultdict
+
+from rdfdatabank.lib.auth_entry import list_silos, list_user_groups
ID_PATTERN = re.compile(r"^[0-9A-z\-\:]+$")
-def authz(granary_list,ident):
- g = ag.granary
- g.state.revert()
- g._register_silos()
- granary_list = g.silos
- def _parse_owners(silo_name):
- kw = g.describe_silo(silo_name)
- if "owners" in kw.keys():
- owners = [x.strip() for x in kw['owners'].split(",") if x]
- return owners
- else:
- return []
-
- if ident['role'] == "admin":
- return granary_list
- else:
- authd = []
- for item in granary_list:
- owners = _parse_owners(item)
- if ident['repoze.who.userid'] in owners:
- authd.append(item)
- return authd
+def authz(ident, permission=[]):
+ #NOTE: g._register_silos() IS AN EXPENSIVE OPERATION. LISTING SILOS FROM DATABASE INSTEAD
+ #g = ag.granary
+ #g.state.revert()
+ #g._register_silos()
+ #granary_list = g.silos
+ granary_list = list_silos()
+ if permission and not type(permission).__name__ == 'list':
+ permission = [permission]
+ if not permission:
+ permission = []
+ silos = []
+ for i in ident['user'].groups:
+ if i.silo == '*':
+ return granary_list
+ if i.silo in granary_list and not i.silo in silos:
+ if not permission:
+ silos.append(i.silo)
+ else:
+ for p in i.permissions:
+ if p.permission_name in permission:
+ silos.append(i.silo)
+ """
+ user_groups = list_user_groups(ident['repoze.who.userid'])
+ for g,p in user_groups:
+ if g == '*':
+ f = open('/var/log/databank/authz.log', 'a')
+ f.write('List of all Silos: %s\n'%str(granary_list))
+ f.write('List of user groups: %s\n'%str(user_groups))
+ f.write('Permissions to match: %s\n'%str(permission))
+ f.write('Group is *. Returning all silos\n\n')
+ f.close()
+ return granary_list
+ if g in granary_list and not g in silos:
+ if not permission:
+ silos.append(g)
+ elif p in permission:
+ silos.append(g)
+ f = open('/var/log/databank/authz.log', 'a')
+ f.write('List of all Silos: %s\n'%str(granary_list))
+ f.write('List of user groups: %s\n'%str(user_groups))
+ f.write('Permissions to match: %s\n'%str(permission))
+ f.write('List of auth Silos: %s\n\n'%str(silos))
+ f.close()
+ """
+ return silos
def allowable_id(identifier):
if ID_PATTERN.match(identifier):
return identifier
+def allowable_id2(strg):
+ if len(strg) < 2 or ' ' in strg:
+ return False
+ search=re.compile(r'%s'%ag.naming_rule).search
+ return not bool(search(strg))
+
def is_embargoed(silo, id, refresh=False):
- # TODO evaluate r.expire settings for these keys - popularity resets ttl or increases it?
- r = Redis()
- e = r.get("%s:%s:embargoed" % (silo.state['storage_dir'], id))
- e_d = r.get("%s:%s:embargoed_until" % (silo.state['storage_dir'], id))
+ # TODO evaluate ag.r.expire settings for these keys - popularity resets ttl or increases it?
+ e = None
+ e_d = None
+ try:
+ e = ag.r.get("%s:%s:embargoed" % (silo.state['storage_dir'], id))
+ e_d = ag.r.get("%s:%s:embargoed_until" % (silo.state['storage_dir'], id))
+ except:
+ pass
+
if refresh or (not e or not e_d):
if silo.exists(id):
item = silo.get_item(id)
@@ -54,23 +120,100 @@ def is_embargoed(silo, id, refresh=False):
e = True
else:
e = False
- r.set("%s:%s:embargoed" % (silo.state['storage_dir'], id), e)
- r.set("%s:%s:embargoed_until" % (silo.state['storage_dir'], id), e_d)
+ try:
+ ag.r.set("%s:%s:embargoed" % (silo.state['storage_dir'], id), e)
+ ag.r.set("%s:%s:embargoed_until" % (silo.state['storage_dir'], id), e_d)
+ except:
+ pass
return (e, e_d)
-def create_new(silo, id, creator, title=None, embargoed=True, embargoed_until=None, embargo_days_from_now=None, **kw):
- item = silo.get_item(id)
- item.metadata['createdby'] = creator
- item.metadata['embargoed'] = embargoed
- item.metadata['uuid'] = uuid4().hex
- if embargoed:
+def get_embargo_values(embargoed=None, embargoed_until=None, embargo_days_from_now=None):
+ if isinstance(embargoed, basestring):
+ embargoed = embargoed.strip()
+ if isinstance(embargoed_until, basestring):
+ embargoed_until = embargoed_until.strip()
+ if isinstance(embargo_days_from_now, basestring):
+ embargo_days_from_now = embargo_days_from_now.strip()
+ e_status=None
+ e_date=None
+ if embargoed == None:
+ #No embargo details are supplied by user
+ e_status = True
+ e_date = (datetime.now() + relativedelta(years=+70)).isoformat()
+ elif embargoed==True or embargoed.lower() in ['true', '1'] :
+ #embargo status is True
+ e_status = True
+ e_date = None
if embargoed_until:
- item.metadata['embargoed_until'] = embargoed_until
+ try:
+ e_date = parse(embargoed_until, dayfirst=True, yearfirst=False).isoformat()
+ except:
+ e_date = (datetime.now() + relativedelta(years=+70)).isoformat()
elif embargo_days_from_now:
- item.metadata['embargoed_until'] = (datetime.now() + timedelta(days=embargo_days_from_now)).isoformat()
- else:
- item.metadata['embargoed_until'] = (datetime.now() + timedelta(days=365*70)).isoformat()
- item.add_triple(item.uri, u"dcterms:dateSubmitted", datetime.now())
+ if embargo_days_from_now.isdigit():
+ e_date = (datetime.now() + timedelta(days=int(embargo_days_from_now))).isoformat()
+ else:
+ e_date = (datetime.now() + relativedelta(years=+70)).isoformat()
+ elif embargoed==False or embargoed.lower() in ['false', '0'] :
+ e_status = False
+ else:
+ #Default case: Treat it as though embargo=None
+ e_status = True
+ e_date = (datetime.now() + relativedelta(years=+70)).isoformat()
+ return (e_status, e_date)
+
+def create_new(silo, id, creator, title=None, embargoed=None, embargoed_until=None, embargo_days_from_now=None, **kw):
+ item = silo.get_item(id, startversion="0")
+ item.metadata['createdby'] = creator
+ item.metadata['uuid'] = uuid4().hex
+ item.add_namespace('oxds', "http://vocab.ox.ac.uk/dataset/schema#")
+ item.add_triple(item.uri, u"rdf:type", "oxds:DataSet")
+
+ item.metadata['embargoed_until'] = ''
+ item.del_triple(item.uri, u"oxds:isEmbargoed")
+ item.del_triple(item.uri, u"oxds:embargoedUntil")
+ try:
+ ag.r.set("%s:%s:embargoed_until" % (silo.state['storage_dir'], id), ' ')
+ except:
+ pass
+ e, e_d = get_embargo_values(embargoed=embargoed, embargoed_until=embargoed_until, embargo_days_from_now=embargo_days_from_now)
+ if e:
+ item.metadata['embargoed'] = True
+ item.add_triple(item.uri, u"oxds:isEmbargoed", 'True')
+ try:
+ ag.r.set("%s:%s:embargoed" % (silo.state['storage_dir'], id), True)
+ except:
+ pass
+ if e_d:
+ item.metadata['embargoed_until'] = e_d
+ item.add_triple(item.uri, u"oxds:embargoedUntil", e_d)
+ try:
+ ag.r.set("%s:%s:embargoed_until" % (silo.state['storage_dir'], id), e_d)
+ except:
+ pass
+ else:
+ item.metadata['embargoed'] = False
+ item.add_triple(item.uri, u"oxds:isEmbargoed", 'False')
+ try:
+ ag.r.set("%s:%s:embargoed" % (silo.state['storage_dir'], id), False)
+ except:
+ pass
+
+ item.add_triple(item.uri, u"dcterms:identifier", id)
+ item.add_triple(item.uri, u"dcterms:mediator", creator)
+ item.add_triple(item.uri, u"dcterms:publisher", ag.publisher)
+ item.add_triple(item.uri, u"dcterms:created", datetime.now())
+ item.add_triple(item.uri, u"oxds:currentVersion", item.currentversion)
+ if ag.rights and ag.rights.startswith('http'):
+ item.add_triple(item.uri, u"dcterms:rights", URIRef(ag.rights))
+ elif ag.rights:
+ item.add_triple(item.uri, u"dcterms:rights", Literal(ag.rights))
+ if ag.license and ag.license.startswith('http'):
+ item.add_triple(item.uri, u"dcterms:license", URIRef(ag.license))
+ elif ag.license:
+ item.add_triple(item.uri, u"dcterms:license", Literal(ag.license))
+
+ #TODO: Add current version metadata
if title:
item.add_triple(item.uri, u"rdfs:label", title)
item.sync()
@@ -81,10 +224,261 @@ def get_readme_text(item, filename="README"):
text = fn.read().decode("utf-8")
return u"%s\n\n%s" % (filename, text)
-def test_rdf(text):
+def get_rdf_template(item_uri, item_id):
+ g = ConjunctiveGraph(identifier=item_uri)
+ g.bind('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
+ g.bind('dcterms', 'http://purl.org/dc/terms/')
+ g.add((URIRef(item_uri), URIRef('http://purl.org/dc/terms/identifier'), Literal(item_id)))
+ data2 = g.serialize(format='xml', encoding="utf-8") + '\n'
+ return data2
+
+#def test_rdf(text):
+def test_rdf(mfile):
+ g = ConjunctiveGraph()
try:
- mani = Manifest()
- mani.from_string(text)
+ g = g.parse(mfile, format='xml')
return True
- except:
+ except Exception as inst:
return False
+
+def munge_manifest(manifest_file, item):
+ #Get triples from the manifest file and remove the file
+ triples = None
+ ns = None
+ seeAlsoFiles = None
+ ns, triples, seeAlsoFiles = read_manifest(item, manifest_file)
+ if ns and triples:
+ for k, v in ns.iteritems():
+ item.add_namespace(k, v)
+ for (s, p, o) in triples:
+ if str(p) == 'http://purl.org/dc/terms/title':
+ try:
+ item.del_triple(URIRef(s), u"dcterms:title")
+ except:
+ pass
+ if str(p) == 'http://purl.org/dc/terms/license':
+ try:
+ item.del_triple(URIRef(s), u"dcterms:license")
+ except:
+ pass
+ if str(p) == 'http://purl.org/dc/terms/rights':
+ try:
+ item.del_triple(URIRef(s), u"dcterms:rights")
+ except:
+ pass
+ for (s, p, o) in triples:
+ item.add_triple(s, p, o)
+ manifest_file_name = os.path.basename(manifest_file)
+ item.manifest['versionlog'][item.currentversion].append('Updated file manifest.rdf')
+ item.sync()
+ if seeAlsoFiles:
+ for fileuri in seeAlsoFiles:
+ fullfilepath = None
+ filepath = fileuri.replace(item.uri, '').strip().lstrip('/')
+ fullfilepath = item.to_dirpath(filepath=filepath)
+ if fullfilepath and item.isfile(fullfilepath):
+ ans = test_rdf(fullfilepath)
+ #with item.get_stream(filepath) as fn:
+ # text = fn.read()
+ #if test_rdf(text):
+ # munge_manifest(text, item)
+ if ans:
+ munge_manifest(fullfilepath, item)
+ return True
+
+def read_manifest(item, manifest_file):
+ triples = []
+ namespaces = {}
+ seeAlsoFiles = []
+ oxdsClasses = ['http://vocab.ox.ac.uk/dataset/schema#Grouping', 'http://vocab.ox.ac.uk/dataset/schema#DataSet']
+
+ aggregates = item.list_rdf_objects(item.uri, "ore:aggregates")
+
+ g = ConjunctiveGraph()
+ gparsed = g.parse(manifest_file, format='xml')
+ namespaces = dict(g.namespaces())
+ #Get the subjects
+ subjects = {}
+ for s in gparsed.subjects():
+ if s in subjects:
+ continue
+ if type(s).__name__ == 'URIRef':
+ if str(s).startswith('file://'):
+ ss = str(s).replace('file://', '')
+ if manifest_file in ss:
+ subjects[s] = URIRef(item.uri)
+ else:
+ manifest_file_path, manifest_file_name = os.path.split(manifest_file)
+ ss = ss.replace(manifest_file_path, '').strip('/')
+ for file_uri in aggregates:
+ if ss in str(file_uri):
+ subjects[s] = URIRef(file_uri)
+ break
+ if not s in subjects:
+ subjects[s] = URIRef(item.uri)
+ else:
+ subjects[s] = URIRef(s)
+ elif type(s).__name__ == 'BNode':
+ replace_subject = True
+ for o in gparsed.objects():
+ if o == s:
+ replace_subject = False
+ if replace_subject:
+ subjects[s] = URIRef(item.uri)
+ else:
+ subjects[s] = s
+ #Get the dataset type
+ #set the subject uri to item uri if it is of type as defined in oxdsClasses
+ datasetType = False
+ for s,p,o in gparsed.triples((None, RDF.type, None)):
+ if str(o) in oxdsClasses:
+ if type(s).__name__ == 'URIRef' and len(s) > 0 and str(s) != str(item.uri) and str(subjects[s]) != str(item.uri):
+ namespaces['owl'] = URIRef("http://www.w3.org/2002/07/owl#")
+ triples.append((item.uri, 'owl:sameAs', s))
+ triples.append((item.uri, RDF.type, o))
+ elif type(s).__name__ == 'BNode' or len(s) == 0 or str(s) == str(item.uri) or str(subjects[s]) == str(item.uri):
+ gparsed.remove((s, p, o))
+ subjects[s] = item.uri
+
+ #Get the uri for the see also files
+ for s,p,o in gparsed.triples((None, URIRef('http://www.w3.org/2000/01/rdf-schema#seeAlso'), None)):
+ if type(o).__name__ == 'URIRef' and len(o) > 0:
+ obj = str(o)
+ if obj.startswith('file://'):
+ obj_path, obj_name = os.path.split(obj)
+ obj = obj.replace(obj_path, '').strip('/')
+ for file_uri in aggregates:
+ if obj in str(file_uri):
+ seeAlsoFiles.append(file_uri)
+ gparsed.remove((s, p, o))
+
+ #Add remaining triples
+ for s,p,o in gparsed.triples((None, None, None)):
+ triples.append((subjects[s], p, o))
+ return namespaces, triples, seeAlsoFiles
+
+def manifest_type(manifest_file):
+ mani_types = []
+ g = ConjunctiveGraph()
+ gparsed = g.parse(manifest_file, format='xml')
+ for s,p,o in gparsed.triples((None, RDF.type, None)):
+ mani_types.append(str(o))
+ if "http://vocab.ox.ac.uk/dataset/schema#DataSet" in mani_types:
+ return "http://vocab.ox.ac.uk/dataset/schema#DataSet"
+ elif "http://vocab.ox.ac.uk/dataset/schema#Grouping" in mani_types:
+ return "http://vocab.ox.ac.uk/dataset/schema#Grouping"
+ return None
+
+def serialisable_stat(stat):
+ stat_values = {}
+ for f in ['st_atime', 'st_blksize', 'st_blocks', 'st_ctime', 'st_dev', 'st_gid', 'st_ino', 'st_mode', 'st_mtime', 'st_nlink', 'st_rdev', 'st_size', 'st_uid']:
+ try:
+ stat_values[f] = stat.__getattribute__(f)
+ except AttributeError:
+ pass
+ return stat_values
+
+def natural_sort(l):
+ convert = lambda text: int(text) if text.isdigit() else text.lower()
+ alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
+ return sorted(l, key = alphanum_key)
+
+def extract_metadata(item):
+ g = item.get_graph()
+ m = defaultdict(list)
+ #for s,p,o in g.triples((URIRef(item.uri), ag.NAMESPACES[dc]['identifier'], None)):
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dc']['title']):
+ m['title'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dc']['identifier']):
+ m['identifier'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dc']['description']):
+ m['description'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dc']['creator']):
+ m['creator'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dc']['subject']):
+ m['subject'].append(o)
+
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['abstract']):
+ m['abstract'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['created']):
+ try:
+ dt = formatDate(str(o))
+ except:
+ dt = o
+ m['created'].append(dt)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['description']):
+ m['description'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['hasVersion']):
+ m['hasVersion'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['identifier']):
+ m['identifier'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['isVersionOf']):
+ m['isVersionOf'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['license']):
+ m['license'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['mediator']):
+ m['mediator'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['modified']):
+ try:
+ dt = formatDate(str(o))
+ except:
+ dt = o
+ m['modified'].append(dt)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['publisher']):
+ m['publisher'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['rights']):
+ m['rights'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['subject']):
+ m['subject'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['dcterms']['title']):
+ m['title'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['oxds']['isEmbargoed']):
+ m['isEmbargoed'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['oxds']['embargoedUntil']):
+ try:
+ dt = formatDate(str(o))
+ except:
+ dt = o
+ m['embargoedUntil'].append(dt)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['oxds']['currentVersion']):
+ m['currentVersion'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['bibo']['doi']):
+ m['doi'].append(o)
+ for o in g.objects(URIRef(item.uri), ag.NAMESPACES['ore']['aggregates']):
+ m['aggregates'].append(o)
+ return dict(m)
+
+def formatDate(dt):
+ dt_human = dt
+ try:
+ dt_obj = parse(dt, dayfirst=True, yearfirst=False)
+ dt_human = dt_obj.strftime("%B %d %Y, %I:%M %p")
+ except:
+ return dt
+ return dt_human
+
+def getSiloModifiedDate(silo_name):
+ solr_params = {}
+ solr_params['q'] = "silo:%s"%silo_name
+ solr_params['wt'] = 'json'
+ solr_params['start'] = 0
+ solr_params['rows'] = 1
+ solr_params['sort'] = "modified desc"
+ solr_params['fl'] = 'modified'
+ solr_response = None
+ try:
+ solr_response = ag.solr.raw_query(**solr_params)
+ except:
+ pass
+ if not solr_response:
+ return ''
+ result = simplejson.loads(solr_response)
+ docs = result['response'].get('docs',None)
+ numFound = result['response'].get('numFound',None)
+ if docs and len(docs) > 0 and docs[0] and 'modified' in docs[0] and len(docs[0]['modified']) > 0:
+ dt = docs[0]['modified'][0]
+ else:
+ return ''
+ dt = formatDate(dt)
+ return dt
+
diff --git a/rdfdatabank/model/__init__.py b/rdfdatabank/model/__init__.py
index e69de29..ba02feb 100644
--- a/rdfdatabank/model/__init__.py
+++ b/rdfdatabank/model/__init__.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+import sqlalchemy as sa
+from sqlalchemy import orm
+from rdfdatabank.model import meta
+from rdfdatabank.model.auth import User, Group, Permission, Datasets
+
+def init_model(engine):
+ """Call me before using any of the tables or classes in the model"""
+ ## Reflected tables must be defined and mapped here
+ #global reflected_table
+ #reflected_table = sa.Table("Reflected", meta.metadata, autoload=True,
+ # autoload_with=engine)
+ #orm.mapper(Reflected, reflected_table)
+
+ # We are using SQLAlchemy 0.5 so transactional=True is replaced by
+ # autocommit=False
+ sm = orm.sessionmaker(autoflush=True, autocommit=False, bind=engine)
+
+ meta.engine = engine
+ meta.Session = orm.scoped_session(sm)
diff --git a/rdfdatabank/model/auth.py b/rdfdatabank/model/auth.py
new file mode 100644
index 0000000..b2c21dd
--- /dev/null
+++ b/rdfdatabank/model/auth.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+"""
+SQLAlchemy-powered model definitions for repoze.what SQL plugin.
+Sets up Users, Groups and Permissions
+"""
+
+from sqlalchemy import Table, ForeignKey, Column
+from sqlalchemy.types import Unicode, Integer
+from sqlalchemy.orm import relation
+from rdfdatabank.model.meta import metadata, Base
+import os
+from hashlib import sha1
+
+__all__ = ['User', 'Group', 'Permission', 'Datasets']
+
+# This is the association table for the many-to-many relationship between
+# groups and permissions. This is required by repoze.what.
+group_permission_table = Table('group_permission', metadata,
+ Column('group_id', Integer, ForeignKey('silo.id',
+ onupdate="CASCADE", ondelete="CASCADE"), primary_key=True),
+ Column('permission_id', Integer, ForeignKey('permission.id',
+ onupdate="CASCADE", ondelete="CASCADE"), primary_key=True)
+)
+
+# This is the association table for the many-to-many relationship between
+# groups and members - this is, the memberships. It's required by repoze.what.
+user_group_table = Table('user_group', metadata,
+ Column('user_id', Integer, ForeignKey('user.id',
+ onupdate="CASCADE", ondelete="CASCADE"), primary_key=True),
+ Column('group_id', Integer, ForeignKey('silo.id',
+ onupdate="CASCADE", ondelete="CASCADE"), primary_key=True)
+)
+
+class Group(Base):
+ """
+ Group definition for :mod:`repoze.what`.
+
+ Only the ``group_name`` column is required by :mod:`repoze.what`.
+
+ """
+
+ __tablename__ = 'silo'
+
+ # columns
+
+ id = Column(Integer, autoincrement=True, primary_key=True)
+ group_name = Column(Unicode(255), unique=True, nullable=False)
+ silo = Column(Unicode(255))
+
+ # relations
+
+ users = relation('User', secondary=user_group_table, backref='groups')
+
+ # special methods
+
+ #def __repr__(self):
+ # return '' % self.group_name
+
+ #def __unicode__(self):
+ # return self.group_name
+
+class Permission(Base):
+ """
+ Permission definition for :mod:`repoze.what`.
+
+ Only the ``permission_name`` column is required by :mod:`repoze.what`.
+
+ """
+
+ __tablename__ = 'permission'
+
+ # columns
+
+ id = Column(Integer, autoincrement=True, primary_key=True)
+ permission_name = Column(Unicode(255), unique=True, nullable=False)
+
+ # relations
+
+ groups = relation(Group, secondary=group_permission_table, backref='permissions')
+
+ # special methods
+
+ #def __repr__(self):
+ # return '' % self.permission_name
+
+ #def __unicode__(self):
+ # return self.permission_name
+
+class User(Base):
+
+ """
+ User definition.
+
+ This is the user definition used by :mod:`repoze.who`, which requires at
+ least the ``user_name`` column.
+
+ """
+ __tablename__ = 'user'
+
+ #column
+
+ id = Column(Integer, autoincrement=True, primary_key=True)
+ user_name = Column(Unicode(255), unique=True, nullable=False)
+ password = Column(Unicode(80), nullable=False)
+ email = Column(Unicode(255))
+ name = Column(Unicode(255))
+ firstname = Column(Unicode(255))
+ lastname = Column(Unicode(255))
+
+ def _set_password(self, password):
+ """Hash password on the fly."""
+ hashed_password = password
+
+ if isinstance(password, unicode):
+ password_8bit = password.encode('UTF-8')
+ else:
+ password_8bit = password
+
+ salt = sha1()
+ salt.update(os.urandom(60))
+ hash = sha1()
+ hash.update(password_8bit + salt.hexdigest())
+ hashed_password = salt.hexdigest() + hash.hexdigest()
+
+ # Make sure the hased password is an UTF-8 object at the end of the
+ # process because SQLAlchemy _wants_ a unicode object for Unicode
+ # fields
+ if not isinstance(hashed_password, unicode):
+ hashed_password = hashed_password.decode('UTF-8')
+
+ self.password = hashed_password
+
+ def _get_password(self):
+ """Return the password hashed"""
+ return self.password
+
+ def validate_password(self, password):
+ """
+ Check the password against existing credentials.
+
+ :param password: the password that was provided by the user to
+ try and authenticate. This is the clear text version that we will
+ need to match against the hashed one in the database.
+ :type password: unicode object.
+ :return: Whether the password is valid.
+ :rtype: bool
+
+ """
+ hashed_pass = sha1()
+ hashed_pass.update(password + self.password[:40])
+ return self.password[40:] == hashed_pass.hexdigest()
+
+ def permissions(self):
+ """Return a set with all permissions granted to the user."""
+ perms = set()
+ for g in self.groups:
+ perms = perms | set(g.permissions)
+ return perms
+
+ #def __repr__(self):
+ # return '' % (self.name, self.email)
+
+ #def __unicode__(self):
+ # return self.name
+
+class Datasets(Base):
+ """
+ Table to store index of datasets
+ """
+
+ __tablename__ = 'datasets'
+
+ # columns
+ silo = Column(Unicode(50), primary_key=True)
+ id = Column(Unicode(75), primary_key=True)
+
diff --git a/rdfdatabank/model/meta.py b/rdfdatabank/model/meta.py
new file mode 100644
index 0000000..2f2a256
--- /dev/null
+++ b/rdfdatabank/model/meta.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+
+"""Creates SQLAlchemy Metadata and Session object"""
+from sqlalchemy.orm import scoped_session, sessionmaker
+from sqlalchemy import MetaData
+from sqlalchemy.ext.declarative import declarative_base
+
+__all__ = ['engine', 'Session', 'metadata']
+
+# SQLAlchemy database engine. Updated by model.init_model()
+engine = None
+
+# SQLAlchemy session manager. Updated by model.init_model()
+Session = scoped_session(sessionmaker())
+
+#metadata = MetaData()
+Base = declarative_base()
+metadata = Base.metadata
+
diff --git a/rdfdatabank/public/api.html b/rdfdatabank/public/api.html
deleted file mode 100644
index d61a349..0000000
--- a/rdfdatabank/public/api.html
+++ /dev/null
@@ -1,263 +0,0 @@
-
-
-
-
-
-
-
-
-
-Converted document
-
-
-
-
-/objects
-
-
-GETAccept: *, authd - Returns an HTML
-page listing the ’silos’ your account can see
-
-
-POST,PUT,DELETENOOP
-
-
-/objects/{silo name}
-
-
-GETAccept:text/html - Returns an HTML
-page listing the ids, along with a form for item creation
-
-
-GETAccept: text/plain, application/json -
- Returns a JSON-encoded hash/dict, keys map to exising item ids, and
-include embargo information
-
-
-POSTCreate new object
-
-
-
-
-
-
-Parameters-> id = {id to create},
-embargoed = {true|false}, embargoed_until = {ISO8601 date},
-title={(Optional)}
-
-
-Returns:Accept:text/html - 302 to splash
- page for newly created object, 201 otherwise on success. Code 409 if
-object already exists.
-
-
-
-
-PUT,DELETENOOP
-
-
-/objects/{silo name}/{id}
-
-
-GETAccept: text/html - returns HTML
-splash page for item id
-
-
-GETAccept: application/json - returns
-item’s JSON-encoded state
-
-Returns:Accept: text/html - 302 redirect
- to top-level item view on success
-
-
-Returns:Accept: * - 200/201 on
-update/creation of file with filename 403 if filename == existing
- subdirectory name. (For the sake of web UI)
-
-
-
-
-Text-uploadConvenience function for HTML
- to update/create text files
-
-
-
-
-
-
-Parameters-> text = {UTF text to
-store}, filename = {desired filename}
-
-
-Returns:As File upload, but with the
-following changes: if ’filename’ isn’t set, 406 error will result. If
-filename == ’manifest.rdf’, and the text isn’t valid RDF/XML, again, 406
- will result.
-
-
-
-
-
-
-DELETEDeletes the item id - 200
-on success, 404 if id doesn’t exist
-
-
-PUTNOOP
-
-
-/objects/{silo
-name}/{id}/{subpath}
-
-
-GETAccept: text/html - HTML page listing
- the files within a given subpath
-
-
-GETAccept: text/plain, application/json -
- JSON-encoded listing of files {’parts’: { ’filename’: {python os.stat
-results}, ...} with ’readme_text’ being a potential top-level key-value
-pair
-
-
-PUTresults in a 403 - cannot put content
- onto an existing directory
-
Cookies are small text files that can be written and read by websites and stored by the browser on your computer, tablet or smartphone. They do a number of things including allowing access to some types of content and functionality to users, and collecting anonymous user information so that site owners can monitor the performance of their sites. They are a kind of .memory. for a website that can help it respond appropriately to users and user behaviour.
+
+
+
Cookies on this website
+
This website stores cookies for use in website analytics and other cookies that are striclty necessary to provide certain features or content, you have requested based on the authentication information provided by you.
+
The analytics cookies anonymously tracks individual visitor behaviour on the website so that we can see how the site is being used. We only use this information for monitoring and improving our website and content for the benefit of our users (you).
API call to obtain a list of datasets in a silo and create a new dataset
+
Controller: datasets action: siloview
+
+
GET: Obtain a list of datasets in a silo
+
Returns
+
+
401 if not a valid user
+
403 if not authorized
+
Accept:text/html
+
Returns the ids of each dataset in the silo, along with a form for changing the embargo information and deleting the dataset. A form for dataset creation is also available.
+
Accept: text/plain, application/json
+
200 OK
+
Returns a JSON-encoded list of dataset ids in that silo along with the embargo information for each dataset
Returns text/HTML listing the ids of each dataset, along with a form for changing the embargo information and deleting the dataset. A form for dataset creation is also available.
+
+
+
+
POST: Create new dataset
+
Parameters
+
+
+
id
{id to create}
+
embargoed
{true|false} (optional). If the parameter is not supplied, a default value of true will be used.
+
embargoed_until
{ISO8601 date} (optional). If embargoed = true and no date has been supplied, a default time delta of 70 years will be used
+
title
{(Optional)}
+
+
+
Returns
+
+
+ 409 if dataset already exists
+ 401 If not a valid user
+ 403 if not authorized
+ 403 if the name of the dataset does not confirm to the naming rule (name can contain only the followin characters 0-9a-zA-Z-_:
+
API call to obtain information about the dataset, create a dataset, modify the datasets embargo information, post files to the dataset and delete the dataset. All of this is performed on the latest version of the dataset.
+
Controller: datasets action: datasetview
+
+
GET: Obtain information about the dataset
+
Returns
+
+
404 if dataset is not available
+
401 if not authenticated and dataset is under embargo
+
403 if not authorized and dataset is under embargo
+
Accept: text/html
+
returns HTML splash page for dataset id. The page contains the following:
+
+
Readme text
+
List of files / folders at the top level
+
Dataset's embargo state
+
Manifest data
+
If authenticated and authorized user:
+
+
View options: Choice to view page as anonymous user or as editor
+
Form to modify manifest
+
Form to upload file
+
List of dataset unpack endpoints, to unpack file to new dataset
+
+
+
+
+
Accept: application/json
+
200 OK
+
Returns a JSON-encoded hash/dict. The keys map to the following :
+
+
Readme text - data_returned["readme_text"]
+
Dataset's embargo information data_returned["embargos"]
+
If user is authenticated and authorized (editor = true) / anonymous (editor = false) - data_returned["editor"]
+
List of zipfiles - data_returned["zipfiles"]
+
If user is authenticated and authorized, the view chosen by the user - data_returned["view"]
+
Manifest data in two forms - data_returned["manifest"] and data_returned["manifest_pretty"]
+
Hash dict of files / folders at the top level - data_returned["parts"][file_or_directory_name"]
+ If it is a file, the file stat information is also available. See http://docs.python.org/library/os.html#os.stat for more information.
+ If it is a directory, the dictionary value is empty as there is no stat information
+
+
+
The file datasetInformation.txt contains an example of the data returned (data_returned)
+
Accept: application/rdf+xml, text/xml
+
returns dataset's RDF manifest as RDF/XML
+
Accept: text/rdf+n3
+
returns dataset's RDF manifest as N3
+
Accept: application/x-turtle
+
Returns dataset's RDF manifest as Turtle
+
Accept: text/rdf+ntriples, text/rdf+nt
+
returns dataset's RDF manifest as ntriples
+
Accept: */*, default
+
returns text/HTML of splash page for dataset id, as described above
+
+
+
+
POST: Create new dataset. Dataset id doesn't exist. (As POST /{silo}/datasets id=...)
+
Parameters
+
+
+
embargoed
{true|false} (optional). If the parameter is not supplied, a default value of true will be used.
+
embargoed_until
{ISO8601 date} (optional). If embargoed = true and no date has been supplied, a default time delta of 70 years will be used
+
title
{(Optional)}
+
+
+
Returns
+
+
+ 401 If not a valid user
+ 403 if not authorized
+ 403 if the name of the dataset does not confirm to the naming rule (name can contain only the followin characters 0-9a-zA-Z-_:
+
+
Accept: text/html
+
302 to splash page for newly created dataset
+
Accept: text/plain, application/json
+
201 created
+
Accept: */*, default
+
Returns text/plain, 201 created
+
+
+
+
POST : change embargo information in dataset. On success, version is incremented.
+
+
TODO:
+
Redo this bit of code, removing the parameter embargo_change and just using embargoed and embargoed_until
+
+
Parameters
+
+
+
embargo_change
true (used as a convenience parameter for html forms)
+
embargoed
{true|false}
+
embargoed_until
{ISO8601 date} (optional). If embargoed = true and no date has been supplied, a default time delta of 70 years will be used
+
+
+
Returns
+
+
401 if not a valid user
+
403 if not authorized
+
Accept: text/html
+
302 to splash page of dataset
+
Accept: text/plain, application/json
+
204 updated
+
Accept: */*, default
+
204 updated
+
+
+
+
+
POST: Upload file to root directory. On success, version is incremented.
+
Parameters
+
+
+
file
Multipart-encoded (HTML) file upload
+
filename
{Optional filename for upload}
+
+
+
Returns
+
+
401 if not a valid user
+
403 if not authorized
+
400 if filename contains ..
+
403 if filename is an existing directory in dataset. Cannot post a file to update a directory with a file.
+
400 if filename is manifest.rdf and cannot read / parse rdf in manifest
+
Accept: text/html
+
302 to splash page of dataset
+
Accept: text/plain
+
204 on update, if filename exists.
+
201 on creation, if filename does not exist
+
Accept: */*, default
+
Returns text/plain
+
204 on update, if filename exists.
+
201 on creation, if filename does not exist
+
+
+
+
POST: Text upload. Convenience function for HTML to update/create text files. On success, version is incremented.
+
Parameters
+
+
+
text
{UTF text to store}
+
filename
{desired filename}
+
+
+
Returns
+
+
401 if not a valid user
+
403 if not authorized
+
400 if filename contains ..
+
406 if the parameter filename is missing
+
403 if filename is an existing directory in dataset. Cannot post a file to update a directory with a file.
+
406 if the filename is manifet.rdf and cannot parse rdf
+
Accept: text/html
+
302 to splash page of dataset
+
Accept: text/plain
+
204 on update, if filename exists.
+
201 on creation, if filename does not exist
+
Accept: */*, default
+
Returns text/plain
+
204 on update, if filename exists.
+
201 on creation, if filename does not exist
+
+
+
+
DELETE Deletes the dataset id
+
+
Be Aware!
+
Delete currently deletes the dataset. This action cannot be undone and your data is lost following this action.
+
+
+
TODO:
+
Delete currently deletes the dataset. This SHOULD NOT happen. On delete, create a new version with just a stub in the manifest saying deleted. Also, this dataset should not be returned in the list of datasets and dataset create should not return a 409.
API call to obtain information about a particular version of the dataset
+
Controller: datasets action: datasetview_vnum
+
+
GET: Obtain information about a particular version of the dataset
+
Returns
+
+
404 if dataset is not available
+
404 if version number # of dataset is not available
+
401 if not authenticated and dataset is under embargo
+
403 if not authorized and dataset is under embargo
+
Accept: text/html
+
returns HTML splash page for dataset id. The page contains the following:
+
+
Readme text
+
List of files / folders at the top level
+
Dataset's embargo state
+
Manifest data
+
+
+
Accept: application/json
+
200 OK
+
Returns a JSON-encoded hash/dict. The keys map to the following :
+
+
Readme text - data_returned["readme_text"]
+
Dataset's embargo information data_returned["embargos"]
+
If user is authenticated and authorized (editor = true) / anonymous (editor = false) - data_returned["editor"]
+
If user is authenticated and authorized, the view chosen by the user - data_returned["view"] (Note : this will always return empty as there is no edit possible for previous versions of the dataset)
+
Manifest data - data_returned["manifest_pretty"]
+
Hash dict of files / folders at the top level - data_returned["parts"][file_or_directory_name"]
+ If it is a file, the file stat information is also available. See http://docs.python.org/library/os.html#os.stat for more information.
+ If it is a directory, the dictionary value is empty as there is no stat information
API call to view files / contents of a folder and add, update or delete a file. All of these actions are performed on the latest version of the dataset.
+
+
Controller: datasets action: itemview
+
+
GET: view the contents of subpath (subpath could point to a file or a folder)
+
Returns
+
+
404 if dataset is not available
+
401 if not authenticated and dataset is under embargo
+
403 if not authorized and dataset is under embargo
+
404 if subpath is not available
+
subpath is a file
+
+
Accept: */*, default
+
The file is served
+
+
subpath is a directory
+
+
Accept: text/html
+
HTML page listing the files within the given subpath and readme_text
+
Accept: text/plain, application/json
+
200 OK
+
Returns a JSON-encoded hash/dict. The keys map to the following :
+
+
Readme text - data_returned["readme_text"]
+
Hash dict of files / folders at subpath (just that level) - data_returned["parts"]["file_or_directory_name"]
+ If it is a file, the file stat information is also available.
+ See http://docs.python.org/library/os.html#os.stat for more information.
+ If it is a directory, the dictionary value is empty as there is no stat information
HTML page listing the files within the given subpath and readme_text
+
+
+
+
+
PUT: used to add content to a file.
+ If the subpath points to manifest.rdf, the content is munged with exisitng metadata.
+ If the subpath does not exist, a file is created in the same name as the filename in path including all the intermediate directories in the subpath (using os.makedirs) and the contents are added to that file.
+ On success, version is incremented.
+
+
Returns
+
+
401 if not authenticated
+
403 if not authorized
+
403 if subpath points to a directory - cannot put content onto an existing directory
+
400 if subpath contains ..
+
Accept: text/html
+
302 to the subpath, listing the files within the given subpath and readme_text
+
Accept: text/plain, application/json
+
204 Updated, if subpath did exist
+
201 created, if subpath did not exist
+
Accept: */*, default
+
Return text/plain. 201 on creation and 204 on update.
+
+
+
+
POST Upload a file within the subpath
+ If the subpath + filename parameter points to manifest.rdf, the content is munged with exisitng metadata.
+ If the subpath + filename parameter does not exist, the file is added as filename into subpath. The intermediate directories in the subpath (using os.makedirs) are created if they do not exist.
+ On success, version is incremented.
+
+
Parameters
+
+
+
file
Multipart-encoded (HTML) file upload
+
filename
{Optional filename for upload}.
+ When provided, it is only used if subpath points to an existing directory.
+
+
+
Returns
+
+
401 if not authenticated
+
403 if not authorized
+
400 if subpath contains ..
+
403 if subpath + filename points to an existing directory
+
Accept: text/html
+
302 to the subpath, listing the file filename within the given subpath and readme_text
+
Accept: text/plain, application/json
+
204 Updated, if subpath did exist
+
201 created, if subpath did not exist
+
Accept: */*, default
+
Returns text/plain. 201 on creation and 204 on update.
+
+
+
+
DELETE: deletes files or directories within the dataset. It also performs recursive deletes (directories with sub-directories within them).
+ On success, version is incremented.
+
+
Returns
+
+
401 if not authenticated
+
403 if not authorized
+
403 if subpath is manifest.rdf
+
Accept: */*, default
+
Returns text/plain. 200 OK on successful completion.
API call to obtain a file or view the contents of a folder, for a particular version of the dataset
+
Controller: datasets action: itemview
+
+
GET: Obtain files or view the contents of a directory for a particular version of the dataset.
+
Returns
+
+
404 if dataset, subpath or version number (#) is not available
+
401 if not authenticated and dataset is under embargo
+
403 if not authorized and dataset is under embargo
+
subpath is a file
+
+
Accept: */*, default
+
The file is served
+
+
subpath is a directory
+
+
Accept: text/html
+
HTML page listing the files within the given subpath and readme_text (pertaining to that version)
+
+
TODO:
+
The html display is not showing the files. Fix bug!
+
+
Accept: text/plain, application/json
+
200 OK
+
Returns a JSON-encoded hash/dict. The keys map to the following :
+
+
Readme text - data_returned["readme_text"]
+
Hash dict of files / folders at subpath (just that level) - data_returned["parts"]["file_or_directory_name"]
+ If it is a file, the file stat information is also available.
+ See http://docs.python.org/library/os.html#os.stat for more information.
+ If it is a directory, the dictionary value is empty as there is no stat information
+% if c.embargos and c.current in c.embargos and c.embargos[c.current]:
+<%
+from rdfdatabank.lib.utils import formatDate
+c.emb = c.embargos[c.current]
+dt_human = c.emb[1]
+if dt_human and dt_human.strip():
+ dt_human = formatDate(dt_human)
+%>
+ % if c.emb[0] == True or c.emb[0] == 1 or (isinstance(c.emb[0], basestring) and c.emb[0].strip().lower() in ['true', '1']):
+ % if (isinstance(c.emb[1], basestring) and c.emb[1].strip()):
+ Data package is embargoed until ${dt_human}. Only the metadata is openly accessible.
+ % else:
+ Data package is embargoed indefinitely. Only the metadata is openly accessible.
+ % endif
+ % else:
+ Data package is openly accessible
+ % endif
+% else:
+ Data package is openly accessible
+% endif
+% endif
+
You should be able to browse and search for all of the bibliographic information in this instance of Databank and where allowed, view the data packages.
+
A demo instance of databank is available at http://databank-vm1.oerc.ox.ac.uk/ where you will be to create data pacakges, upload files and manage users.
+
+
+
+
Using the Databank API
+
The API provides a restful web interface to all of Databank's features. Documentation on the API
API call to obtain a list of unpack end-points, read the contents of a zip-file (without unpacking) and unpack a zip file into a new / existing dataset
+
Controller: items action: datasetview
+
+
GET
+
Returns
+
+
401 if not a valid user
+
403 if not authorized
+
404 if dataset id does not exist
+
Accept: text/html
+
Returns an HTML page with list of zipfiles in {id} with form to unpack the zip file
+
Accept: text/plain, application/json
+
200 OK
+
Returns a JSON-encoded list of zipfiles in dataset
API call to read the contents of a zip-file (without having to unpack) and unpack a zip file into a new / existing dataset
+
Controller: items action: itemview
+
+
GET
+
Returns
+
+
401 if not a valid user
+
403 if not authorized
+
404 if path does not exist, or if path does not point to a file
+
415 if file is not of type application/zip
+
400 if there was an error reading the zipfile (BadZipFile)
+
Accept: text/html
+
Returns a HTML page listing the contents of the zipfile
+
Accept: text/plain, application/json
+
200 OK
+
Returns a JSON-encoded hash dict listing the contents of the ziipfile (files / directories in archive), along with the size of the uncompressed member and and the date of last modification of the member as a tuple of 6 values (Year, Month, Day of month, Hours, Minutes, Seconds).
View as Editor , or User
-% endif
-% if c.readme_text:
-<%include file="/readme_section.html"/>
-% endif
-<%include file="/part_list.html"/>
-% if c.item and c.view == "editor":
-
-
Detailed view (As Editor)
-
-
Item's JSON state (Accept: application/json):
-
${repr(c.item)}
-
Item's Embargo state
-
-<%
-c.current = c.id
-%>
-
<%include file="/embargo_form.html"/>
-
Embargo state: True - only those logged in and with edit rights can see item. False - Anyone can GET the item and it's files.
-
Embargo date: Aim is for ISO8601 dates to provide embargo trigger events. Currently unused, unvalidated and unparsed.
+% if c.embargos and item in c.embargos and c.embargos[item]:
+<%
+from rdfdatabank.lib.utils import formatDate
+c.emb = c.embargos[item]
+dt_human = c.emb[1]
+if dt_human and dt_human.strip():
+ dt_human = formatDate(dt_human)
+%>
+ % if c.emb[0] == True or c.emb[0] == 1 or (isinstance(c.emb[0], basestring) and c.emb[0].strip().lower() in ['true', '1']):
+ % if (isinstance(c.emb[1], basestring) and c.emb[1].strip()):
+ Data package is embargoed until ${dt_human}
+ % else:
+ Data package is embargoed indefinitely
+ % endif
+ % else:
+ Data package is openly accessible
+ % endif
+% else:
+ Data package is openly accessible
+% endif
+
+
+% endfor
+
+
+## Pagination and sort options
+%if c.numFound and c.numFound > 0:
+
+ ${pagination()}
+
+% endif
+
+## Number of records, current record start and end and items per page
+%if c.numFound and c.numFound > 0:
+
+
+${ c.numFound} records found.
+
+% if c.numFound > 0:
+
Showing results ${c.start+1} to
+% if (c.start+c.rows) > c.numFound:
+${c.numFound}
+% else:
+${c.start+c.rows}
+% endif
+
+% if c.returned_facets:
+% for facet in c.returned_facets:
+<% count = count + 1 %>
+% if c.returned_facets[facet] and len(c.returned_facets[facet]) > 1:
+
+% endif
+##=============================================================================
+## Number of records, current span of records and items per page
+
+
+
+${ c.numFound} records found.
+
+
+% if c.numFound > 0:
+
Showing results ${c.start+1} to
+% if (c.start+c.rows) > c.numFound:
+${c.numFound}
+% else:
+${c.start+c.rows}
+% endif
+
+% endif
+##=============================================================================
+##Search results
+% if c.docs:
+
+
+<% cnt = 0 %>
+% for doc_index in xrange(len(c.docs)):
+<%
+cnt += 1
+isSilo = False
+if 'type' in c.docs[doc_index] and c.docs[doc_index]['type']:
+ if not isinstance(c.docs[doc_index]['type'], list):
+ c.docs[doc_index]['type'] = [c.docs[doc_index]['type']]
+ for typ in c.docs[doc_index]['type']:
+ if typ.lower() == 'silo':
+ isSilo = True
+%>
+
+% for field in c.chosen_fields:
+% if field in c.docs[doc_index] and field not in ['silo', 'id', 'title']:
+<%
+lbl = field
+if field in c.field_names:
+ lbl = c.field_names[field]
+%>
+
+% endif
+##=============================================================================
+## Number of records, current record start and end and items per page
+%if c.numFound and c.numFound > 0:
+
+
+
+${ c.numFound} records found.
+
+
+% if c.numFound > 0:
+
Showing results ${c.start+1} to
+% if (c.start+c.rows) > c.numFound:
+${c.numFound}
+% else:
+${c.start+c.rows}
+% endif
+
+% endif
+## end sortoptions()
+%def>
+##=============================================================================
+<%def name="numresultspp(idname)">
+% if c.docs and c.add_facet:
+
\ No newline at end of file
diff --git a/rdfdatabank/templates/silo_admin.html b/rdfdatabank/templates/silo_admin.html
index 186e1fb..0b25a9f 100644
--- a/rdfdatabank/templates/silo_admin.html
+++ b/rdfdatabank/templates/silo_admin.html
@@ -1,14 +1,17 @@
-# -*- coding: utf-8 -*-
-<%inherit file="/base.html" />
-<%def name="head_tags()">
- List of Data Archives
-%def>
-% if c.granary_list:
-
+% if c.kw:
+ % if 'title' in c.kw and c.kw.get('title', None):
+
Title:${c.kw.get('title')}
+ % endif
+ % if 'description' in c.kw and c.kw.get('description', None):
+
Description:${c.kw.get('description')}
+ % endif
+ % if 'notes' in c.kw and c.kw.get('notes', None):
+
Notes:${c.kw.get('notes')}
+ % endif
+ % if 'disk_allocation' in c.kw and c.kw.get('disk_allocation', None):
+
Disk Allocation (in Kb):${c.kw.get('disk_allocation')}
+ % endif
+ % if ('administrators' in c.kw and c.kw['administrators']) or ('managers' in c.kw and c.kw['managers']) or ('submitters' in c.kw and c.kw['submitters']):
+
+ % for role in ['administrators', 'managers', 'submitters']:
+ % if role in c.kw and c.kw[role]:
+ <%
+ admins = []
+ admins = [x.strip() for x in c.kw[role].split(",") if x]
+ %>
+
Returns 401 if not a valid user and 403 if not authorized
+
Accept:text/html (with auth)
+
Returns text/HTML
+listing the ids of each dataset, along with a form for changing the embargo information and deleting the dataset.
+A form for dataset creation is also available.
+
Accept: text/plain, application/json (with auth)
+
Returns a JSON-encoded list of dataset ids in that silo, along with the embargo information for each dataset
+
The file siloInformation.txt contains an example of the data returned (data_returned)
+
Accept:*/*, default (with auth)
+
Returns text/HTML
+listing the ids of each dataset, along with a form for changing the embargo information and deleting the dataset.
+A form for dataset creation is also available.
+
+
+
POST, PUT, DELETE NOOP
+
+
+
diff --git a/rdfdatabank/templates/siloview.html b/rdfdatabank/templates/siloview.html
index 438ba5a..c7388d8 100644
--- a/rdfdatabank/templates/siloview.html
+++ b/rdfdatabank/templates/siloview.html
@@ -1,32 +1,24 @@
-# -*- coding: utf-8 -*-
-<%inherit file="/base.html" />
-<%def name="head_tags()">
- List of Data Archives
-%def>
-% if c.silo_name:
-
-% for key in ['title', 'description','owner']:
-% if c.silo.state.has_key(key):
-
API call to obtain information regarding the state of a silo, the state of the latest version of a dataset or the state of a particular version of a dataset.
Returns the state information of a silo. To view an example of the state information returned for the silo Sandbox (ans["silo"]) see SiloStateInfo.txt
+
The state information for a silo contains the following:
+
+Name of the silo (machine name, used in uris) - ans["silo"]
+Base URI for the silo - ans["uri_base"]
+Users who can access the silo (silo owners) - ans["owners"]
+Silo description - ans["description"]
+Title of the silo (human readable) - ans["title"]
+Disk allocation for the silo (in kB) - ans["disk_allocation"]
+List of datasets in the silo with embargo information for each of the datasets - ans["datasets"], ans["datasets"]["dataset_name"]["embargo_info"]
+
+
+
Note:
+
Disk allocation information is not used at the moment. In the future, this will be used to calculate if current levels of disk usage is within the allocation limit and warn or prevent users from submitting data if beyond this limit. Also, the unit will change from kB to MB.
+
+
Controller: states action: siloview
+
+
GET
+
Returns 401 if not a valid user and 403 if not authorized
+
Accept: */*, default (with auth)
Returns a JSON-encoded hash/dict, keys map
+with the silo name, base uri and embargo information for each of the datasets in the silo {silo name} as text/plain
The state information for a dataset contains the following:
+
Information about the dataset
+
+list of files in each version - ans["state"]["files"]
+list of sub-directories in each version - ans["state"]["subdir"]
+List of available versions - ans["state"]["versions]
+manifest file format - ans["state"]["rdffileformat"]
+manifest file name - ans["state"]["rdffilename"]
+Metadata for the dataset - ans["state"]["metadata"].
+
+ createdby - ans["state"]["metadata"]["createdby"]
+ uuid - ans["state"]["metadata"]["uuid"]
+ embargo date - ans["state"]["metadata"]["embargoed_until"]
+ State of embargo (true | false) - ans["state"]["metadata"]["embargoed"]
+
+Id of the dataset - ans["state"]["item_id"]
+Current version of the dataset - ans["state"]["currentversion"]
+Metadata files for each version - ans["state"]["metadata_files"] - This feature is not used at the moment
+Dates when each version was created - ans["state"]["version_dates"]
+Date last modified - ans["state"]["date"]
+
+
Information about each file in the latest version of the dataset
+
+ans["parts"]["file_name"] contains file information for each of the files listed in ans["state"]["files"]["#"] (# is the current version number), including the Namaste files that are generated and the databank metadata file manifest.rdf
+The Nameaste file generated in Databank are
+
+ 3=when - Date last modified (example: 3=2011-02-09T14+15+05,064235)
+ 4=where - name of dataset (example: 4=dataset1)
+
+The file information returns the equivalent of a stat() system call on the given path. See http://docs.python.org/library/os.html#os.stat for more information.
+
+
+
TODO:
+
Currently the state information returns information regarding all versions for some. Modify to return information peraining only to the relevant version.
+
The name of metadata files are not included in the state information. Need to add files mentioned in 'seeAlso' to ans["state"]["metadata_files"]
+
+
Controller: states action: datasetview
+
+
GET
+
Returns 401 if not a valid user and 403 if not authorized
+
Returns 404 if dataset id does not exist
+
Accept: */*, default (with auth)
Returns a JSON-encoded hash/dict, keys map with the detailed state information of the latest version of the dataset id as text/plain
Returns the state of a particular version of a dataset. To view an example of the state information returned for verison 0 of the dataset dataset1 see DatasetStateInfo-dataset1-version0.txt. For details pertaining to the information returned, see above
+
+
TODO:
+
Currently the state information returns information regarding all versions for some. Modify to return information peraining only to the relevant version.
+
The name of metadata files are not included in the state information. Need to add files mentioned in 'seeAlso' to ans["state"]["metadata_files"]
+
+
Controller: states action: datasetview_vnum
+
+
GET
+
Returns 401 if not a valid user and 403 if not authorized
+
Returns 404 if dataset id does not exist
+
Returns 404 if dataset version number # does not exist
+
Accept: */*, default (with auth)
Returns a JSON-encoded hash/dict, keys map with the detailed state information of the version # of the dataset id as text/plain
+Auf Deutsch ist die Vereinbarung »umgekehrte zweifache Anführungszeichen für die Zitate zu benutzen, sogar ›einfache Anführungszeichen‹ für die verschachtelte Zitate«; diese Anführungszeichen „dürfen auch solche ‚englische‘ Anführungszeichen sein.“
+
+The en-dash is used between numbers such as in: 1685–1750 (J. S. Bach). It is longer than the hyphen (as in “en-dashâ€, or, more properly, “en-dashâ€) but shorter than the em-dash, which is used — like this — as a sort of parenthesis. Neither should be confused with the horizontal bar which is used to introduce quotation in some cases.
+― Like this?
+― Right.
+And here is a transcription of it:
+
+ bismi ăl-la'hi ăr-raḥma'ni ăr-raḥiymi
+
+ ăl-ḥamdu li-lla'hi rabbi ăl-`a'lamiyna
+
+ ăr-raḥma'ni ăr-raḥiymi
+
+ ma'liki yawmi ăd-diyni
+
+ 'iyya'ka na`budu wa-'iyya'ka nasta`iynu
+
+ Ähdina' ăṣ-á¹£ira'á¹a ăl-mustaqiyma
+
+ á¹£ira'á¹a ăllaá¸iyna 'an`amta `alayhim Ä¡ayri ăl-maÄ¡á¸uwbi `alayhim wala' ăá¸-á¸a'lliyna
+
+A rough translation might be:
+
+ In the name of God, the beneficient, the merciful.
+
+ Praise be to God, lord of the worlds.
+
+ The beneficient, the merciful.
+
+ Master of the day of judgment.
+
+ Thee do we worship, and Thine aid we seek.
+
+ Lead us on the right path.
+
+ The path of those on whom Thou hast bestowed favors. Not of those who have earned Thy wrath, nor of those who go astray.
\ No newline at end of file
diff --git a/rdfdatabank/tests/testdata/unicodedata/unicode10.xml b/rdfdatabank/tests/testdata/unicodedata/unicode10.xml
new file mode 100644
index 0000000..b5bc9d1
--- /dev/null
+++ b/rdfdatabank/tests/testdata/unicodedata/unicode10.xml
@@ -0,0 +1,58 @@
+
+
+
+ Some Arabic
+ The following lines are the first chapter of the Qur'an (note that the text runs right to left, and should probably be aligned on the right margin): <br>
+
+ بÙسْم٠ٱللّٰه٠ٱلرَّØْمـَبن٠ٱلرَّØÙيمÙ
+ <br> <br>
+ ٱلْØَمْد٠لÙلّٰه٠رَبّ٠ٱلْعَالَمÙينَ
+ <br> <br>
+ ٱلرَّØْمـَبن٠ٱلرَّØÙيمÙ
+ <br> <br>
+ مَـالÙك٠يَوْم٠ٱلدّÙينÙ
+ <br> <br>
+ Ø¥Ùيَّاكَ نَعْبÙد٠وَإÙيَّاكَ نَسْتَعÙينÙ
+ <br> <br>
+ ٱهْدÙنَــــا ٱلصّÙرَاطَ ٱلمÙسْتَقÙيمَ
+ <br> <br>
+ صÙرَاطَ ٱلَّذÙينَ أَنعَمْتَ عَلَيهÙمْ غَير٠ٱلمَغضÙوب٠عَلَيهÙمْ وَلاَ ٱلضَّالّÙينَ
+ <br> <br>
+And here is a transcription of it: <br>
+ <br>
+ bismi ăl-la'hi ăr-raḥma'ni ăr-raḥiymi <br>
+ <br>
+ ăl-ḥamdu li-lla'hi rabbi ăl-`a'lamiyna <br>
+ <br>
+ ăr-raḥma'ni ăr-raḥiymi <br>
+ <br>
+ ma'liki yawmi ăd-diyni <br>
+ <br>
+ 'iyya'ka na`budu wa-'iyya'ka nasta`iynu <br>
+ <br>
+ Ähdina' ăṣ-á¹£ira'á¹a ăl-mustaqiyma <br>
+ <br>
+ á¹£ira'á¹a ăllaá¸iyna 'an`amta `alayhim Ä¡ayri ăl-maÄ¡á¸uwbi `alayhim wala' ăá¸-á¸a'lliyna <br>
+ <br>
+A rough translation might be: <br>
+ <br>
+ In the name of God, the beneficient, the merciful. <br>
+ <br>
+ Praise be to God, lord of the worlds. <br>
+ <br>
+ The beneficient, the merciful. <br>
+ <br>
+ Master of the day of judgment. <br>
+ <br>
+ Thee do we worship, and Thine aid we seek. <br>
+ <br>
+ Lead us on the right path. <br>
+ <br>
+ The path of those on whom Thou hast bestowed favors. Not of those who have earned Thy wrath, nor of those who go astray.
+ http://www.madore.org/~david/misc/unitest/
+
+
diff --git a/rdfdatabank/tests/testdata/ww1-2862-manifest.xml b/rdfdatabank/tests/testdata/ww1-2862-manifest.xml
new file mode 100644
index 0000000..6f9e364
--- /dev/null
+++ b/rdfdatabank/tests/testdata/ww1-2862-manifest.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+ Thomas, Edward
+ A Cat
+ Poem
+
+ Copyright Edward Thomas, 1979, reproduced under licence from Faber and Faber Ltd.
+ <br> She had a name among the children;
+<br> But no one loved though someone owned
+<br> Her, locked her out of doors at bedtime
+<br> And had her kittens duly drowned.
+<br> In Spring, nevertheless, this cat
+<br> Ate blackbirds, thrushes, nightingales,
+<br> And birds of bright voice and plume and flight,
+<br> As well as scraps from neighbours' pails.
+<br> I loathed and hated her for this;
+<br> One speckle on a thrush's breast
+<br> Was worth a million such; and yet
+<br> She lived long, till God gave her rest.
+<br><br>
+ Edward Thomas Collected Poems
+ 1979-01-01/1979-12-31
+ Thomas, George
+
+
+ ProQuest
+ http://lion.chadwyck.co.uk/
+
+
+
+
+ Faber and Faber
+ London
+
+
+
+
diff --git a/rdfdatabank/tests/testdata/ww1-860b-manifest.xml b/rdfdatabank/tests/testdata/ww1-860b-manifest.xml
new file mode 100644
index 0000000..7f79989
--- /dev/null
+++ b/rdfdatabank/tests/testdata/ww1-860b-manifest.xml
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+ Thomas, Edward
+ Two Houses
+ This manuscript is untitled but entitled 'Two Houses' in Edward Thomas Collected Poems
+ London
+ Notebook
+ Paper
+ Poem
+ 13
+ MS Don d.28 f.13
+ Everett Sharp
+ ETBODDOND28-13.jpg
+ Copyright of The Bodleian Library, Oxford University / The Edward Thomas Literary Estate
+ 51.501
+ 1915-07-22
+
+
+
+
+ Bodleian Library, University of Oxford
+ Western Manuscripts Collections
+ Broad Street
+ Oxford
+ Oxfordshire
+ OX13BG
+ United Kingdom
+ http://www.bodley.ox.ac.uk/
+
+
+
+
diff --git a/rdfdatabank/tests/testdata/ziptestdir.sh b/rdfdatabank/tests/testdata/ziptestdir.sh
new file mode 100644
index 0000000..e968397
--- /dev/null
+++ b/rdfdatabank/tests/testdata/ziptestdir.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+#
+# Create ZIP files of test directories
+#
+
+rm testdir.zip
+rm testdir2.zip
+rm testrdf.zip
+zip -r testdir.zip testdir
+zip -r testdir2.zip testdir2
+cd testrdf
+zip -r ../testrdf.zip *
+cd ..
+
diff --git a/rdfdatabank/tests/testlib/SparqlQueryTestCase.py b/rdfdatabank/tests/testlib/SparqlQueryTestCase.py
new file mode 100644
index 0000000..4bd158a
--- /dev/null
+++ b/rdfdatabank/tests/testlib/SparqlQueryTestCase.py
@@ -0,0 +1,405 @@
+#!/usr/bin/python
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+"""
+HTTP and SPARQL query test case support functions
+"""
+
+import os, os.path
+import sys
+import unittest
+import logging
+import httplib
+import base64
+import mimetypes
+import urllib
+import urlparse
+try:
+ # Running Python 2.5 with simplejson?
+ import simplejson as simplejson
+except ImportError:
+ import json as simplejson
+
+if __name__ == "__main__":
+ # For testing:
+ # add main library directory to python path if running stand-alone
+ sys.path.append("..")
+
+#from testlib import TestUtils
+import TestUtils
+
+logger = logging.getLogger('SparqlQueryTestCase')
+
+# Originally copied from http://code.activestate.com/recipes/146306/:
+def get_content_type(filename):
+ return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
+# Originally copied from http://code.activestate.com/recipes/146306/:
+def encode_multipart_formdata(fields, files):
+ """
+ fields is a sequence of (name, value) elements for regular form fields.
+ files is a sequence of (name, filename, value, filetype) elements for data to be uploaded as files
+ Return (content_type, body) ready for httplib.HTTP instance
+ """
+ BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
+ CRLF = '\r\n'
+ L = []
+ for (key, value) in fields:
+ L.append('--' + BOUNDARY)
+ L.append('Content-Disposition: form-data; name="%s"' % key)
+ L.append('')
+ L.append(value)
+ for (key, filename, value, filetype) in files:
+ L.append('--' + BOUNDARY)
+ L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
+ L.append('Content-Type: %s' % (filetype or get_content_type(filename)))
+ L.append('')
+ L.append(value)
+ L.append('--' + BOUNDARY + '--')
+ L.append('')
+ body = CRLF.join(L)
+ content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+ return content_type, body
+
+def bindingType(b):
+ """
+ Function returns the type of a variable binding. Commonly 'uri' or 'literal'.
+ """
+ type = b['type']
+ if type == "typed-literal" and b['datatype'] == "http://www.w3.org/2001/XMLSchema#string":
+ type = 'literal'
+ return type
+
+def findVarBindings(data, var):
+ """
+ Returns a list of (type,value) pairs to which the supplied variable is bound in the results
+ """
+ return [ (bindingType(b[var]),b[var]['value'])
+ for b in data['results']['bindings'] if var in b ]
+
+def findBindingSets(data):
+ """
+ Returns a list of lists of (var:(type,value)) dictionaries from the supplied results
+ """
+ return [ dict([ (var,{'type':bindingType(bindset[var]), 'value':bindset[var]['value']} ) for var in bindset ])
+ for bindset in data['results']['bindings'] ]
+
+class SparqlQueryTestCase(unittest.TestCase):
+ """
+ Test simple query patterns against data in SPARQL endpoint
+
+ Although this module appears as a test suite, its main intended use is as a class
+ that can be subclassed in place of unittest.TestCase, providing additional methods
+ for testing HTTP access and SPATRQL queries.
+ """
+ def setUp(self):
+ # Default SPARQL endpoint details
+ self._endpointhost = "localhost"
+ self._endpointpath = "/sparqlquerytest" # Really just a placeholder
+ self._endpointuser = None
+ self._endpointpass = None
+ self._manifesturiroot = None
+ return
+
+ def tearDown(self):
+ return
+
+ def setRequestEndPoint(self, endpointhost=None, endpointpath=None):
+ if endpointhost or endpointpath:
+ if endpointhost:
+ self._endpointhost = endpointhost
+ # Reset credentials if setting host
+ self._endpointuser = None
+ self._endpointpass = None
+ logger.debug("setRequestEndPoint: endpointhost %s: " % self._endpointhost)
+ if endpointpath:
+ self._endpointpath = endpointpath
+ logger.debug("setRequestEndPoint: endpointpath %s: " % self._endpointpath)
+ return
+
+ def setRequestUserPass(self, endpointuser=None, endpointpass=None):
+ if endpointuser:
+ self._endpointuser = endpointuser
+ self._endpointpass = endpointpass
+ logger.debug("setRequestEndPoint: endpointuser %s: " % self._endpointuser)
+ logger.debug("setRequestEndPoint: endpointpass %s: " % self._endpointpass)
+ else:
+ self._endpointuser = None
+ self._endpointpass = None
+ return
+
+ def setRequestUriRoot(self, manifesturiroot=None):
+ if manifesturiroot:
+ self._manifesturiroot = manifesturiroot
+ logger.debug("setRequestUriRoot: %s: " % self._manifesturiroot)
+ else:
+ self._manifesturiroot = None
+ return
+
+ def getRequestPath(self, rel):
+ rel = rel or ""
+ if self._endpointpath:
+ return urlparse.urljoin(self._endpointpath,rel)
+ else:
+ return ""
+
+ def getRequestUri(self, rel):
+ return "http://"+self._endpointhost+self.getRequestPath(rel)
+
+ def getManifestUri(self, rel):
+ return self._manifesturiroot+self.getRequestPath(rel)
+
+ def doRequest(self, command, resource, reqdata=None, reqheaders={}, expect_status=200, expect_reason="OK"):
+ logger.debug(command+" "+self.getRequestUri(resource))
+ #if self._endpointuser:
+ # auth = base64.encodestring("%s:%s" % (self._endpointuser, self._endpointpass)).strip()
+ # reqheaders["Authorization"] = "Basic %s" % auth
+ auth = base64.encodestring("%s:%s" % (self._endpointuser, self._endpointpass)).strip()
+ reqheaders["Authorization"] = "Basic %s" % auth
+ hc = httplib.HTTPConnection(self._endpointhost)
+ #hc = httplib.HTTPSConnection(self._endpointhost)
+ path = self.getRequestPath(resource)
+ response = None
+ responsedata = None
+ repeat = 10
+ while path and repeat > 0:
+ repeat -= 1
+ hc.request(command, path, reqdata, reqheaders)
+ response = hc.getresponse()
+ if response.status != 301: break
+ path = response.getheader('Location', None)
+ if path[0:6] == "https:":
+ # close old connection, create new HTTPS connection
+ hc.close()
+ hc = httplib.HTTPSConnection(self._endpointhost) # Assume same host for https:
+ else:
+ response.read() # Seems to be needed to free up connection for new request
+ logger.debug("Status: %i %s" % (response.status, response.reason))
+
+ if expect_status != "*": self.assertEqual(response.status, expect_status)
+ if expect_status == 201: self.assertTrue(response.getheader('Content-Location', None))
+ if expect_reason != "*": self.assertEqual(response.reason, expect_reason)
+ responsedata = response.read()
+ hc.close()
+ return (response, responsedata)
+
+ def doHTTP_GET(self,
+ endpointhost=None, endpointpath=None, resource=None,
+ expect_status=200, expect_reason="OK",
+ expect_type="text/plain"):
+ reqheaders = {
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("GET", resource,
+ reqheaders=reqheaders,
+ expect_status=expect_status, expect_reason=expect_reason)
+ if (expect_type.lower() == "application/json"): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doQueryGET(self, query,
+ endpointhost=None, endpointpath=None,
+ expect_status=200, expect_reason="OK",
+ JSON=False):
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ encodequery = urllib.urlencode({"query": query})
+ self.doHTTP_GET(endpointpath=self.getRequestPath("?"+encodequery),
+ expect_status=expect_status, expect_reason=expect_reason,
+ expect_type=("application/JSON" if JSON else None))
+ return responsedata
+
+ def doHTTP_POST(self, data, data_type="application/octet-strem",
+ endpointhost=None, endpointpath=None, resource=None,
+ expect_status=200, expect_reason="OK",
+ expect_type="text/plain"):
+ reqheaders = {
+ "Content-type": data_type,
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("POST", resource,
+ reqdata=data, reqheaders=reqheaders,
+ expect_status=expect_status, expect_reason=expect_reason)
+ if (expect_type.lower() == "application/json"): responsedata = simplejson.loads(responsedata)
+ return (response, responsedata)
+
+ def doQueryPOST(self, query,
+ endpointhost=None, endpointpath=None,
+ expect_status=200, expect_reason="OK",
+ JSON=False):
+ reqheaders = {
+ "Content-type": "application/x-www-form-urlencoded",
+ "Accept": "application/JSON"
+ }
+ encodequery = urllib.urlencode({"query": query})
+ return self.doHTTP_POST(
+ encodequery, data_type="application/x-www-form-urlencoded",
+ endpointhost=None, endpointpath=None,
+ expect_status=200, expect_reason="OK",
+ expect_type=("application/JSON" if JSON else None))
+
+ def doHTTP_PUT(self, data, data_type="application/octet-strem",
+ endpointhost=None, endpointpath=None, resource=None,
+ expect_status=200, expect_reason="OK",
+ expect_type="text/plain"):
+ reqheaders = {
+ "Content-type": data_type,
+ "Accept": expect_type
+ }
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, responsedata) = self.doRequest("PUT", resource,
+ reqdata=data, reqheaders=reqheaders,
+ expect_status=expect_status, expect_reason=expect_reason)
+ return (response, responsedata)
+
+ def doHTTP_DELETE(self,
+ endpointhost=None, endpointpath=None, resource=None,
+ expect_status=200, expect_reason="OK"):
+ self.setRequestEndPoint(endpointhost, endpointpath)
+ (response, _) = self.doRequest("DELETE", resource,
+ expect_status=expect_status, expect_reason=expect_reason)
+ return response
+
+ def assertVarBinding(self, data, var, type, value):
+ """
+ Asserts that the results for 'var' containing a binding
+ """
+ self.assertTrue( (type, value) in findVarBindings(data, var),
+ """Expected to find %s bound as %s:"%s" in query results"""%(var, type, value))
+
+ def assertBinding(self, data, var, type=None, value=None):
+ self.assertTrue(var in data['head']['vars'], "Expected variable %s binding in query results"%(var))
+ bindings = findBindingSets(data)
+ found = False
+ for b in bindings:
+ if var in b:
+ match = True
+ match &= (type == None) or (b[var]['type'] == type)
+ match &= (value == None) or (b[var]['value'] == value)
+ if match:
+ found = True
+ break
+ self.assertTrue(found, "Expected to find %s bound with type %s to value %s"%(var, type, value))
+
+ def assertBindingCount(self, data, count):
+ bindings = len(data['results']['bindings'])
+ self.assertEqual(bindings, count, "Expected %i result bindings, found %i"%(count, bindings))
+
+ def assertBindingSet(self, data, expectbindingset):
+ """
+ Asserts that a given set of variable bindings occurs in at least one of the
+ result variable bindings from a query.
+ """
+ found = False
+ for resultbinding in findBindingSets(data):
+ # For each query result...
+ match = True
+ for [var, expect] in expectbindingset:
+ # For each expected variable binding
+ self.assertTrue(var in data['head']['vars'],
+ "Expected variable %s binding in query results"%(var))
+ # If variable is not bound in result, continue to next result
+ if not var in resultbinding:
+ match = False
+ continue
+ # Match details for single variable in binding set
+ for facet in expect:
+ match &= facet in resultbinding[var] and resultbinding[var][facet] == expect[facet]
+ # Exit if all variables matched in single binding
+ if match: return
+ # No matching binding found
+ self.assertTrue(False, "Expected to find binding set %s"%(expectbindingset))
+
+ def assertBindingSetPos(self, data, pos, expectbindingset):
+ """
+ Asserts that a given set of variable bindings occurs in at least one of the
+ result variable bindings from a query.
+ """
+ resultbinding = findBindingSets(data)[pos]
+ for [var, expect] in expectbindingset:
+ # For each expected variable binding
+ self.assertTrue(var in data['head']['vars'], "Expected variable %s binding in query results"%(var))
+ # If variable is not bound in result, continue to next result
+ self.assertTrue(var in resultbinding, "Expected variable %s binding in query results"%(var))
+ # Match details for single variable in binding set
+ for facet in expect:
+ self.assertTrue(
+ (facet in resultbinding[var] and resultbinding[var][facet] == expect[facet]),
+ "Result %i expected binding set %s"%(pos,expectbindingset))
+
+ # Actual tests follow
+
+ def testNull(self):
+ # Just checking that this module compiles and loads OK
+ assert True, 'Null test failed'
+
+ # Sentinel/placeholder tests
+
+ def testUnits(self):
+ assert (True)
+
+ def testComponents(self):
+ assert (True)
+
+ def testIntegration(self):
+ assert (True)
+
+ def testPending(self):
+ #assert (False), "Pending tests follow"
+ assert (True)
+
+# Assemble test suite
+
+def getTestSuite(select="unit"):
+ """
+ Get test suite
+
+ select is one of the following:
+ "unit" return suite of unit tests only
+ "component" return suite of unit and component tests
+ "all" return suite of unit, component and integration tests
+ "pending" return suite of pending tests
+ name a single named test to be run
+ """
+ testdict = {
+ "unit":
+ [ "testUnits"
+ , "testNull"
+ ],
+ "component":
+ [ "testComponents"
+ ],
+ "integration":
+ [ "testIntegration"
+ ],
+ "pending":
+ [ "testPending"
+ ]
+ }
+ return TestUtils.getTestSuite(SparqlQueryTestCase, testdict, select=select)
+
+if __name__ == "__main__":
+ TestUtils.runTests("SparqlQueryTestCase.log", getTestSuite, sys.argv)
+
+# End.
diff --git a/rdfdatabank/tests/testlib/TestUtils.py b/rdfdatabank/tests/testlib/TestUtils.py
new file mode 100644
index 0000000..79949ea
--- /dev/null
+++ b/rdfdatabank/tests/testlib/TestUtils.py
@@ -0,0 +1,125 @@
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+# $Id: TestUtils.py 1047 2009-01-15 14:48:58Z graham $
+#
+# Support functions for running different test suites
+#
+# Test suites are selected using a command line argument:
+#
+# Test classes are:
+# "unit" These are stand-alone tests that all complete within a few
+# seceonds and do not depend on resources external to the
+# package being tested, (other than other libraries used).
+# "component" These are tests that take loonger to run, or depend on
+# external resources, (files, etc.) but do not depend on
+# external services.
+# "integration" These are tests that exercise interactions with seperate
+# services.
+# "pending" These are tests that have been designed and created, but
+# for which the corresponding implementation has not been
+# completed.
+# "all" return suite of unit, component and integration tests
+# name a single named test to be run.
+#
+
+import logging
+import unittest
+
+def getTestSuite(testclass,testdict,select="unit"):
+ """
+ Assemble test suite from supplied class, dictionary and selector
+
+ testclass is the test class whose methods are test cases
+ testdict is a dictionary of test cases in named test suite,
+ keyed by "unit", "component", etc., or by a named test.
+ select is the test suite selector:
+ "unit" return suite of unit tests only
+ "component" return suite of component tests
+ "integrate" return suite of integration tests
+ "pending" return suite of pending tests
+ "all" return suite of unit and component tests
+ name a single named test to be run
+ """
+ suite = unittest.TestSuite()
+ # Named test only
+ if select[0:3] not in ["uni","com","all","int","pen"]:
+ if not hasattr(testclass, select):
+ print "%s: no test named '%s'"%(testclass.__name__, select)
+ return None
+ suite.addTest(testclass(select))
+ return suite
+ # Select test classes to include
+ if select[0:3] == "uni":
+ testclasses = ["unit"]
+ elif select[0:3] == "com":
+ testclasses = ["component"]
+ elif select[0:3] == "int":
+ testclasses = ["integration"]
+ elif select[0:3] == "pen":
+ testclasses = ["pending"]
+ elif select[0:3] == "all":
+ testclasses = ["unit", "component"]
+ else:
+ testclasses = ["unit"]
+ for c in testclasses:
+ for t in testdict.get(c,[]):
+ if not hasattr(testclass, t):
+ print "%s: in '%s' tests, no test named '%s'"%(testclass.__name__, c, t)
+ return None
+ suite.addTest(testclass(t))
+ return suite
+
+def runTests(logname, getSuite, args):
+ """
+ Run unit tests based on supplied command line argument values
+
+ logname name for logging output file, if used
+ getSuite function to retrieve test suite, given selector value
+ args command line arguments (or equivalent values)
+ """
+ sel = "unit"
+ vrb = 1
+ if len(args) > 1:
+ sel = args[1]
+ if sel[0:3] in ["uni","com","all","int","pen"]:
+ logging.basicConfig(level=logging.WARNING)
+ if sel[0:3] in ["com","all"]: vrb = 2
+ else:
+ # Run single test with elevated logging to file via new handler
+ logging.basicConfig(level=logging.DEBUG)
+ # Enable debug logging to a file
+ fileloghandler = logging.FileHandler(logname,"w")
+ fileloghandler.setLevel(logging.DEBUG)
+ # Use this formatter for shorter log records
+ ###filelogformatter = logging.Formatter('%(levelname)s %(message)s', "%H:%M:%S")
+ # Use this formnatter to display timing information:
+ filelogformatter = logging.Formatter('%(asctime)s.%(msecs)03d %(levelname)s %(message)s', "%H:%M:%S")
+ fileloghandler.setFormatter(filelogformatter)
+ logging.getLogger('').addHandler(fileloghandler)
+ vrb = 2
+ runner = unittest.TextTestRunner(verbosity=vrb)
+ tests = getSuite(select=sel)
+ if tests: runner.run(tests)
+ return
+
+# End.
diff --git a/rdfdatabank/tests/testlib/__init__.py b/rdfdatabank/tests/testlib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/rdfdatabank/tests/userRolesForTesting.txt b/rdfdatabank/tests/userRolesForTesting.txt
new file mode 100644
index 0000000..eadb8e9
--- /dev/null
+++ b/rdfdatabank/tests/userRolesForTesting.txt
@@ -0,0 +1,10 @@
+username password role silo
+sandbox_user sandbox Submitter (user) sandbox
+sandbox_user2 sandbox2 Submitter (user) sandbox
+sandbox_user3 sandbox3 Submitter (user) sandbox2
+sandbox_manger managertest Manager (manager) sandbox
+sandbox_manger2 managertest2 Manager (manager) sandbox
+sandbox_manger3 managertest3 Manager (manager) sandbox2
+admin test Administrator (admin) sandbox
+admin2 test2 Administrator (admin) sandbox
+admin3 test3 Administrator (admin) sandbox2
\ No newline at end of file
diff --git a/rdfdatabank/tools/broadcastDatasets.py b/rdfdatabank/tools/broadcastDatasets.py
new file mode 100644
index 0000000..18d32cd
--- /dev/null
+++ b/rdfdatabank/tools/broadcastDatasets.py
@@ -0,0 +1,58 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import os
+from rdfdatabank.lib.broadcast import BroadcastToRedis
+from pylons import config
+
+def get_objs_in_dir(items_list, dirname, fnames):
+ for fname in fnames:
+ a = os.path.join(dirname,fname)
+ if fname == 'obj':
+ item = a.split('pairtree_root')[1].strip('/').split('obj')[0].replace('/', '')
+ silo = a.split('pairtree_root')[0].strip('/').split('/')[-1]
+ if not (silo, item) in items_list:
+ items_list.append((silo, item))
+ return
+
+def broadcast_links(src_dir):
+ links_list = []
+ os.path.walk(src_dir,get_objs_in_dir,links_list)
+ b = BroadcastToRedis(config['redis.host'], config['broadcast.queue'])
+
+ for silo, item in links_list:
+ b.creation(silo, item)
+ return
+
+src_dirs = [
+'/silos',
+]
+
+for src_dir in src_dirs:
+ print "starting", src_dir
+ links_list = []
+ os.path.walk(src_dir,get_objs_in_dir,links_list)
+ b = BroadcastToRedis(config['redis.host'], config['broadcast.queue'])
+ for silo, item in links_list:
+ b.creation(silo, item)
diff --git a/rdfdatabank/tools/digitalbooksMetadataWrite.py b/rdfdatabank/tools/digitalbooksMetadataWrite.py
new file mode 100644
index 0000000..365ed8d
--- /dev/null
+++ b/rdfdatabank/tools/digitalbooksMetadataWrite.py
@@ -0,0 +1,50 @@
+from solr import SolrConnection
+import json
+import codecs
+
+solrhost = "http://localhost:8080/solr"
+s = SolrConnection(solrhost)
+
+fieldnames = ['silo', 'id', 'uuid', 'aggregatedResource', 'created', 'creator', 'currentVersion', 'date', 'dateAccepted', 'dateCopyrighted', 'dateSubmitted', 'description', 'embargoStatus', 'embargoedUntilDate', 'mediator', 'isPartOf', 'isVersionOf', 'license', 'modified', 'publisher', 'rights', 'subject', 'timestamp', 'title', 'type']
+
+solr_params = {}
+solr_params['q'] = "silo:digitalbooks"
+solr_params['wt'] = 'json'
+solr_params['fl'] = ','.join(fieldnames)
+solr_params['rows'] = 500000
+solr_params['start'] = 0
+
+solr_response = s.raw_query(**solr_params)
+
+numFound = 0
+docs = None
+fname = "digitalbooks.csv"
+delimiter = '$'
+
+if solr_response:
+ ans = json.loads(solr_response)
+ numFound = ans['response'].get('numFound',None)
+ try:
+ numFound = int(numFound)
+ except:
+ numFound = 0
+ docs = ans['response'].get('docs',None)
+ if numfound > 0 and docs:
+ out_f = codecs.open(fname, 'a', 'utf-8')
+ for row in docs:
+ row_val = []
+ for name in fieldnames:
+ if name in row and row[name] and isinstance(row[name], basestring):
+ row_val.append(row[name])
+ elif name in row and row[name] and isinstance(row[name], list):
+ row_val.append(";".join(row[name]))
+ else:
+ row_val.append("")
+ if row_val:
+ out_f.write("%s\n" %delimiter.join(row_val)
+ out_f.close()
+ else:
+ print 'The search resulted in no documents'
+else:
+ print 'The search resulted in no matches'
+
diff --git a/rdfdatabank/tools/indexingItemsinDatabank.py b/rdfdatabank/tools/indexingItemsinDatabank.py
new file mode 100644
index 0000000..45d13c1
--- /dev/null
+++ b/rdfdatabank/tools/indexingItemsinDatabank.py
@@ -0,0 +1,112 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+#To test keys in redis
+from redis import Redis
+r = Redis()
+k = r.keys('*:embargoed')
+k2 = r.keys('*:embargoed_until')
+ka = r.keys('*')
+len(ka)
+for i in ka:
+ if not 'embargoed' in i:
+ print i
+
+r.llen('silochanges')
+for i in range(r.llen('silochanges')):
+ r.lindex('silochanges', i)
+
+#======================================================================
+
+# To add items to SOLR once in redis (for testing). Stop supervisor workers
+from redis import Redis
+from recordsilo import Granary
+from solr import SolrConnection
+from solr_worker import gather_document
+import simplejson
+
+r = Redis()
+r.llen('silochanges')
+for i in range(r.llen('silochanges')):
+ r.lindex('silochanges', i)
+
+g = Granary("/opt/RDFDatabank/silos")
+solr = SolrConnection("http://localhost:8080/solr")
+
+line = r.rpop("silochanges")
+msg = simplejson.loads(line)
+silo_name = msg['silo']
+s = g.get_rdf_silo(silo_name)
+itemid = msg.get('id')
+if itemid and s.exists(itemid):
+ item = s.get_item(itemid)
+ solr_doc = gather_document(silo_name, item)
+ solr.add(_commit=True, **solr_doc)
+
+#r.rpush("silochanges", line)
+
+#======================================================================
+
+# To add items to redis
+from rdfdatabank.lib.broadcast import BroadcastToRedis
+b = BroadcastToRedis("localhost", 'silochanges')
+
+b.creation("demo", "Apocalypse-auctm315", ident="admin")
+b.creation("demo", "Apocalypse-douce249", ident="admin")
+b.creation("demo", "BibliaPauperum-archgc14", ident="admin")
+b.creation("demo", "CanticumCanticorum-auctm312", ident="admin")
+b.creation("demo", "MCSimulation-WW4jet", ident="admin")
+b.creation("demo", "MCSimulation-WW4jet-CR", ident="admin")
+b.creation("demo", "MonteCarloSimulations", ident="admin")
+b.creation("demo", "blockbooks", ident="admin")
+b.creation("test", "TestSubmission_2", ident="sandbox_user")
+b.creation("dataflow", "GabrielTest", ident="admin")
+b.creation("dataflow", "anusha-test", ident="admin")
+b.creation("dataflow", "anusha-test-testrdf3", ident="admin")
+b.creation("dataflow", "anusha:test", ident="admin")
+b.creation("dataflow", "joe-test-2011-09-16-1", ident="admin")
+b.creation("dataflow", "joetest", ident="admin")
+b.creation("dataflow", "monica-test", ident="admin")
+b.creation("dataflow", "test123", ident="admin")
+b.creation("dataflow", "testdir123", ident="admin")
+b.creation("dataflow", "testdir2", ident="admin")
+b.creation("dataflow", "unpackingTest", ident="admin")
+
+#======================================================================
+"""
+To install the correct versions of Redis-server and python-redis,
+download the latest packages from oneiric
+
+cd ~
+aptitude show redis-server
+sudo apt-get remove --purge redis-server
+wget http://ubuntu.intergenia.de/ubuntu//pool/universe/r/redis/redis-server_2.2.11-3_amd64.deb
+sudo dpkg -i redis-server_2.2.11-3_amd64.deb
+
+cd ~
+sudo rm -r /usr/local/lib/python2.6/dist-packages/redis-1.34.1-py2.6.egg
+sudo apt-get remove --purge python-redis
+wget http://de.archive.ubuntu.com/ubuntu/pool/universe/p/python-redis/python-redis_2.4.5-1_all.deb
+sudo dpkg -i python-redis_2.4.5-1_all.deb
+"""
diff --git a/rdfdatabank/tools/populateState.py b/rdfdatabank/tools/populateState.py
new file mode 100755
index 0000000..e552d50
--- /dev/null
+++ b/rdfdatabank/tools/populateState.py
@@ -0,0 +1,63 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+import os
+import simplejson
+from pylons import config
+
+def get_objs_in_dir(items_in_silo, dirname, fnames):
+ for fname in fnames:
+ a = os.path.join(dirname,fname)
+ if fname == 'obj':
+ item = a.split('pairtree_root')[1].strip('/').split('obj')[0].replace('/', '')
+ silo = a.split('pairtree_root')[0].strip('/').split('/')[-1]
+ if not silo in items_in_silo:
+ items_in_silo[silo] = set()
+ items_in_silo[silo].add(item)
+ return
+
+def update_silo_persisted_state(root_dir, src_dir):
+ silo_items = {}
+ os.path.walk(src_dir,get_objs_in_dir,silo_items)
+ for silo, items in silo_items.iteritems():
+ filepath = "%s/%s/persisted_state.json"%(root_dir, silo)
+ if not os.path.isfile(filepath):
+ print "File %s does not exist"%filepath
+ return
+ with open(filepath, "r") as serialised_file:
+ state = simplejson.load(serialised_file)
+ state['items'] = list(items)
+ state['item_count'] = "%d"%len(state['items'])
+ with open(filepath, "w") as serialised_file:
+ simplejson.dump(state, serialised_file)
+ return
+
+if __name__ == '__main__':
+ src_dirs = [
+ '/silos/loadtest'
+ ]
+ root_dir = '/silos'
+ for src_dir in src_dirs:
+ print "starting", src_dir
+ update_silo_persisted_state(root_dir, src_dir)
diff --git a/rdfdatabank/tools/populateTable.py b/rdfdatabank/tools/populateTable.py
new file mode 100644
index 0000000..602702e
--- /dev/null
+++ b/rdfdatabank/tools/populateTable.py
@@ -0,0 +1,32 @@
+import sqlalchemy as sa
+import ConfigParser, os
+from rdfdatabank import model
+from rdfdatabank.lib.auth_entry import add_dataset
+
+class populateTable:
+
+ def __init__(self, configFile="/var/lib/databank/production.ini"):
+ Config = ConfigParser.ConfigParser()
+ Config.read("/var/lib/databank/production.ini")
+ db_conn = Config.get("app:main", "sqlalchemy.url")
+ self.root_dir = Config.get("app:main", "granary.store")
+ engine = sa.create_engine(db_conn)
+ model.init_model(engine)
+
+ def add_objs_in_dir(self, items_in_silo, dirname, fnames):
+ for fname in fnames:
+ a = os.path.join(dirname,fname)
+ if fname == 'obj':
+ item = a.split('pairtree_root')[1].strip('/').split('obj')[0].replace('/', '')
+ silo = a.split('pairtree_root')[0].strip('/').split('/')[-1]
+ add_dataset(silo, item)
+ return
+
+ def populate(self):
+ silo_items = {}
+ os.path.walk(self.root_dir,self.add_objs_in_dir,silo_items)
+
+
+if __name__ == '__main__':
+ p = populateTable()
+ p.populate()
diff --git a/rdfdatabank/tools/renamingSymLinks.py b/rdfdatabank/tools/renamingSymLinks.py
new file mode 100644
index 0000000..5028d4f
--- /dev/null
+++ b/rdfdatabank/tools/renamingSymLinks.py
@@ -0,0 +1,80 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+"""
+This program is used to rename symlinks when the location of silos has changed
+1. Move silo from your old location to the new location
+2. Run this script using
+
+$python renamingSymLinks.py OLDPATH NEWPATH
+
+ OR by calling the function rewrite_links
+
+OLDPATH = "/opt/RDFDatabank/silos"
+NEWPATH = "/silos"
+
+src_dirs = [
+ '/silos/dataflow/pairtree_root'
+ ,'/silos/demo/pairtree_root'
+ ,'/silos/test/pairtree_root'
+]
+
+for src_dir in src_dirs:
+ print "starting", src_dir
+ rewrite_links(src_dir, OLDPATH, NEWPATH)
+"""
+
+import os
+import sys
+
+def get_links_in_dir(items_list, dirname, fnames):
+ for fname in fnames:
+ a = os.path.join(dirname,fname)
+ #if fname == 'obj':
+ # print a
+ if os.path.islink(a):
+ items_list.append(os.path.join(dirname,fname))
+ return
+
+def rewrite_links(src_dir, OLDPATH, NEWPATH):
+ links_list = []
+ os.path.walk(src_dir,get_links_in_dir,links_list)
+
+ for i in range(len(links_list)):
+ linkname = links_list[i]
+ #print "linkname:", linkname
+ realpath = os.readlink(linkname)
+ if realpath.startswith(OLDPATH):
+ newpath = realpath.replace(OLDPATH, NEWPATH)
+ if os.path.islink(linkname) and os.path.isfile(newpath):
+ os.remove(linkname)
+ os.symlink(newpath, linkname)
+ #print "oldpath:", realpath, "\n newpath:", newpath
+
+if __name__ == "__main__":
+ OLDPATH = sys.argv[1]
+ NEWPATH = sys.argv[2]
+ src_dir = NEWPATH
+ rewrite_links(src_dir, OLDPATH, NEWPATH)
+
diff --git a/rdfdatabank/websetup.py b/rdfdatabank/websetup.py
index e6c53b4..fc95140 100644
--- a/rdfdatabank/websetup.py
+++ b/rdfdatabank/websetup.py
@@ -1,10 +1,190 @@
+#-*- coding: utf-8 -*-
+"""
+Copyright (c) 2012 University of Oxford
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+
"""Setup the rdfdatabank application"""
import logging
from rdfdatabank.config.environment import load_environment
+from rdfdatabank.model import meta, User, Group, Permission
+from sqlalchemy.exc import IntegrityError
log = logging.getLogger(__name__)
def setup_app(command, conf, vars):
"""Place any commands to setup rdfdatabank here"""
load_environment(conf.global_conf, conf.local_conf)
+ log.info("Creating tables")
+ meta.metadata.create_all(bind=meta.engine)
+ log.info("Successfully setup")
+
+ try:
+ g0a = Group()
+ g0a.group_name = u'databank_administrator'
+ g0a.silo = u'*'
+ meta.Session.add(g0a)
+ """
+ g1a = Group()
+ g1a.group_name = u'sandbox_administrator'
+ g1a.silo = u'sandbox'
+ meta.Session.add(g1a)
+
+ g1b = Group()
+ g1b.group_name = u'sandbox_manager'
+ g1b.silo = u'sandbox'
+ meta.Session.add(g1b)
+
+ g1c = Group()
+ g1c.group_name = u'sandbox_submitter'
+ g1c.silo = u'sandbox'
+ meta.Session.add(g1c)
+
+ g2a = Group()
+ g2a.group_name = u'sandbox2_administrator'
+ g2a.silo = u'sandbox2'
+ meta.Session.add(g2a)
+
+ g2b = Group()
+ g2b.group_name = u'sandbox2_manager'
+ g2b.silo = u'sandbox2'
+ meta.Session.add(g2b)
+
+ g2c = Group()
+ g2c.group_name = u'sandbox2_submitter'
+ g2c.silo = u'sandbox2'
+ meta.Session.add(g2c)
+
+ g3a = Group()
+ g3a.group_name = u'sandbox3_administrator'
+ g3a.silo = u'sandbox3'
+ meta.Session.add(g3a)
+
+ g3b = Group()
+ g3b.group_name = u'sandbox3_manager'
+ g3b.silo = u'sandbox3'
+ meta.Session.add(g3b)
+
+ g3c = Group()
+ g3c.group_name = u'sandbox3_submitter'
+ g3c.silo = u'sandbox3'
+ meta.Session.add(g3c)
+ """
+ p1 = Permission()
+ p1.permission_name = u'administrator'
+ p1.groups.append(g0a)
+ #p1.groups.append(g1a)
+ #p1.groups.append(g2a)
+ #p1.groups.append(g3a)
+ meta.Session.add(p1)
+
+ p2 = Permission()
+ p2.permission_name = u'manager'
+ #p2.groups.append(g1b)
+ #p2.groups.append(g2b)
+ #p2.groups.append(g3b)
+ meta.Session.add(p2)
+
+ p3 = Permission()
+ p3.permission_name = u'submitter'
+ #p3.groups.append(g1c)
+ #p3.groups.append(g2c)
+ #p3.groups.append(g3c)
+ meta.Session.add(p3)
+ """
+ u0 = User()
+ u0.user_name = u'admin'
+ u0.name = u'Databank Administrator'
+ u0._set_password(u'test')
+ u0.groups.append(g0a)
+ meta.Session.add(u0)
+
+ u1 = User()
+ u1.user_name = u'sandbox_user'
+ u1.name = u'Test User I'
+ u1._set_password(u'sandbox')
+ u1.groups.append(g1c)
+ meta.Session.add(u1)
+
+ u2 = User()
+ u2.user_name = u'sandbox_user2'
+ u2.name = u'Test User II'
+ u2._set_password(u'sandbox2')
+ u2.groups.append(g2c)
+ meta.Session.add(u2)
+
+ u3 = User()
+ u3.user_name = u'sandbox_user3'
+ u3.name = u'Test User III'
+ u3._set_password(u'sandbox3')
+ u3.groups.append(g3c)
+ meta.Session.add(u3)
+
+ u4 = User()
+ u4.user_name = u'admin1'
+ u4.name = u'Test Administrator I'
+ u4._set_password(u'test')
+ u4.groups.append(g1a)
+ meta.Session.add(u4)
+
+ u5 = User()
+ u5.user_name = u'admin2'
+ u5.name = u'Test Administrator II'
+ u5._set_password(u'test2')
+ u5.groups.append(g2a)
+ meta.Session.add(u5)
+
+ u6 = User()
+ u6.user_name = u'admin3'
+ u6.name = u'Test Administrator III'
+ u6._set_password(u'test3')
+ u6.groups.append(g3a)
+ meta.Session.add(u6)
+
+ u7 = User()
+ u7.user_name = u'sandbox_manager'
+ u7.name = u'Test Manager I'
+ u7._set_password(u'managertest')
+ u7.groups.append(g1b)
+ meta.Session.add(u7)
+
+ u8 = User()
+ u8.user_name = u'sandbox_manager2'
+ u8.name = u'Test Manager II'
+ u8._set_password(u'managertest2')
+ u8.groups.append(g2b)
+ meta.Session.add(u8)
+
+ u9 = User()
+ u9.user_name = u'sandbox_manager3'
+ u9.name = u'Test Manager III'
+ u9._set_password(u'managertest3')
+ u9.groups.append(g3b)
+ meta.Session.add(u9)
+ """
+ meta.Session.flush()
+ meta.Session.commit()
+ except IntegrityError:
+ log.error('there was a problem adding your auth data, it may have already been added. Continuing with bootstrapping...')
+ #import traceback
+ #print traceback.format_exc()
+ meta.Session.rollback()
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..1359fc3
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,13 @@
+nose
+pairtree==0.5.6-T
+pylons==0.9.7
+repoze.who==2.0a4
+repoze.who-friendlyform
+repoze.profile
+rdflib==2.4.2
+rdfobject
+solrpy
+uuid
+redis==1.34.1
+python-dateutil==1.5
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a1eae4d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+nose
+pairtree==0.5.6-T
+pylons==0.9.7
+repoze.who==2.0a4
+repoze.who-friendlyform
+rdflib==2.4.2
+rdfobject
+solrpy
+uuid
+redis==1.34.1
+python-dateutil==1.5
diff --git a/setup.py b/setup.py
index afdc0a6..6ee5f5b 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
setup(
name='rdfdatabank',
- version='0.1',
+ version='0.2',
description='',
author='',
author_email='',
diff --git a/setup_db.py b/setup_db.py
new file mode 100644
index 0000000..c798c01
--- /dev/null
+++ b/setup_db.py
@@ -0,0 +1,61 @@
+import sqlalchemy as sa
+from rdfdatabank.model import init_model
+from rdfdatabank.lib.auth_entry import add_user, add_user_groups, add_silo
+import ConfigParser
+import sys, os
+
+class setupDB():
+
+ def __init__(self, config_file='/var/lib/databank/production.ini'):
+ if not os.path.exists(config_file):
+ print "Config file not found"
+ sys.exit()
+ c = ConfigParser.ConfigParser()
+ c.read(config_file)
+ if not 'app:main' in c.sections():
+ print "Section app:main not found in config file"
+ sys.exit()
+ engine = sa.create_engine(c.get('app:main', 'sqlalchemy.url'))
+ init_model(engine)
+ return
+
+ def addUser(self, user_details):
+ if not ('username' in user_details and user_details['username'] and \
+ 'password' in user_details and user_details['password'] and \
+ ('name' in user_details and user_details['name'] or \
+ ('firstname' in user_details and user_details['firstname'] and \
+ 'lastname' in user_details and user_details['lastname']))):
+ return False
+ add_user(user_details)
+ return True
+
+ def addSilo(self, silo):
+ add_silo(silo)
+ return
+
+ def addUserGroup(self, username, silo, permission):
+ groups = []
+ groups.append((silo, permission))
+ add_user_groups(username, groups)
+ return
+
+if __name__ == "__main__":
+ #Initialize sqlalchemy
+ s = setupDB()
+
+ #add user
+ username = sys.argv[1]
+ password = sys.argv[2]
+ email = sys.argv[3]
+
+ user_details = {
+ 'username':u'%s'%username,
+ 'password':u"%s"%password,
+ 'name':u'Databank Administrator',
+ 'email':u"%s"%email
+ }
+ s.addUser(user_details)
+
+ #Add user membership
+ s.addUserGroup(username, '*', 'administrator')
+
diff --git a/sss.conf.json b/sss.conf.json
new file mode 100644
index 0000000..a32889d
--- /dev/null
+++ b/sss.conf.json
@@ -0,0 +1,165 @@
+{
+ ############################################################################
+ # SWORD SERVER CONFIGURATION
+ ############################################################################
+ # This configuration file specifies the parameters for SSS
+ #
+ # Each configuration option can be accessed as an attribute of the
+ # Configuration python object. e.g.
+ #
+ # Configuration().base_url
+ #
+ # You may add any other configuration options directly to this JSON file
+ # and they will be picked up in the same way by the Configuration object.
+ #
+ # Some core configuration options have special methods for access built into
+ # the Configuration object (check the docs for details)
+ #
+ # This file is JSON formatted with one extension: comments are allowed.
+ # Comments are must be on a line of their own, and prefixed with #. The #
+ # must be the first non-whitespace character on the line. The configuration
+ # interpreter will strip all such lines before parsing the JSON, but will
+ # leave blank lines in the resulting JSON so that errors may be detected
+ # accurately by line number.
+ #
+ # To validate an this file, run:
+ #
+ # python config.py /path/to/sss.conf.json
+ #
+ ############################################################################
+
+ # The base url of the webservice where SSS is deployed
+ "base_url" : "http://localhost:5000/swordv2/",
+
+ # the DataBank base url for the UI and other such reference points
+ "db_base_url" : "http://192.168.23.133/",
+
+ # explicitly set the sword version, so if you're testing validation of
+ # service documents you can "break" it.
+ "sword_version" : "2.0",
+
+ # require authentication (although DataBank will enforce this anyway)
+ "authenticate" : true,
+
+ # DataBank does not require support of On-Behalf-Of as users will be
+ # authenticated using OAuth. So we turn mediation off in the service document
+ "mediation" : false,
+
+ # What media ranges should the app:accept element in the Service Document support
+ "app_accept" : [ "*/*" ],
+ "multipart_accept" : [ "*/*" ],
+
+ # What packaging formats should the sword:acceptPackaging element in the Service Document support
+ # In DataBank we explicitly support the DataBankBagIt format, as well as the
+ # required Binary format (Note that it does not support SimpleZip
+ "sword_accept_package" : [
+ "http://purl.org/net/sword/package/Binary",
+ "http://dataflow.ox.ac.uk/package/DataBankBagIt"
+ ],
+
+ # For DataBank this is currently disabled; it can be re-enabled at any
+ # point if upload size limits become necessary.
+ #
+ # maximum upload size to be allowed, in bytes (this default is 16Mb)
+ # (omitting this config option means there is no max_upload_size limit)
+ #"max_upload_size" : 16777216,
+
+ # FIXME: will we ever really support SimpleZip
+ # list of package formats that SSS can provide when retrieving the Media Resource
+ # Here we support the required SimpleZip format and the standard DataBankBagIt
+ "sword_disseminate_package" : [
+ "http://purl.org/net/sword/package/SimpleZip",
+ "http://dataflow.ox.ac.uk/package/DataBankBagIt"
+ ],
+
+
+ # FIXME: what are the dissemination packagers for DataBank?
+ # FIXME: this is a bad config option - way too complex, can we simplify? (e.g. each disseminator is a
+ # dictionary whose keys match parameters, and for which there is an "implementation" key pointing
+ # to the disseminator
+ # Supported package format disseminators; for the content type (dictionary key), the associated
+ # class will be used to package the content for dissemination
+ "package_disseminators" : {
+ # "(& (type=\"application/zip\") (packaging=\"http://purl.org/net/sword/package/SimpleZip\") )" : "sss.ingesters_disseminators.DefaultDisseminator",
+ # "(& (type=\"application/zip\") )" : "sss.ingesters_disseminators.DefaultDisseminator",
+ # "(& (type=\"application/atom+xml;type=feed\") )" : "sss.ingesters_disseminators.FeedDisseminator"
+ },
+
+
+ # FIXME: this is probably not going to be used, as the unpacking will be done asynchronously
+ # in DataBank
+ #
+ # Supported package format ingesters; for the Packaging header (dictionary key), the associated class will
+ # be used to unpackage deposited content
+ "package_ingesters" : {
+ "http://purl.org/net/sword/package/Binary" : "sss.ingesters_disseminators.BinaryIngester",
+ "http://dataflow.ox.ac.uk/package/DataBankBagIt" : "sss.ingesters_disseminators.SimpleZipIngester"
+ },
+
+ # FIXME: what is the Entry Ingester for DataBank?
+ # FIXME: at the moment the Entry Ingester in DataBank is hard coded, but we should break it out here
+ #
+ # Ingester to use for atom entries
+ "entry_ingester" : "rdfdatabank.lib.sword_server.DefaultEntryIngester",
+
+ # we can turn off updates and deletes in order to examine the behaviour of Method Not Allowed errors
+ # We leave these in and configured to true for DataBank, just in case we ever need to turn
+ # off any of the features
+ "allow_update" : true,
+ "allow_delete" : true,
+
+ # we can turn off deposit receipts, which is allowed by the specification
+ # DataBank is well behaved, and ALWAYS returns a deposit receipt
+ "return_deposit_receipt" : true,
+
+ # FIXME: this relates to the package_disseminators configuration above, which
+ # is not yet stabilised or used in DataBank. This configuration is therefore
+ # speculative
+ #
+ # The acceptable formats that the server can return the media resource in
+ # on request.
+ # This is used in Content Negotiation during GET on the EM-URI
+ "media_resource_formats" : [
+ {"content_type" : "application/zip", "packaging": "http://dataflow.ox.ac.uk/package/DataBankBagIt"},
+ {"content_type" : "application/zip"},
+ {"content_type" : "application/atom+xml;type=feed"},
+ {"content_type" : "text/html"}
+ ],
+
+ # FIXME: this relates to the package_disseminators configuration above, which
+ # is not yet stabilised or used in DataBank. This configuration is therefore
+ # speculative
+ #
+ # If no Accept parameters are given to the server on GET to the EM-URI the
+ # following defaults will be used to determine the response type
+ "media_resource_default" : {
+ "content_type" : "application/zip", "packaging": "http://dataflow.ox.ac.uk/package/DataBankBagIt"
+ },
+
+ # FIXME: this is a standard required set for SWORD, although at present
+ # DataBank only supports type=entry for real. The rest coming in the full
+ # sword2 implementation
+ #
+ # The acceptable formats that the server can return the entry document in
+ # on request
+ # This is used in Content Negotiation during GET on the Edit-URI
+ "container_formats" : [
+ {"content_type" : "application/atom+xml;type=entry" },
+ {"content_type" : "application/atom+xml;type=feed" },
+ {"content_type" : "application/rdf+xml" }
+ ],
+
+ # If no Accept parameters are given to the server on GET to the Edit-URI the
+ # following defaults will be used to determine the response type
+ "container_format_default" : {
+ "content_type" : "application/atom+xml;type=entry"
+ },
+
+ # Dynamically load the implementation classes for the 3 main interfaces
+ # Here DataBank provides the implementation classes for the server and
+ # authenticator, and no WebUI is provided, as this is done by the main
+ # application
+ "sword_server" : "rdfdatabank.lib.sword_server.SwordDataBank",
+ "authenticator" : "rdfdatabank.lib.sword_server.DataBankAuthenticator"
+ #"webui" : "sss.repository.WebInterface"
+}
diff --git a/test.ini b/test.ini
index ae777ba..8a97006 100644
--- a/test.ini
+++ b/test.ini
@@ -1,3 +1,23 @@
+# Copyright (c) 2012 University of Oxford
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# rdfdatabank - Pylons testing environment configuration
#
diff --git a/who.ini b/who.ini
index 88f2cb4..e74b8a6 100644
--- a/who.ini
+++ b/who.ini
@@ -1,11 +1,52 @@
+# Copyright (c) 2012 University of Oxford
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+[plugin:redirector]
+# identificaion and challenge
+#use = repoze.who.plugins.redirector:make_plugin
+#login_url = /login
+
+[plugin:friendlyform]
+# Redirecting form which does login via a "post" from a regular /login form
+use = repoze.who.plugins.friendlyform:FriendlyFormPlugin
+login_form_url= /login
+login_handler_path = /login_handler
+post_login_url = /welcome
+logout_handler_path = /logout_handler
+post_logout_url = /logout
+rememberer_name = auth_tkt
+login_counter_name = logins
+
[plugin:auth_tkt]
# identification and authentication
use = repoze.who.plugins.auth_tkt:make_plugin
secret = sup3rs33kr1t
cookie_name = databank
-secure = True
+secure = False
include_ip = False
+[plugin:sqlauth]
+# An SQLAlchemy authorization plugin
+use = rdfdatabank.lib.auth:authenticator
+
[plugin:basicauth]
# identification and challenge
use = repoze.who.plugins.basicauth:make_plugin
@@ -18,27 +59,31 @@ filename = %(here)s/passwd
check_fn = repoze.who.plugins.htpasswd:crypt_check
[general]
-request_classifier = repoze.who.classifiers:default_request_classifier
+#request_classifier = repoze.who.classifiers:default_request_classifier
+request_classifier = rdfdatabank.lib.reqclassifier:custom_request_classifier
challenge_decider = repoze.who.classifiers:default_challenge_decider
remote_user_key = REMOTE_USER
[identifiers]
-# plugin_name;classifier_name:.. or just plugin_name (good for any)
plugins =
- auth_tkt
- basicauth
+ friendlyform;browser
+ auth_tkt
+ basicauth
[authenticators]
-# plugin_name;classifier_name.. or just plugin_name (good for any)
plugins =
- auth_tkt
- htpasswd
+ auth_tkt
+ sqlauth
+# htpasswd
[challengers]
# plugin_name;classifier_name:.. or just plugin_name (good for any)
plugins =
- basicauth
+ friendlyform;browser
+ basicauth
[mdproviders]
plugins =
- rdfdatabank.lib.ident_md:IdentMDProvider
+ rdfdatabank.lib.auth:user
+ rdfdatabank.lib.auth:group
+# rdfdatabank.lib.ident_md:IdentMDProvider