Skip to content

Commit

Permalink
Databank now support 3 embargo states - Dark, Embargoed and Open.
Browse files Browse the repository at this point in the history
Added a profiler to the pylons middleware
  • Loading branch information
Anusha Ranganathan committed Sep 20, 2011
1 parent 1b3b927 commit af77eb7
Show file tree
Hide file tree
Showing 18 changed files with 1,777 additions and 689 deletions.
2 changes: 1 addition & 1 deletion development-jenkins.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ debug = false
# Uncomment and replace with the address which should receive any error reports
#email_to = [email protected]
smtp_server = localhost
error_email_from = paste@localhost
error_email_from = paste@jenkins

[server:main]
use = egg:Paste#http
Expand Down
24 changes: 13 additions & 11 deletions development.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# The %(here)s variable will be replaced with the parent directory of this file
#
[DEFAULT]
debug = false
debug = true
# Uncomment and replace with the address which should receive any error reports
#email_to = [email protected]
smtp_server = localhost
Expand All @@ -13,11 +13,11 @@ error_email_from = paste@localhost
[server:main]
use = egg:Paste#http
#Use these setings to run pylons using mod_wsgi and apache
host = 127.0.0.1
port = 5000
#host = 127.0.0.1
#port = 5000
#Use these settings tp run pylons from the commandline
#host = 0.0.0.0
#port = 80
host = 0.0.0.0
port = 80

[app:main]
use = egg:rdfdatabank
Expand All @@ -30,16 +30,18 @@ beaker.session.secret = somesecret

who.config_file = %(here)s/who.ini
who.log_level = info
who.log_file = /var/log/databank/who.log
#who.log_file = stdout
#who.log_file = /var/log/databank/who.log
who.log_file = stdout
#who.log_file = %(here)s/logs/who.log

redis.host = localhost

granary.store = %(here)s/silos
#granary.uri_root = http://databank.bodleian.ox.ac.uk/datasets/
granary.uri_root = http://192.168.23.133/

profile.log_filename = %(here)s/logs/profile.log
profile.path = /__profile__

auth.file = %(here)s/passwd
auth.info = %(here)s/rdfdatabank/config/users.py

Expand Down Expand Up @@ -80,17 +82,17 @@ keys = generic

[logger_root]
level = INFO
handlers = logfile
handlers = console

[logger_routes]
level = INFO
handlers = logfile
handlers = console
qualname = routes.middleware
# "level = DEBUG" logs the route matched and routing variables.

[logger_rdfdatabank]
level = DEBUG
handlers = logfile
handlers = console
qualname = rdfdatabank

[handler_console]
Expand Down
4 changes: 1 addition & 3 deletions production.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ debug = false
# Uncomment and replace with the address which should receive any error reports
email_to = [email protected]
smtp_server = localhost
error_email_from = paste@localhost
error_email_from = paste@databank

[server:main]
use = egg:Paste#http
Expand All @@ -35,10 +35,8 @@ who.log_file = /var/log/databank/who.log

redis.host = localhost

#granary.store = %(here)s/silos
granary.store = /silos
granary.uri_root = http://databank.ora.ox.ac.uk/
#granary.uri_root = http://163.1.127.173/

auth.file = %(here)s/passwd
auth.info = %(here)s/rdfdatabank/config/users.py
Expand Down
10 changes: 9 additions & 1 deletion rdfdatabank/config/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,21 @@ def make_app(global_conf, full_stack=True, static_files=True, **app_conf):
app = PylonsApp()

#app = httpexceptions.make_middleware(app, global_conf)
if asbool(config['debug']):
from repoze.profile.profiler import AccumulatingProfileMiddleware
app = AccumulatingProfileMiddleware(
app,
log_filename=app_conf['profile.log_filename'],
discard_first_request=True,
flush_at_shutdown=True,
path=app_conf['profile.path']
)

# Routing/Session/Cache Middleware
app = RoutesMiddleware(app, config['routes.map'])
app = SessionMiddleware(app, config)
app = CacheMiddleware(app, config)


# CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares)
if asbool(full_stack):
# Handle Python exceptions
Expand Down
54 changes: 37 additions & 17 deletions rdfdatabank/controllers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
import re, os, shutil, codecs
import simplejson
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from dateutil.parser import parse
import time
from uuid import uuid4
from pylons import request, response, session, tmpl_context as c, url, app_globals as ag
from pylons.controllers.util import abort, redirect
from pylons.decorators import rest
from paste.fileapp import FileApp
from rdfdatabank.lib.base import BaseController, render
from rdfdatabank.lib.utils import create_new, is_embargoed, get_readme_text, test_rdf, munge_manifest, serialisable_stat, allowable_id2
from rdfdatabank.lib.utils import create_new, is_embargoed, get_readme_text, test_rdf, munge_manifest, serialisable_stat, allowable_id2, get_rdf_template
from rdfdatabank.lib.file_unpack import get_zipfiles_in_dataset
from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse

Expand Down Expand Up @@ -218,14 +220,16 @@ def datasetview(self, silo, id):
if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
c.editor = True

if c.version and not c.version == currentversion:
c.editor = False


c.show_files = True
#Only the administrator, manager and creator can view embargoed files.
if embargoed and not c.editor:
c.show_files = False

#Display but do not edit previous versions of files, since preious versions are read only.
if c.version and not c.version == currentversion:
c.editor = False

# View options
if "view" in options and c.editor:
c.view = options['view']
Expand All @@ -240,7 +244,8 @@ def datasetview(self, silo, id):
c.embargos[id] = is_embargoed(c_silo, id)
c.parts = item.list_parts(detailed=True)
c.manifest_pretty = item.rdf_to_string(format="pretty-xml")
c.manifest = item.rdf_to_string()
#c.manifest = item.rdf_to_string()
c.manifest = get_rdf_template(item.uri, id)
c.zipfiles = get_zipfiles_in_dataset(item)
c.readme_text = None
#if item.isfile("README"):
Expand Down Expand Up @@ -366,20 +371,28 @@ def datasetview(self, silo, id):
abort(403)
item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
#if params.has_key('embargoed'):
if (params.has_key('embargo_change') and params.has_key('embargoed')) or \
(params.has_key('embargoed') and params['embargoed'].lower() == 'true'):
if (params.has_key('embargo_change') and params.has_key('embargoed') and \
params['embargoed'].lower() in ['true', '1'] and params['embargo_change'].lower() in ['true', '1']) or \
(params.has_key('embargoed') and params['embargoed'].lower() in ['true', '1']):
embargoed_until_date = None
if params.has_key('embargoed_until') and params['embargoed_until']:
embargoed_until_date = params['embargoed_until']
elif params.has_key('embargo_days_from_now') and params['embargo_days_from_now']:
embargoed_until_date = (datetime.now() + timedelta(days=params['embargo_days_from_now'])).isoformat()
else:
embargoed_until_date = (datetime.now() + timedelta(days=365*70)).isoformat()
try:
embargoed_until_date = parse(params['embargoed_until']).isoformat()
except:
embargoed_until_date = (datetime.now() + relativedelta(years=+70)).isoformat()
elif params.has_key('embargo_days_from_now') and params['embargo_days_from_now'].isdigit():
embargoed_until_date = (datetime.now() + timedelta(days=int(params['embargo_days_from_now']))).isoformat()
#It is embargoed indefinitely by default
#else:
# embargoed_until_date = (datetime.now() + timedelta(days=365*70)).isoformat()
item.metadata['embargoed'] = True
item.metadata['embargoed_until'] = embargoed_until_date
item.metadata['embargoed_until'] = ''
item.del_triple(item.uri, u"oxds:isEmbargoed")
item.del_triple(item.uri, u"oxds:embargoedUntil")
item.add_triple(item.uri, u"oxds:isEmbargoed", 'True')
item.add_triple(item.uri, u"oxds:embargoedUntil", embargoed_until_date)
if embargoed_until_date:
item.metadata['embargoed_until'] = embargoed_until_date
item.add_triple(item.uri, u"oxds:embargoedUntil", embargoed_until_date)
else:
#if is_embargoed(c_silo, id)[0] == True:
item.metadata['embargoed'] = False
Expand Down Expand Up @@ -822,13 +835,15 @@ def itemview(self, silo, id, path):
if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
c.editor = True

if c.version and not c.version == currentversion:
c.editor = False

c.show_files = True
#Only the administrator, manager and creator can view embargoed files.
if embargoed and not c.editor:
c.show_files = False

#Display but do not edit previous versions of files, since preious versions are read only.
if c.version and not c.version == currentversion:
c.editor = False

# View options
if "view" in options and c.editor:
c.view = options['view']
Expand Down Expand Up @@ -1063,6 +1078,11 @@ def itemview(self, silo, id, path):
response.status_int = 403
response.status = "403 Forbidden"
return "Forbidden - Cannot delete the manifest"
if '3=' in path or '4=' in path:
response.content_type = "text/plain"
response.status_int = 403
response.status = "403 Forbidden"
return "Forbidden - These files are generated by the system and connot be deleted"
item.increment_version_delta(clone_previous_version=True, copy_filenames=['manifest.rdf'])
item.del_stream(path)
item.del_triple(item.uri, u"dcterms:modified")
Expand Down
1 change: 1 addition & 0 deletions rdfdatabank/controllers/users.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import logging
import simplejson
import codecs
from pylons import request, response, session, config, tmpl_context as c, url
from pylons.controllers.util import abort, redirect
from pylons.decorators import rest
Expand Down
31 changes: 24 additions & 7 deletions rdfdatabank/lib/file_unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ class BadZipfile(Exception):
"""Cannot open zipfile using commandline tool 'unzip' to target directory"""

def check_file_mimetype(real_filepath, mimetype):
if os.path.isdir(real_filepath):
return False
if os.path.islink(real_filepath):
real_filepath = os.readlink(real_filepath)
if not os.path.isfile(real_filepath):
return False
p = subprocess.Popen("file -ib '%s'" %(real_filepath), shell=True, stdout=subprocess.PIPE)
output_file = p.stdout
output_str = output_file.read()
Expand All @@ -29,11 +33,15 @@ def check_file_mimetype(real_filepath, mimetype):
else:
return False

def get_zipfiles_in_dataset_old(dataset):
def get_zipfiles_in_dataset(dataset):
derivative = dataset.list_rdf_objects("*", "ore:aggregates")
zipfiles = {}
if derivative and derivative.values() and derivative.values()[0]:
for file_uri in derivative.values()[0]:
#if derivative and derivative.values() and derivative.values()[0]:
if derivative:
#for file_uri in derivative.values()[0]:
for file_uri in derivative:
if not file_uri.lower().endswith('.zip'):
continue
filepath = file_uri[len(dataset.uri)+1:]
real_filepath = dataset.to_dirpath(filepath)
if os.path.islink(real_filepath):
Expand All @@ -43,7 +51,7 @@ def get_zipfiles_in_dataset_old(dataset):
zipfiles[filepath]="%s-%s"%(dataset.item_id, fn)
return zipfiles

def get_zipfiles_in_dataset(dataset):
def get_zipfiles_in_dataset_new(dataset):
p = subprocess.Popen("""file -iL `find %s -name '*.zip'` | grep "application/zip" | awk -F":" '{print $1}'""" %dataset.to_dirpath(), shell=True, stdout=subprocess.PIPE)
stdout_value = p.communicate()[0]
zipfiles = {}
Expand Down Expand Up @@ -148,6 +156,9 @@ def unpack_zip_item(target_dataset, current_dataset, zip_item, silo, ident):
if os.path.islink(filepath):
filepath = os.readlink(filepath)

emb = target_dataset.metadata.get('embargoed')
emb_until = target_dataset.metadata.get('embargoed_until')

# -- Step 1 -----------------------------
unpacked_dir = unzip_file(filepath)

Expand Down Expand Up @@ -181,9 +192,15 @@ def unpack_zip_item(target_dataset, current_dataset, zip_item, silo, ident):
target_dataset.add_triple(target_dataset.uri, u"rdf:type", "oxds:Grouping")
target_dataset.add_triple(target_dataset.uri, "dcterms:isVersionOf", file_uri)
#TODO: Adding the following metadata again as moving directory deletes all this information. Need to find a better way
embargoed_until_date = (datetime.now() + timedelta(days=365*70)).isoformat()
target_dataset.add_triple(target_dataset.uri, u"oxds:isEmbargoed", 'True')
target_dataset.add_triple(target_dataset.uri, u"oxds:embargoedUntil", embargoed_until_date)
if emb:
target_dataset.add_triple(target_dataset.uri, u"oxds:isEmbargoed", 'True')
if emb_until:
target_dataset.add_triple(target_dataset.uri, u"oxds:embargoedUntil", emb_until)
else:
target_dataset.add_triple(target_dataset.uri, u"oxds:isEmbargoed", 'False')
#The embargo
#embargoed_until_date = (datetime.now() + timedelta(days=365*70)).isoformat()
#target_dataset.add_triple(target_dataset.uri, u"oxds:embargoedUntil", embargoed_until_date)
target_dataset.add_triple(target_dataset.uri, u"dcterms:identifier", target_dataset.item_id)
target_dataset.add_triple(target_dataset.uri, u"dcterms:mediator", ident)
target_dataset.add_triple(target_dataset.uri, u"dcterms:publisher", ag.publisher)
Expand Down
39 changes: 29 additions & 10 deletions rdfdatabank/lib/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from dateutil.relativedelta import *
from dateutil.parser import parse
from time import sleep
from redis import Redis
from redis.exceptions import ConnectionError
Expand All @@ -14,6 +16,7 @@
#from rdflib.parser import StringInputSource
from rdflib import Namespace, RDF, RDFS, URIRef, Literal, BNode


from uuid import uuid4
import re

Expand Down Expand Up @@ -146,23 +149,31 @@ def is_embargoed_no_redis(silo, id, refresh=False):
def create_new(silo, id, creator, title=None, embargoed=True, embargoed_until=None, embargo_days_from_now=None, **kw):
item = silo.get_item(id, startversion="0")
item.metadata['createdby'] = creator
item.metadata['embargoed'] = embargoed
item.metadata['embargoed_until'] = ''
item.metadata['uuid'] = uuid4().hex
item.add_namespace('oxds', "http://vocab.ox.ac.uk/dataset/schema#")
item.add_triple(item.uri, u"rdf:type", "oxds:DataSet")

if embargoed:
if embargoed_until:
embargoed_until_date = embargoed_until
elif embargo_days_from_now:
embargoed_until_date = (datetime.now() + timedelta(days=embargo_days_from_now)).isoformat()
else:
embargoed_until_date = (datetime.now() + timedelta(days=365*70)).isoformat()
item.metadata['embargoed_until'] = embargoed_until_date
if embargoed==True or embargoed.lower() in ['true', '1'] :
item.metadata['embargoed'] = True
item.add_triple(item.uri, u"oxds:isEmbargoed", 'True')
item.add_triple(item.uri, u"oxds:embargoedUntil", embargoed_until_date)
embargoed_until_date = None
if embargoed_until:
try:
embargoed_until_date = parse(embargoed_until).isoformat()
except:
embargoed_until_date = (datetime.now() + relativedelta(years=+70)).isoformat()
elif embargo_days_from_now and embargo_days_from_now.isdigit():
embargoed_until_date = (datetime.now() + timedelta(days=int(embargo_days_from_now))).isoformat()
#TODO: Do we want the default embargo_until to be 70 years or indefinite. Going with indefinite
#else:
# embargoed_until_date = (datetime.now() + relativedelta(years=+70)).isoformat()
if embargoed_until_date:
item.metadata['embargoed_until'] = embargoed_until_date
item.add_triple(item.uri, u"oxds:embargoedUntil", embargoed_until_date)
else:
item.add_triple(item.uri, u"oxds:isEmbargoed", 'False')
item.metadata['embargoed'] = False
item.add_triple(item.uri, u"dcterms:identifier", id)
item.add_triple(item.uri, u"dcterms:mediator", creator)
item.add_triple(item.uri, u"dcterms:publisher", ag.publisher)
Expand All @@ -188,6 +199,14 @@ def get_readme_text(item, filename="README"):
text = fn.read().decode("utf-8")
return u"%s\n\n%s" % (filename, text)

def get_rdf_template(item_uri, item_id):
g = ConjunctiveGraph(identifier=item_uri)
g.bind('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
g.bind('dcterms', 'http://purl.org/dc/terms/')
g.add((URIRef(item_uri), URIRef('http://purl.org/dc/terms/identifier'), Literal(item_id)))
data2 = g.serialize(format='xml', encoding="utf-8") + '\n'
return data2

#def test_rdf(text):
def test_rdf(mfile):
g = ConjunctiveGraph()
Expand Down
2 changes: 1 addition & 1 deletion rdfdatabank/templates/datasetview.html
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ <h3>Information for version ${c.version} of the dataset</h3>
<p><small>Embargo date: Aim is for ISO8601 dates to provide embargo trigger events. Currently unused, unvalidated and unparsed.</small></p>
</dd>
<!-- Change RDF Manifest -->
<dt><b>Change RDF Manifest:</b></dt>
<dt><b>Add metadata to the RDF Manifest:</b></dt>
<dd><%include file="/rdf_manifest_form.html"/></dd>
</dl>
<!-- Upload File -->
Expand Down
Loading

0 comments on commit af77eb7

Please sign in to comment.