diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..80dfe70 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include rdfdatabank/config/deployment.ini_tmpl +recursive-include rdfdatabank/public * +recursive-include rdfdatabank/templates * diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..ada0e6d --- /dev/null +++ b/README.txt @@ -0,0 +1,19 @@ +This file is for you to describe the rdfdatabank application. Typically +you would include information such as the information below: + +Installation and Setup +====================== + +Install ``rdfdatabank`` using easy_install:: + + easy_install rdfdatabank + +Make a config file as follows:: + + paster make-config rdfdatabank config.ini + +Tweak the config file as appropriate and then setup the application:: + + paster setup-app config.ini + +Then you are ready to go. diff --git a/development.ini b/development.ini new file mode 100644 index 0000000..08f5116 --- /dev/null +++ b/development.ini @@ -0,0 +1,81 @@ +# +# rdfdatabank - Pylons development environment configuration +# +# The %(here)s variable will be replaced with the parent directory of this file +# +[DEFAULT] +debug = true +# Uncomment and replace with the address which should receive any error reports +#email_to = you@yourdomain.com +smtp_server = localhost +error_email_from = paste@localhost + +[server:main] +use = egg:Paste#http +host = 127.0.0.1 +port = 5000 + +[app:main] +use = egg:rdfdatabank +full_stack = true +static_files = true + +cache_dir = %(here)s/data +beaker.session.key = rdfdatabank +beaker.session.secret = somesecret + +who.config_file = %(here)s/who.ini +who.log_level = info +who.log_file = stdout + +redis.host = localhost + +granary.store = silos + + +# If you'd like to fine-tune the individual locations of the cache data dirs +# for the Cache data, or the Session saves, un-comment the desired settings +# here: +#beaker.cache.data_dir = %(here)s/data/cache +#beaker.session.data_dir = %(here)s/data/sessions + +# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT* +# Debug mode will enable the interactive debugging tool, allowing ANYONE to +# execute malicious code after an exception is raised. +#set debug = false + + +# Logging configuration +[loggers] +keys = root, routes, rdfdatabank + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = INFO +handlers = console + +[logger_routes] +level = INFO +handlers = +qualname = routes.middleware +# "level = DEBUG" logs the route matched and routing variables. + +[logger_rdfdatabank] +level = DEBUG +handlers = +qualname = rdfdatabank + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(asctime)s,%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/ez_setup.py b/ez_setup.py new file mode 100644 index 0000000..d24e845 --- /dev/null +++ b/ez_setup.py @@ -0,0 +1,276 @@ +#!python +"""Bootstrap setuptools installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import sys +DEFAULT_VERSION = "0.6c9" +DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3] + +md5_data = { + 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', + 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb', + 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b', + 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a', + 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618', + 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac', + 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5', + 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4', + 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c', + 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b', + 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27', + 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277', + 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa', + 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e', + 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e', + 'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f', + 'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2', + 'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc', + 'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167', + 'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64', + 'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d', + 'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20', + 'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab', + 'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53', + 'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2', + 'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e', + 'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372', + 'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902', + 'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de', + 'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b', + 'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03', + 'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a', + 'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6', + 'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a', +} + +import sys, os +try: from hashlib import md5 +except ImportError: from md5 import md5 + +def _validate_md5(egg_name, data): + if egg_name in md5_data: + digest = md5(data).hexdigest() + if digest != md5_data[egg_name]: + print >>sys.stderr, ( + "md5 validation of %s failed! (Possible download problem?)" + % egg_name + ) + sys.exit(2) + return data + +def use_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + download_delay=15 +): + """Automatically find/download setuptools and make it available on sys.path + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end with + a '/'). `to_dir` is the directory where setuptools will be downloaded, if + it is not already available. If `download_delay` is specified, it should + be the number of seconds that will be paused before initiating a download, + should one be required. If an older version of setuptools is installed, + this routine will print a message to ``sys.stderr`` and raise SystemExit in + an attempt to abort the calling script. + """ + was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules + def do_download(): + egg = download_setuptools(version, download_base, to_dir, download_delay) + sys.path.insert(0, egg) + import setuptools; setuptools.bootstrap_install_from = egg + try: + import pkg_resources + except ImportError: + return do_download() + try: + pkg_resources.require("setuptools>="+version); return + except pkg_resources.VersionConflict, e: + if was_imported: + print >>sys.stderr, ( + "The required version of setuptools (>=%s) is not available, and\n" + "can't be installed while this script is running. Please install\n" + " a more recent version first, using 'easy_install -U setuptools'." + "\n\n(Currently using %r)" + ) % (version, e.args[0]) + sys.exit(2) + else: + del pkg_resources, sys.modules['pkg_resources'] # reload ok + return do_download() + except pkg_resources.DistributionNotFound: + return do_download() + +def download_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + delay = 15 +): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download attempt. + """ + import urllib2, shutil + egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) + url = download_base + egg_name + saveto = os.path.join(to_dir, egg_name) + src = dst = None + if not os.path.exists(saveto): # Avoid repeated downloads + try: + from distutils import log + if delay: + log.warn(""" +--------------------------------------------------------------------------- +This script requires setuptools version %s to run (even to display +help). I will attempt to download it for you (from +%s), but +you may need to enable firewall access for this script first. +I will start the download in %d seconds. + +(Note: if this machine does not have network access, please obtain the file + + %s + +and place it in this directory before rerunning this script.) +---------------------------------------------------------------------------""", + version, download_base, delay, url + ); from time import sleep; sleep(delay) + log.warn("Downloading %s", url) + src = urllib2.urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = _validate_md5(egg_name, src.read()) + dst = open(saveto,"wb"); dst.write(data) + finally: + if src: src.close() + if dst: dst.close() + return os.path.realpath(saveto) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +def main(argv, version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + try: + import setuptools + except ImportError: + egg = None + try: + egg = download_setuptools(version, delay=0) + sys.path.insert(0,egg) + from setuptools.command.easy_install import main + return main(list(argv)+[egg]) # we're done here + finally: + if egg and os.path.exists(egg): + os.unlink(egg) + else: + if setuptools.__version__ == '0.0.1': + print >>sys.stderr, ( + "You have an obsolete version of setuptools installed. Please\n" + "remove it from your system entirely before rerunning this script." + ) + sys.exit(2) + + req = "setuptools>="+version + import pkg_resources + try: + pkg_resources.require(req) + except pkg_resources.VersionConflict: + try: + from setuptools.command.easy_install import main + except ImportError: + from easy_install import main + main(list(argv)+[download_setuptools(delay=0)]) + sys.exit(0) # try to force an exit + else: + if argv: + from setuptools.command.easy_install import main + main(argv) + else: + print "Setuptools version",version,"or greater has been installed." + print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + +def update_md5(filenames): + """Update our built-in md5 registry""" + + import re + + for name in filenames: + base = os.path.basename(name) + f = open(name,'rb') + md5_data[base] = md5(f.read()).hexdigest() + f.close() + + data = [" %r: %r,\n" % it for it in md5_data.items()] + data.sort() + repl = "".join(data) + + import inspect + srcfile = inspect.getsourcefile(sys.modules[__name__]) + f = open(srcfile, 'rb'); src = f.read(); f.close() + + match = re.search("\nmd5_data = {\n([^}]+)}", src) + if not match: + print >>sys.stderr, "Internal error!" + sys.exit(2) + + src = src[:match.start(1)] + repl + src[match.end(1):] + f = open(srcfile,'w') + f.write(src) + f.close() + + +if __name__=='__main__': + if len(sys.argv)>2 and sys.argv[1]=='--md5update': + update_md5(sys.argv[2:]) + else: + main(sys.argv[1:]) + + + + + + diff --git a/rdfdatabank/__init__.py b/rdfdatabank/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdfdatabank/config/__init__.py b/rdfdatabank/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdfdatabank/config/deployment.ini_tmpl b/rdfdatabank/config/deployment.ini_tmpl new file mode 100644 index 0000000..8d7e072 --- /dev/null +++ b/rdfdatabank/config/deployment.ini_tmpl @@ -0,0 +1,60 @@ +# +# rdfdatabank - Pylons configuration +# +# The %(here)s variable will be replaced with the parent directory of this file +# +[DEFAULT] +debug = true +email_to = you@yourdomain.com +smtp_server = localhost +error_email_from = paste@localhost + +[server:main] +use = egg:Paste#http +host = 0.0.0.0 +port = 5000 + +[app:main] +use = egg:rdfdatabank +full_stack = true +static_files = true + +cache_dir = %(here)s/data +beaker.session.key = rdfdatabank +beaker.session.secret = ${app_instance_secret} +app_instance_uuid = ${app_instance_uuid} + +# If you'd like to fine-tune the individual locations of the cache data dirs +# for the Cache data, or the Session saves, un-comment the desired settings +# here: +#beaker.cache.data_dir = %(here)s/data/cache +#beaker.session.data_dir = %(here)s/data/sessions + +# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT* +# Debug mode will enable the interactive debugging tool, allowing ANYONE to +# execute malicious code after an exception is raised. +set debug = false + + +# Logging configuration +[loggers] +keys = root + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = INFO +handlers = console + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s diff --git a/rdfdatabank/config/environment.py b/rdfdatabank/config/environment.py new file mode 100644 index 0000000..f456ec5 --- /dev/null +++ b/rdfdatabank/config/environment.py @@ -0,0 +1,39 @@ +"""Pylons environment configuration""" +import os + +from mako.lookup import TemplateLookup +from pylons import config +from pylons.error import handle_mako_error + +import rdfdatabank.lib.app_globals as app_globals +import rdfdatabank.lib.helpers +from rdfdatabank.config.routing import make_map + +def load_environment(global_conf, app_conf): + """Configure the Pylons environment via the ``pylons.config`` + object + """ + # Pylons paths + root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + paths = dict(root=root, + controllers=os.path.join(root, 'controllers'), + static_files=os.path.join(root, 'public'), + templates=[os.path.join(root, 'templates')]) + + # Initialize config with the basic options + config.init_app(global_conf, app_conf, package='rdfdatabank', paths=paths) + + config['routes.map'] = make_map() + config['pylons.app_globals'] = app_globals.Globals() + config['pylons.h'] = rdfdatabank.lib.helpers + + # Create the Mako TemplateLookup, with the default auto-escaping + config['pylons.app_globals'].mako_lookup = TemplateLookup( + directories=paths['templates'], + error_handler=handle_mako_error, + module_directory=os.path.join(app_conf['cache_dir'], 'templates'), + input_encoding='utf-8', default_filters=['escape'], + imports=['from webhelpers.html import escape']) + + # CONFIGURATION OPTIONS HERE (note: all config options will override + # any Pylons config options) diff --git a/rdfdatabank/config/middleware.py b/rdfdatabank/config/middleware.py new file mode 100644 index 0000000..0504f2a --- /dev/null +++ b/rdfdatabank/config/middleware.py @@ -0,0 +1,72 @@ +"""Pylons middleware initialization""" +from beaker.middleware import CacheMiddleware, SessionMiddleware +from paste.cascade import Cascade +from paste.registry import RegistryManager +from paste.urlparser import StaticURLParser +from paste.deploy.converters import asbool +from pylons import config +from pylons.middleware import ErrorHandler, StatusCodeRedirect +from pylons.wsgiapp import PylonsApp +from routes.middleware import RoutesMiddleware + +from rdfdatabank.config.environment import load_environment + +from repoze.who.config import make_middleware_with_config as make_who_with_config + +def make_app(global_conf, full_stack=True, static_files=True, **app_conf): + """Create a Pylons WSGI application and return it + + ``global_conf`` + The inherited configuration for this application. Normally from + the [DEFAULT] section of the Paste ini file. + + ``full_stack`` + Whether this application provides a full WSGI stack (by default, + meaning it handles its own exceptions and errors). Disable + full_stack when this application is "managed" by another WSGI + middleware. + + ``static_files`` + Whether this application serves its own static files; disable + when another web server is responsible for serving them. + + ``app_conf`` + The application's local configuration. Normally specified in + the [app:] section of the Paste ini file (where + defaults to main). + + """ + # Configure the Pylons environment + load_environment(global_conf, app_conf) + + # The Pylons WSGI app + app = PylonsApp() + + # Routing/Session/Cache Middleware + app = RoutesMiddleware(app, config['routes.map']) + app = SessionMiddleware(app, config) + app = CacheMiddleware(app, config) + + # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares) + if asbool(full_stack): + # Handle Python exceptions + app = ErrorHandler(app, global_conf, **config['pylons.errorware']) + + # Display error documents for 401, 403, 404 status codes (and + # 500 when debug is disabled) + if asbool(config['debug']): + app = StatusCodeRedirect(app) + else: + app = StatusCodeRedirect(app, [400, 401, 403, 404, 500]) + + app = make_who_with_config(app, global_conf, app_conf['who.config_file'], app_conf['who.log_file'], app_conf['who.log_level']) + + # Establish the Registry for this application + app = RegistryManager(app) + + if asbool(static_files): + # Serve static files + static_app = StaticURLParser(config['pylons.paths']['static_files']) + app = Cascade([static_app, app]) + + return app diff --git a/rdfdatabank/config/routing.py b/rdfdatabank/config/routing.py new file mode 100644 index 0000000..d056144 --- /dev/null +++ b/rdfdatabank/config/routing.py @@ -0,0 +1,38 @@ +"""Routes configuration + +The more specific and detailed routes should be defined first so they +may take precedent over the more generic routes. For more information +refer to the routes manual at http://routes.groovie.org/docs/ +""" +from pylons import config +from routes import Mapper + +def make_map(): + """Create, configure and return the routes Mapper""" + map = Mapper(directory=config['pylons.paths']['controllers'], + always_scan=config['debug']) + map.minimization = False + + # The ErrorController route (handles 404/500 error pages); it should + # likely stay at the top, ensuring it can always be resolved + map.connect('/error/{action}', controller='error') + map.connect('/error/{action}/{id}', controller='error') + + # CUSTOM ROUTES HERE + map.redirect("/", "/objects") + + map.connect('/packages', controller='packages', action='index') + map.connect('/packages/{silo}', controller='packages', action='siloview') + map.connect('/packages/{silo}/upload', controller='packages', action='upload') + map.connect('/objects', controller='objects', action='index') + map.connect('/objects/{silo}', controller='objects', action='siloview') + map.connect('/objects/{silo}/{id}', controller='objects', action='itemview') + map.connect('/objects/{silo}/{id}/{path:.*}', controller='objects', action='subitemview') + + map.connect('/{controller}/{action}') + map.connect('/{controller}/{action}/{id}') + + map.redirect('/*(url)/', '/{url}', + _redirect_code='301 Moved Permanently') + + return map diff --git a/rdfdatabank/controllers/__init__.py b/rdfdatabank/controllers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdfdatabank/controllers/error.py b/rdfdatabank/controllers/error.py new file mode 100644 index 0000000..f7bc3bc --- /dev/null +++ b/rdfdatabank/controllers/error.py @@ -0,0 +1,46 @@ +import cgi + +from paste.urlparser import PkgResourcesParser +from pylons import request +from pylons.controllers.util import forward +from pylons.middleware import error_document_template +from webhelpers.html.builder import literal + +from rdfdatabank.lib.base import BaseController + +class ErrorController(BaseController): + + """Generates error documents as and when they are required. + + The ErrorDocuments middleware forwards to ErrorController when error + related status codes are returned from the application. + + This behaviour can be altered by changing the parameters to the + ErrorDocuments middleware in your config/middleware.py file. + + """ + + def document(self): + """Render the error document""" + resp = request.environ.get('pylons.original_response') + content = literal(resp.body) or cgi.escape(request.GET.get('message', '')) + page = error_document_template % \ + dict(prefix=request.environ.get('SCRIPT_NAME', ''), + code=cgi.escape(request.GET.get('code', str(resp.status_int))), + message=content) + return page + + def img(self, id): + """Serve Pylons' stock images""" + return self._serve_file('/'.join(['media/img', id])) + + def style(self, id): + """Serve Pylons' stock stylesheets""" + return self._serve_file('/'.join(['media/style', id])) + + def _serve_file(self, path): + """Call Paste's FileApp (a WSGI application) to serve the file + at the specified path + """ + request.environ['PATH_INFO'] = '/%s' % path + return forward(PkgResourcesParser('pylons', 'pylons')) diff --git a/rdfdatabank/controllers/objects.py b/rdfdatabank/controllers/objects.py new file mode 100644 index 0000000..845c531 --- /dev/null +++ b/rdfdatabank/controllers/objects.py @@ -0,0 +1,368 @@ +import logging + +from pylons import request, response, session, tmpl_context as c +from pylons.controllers.util import abort, redirect_to +from pylons import app_globals +from rdfdatabank.lib.base import BaseController, render +from rdfdatabank.lib.utils import create_new, is_embargoed + +from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse + +from datetime import datetime, timedelta +from paste.fileapp import FileApp + +import re, os + +JAILBREAK = re.compile("[\/]*\.\.[\/]*") + +import simplejson + +log = logging.getLogger(__name__) + +class ObjectsController(BaseController): + def index(self): + if not request.environ.get('repoze.who.identity'): + abort(401, "Not Authorised") + ident = request.environ.get('repoze.who.identity') + granary_list = app_globals.granary.silos + c.silos = app_globals.authz(granary_list, ident) + + return render('/list_of_archives.html') + + def siloview(self, silo): + if not request.environ.get('repoze.who.identity'): + abort(401, "Not Authorised") + ident = request.environ.get('repoze.who.identity') + granary_list = app_globals.granary.silos + c.silos = app_globals.authz(granary_list, ident) + if silo not in c.silos: + abort(403, "Forbidden") + + c.silo_name = silo + c.silo = app_globals.granary.get_rdf_silo(silo) + + http_method = request.environ['REQUEST_METHOD'] + if http_method == "GET": + c.embargos = {} + for item in c.silo.list_items(): + c.embargos[item] = is_embargoed(c.silo, item) + c.items = c.silo.list_items() + return render('/siloview.html') + elif http_method == "POST": + params = request.POST + if params.has_key("id"): + if c.silo.exists(params['id']): + response.content_type = "text/plain" + response.status_int = 409 + response.status = "409 Conflict: Object Already Exists" + return "Object Already Exists" + else: + # Supported params: + # id, title, embargoed, embargoed_until, embargo_days_from_now + id = params['id'] + del params['id'] + item = create_new(c.silo, id, ident['repoze.who.userid'], **params) + # TODO b_creation(silo, id) + # conneg return + accept_list = conneg_parse(request.environ['HTTP_ACCEPT']) + if not accept_list: + accept_list= [MT("text", "html")] + mimetype = accept_list.pop(0) + while(mimetype): + if str(mimetype) in ["text/html", "text/xhtml"]: + # probably a browser - redirect to newly created object + redirect_to(controller="objects", action="itemview", silo=silo, id=id) + elif str(mimetype) in ["text/plain"]: + response.content_type = "text/plain" + response.status_int = 201 + response.status = "201 Created" + response.headers.add("Content-Location", item.uri) + return "Created" + # Whoops - nothing satisfies + response.content_type = "text/plain" + response.status_int = 201 + response.headers.add("Content-Location", item.uri) + response.status = "201 Created" + return "Created" + + def itemview(self, silo, id): + + # Check to see if embargo is on: + c.silo_name = silo + c.id = id + c.silo = app_globals.granary.get_rdf_silo(silo) + + c.embargoed = False + if c.silo.exists(id): + c.item = c.silo.get_item(id) + + if c.item.metadata.get('embargoed') not in ["false", 0, False]: + c.embargoed = True + c.embargos = {} + c.embargos[id] = is_embargoed(c.silo, id) + http_method = request.environ['REQUEST_METHOD'] + + editor = False + + if not (http_method == "GET" and not c.embargoed): + #identity management if item + if not request.environ.get('repoze.who.identity'): + abort(401, "Not Authorised") + ident = request.environ.get('repoze.who.identity') + granary_list = app_globals.granary.silos + if ident: + c.silos = app_globals.authz(granary_list, ident) + if silo not in c.silos: + abort(403, "Forbidden") + else: + abort(403, "Forbidden") + + editor = silo in c.silos + + # Method determination + if http_method == "GET": + if c.silo.exists(id): + # conneg: + c.item = c.silo.get_item(id) + + c.parts = c.item.list_parts(detailed=True) + + accept_list = conneg_parse(request.environ['HTTP_ACCEPT']) + if not accept_list: + accept_list= [MT("text", "html")] + mimetype = accept_list.pop(0) + while(mimetype): + if str(mimetype) in ["text/html", "text/xhtml"]: + return render('/itemview.html') + elif str(mimetype) == "application/json": + response.content_type = 'application/json; charset="UTF-8"' + return simplejson.dumps(c.item.manifest) + elif str(mimetype) in ["application/rdf+xml", "text/xml"]: + response.content_type = 'application/rdf+xml; charset="UTF-8"' + return c.item.rdf_to_string(format="pretty-xml") + elif str(mimetype) == "text/rdf+n3": + response.content_type = 'text/rdf+n3; charset="UTF-8"' + return c.item.rdf_to_string(format="n3") + elif str(mimetype) == "application/x-turtle": + response.content_type = 'application/x-turtle; charset="UTF-8"' + return c.item.rdf_to_string(format="turtle") + elif str(mimetype) in ["text/rdf+ntriples", "text/rdf+nt"]: + response.content_type = 'text/rdf+ntriples; charset="UTF-8"' + return c.item.rdf_to_string(format="nt") + # Whoops - nothing satisfies + abort(406) + else: + abort(404) + elif http_method == "POST" and editor: + params = request.POST + if not c.silo.exists(id): + if 'id' in params.keys(): + del params['id'] + item = create_new(c.silo, id, ident['repoze.who.userid'], **params) + + # TODO b_creation(silo, id) + # conneg return + accept_list = conneg_parse(request.environ['HTTP_ACCEPT']) + if not accept_list: + accept_list= [MT("text", "html")] + mimetype = accept_list.pop(0) + while(mimetype): + if str(mimetype) in ["text/html", "text/xhtml"]: + # probably a browser - redirect to newly created object + redirect_to(controller="objects", action="itemview", silo=silo, id=id) + elif str(mimetype) in ["text/plain"]: + response.content_type = "text/plain" + response.status_int = 201 + response.status = "201 Created" + response.headers.add("Content-Location", item.uri) + return "Created" + # Whoops - nothing satisfies + response.content_type = "text/plain" + response.status_int = 201 + response.headers.add("Content-Location", item.uri) + response.status = "201 Created" + return "Created" + elif params.has_key('embargo_change'): + item = c.silo.get_item(id) + if params.has_key('embargoed'): + item.metadata['embargoed'] = True + else: + #if is_embargoed(c.silo, id)[0] == True: + item.metadata['embargoed'] = False + if params.has_key('embargoed_until'): + item.metadata['embargoed_until'] = params['embargoed_until'] + item.sync() + e, e_d = is_embargoed(c.silo, id, refresh=True) + # TODO b_change(silo, id) + response.content_type = "text/plain" + response.status_int = 200 + return simplejson.dumps({'embargoed':e, 'embargoed_until':e_d}) + else: + ## TODO apply changeset handling + ## 1 - store posted CS docs in 'version' "___cs" + ## 2 - apply changeset to RDF manifest + ## 3 - update state to reflect latest CS applied + response.status_int = 204 + return + + elif http_method == "DELETE" and editor: + if c.silo.exists(id): + c.silo.del_item(id) + # TODO b_deletion(silo, id) + response.status_int = 200 + return "{'ok':'true'}" # required for the JQuery magic delete to succede. + else: + abort(404) + + def subitemview(self, silo, id, path): + # Check to see if embargo is on: + c.silo_name = silo + c.id = id + c.silo = app_globals.granary.get_rdf_silo(silo) + + embargoed = False + if c.silo.exists(id): + c.item = c.silo.get_item(id) + + if c.item.metadata.get('embargoed') not in ["false", 0, False]: + embargoed = True + + http_method = request.environ['REQUEST_METHOD'] + + editor = False + + if not (http_method == "GET" and not embargoed): + #identity management if item + if not request.environ.get('repoze.who.identity'): + abort(401, "Not Authorised") + ident = request.environ.get('repoze.who.identity') + granary_list = app_globals.granary.silos + if ident: + c.silos = app_globals.authz(granary_list, ident) + if silo not in c.silos: + abort(403, "Forbidden") + else: + abort(403, "Forbidden") + + editor = silo in c.silos + + c.path = path + + http_method = request.environ['REQUEST_METHOD'] + + if http_method == "GET": + if c.silo.exists(id): + c.item = c.silo.get_item(id) + if c.item.isfile(path): + fileserve_app = FileApp(c.item.to_dirpath(path)) + return fileserve_app(request.environ, self.start_response) + elif c.item.isdir(path): + c.parts = c.item.list_parts(path, detailed=True) + return render("/subitemview.html") + else: + return render("/nofilehere.html") + elif http_method == "PUT" and editor: + if c.silo.exists(id): + # Pylons loads the request body into request.body... + # This is not going to work for large files... ah well + # POST will handle large files as they are pushed to disc, + # but this won't + content = request.body + item = c.silo.get_item(id) + + if JAILBREAK.search(path) != None: + abort(400, "'..' cannot be used in the path") + + if item.isfile(path): + code = 204 + elif item.isdir(path): + response.status_int = 403 + return "Cannot PUT a file on to an existing directory" + else: + code = 201 + + item.put_stream(path, content) + + #if code == 201: + # b_creation(silo, id, path) + #else: + # b_change(silo, id, path) + #if code == 201: + # b_creation(silo, id, path) + #else: + # b_change(silo, id, path) + response.status_int = code + return + else: + # item doesn't exist yet... + # DECISION: Auto-instantiate object and then put file there? + # or error out with perhaps a 404? + # Going with error out... + response.status_int = 404 + return "Object %s doesn't exist" % id + elif http_method == "POST" and editor: + if c.silo.exists(id): + # POST... differences from PUT: + # path = filepath that this acts on, should be dir, or non-existant + # if path is a file, this will revert to PUT's functionality and + # overwrite the file, if there is a multipart file uploaded + # Expected params: filename, file (uploaded file) + params = request.POST + item = c.silo.get_item(id) + filename = params.get('filename') + upload = params.get('file') + if JAILBREAK.search(filename) != None: + abort(400, "'..' cannot be used in the path or as a filename") + target_path = path + if item.isdir(path) and filename: + target_path = os.path.join(path, filename) + + if item.isfile(target_path): + code = 204 + elif item.isdir(target_path): + response.status_int = 403 + return "Cannot POST a file on to an existing directory" + else: + code = 201 + item.put_stream(target_path, upload.file) + + #if code == 201: + # b_creation(silo, id, target_path) + #else: + # b_change(silo, id, target_path) + response.status_int = code + return + else: + # item doesn't exist yet... + # DECISION: Auto-instantiate object and then put file there? + # or error out with perhaps a 404? + # Going with error out... + response.status_int = 404 + return "Object %s doesn't exist" % id + elif http_method == "DELETE" and editor: + if c.silo.exists(id): + item = c.silo.get_item(id) + if item.isfile(path): + item.del_stream(path) + + # TODO b_deletion(silo, id, path) + response.status_int = 200 + return "{'ok':'true'}" # required for the JQuery magic delete to succede. + elif item.isdir(path): + parts = item.list_parts(path) + for part in parts: + if item.isdir(os.path.join(path, part)): + # TODO implement proper recursive delete, with RDF aggregation + # updating + abort(400, "Directory is not empty of directories") + for part in parts: + item.del_stream(os.path.join(path, part)) + # TODO b_deletion(silo, id, os.path.join(path, part)) + item.del_stream(path) + # TODO b_deletion(silo, id, path) + response.status_int = 200 + return "{'ok':'true'}" # required for the JQuery magic delete to succede. + else: + abort(404) + else: + abort(404) diff --git a/rdfdatabank/controllers/packages.py b/rdfdatabank/controllers/packages.py new file mode 100644 index 0000000..26a66b2 --- /dev/null +++ b/rdfdatabank/controllers/packages.py @@ -0,0 +1,87 @@ +import logging + +from pylons import request, response, session, tmpl_context as c +from pylons.controllers.util import abort, redirect_to + +from pylons import app_globals +from rdfdatabank.lib.base import BaseController, render + +import re, os + +from rdfdatabank.lib.unpack import store_zipfile, unpack_zip_item, BadZipfile + +from rdfdatabank.lib.conneg import MimeType as MT, parse as conneg_parse + +log = logging.getLogger(__name__) + +class PackagesController(BaseController): + def index(self): + if not request.environ.get('repoze.who.identity'): + abort(401, "Not Authorised") + ident = request.environ.get('repoze.who.identity') + granary_list = app_globals.granary.silos + c.silos = app_globals.authz(granary_list, ident) + + return render('/list_of_zipfile_archives.html') + + def success(self, message): + c.message = message + return render("/success_message.html") + + def siloview(self, silo): + if not request.environ.get('repoze.who.identity'): + abort(401, "Not Authorised") + ident = request.environ.get('repoze.who.identity') + granary_list = app_globals.granary.silos + c.silos = app_globals.authz(granary_list, ident) + if silo not in c.silos: + abort(403, "Forbidden") + + c.silo_name = silo + c.silo = app_globals.granary.get_rdf_silo(silo) + + http_method = request.environ['REQUEST_METHOD'] + if http_method == "GET": + return render("/package_form_upload.html") + elif http_method == "POST": + params = request.POST + if params.has_key("id") and params.has_key("file") and params['id'] and params['file'].filename: + target_uri = "%s%s" % (c.silo.state['uri_base'], params['id']) + info = {} + info['package_filename'] = params['file'].filename + zip_item = store_zipfile(c.silo, target_uri, params['file'], ident['repoze.who.userid']) + info['zip_id'] = zip_item.item_id + info['zip_uri'] = zip_item.uri + info['zip_target'] = target_uri + info['zip_file_stat'] = zip_item.stat(info['package_filename']) + info['zip_file_size'] = info['zip_file_stat'].st_size + try: + unpack_zip_item(zip_item, c.silo, ident['repoze.who.userid']) + # 302 Redirect to new resource? 201 with Content-Location? + # For now, content-location + response.headers.add("Content-Location", target_uri) + # conneg return + accept_list = conneg_parse(request.environ['HTTP_ACCEPT']) + if not accept_list: + accept_list= [MT("text", "html")] + mimetype = accept_list.pop(0) + while(mimetype): + if str(mimetype) in ["text/html", "text/xhtml"]: + c.info = info + return render('/successful_package_upload.html') + elif str(mimetype) == "application/json": + response.status_int = 201 + response.content_type = 'application/json; charset="UTF-8"' + return simplejson.dumps(info) + elif str(mimetype) in ["application/rdf+xml", "text/xml"]: + response.status_int = 201 + response.content_type = 'application/rdf+xml; charset="UTF-8"' + return zip_item.rdf_to_string(format="pretty-xml") + # Whoops - nothing satisfies + abort(406) + except BadZipfile: + # Bad zip file + info['unpacking_status'] = "FAIL - Couldn't unzip package" + else: + abort(400, "You must supply a valid id") + abort(404) diff --git a/rdfdatabank/lib/__init__.py b/rdfdatabank/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdfdatabank/lib/app_globals.py b/rdfdatabank/lib/app_globals.py new file mode 100644 index 0000000..072c2b3 --- /dev/null +++ b/rdfdatabank/lib/app_globals.py @@ -0,0 +1,31 @@ +"""The application's Globals object""" + +from pylons import config + +from recordsilo import Granary +from redis import Redis + +from rdfdatabank.lib.utils import authz + +class Globals(object): + + """Globals acts as a container for objects available throughout the + life of the application + + """ + + def __init__(self): + """One instance of Globals is created during application + initialization and is available during requests via the + 'app_globals' variable + + """ + + self.authz = authz + + if config.has_key("granary.store"): + self.granary = Granary(config['granary.store']) + + if config.has_key("redis.host"): + self.redishost = config['redis.host'] + self.r = Redis(self.redishost) diff --git a/rdfdatabank/lib/base.py b/rdfdatabank/lib/base.py new file mode 100644 index 0000000..56a090b --- /dev/null +++ b/rdfdatabank/lib/base.py @@ -0,0 +1,15 @@ +"""The base Controller API + +Provides the BaseController class for subclassing. +""" +from pylons.controllers import WSGIController +from pylons.templating import render_mako as render + +class BaseController(WSGIController): + + def __call__(self, environ, start_response): + """Invoke the Controller""" + # WSGIController.__call__ dispatches to the Controller method + # the request is routed to. This routing information is + # available in environ['pylons.routes_dict'] + return WSGIController.__call__(self, environ, start_response) diff --git a/rdfdatabank/lib/broadcast.py b/rdfdatabank/lib/broadcast.py new file mode 100644 index 0000000..ab46471 --- /dev/null +++ b/rdfdatabank/lib/broadcast.py @@ -0,0 +1,46 @@ +from redis import Redis +from redis.exceptions import ConnectionError + +class BroadcastToRedis(object): + def __init__(self, redis_host, queue): + self.redis_host = redis_host + self.queue = queue + self.r = Redis(redis_host) + + def lpush(self, msg): + try: + self.r.lpush(self.queue, msg) + except ConnectionError: + self.r = Redis(self.redis_host) + self.lpush(self.queue, msg) + + def b_change(self, silo, id, filepath=None, **kw): + msg = {} + msg.update(kw) + msg.update({'type':'u', + 'silo':silo, + 'id':id}) + if filepath: + msg['filepath'] = filepath + self.lpush(simplejson.dumps(msg)) + + def b_creation(self, silo, id, filepath=None, *kw): + msg = {} + msg.update(kw) + msg.update({'type':'c', + 'silo':silo, + 'id':id}) + if filepath: + msg['filepath'] = filepath + self.lpush(simplejson.dumps(msg)) + + def b_deletion(self, silo, id, filepath=None, **kw): + msg = {} + msg.update(kw) + msg.update({'type':'d', + 'silo':silo, + 'id':id}) + if filepath: + msg['filepath'] = filepath + self.lpush(simplejson.dumps(msg)) + diff --git a/rdfdatabank/lib/conneg.py b/rdfdatabank/lib/conneg.py new file mode 100644 index 0000000..6f03d3c --- /dev/null +++ b/rdfdatabank/lib/conneg.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python + +def skipws(next): + skip = 1 + if not skip: + return next + else: + def foo(*args): + tok = next(*args) + if tok.isspace(): + tok = next(*args) + return tok + return foo + +class ParseError(Exception): + pass + +class MiniLex(object): + + def __init__(self, data, + whitespace= " \t", + sep="[](){}<>\\/@:;,?=", + quotes="\"", + eof="\n\r"): + self.data = data + self.whitespace=whitespace + self.separators=sep + self.quotes=quotes + self.eof=eof + + self.state = 0 + self.token = [] + self.quoted = '' + self.pos = 0 + + def __iter__(self): + return self + + @skipws + def next(self): + while True: + if self.pos == len(self.data): + if self.token: + tok= ''.join(self.token) + self.token = [] + return tok + else: + raise StopIteration + char = self.data[self.pos] + tok = '' + if self.quoted and not char in self.quotes: + self.token.append(char) + self.pos +=1 + elif char in self.quotes: + if char == self.quoted: + # we're in quoted text + if self.data[self.pos-1] == "\\": + self.token.append(char) + self.pos += 1 + else: + self.token.append(char) + tok = ''.join(self.token) + self.token = [] + self.pos += 1 + self.quoted=0 + self.state=0 + return tok + elif self.quoted: + # other quotes + self.token.append(char) + self.pos += 1 + else: + # begin quoted text + if self.token: + tok = ''.join(self.token) + self.quoted=char + self.token = [char] + self.pos += 1 + self.state = 2 + if tok: + return tok + elif char in self.whitespace: + if self.state == 1: + self.token.append(char) + else: + if self.token: + tok = ''.join(self.token) + self.state = 1 + self.token = [char] + self.pos += 1 + if tok: + return tok + elif char in self.separators: + # can't join seps (currently) + if self.token: + tok = ''.join(self.token) + else: + tok = char + self.pos += 1 + self.token = [] + self.state = 0 + return tok + elif char in self.eof: + if self.token: + return ''.join(self.token) + else: + raise StopIteration + else: + if self.state == 3: + self.token.append(char) + else: + if self.token: + tok = ''.join(self.token) + self.token = [char] + self.state=3 + self.pos += 1 + if tok: + return tok + +class MimeType(object): + def __init__(self, m1="", m2=""): + self.mimetype1 = m1 + self.mimetype2 = m2 + self.params = {} + self.qval = 1.0 + + def __str__(self): + #l = [('q', self.qval)] + #l.extend(self.params.items()) + # Actually, most likely Don't want to serialize the qval + l = self.params.items() + if l: + return self.mimetype1 + "/" + self.mimetype2 + ";" + ";".join(["%s=%s" % x for x in l]) + else: + return self.mimetype1 + "/" + self.mimetype2 + + def __repr__(self): + return "" % self + + def sort2(self): + if self.mimetype1 == "*": + return 0 + elif self.mimetype2 == "*": + return 1 + elif self.params: + return 2 + len(self.params) + else: + return 2 + + def matches(self, other): + if other.mimetype1 == self.mimetype1 or other.mimetype1 == '*' or self.mimetype1 == '*': + if other.mimetype2 == self.mimetype2 or other.mimetype2 == '*' or self.mimetype2 == '*': + if other.params == self.params: + return True + return False + + +class Parser(object): + + def __init__(self, ml): + self.ml = ml + + def process(self): + mts = [] + mt = self.top() + while mt: + if mt.mimetype1 == "*" and mt.mimetype2 == "*" and mt.qval == 1.0: + # downgrade anything to the lowest, otherwise behaviour is + # non deterministic. See apache conneg rules. + mt.qval = 0.001 + mts.append(mt) + mt = self.top() + return mts + + def top(self): + mt = MimeType() + try: + tok = self.ml.next() # text + except StopIteration: + return None + mt.mimetype1 = tok + sl = self.ml.next() # / + if sl != "/": + raise ParseError("Expected /, got: " + sl) + tok2 = self.ml.next() # html + mt.mimetype2 = tok2 + + while True: + try: + tok = self.ml.next() + except StopIteration: + return mt + if tok == ',': + return mt + elif tok == ';': + (key, val) = self.param() + if key == "q": + mt.qval = float(val) + else: + mt.params[key] = val + else: + raise ParseError("Expected , or ; got: %r" % tok) + + def param(self): + key = self.ml.next() + eq = self.ml.next() + if eq != "=": + raise ParseError("Expected =, got: " + sl) + val = self.ml.next() + return (key, val) + + +def best(client, server): + # step through client request against server possibilities + # and find highest according to qvals in client + # both client and server are lists of mt objects + # client should be sorted by qval already + # assume that server is unsorted + + # AFAICT, if the request has any params, they MUST be honored + # so if params, and no exact match, discard + # And hence */*;params means that params must be matched. + + for mtc in client: + # this is most wanted, can we provide? + for mts in server: + if mts.matches(mtc): + return mtc + return None + + +def parse(data): + lex = MiniLex(data) + p = Parser(lex) + mts = p.process() + mts.sort(key=lambda x: x.sort2(), reverse=True) + mts.sort(key=lambda x: x.qval, reverse=True) + return mts + +if __name__ == '__main__': + ml = MiniLex("text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.2") + p = Parser(ml) + mts = p.process() + mts.sort(key=lambda x: x.sort2(), reverse=True) + mts.sort(key=lambda x: x.qval, reverse=True) + + ml2 = MiniLex("text/xhtml+xml, text/xml, application/atom+xml, text/html;level=2") + p2 = Parser(ml2) + mts2 = p2.process() + + b = best(mts, mts2) + print b + diff --git a/rdfdatabank/lib/helpers.py b/rdfdatabank/lib/helpers.py new file mode 100644 index 0000000..e30d8fd --- /dev/null +++ b/rdfdatabank/lib/helpers.py @@ -0,0 +1,30 @@ +"""Helper functions + +Consists of functions to typically be used within templates, but also +available to Controllers. This module is available to templates as 'h'. +""" +# Import helpers as desired, or define your own, ie: +#from webhelpers.html.tags import checkbox, password + +from webhelpers.html import escape, HTML, literal, url_escape +from webhelpers.html.tags import * +from webhelpers.date import * +from webhelpers.text import * +from webhelpers.html.converters import * +from webhelpers.html.tools import * +from webhelpers.util import * +from routes import url_for + +from rdfdatabank.lib.conneg import parse as conneg_parse + +def bytes_to_english(no_of_bytes): + # 1024 per 'level' + suffixes = ['bytes', 'kb', 'Mb', 'Gb', 'Tb', 'Pb', 'Eb', 'Yb'] + f_no = float(no_of_bytes) + level = 0 + while(f_no > 1024.0): + f_no = f_no / 1024.0 + level = level + 1 + if level == 0: + return "%s %s" % (no_of_bytes, suffixes[level]) + return "%5.1f %s" % (f_no, suffixes[level]) diff --git a/rdfdatabank/lib/ident_md.py b/rdfdatabank/lib/ident_md.py new file mode 100644 index 0000000..ed9702b --- /dev/null +++ b/rdfdatabank/lib/ident_md.py @@ -0,0 +1,13 @@ +_DATA = { + 'admin': {'first_name':'ben', 'last_name':'OSteen', 'owner':'*'}, + 'admiral': {'name':'ADMIRAL Project', 'description':'ADMIRAL: A Data Management Infrastructure for Research', 'owner':['admiral']}, + 'eidcsr': {'name':'EIDCSR Project', 'description':'The Embedding Institutional Data Curation Services in Research (EIDCSR) project is addressing the research data management and curation challenges of three research groups in the University of Oxford.', 'owner':['eidcsr']}, + } + +class IdentMDProvider(object): + + def add_metadata(self, environ, identity): + userid = identity.get('repoze.who.userid') + info = _DATA.get(userid) + if info is not None: + identity.update(info) diff --git a/rdfdatabank/lib/text.zip b/rdfdatabank/lib/text.zip new file mode 100644 index 0000000..1ec731a Binary files /dev/null and b/rdfdatabank/lib/text.zip differ diff --git a/rdfdatabank/lib/unpack.py b/rdfdatabank/lib/unpack.py new file mode 100644 index 0000000..27a454d --- /dev/null +++ b/rdfdatabank/lib/unpack.py @@ -0,0 +1,77 @@ +import subprocess + +import os + +from redis import Redis + +from uuid import uuid4 + +from rdfdatabank.lib.utils import create_new + +#import checkm + +zipfile_root = "zipfile:" + +class BadZipfile(Exception): + """Cannot open zipfile using commandline tool 'unzip' to target directory""" + +def get_next_zipfile_id(siloname): + # TODO make this configurable + r = Redis() + return str(r.incr("%s:zipfile" % (siloname))) + +def find_last_zipfile(silo): + siloname = silo.state['storage_dir'] + r = Redis() + r.set("%s:zipfile" % (siloname), 0) + zipfile_id = 0 + while(silo.exists("%s%s" % (zipfile_root, zipfile_id))): + zipfile_id = r.incr("%s:zipfile" % (siloname)) + return zipfile_id + +def store_zipfile(silo, target_item_uri, POSTED_file, ident): + zipfile_id = get_next_zipfile_id(silo.state['storage_dir']) + while(silo.exists("%s%s" % (zipfile_root, zipfile_id))): + zipfile_id = get_next_zipfile_id(silo.state['storage_dir']) + + #zip_item = silo.get_item("%s%s" % (zipfile_root, zipfile_id)) + zip_item = create_new(silo, "%s%s" % (zipfile_root, zipfile_id), ident) + zip_item.add_triple("%s/%s" % (zip_item.uri, POSTED_file.filename.lstrip(os.sep)), "dcterms:hasVersion", target_item_uri) + zip_item.put_stream(POSTED_file.filename, POSTED_file.file) + try: + POSTED_file.file.close() + except: + pass + zip_item.sync() + return zip_item + +def unzip_file(filepath, target_directory=None): + # TODO add the checkm stuff back in + if not target_directory: + target_directory = "/tmp/%s" % (uuid4().hex) + p = subprocess.Popen("unzip -d %s %s" % (target_directory, filepath), shell=True, stdout=subprocess.PIPE) + p.wait() + if p.returncode != 0: + raise BadZipfile + else: + return target_directory + +def unpack_zip_item(zip_item, silo, ident): + derivative = zip_item.list_rdf_objects("*", "dcterms:hasVersion") + # 1 object holds 1 zipfile - may relax this easily given demand + assert len(derivative.keys()) == 1 + for file_uri in derivative.keys(): + filepath = file_uri[len(zip_item.uri)+1:] + real_filepath = zip_item.to_dirpath(filepath) + target_item = derivative[file_uri][0][len(silo.state['uri_base']):] + + # Overwrite current version instead of making new version? + + to_item = create_new(silo, target_item, ident) + #to_item = silo.get_item(target_item) + unpacked_dir = unzip_file(real_filepath) + to_item.move_directory_as_new_version(unpacked_dir) + to_item.add_triple(to_item.uri, "dcterms:isVersionOf", file_uri) + to_item.sync() + return True + diff --git a/rdfdatabank/lib/utils.py b/rdfdatabank/lib/utils.py new file mode 100644 index 0000000..fe1cf66 --- /dev/null +++ b/rdfdatabank/lib/utils.py @@ -0,0 +1,51 @@ +from datetime import datetime, timedelta + +from redis import Redis +import simplejson + +def authz(granary_list, ident): + if ident['repoze.who.userid'] == "admin": + return granary_list + else: + authd = [] + if ident.has_key('owner'): + for item in ident['owner']: + if item in granary_list: + authd.append(item) + return authd + +def is_embargoed(silo, id, refresh=False): + # TODO evaluate r.expire settings for these keys - popularity resets ttl or increases it? + r = Redis() + e = r.get("%s:%s:embargoed" % (silo.state['storage_dir'], id)) + e_d = r.get("%s:%s:embargoed_until" % (silo.state['storage_dir'], id)) + if refresh or (not e or not e_d): + if silo.exists(id): + item = silo.get_item(id) + e = item.metadata.get("embargoed") + e_d = item.metadata.get("embargoed_until") + if e not in ['false', 0, False]: + e = True + else: + e = False + r.set("%s:%s:embargoed" % (silo.state['storage_dir'], id), e) + r.set("%s:%s:embargoed_until" % (silo.state['storage_dir'], id), e_d) + return (e, e_d) + +def create_new(silo, id, creator, title=None, embargoed=True, embargoed_until=None, embargo_days_from_now=None, **kw): + item = silo.get_item(id) + item.metadata['createdby'] = creator + item.metadata['embargoed'] = embargoed + if embargoed: + if embargoed_until: + item.metadata['embargoed_until'] = embargoed_until + elif embargo_days_from_now: + item.metadata['embargoed_until'] = (datetime.now() + timedelta(days=embargo_days_from_now)).isoformat() + else: + item.metadata['embargoed_until'] = (datetime.now() + timedelta(days=365*70)).isoformat() + item.add_triple(item.uri, u"dcterms:dateSubmitted", datetime.now()) + if title: + item.add_triple(item.uri, u"rdfs:label", title) + item.sync() + return item + diff --git a/rdfdatabank/model/__init__.py b/rdfdatabank/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdfdatabank/templates/base.html b/rdfdatabank/templates/base.html new file mode 100644 index 0000000..748442b --- /dev/null +++ b/rdfdatabank/templates/base.html @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + + + + + + + ${self.head_tags()} + + +% if c.silo_name: +

Archives: ${c.silo_name} - ${c.silo_name} package upload

+% else: +

Archives

+% endif +% if c.id: +% if c.path: +

Subdirectory ${c.id}/${c.path}

+

Root directory: ${c.id}

+% else: +

Root directory of ${c.id}

+% endif +% endif + ${self.body()} + + + diff --git a/rdfdatabank/templates/create_new_item.html b/rdfdatabank/templates/create_new_item.html new file mode 100644 index 0000000..3c1a694 --- /dev/null +++ b/rdfdatabank/templates/create_new_item.html @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +
+ + +
diff --git a/rdfdatabank/templates/delete_item.html b/rdfdatabank/templates/delete_item.html new file mode 100644 index 0000000..3c1a694 --- /dev/null +++ b/rdfdatabank/templates/delete_item.html @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +
+ + +
diff --git a/rdfdatabank/templates/embargo_form.html b/rdfdatabank/templates/embargo_form.html new file mode 100644 index 0000000..84536a4 --- /dev/null +++ b/rdfdatabank/templates/embargo_form.html @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +
+ +% if c.embargos: +% if c.embargos[c.current][0] not in ['False', 'false', 0, False]: + +% else: + +% endif + +% else: + + +% endif + +
diff --git a/rdfdatabank/templates/itemview.html b/rdfdatabank/templates/itemview.html new file mode 100644 index 0000000..8318f36 --- /dev/null +++ b/rdfdatabank/templates/itemview.html @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +<%inherit file="/base.html" /> +<%def name="head_tags()"> + View of item ${c.item.uri} + +% if c.item: +
+
Item's JSON state (Accept: application/json):
+
${repr(c.item)}
+
Item's Embargo state
+
+<% +c.current = c.id +%> +

<%include file="/embargo_form.html"/>

+

Embargo state: True - only those logged in and with edit rights can see item. False - Anyone can GET the item and it's files.

+

Embargo date: Aim is for ISO8601 dates to provide embargo trigger events. Currently unused, unvalidated and unparsed.

+
Item's RDF Manifest (Accept: most RDF mimetypes):
+
${c.item.rdf_to_string(format="pretty-xml")}
+
Item's Files:
+
<%include file="/part_list.html"/>
+
+% endif diff --git a/rdfdatabank/templates/list_of_archives.html b/rdfdatabank/templates/list_of_archives.html new file mode 100644 index 0000000..d152d2f --- /dev/null +++ b/rdfdatabank/templates/list_of_archives.html @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +<%inherit file="/base.html" /> +<%def name="head_tags()"> + List of Data Archives that accept zipped packages + +% if c.silos: +

List of Archives: +
    +% for silo in c.silos: +
  • ${silo} (Package upload: ${silo})
  • +% endfor +
+% endif diff --git a/rdfdatabank/templates/list_of_zipfile_archives.html b/rdfdatabank/templates/list_of_zipfile_archives.html new file mode 100644 index 0000000..c1dc151 --- /dev/null +++ b/rdfdatabank/templates/list_of_zipfile_archives.html @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +<%inherit file="/base.html" /> +<%def name="head_tags()"> + List of Data Archives that accept zipped packages + +% if c.silos: +

List of package upload endpoints: +
    +% for silo in c.silos: +
  • ${silo}

    +<% +c.silo_name = silo +%> +

    <%include file="/upload_package.html"/>

    +
  • +% endfor +
+% endif diff --git a/rdfdatabank/templates/package_form_upload.html b/rdfdatabank/templates/package_form_upload.html new file mode 100644 index 0000000..f1de8f4 --- /dev/null +++ b/rdfdatabank/templates/package_form_upload.html @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +<%inherit file="/base.html" /> +<%def name="head_tags()"> + Package upload for "${c.silo_name}" + +

Package upload

+

<%include file="/upload_package.html"/>

diff --git a/rdfdatabank/templates/part_list.html b/rdfdatabank/templates/part_list.html new file mode 100644 index 0000000..d01dc5a --- /dev/null +++ b/rdfdatabank/templates/part_list.html @@ -0,0 +1,17 @@ +% if c.parts: +
    +<% +if c.path: + subpath = "/%s" % c.path +else: + subpath = "" +%> +% for part in c.parts: +% if c.parts[part]: +
  • ${part} ${h.bytes_to_english(c.parts[part].st_size)} - (remove)
  • +% else: +
  • ${part}/ - (remove)
  • +% endif +% endfor +
+% endif diff --git a/rdfdatabank/templates/siloview.html b/rdfdatabank/templates/siloview.html new file mode 100644 index 0000000..438ba5a --- /dev/null +++ b/rdfdatabank/templates/siloview.html @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +<%inherit file="/base.html" /> +<%def name="head_tags()"> + List of Data Archives + +% if c.silo_name: +
    +% for key in ['title', 'description','owner']: +% if c.silo.state.has_key(key): +
  • ${key.capitalize()} - ${c.silo.state[key]}
  • +% endif +% endfor +
  • Items:
      +% for item in sorted(c.items): +
    • + ${item} +
      • Delete item
      • +<% +c.current = item +%> +
      • <%include file="/embargo_form.html"/>
      • +
      +
    • +% endfor +
    +
  • +
      +% endif +

      Create new empty object:

      +

      +<%include file="create_new_item.html"/> +

      diff --git a/rdfdatabank/templates/subitemview.html b/rdfdatabank/templates/subitemview.html new file mode 100644 index 0000000..26efc64 --- /dev/null +++ b/rdfdatabank/templates/subitemview.html @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +<%inherit file="/base.html" /> +<%def name="head_tags()"> + View of item ${c.item.uri} + +% if c.item: +<%include file="/part_list.html"/> +% endif diff --git a/rdfdatabank/templates/successful_package_upload.html b/rdfdatabank/templates/successful_package_upload.html new file mode 100644 index 0000000..08aa094 --- /dev/null +++ b/rdfdatabank/templates/successful_package_upload.html @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +<%inherit file="/base.html" /> +<%def name="head_tags()"> + Package uploaded to "${c.silo_name}" + +

      SUCCESS!

      +

      Package uploaded successfully

      +

      Zipfile stored in object: ${c.info['zip_id']}

      +

      Uploaded zipfile size: ${h.bytes_to_english(c.info['zip_file_size'])} +

      Resultant unpacked object: ${c.info['zip_target']}

      diff --git a/rdfdatabank/templates/upload_package.html b/rdfdatabank/templates/upload_package.html new file mode 100644 index 0000000..7d63c45 --- /dev/null +++ b/rdfdatabank/templates/upload_package.html @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +
      + + + +
      diff --git a/rdfdatabank/tests/__init__.py b/rdfdatabank/tests/__init__.py new file mode 100644 index 0000000..d51f6f2 --- /dev/null +++ b/rdfdatabank/tests/__init__.py @@ -0,0 +1,36 @@ +"""Pylons application test package + +This package assumes the Pylons environment is already loaded, such as +when this script is imported from the `nosetests --with-pylons=test.ini` +command. + +This module initializes the application via ``websetup`` (`paster +setup-app`) and provides the base testing objects. +""" +from unittest import TestCase + +from paste.deploy import loadapp +from paste.script.appinstall import SetupCommand +from pylons import config, url +from routes.util import URLGenerator +from webtest import TestApp + +import pylons.test + +__all__ = ['environ', 'url', 'TestController'] + +# Invoke websetup with the current config file +SetupCommand('setup-app').run([config['__file__']]) + +environ = {} + +class TestController(TestCase): + + def __init__(self, *args, **kwargs): + if pylons.test.pylonsapp: + wsgiapp = pylons.test.pylonsapp + else: + wsgiapp = loadapp('config:%s' % config['__file__']) + self.app = TestApp(wsgiapp) + url._push_object(URLGenerator(config['routes.map'], environ)) + TestCase.__init__(self, *args, **kwargs) diff --git a/rdfdatabank/tests/functional/__init__.py b/rdfdatabank/tests/functional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rdfdatabank/tests/functional/test_objects.py b/rdfdatabank/tests/functional/test_objects.py new file mode 100644 index 0000000..50f357c --- /dev/null +++ b/rdfdatabank/tests/functional/test_objects.py @@ -0,0 +1,7 @@ +from rdfdatabank.tests import * + +class TestObjectsController(TestController): + + def test_index(self): + response = self.app.get(url(controller='objects', action='index')) + # Test response... diff --git a/rdfdatabank/tests/functional/test_packages.py b/rdfdatabank/tests/functional/test_packages.py new file mode 100644 index 0000000..432ad20 --- /dev/null +++ b/rdfdatabank/tests/functional/test_packages.py @@ -0,0 +1,7 @@ +from rdfdatabank.tests import * + +class TestPackagesController(TestController): + + def test_index(self): + response = self.app.get(url(controller='packages', action='index')) + # Test response... diff --git a/rdfdatabank/tests/test_models.py b/rdfdatabank/tests/test_models.py new file mode 100644 index 0000000..e69de29 diff --git a/rdfdatabank/websetup.py b/rdfdatabank/websetup.py new file mode 100644 index 0000000..e6c53b4 --- /dev/null +++ b/rdfdatabank/websetup.py @@ -0,0 +1,10 @@ +"""Setup the rdfdatabank application""" +import logging + +from rdfdatabank.config.environment import load_environment + +log = logging.getLogger(__name__) + +def setup_app(command, conf, vars): + """Place any commands to setup rdfdatabank here""" + load_environment(conf.global_conf, conf.local_conf) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..dc8ee38 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,31 @@ +[egg_info] +tag_build = dev +tag_svn_revision = true + +[easy_install] +find_links = http://www.pylonshq.com/download/ + +[nosetests] +with-pylons = test.ini + +# Babel configuration +[compile_catalog] +domain = rdfdatabank +directory = rdfdatabank/i18n +statistics = true + +[extract_messages] +add_comments = TRANSLATORS: +output_file = rdfdatabank/i18n/rdfdatabank.pot +width = 80 + +[init_catalog] +domain = rdfdatabank +input_file = rdfdatabank/i18n/rdfdatabank.pot +output_dir = rdfdatabank/i18n + +[update_catalog] +domain = rdfdatabank +input_file = rdfdatabank/i18n/rdfdatabank.pot +output_dir = rdfdatabank/i18n +previous = true diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..426eba8 --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +try: + from setuptools import setup, find_packages +except ImportError: + from ez_setup import use_setuptools + use_setuptools() + from setuptools import setup, find_packages + +setup( + name='rdfdatabank', + version='0.1', + description='', + author='', + author_email='', + url='', + install_requires=[ + "Pylons>=0.9.7", + ], + setup_requires=["PasteScript>=1.6.3"], + packages=find_packages(exclude=['ez_setup']), + include_package_data=True, + test_suite='nose.collector', + package_data={'rdfdatabank': ['i18n/*/LC_MESSAGES/*.mo']}, + #message_extractors={'rdfdatabank': [ + # ('**.py', 'python', None), + # ('templates/**.mako', 'mako', {'input_encoding': 'utf-8'}), + # ('public/**', 'ignore', None)]}, + zip_safe=False, + paster_plugins=['PasteScript', 'Pylons'], + entry_points=""" + [paste.app_factory] + main = rdfdatabank.config.middleware:make_app + + [paste.app_install] + main = pylons.util:PylonsInstaller + """, +) diff --git a/test.ini b/test.ini new file mode 100644 index 0000000..ae777ba --- /dev/null +++ b/test.ini @@ -0,0 +1,21 @@ +# +# rdfdatabank - Pylons testing environment configuration +# +# The %(here)s variable will be replaced with the parent directory of this file +# +[DEFAULT] +debug = true +# Uncomment and replace with the address which should receive any error reports +#email_to = you@yourdomain.com +smtp_server = localhost +error_email_from = paste@localhost + +[server:main] +use = egg:Paste#http +host = 127.0.0.1 +port = 5000 + +[app:main] +use = config:development.ini + +# Add additional test specific configuration options as necessary. diff --git a/who.ini b/who.ini new file mode 100644 index 0000000..88f2cb4 --- /dev/null +++ b/who.ini @@ -0,0 +1,44 @@ +[plugin:auth_tkt] +# identification and authentication +use = repoze.who.plugins.auth_tkt:make_plugin +secret = sup3rs33kr1t +cookie_name = databank +secure = True +include_ip = False + +[plugin:basicauth] +# identification and challenge +use = repoze.who.plugins.basicauth:make_plugin +realm = 'databank' + +[plugin:htpasswd] +# authentication +use = repoze.who.plugins.htpasswd:make_plugin +filename = %(here)s/passwd +check_fn = repoze.who.plugins.htpasswd:crypt_check + +[general] +request_classifier = repoze.who.classifiers:default_request_classifier +challenge_decider = repoze.who.classifiers:default_challenge_decider +remote_user_key = REMOTE_USER + +[identifiers] +# plugin_name;classifier_name:.. or just plugin_name (good for any) +plugins = + auth_tkt + basicauth + +[authenticators] +# plugin_name;classifier_name.. or just plugin_name (good for any) +plugins = + auth_tkt + htpasswd + +[challengers] +# plugin_name;classifier_name:.. or just plugin_name (good for any) +plugins = + basicauth + +[mdproviders] +plugins = + rdfdatabank.lib.ident_md:IdentMDProvider