diff --git a/debian/README b/debian/README index 96a2f8c..aa4f8e6 100644 --- a/debian/README +++ b/debian/README @@ -3,4 +3,4 @@ The Debian Package databank Comments regarding the Package - -- Anusha Ranganathan Mon, 09 Apr 2012 17:08:50 +0000 + -- Anusha Ranganathan Thu, 03 May 2012 09:08:50 +0000 diff --git a/debian/changelog b/debian/changelog index e8d7e6d..ed1d921 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,22 @@ +databank (0.3rc3) unstable; urgency=low + + * Mod-wsgi and metadata update fixes to Sword server + * Minor user interface changes + + -- Anusha Ranganathan Thu, 03 May 2012 09:08:50 +0000 + +databank (0.3rc2) unstable; urgency=low + + * Extended API to post a file without first having to create a data package + * New Databank authentication and authorization module. + - Designed to work with repoze.what and sqlalchemy. + - User and membership information stored in a mysql database + * Databank API extended to administer silos, users, and user membership + - Databank ui has new adminstration pages in line with api functionality + * Sword server packaged with databank + + -- Anusha Ranganathan Fri, 27 Apr 2012 09:08:50 +0000 + databank (0.3rc1) unstable; urgency=low * Basic support for sword deposit diff --git a/debian/config b/debian/config index f97b176..16c2db9 100644 --- a/debian/config +++ b/debian/config @@ -35,3 +35,9 @@ db_go #db_input critical databank/passwdnote || true #db_go +# Set default database name, database user +if [ -f /usr/share/dbconfig-common/dpkg/config ]; then + . /usr/share/dbconfig-common/dpkg/config.mysql + dbc_go databank $@ +fi + diff --git a/debian/control b/debian/control index 35ca1c6..12f5a44 100644 --- a/debian/control +++ b/debian/control @@ -12,26 +12,33 @@ XS-Python-Version: >= 2.6 Package: databank Architecture: all Depends: python, - python-pylons, - python-repoze.who, - python-repoze.who-friendlyform, + python-dev, + python-setuptools, + python-virtualenv, python-rdflib, python-dateutil, - python-libxml2, - python-libxslt1, python-pairtree, python-recordsilo, python-solrpy, python-redis, + mysql-server, + libmysql++-dev, + git-core, unzip, + libxml2, + libxml2-dev, + libxslt1.1, + libxslt-dev, apache2, apache2-utils, libapache2-mod-wsgi, redis-server, supervisor, openjdk-6-jre, - debconf -Recommends: solr-tomcat + debconf, + dbconfig-common +Recommends: solr-tomcat, + mysql-client Description: RDF-enhanced, pairtree-backed storage API Databank is a simple, RDF-enhanced storage API which is backed by pairtree, for the safe storage of and access to data. diff --git a/debian/copyright b/debian/copyright index 896670a..0aed2ee 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,6 +1,6 @@ This work was packaged for Debian by: - Anusha Ranganathan on Mon, 09 Apr 2012 17:08:50 +0000 + Anusha Ranganathan on Thu, 03 May 2012 09:08:50 +0000 It was downloaded from: diff --git a/debian/databank.install b/debian/databank.install index 0187a8e..f6abaf3 100644 --- a/debian/databank.install +++ b/debian/databank.install @@ -1,15 +1,18 @@ docs/apache_config/databank_wsgi /etc/default/databank/ +docs/apache_config/databank_ve26_wsgi /etc/default/databank/ +docs/apache_config/databank_ve27_wsgi /etc/default/databank/ docs/solr_config/conf/schema.xml /etc/default/databank/ production.ini /etc/default/databank/ development.ini /etc/default/databank/ +sss.conf.json /etc/default/databank/ MANIFEST.in /var/lib/databank/ mod_wsgi /var/lib/databank/ message_workers/ /var/lib/databank/ -passwd-default /var/lib/databank/ rdfdatabank/ /var/lib/databank/ rdfdatabank.egg-info/ /var/lib/databank test.ini /var/lib/databank/ who.ini /var/lib/databank/ -sss.conf.json /var/lib/databank/ +sss/ /var/lib/databank/ add_user.py /var/lib/databank/ -persisted_state.json /var/lib/databank/ \ No newline at end of file +setup_db.py /var/lib/databank/ +persisted_state.json /var/lib/databank/ diff --git a/debian/dirs b/debian/dirs new file mode 100644 index 0000000..8d0a38b --- /dev/null +++ b/debian/dirs @@ -0,0 +1,4 @@ +/usr/share/databank +usr/share/dbconfig-common/data/databank/install/mysql +usr/share/dbconfig-common/data/databank/upgrade/mysql +/etc/default/databank diff --git a/debian/postinst b/debian/postinst index 9e89a35..cc5cb3a 100644 --- a/debian/postinst +++ b/debian/postinst @@ -7,6 +7,11 @@ set -e # Source debconf library. . /usr/share/debconf/confmodule +. /usr/share/dbconfig-common/dpkg/postinst.mysql + +paramfile=/etc/default/databank/db.sh +dbc_generate_include=sh:$paramfile +dbc_go databank $@ # Ask questions db_get databank/email @@ -28,6 +33,7 @@ local dbpasswd="$RET" #Modify the ini files CONFIG_FILE1=/etc/default/databank/production.ini CONFIG_FILE2=/etc/default/databank/development.ini +CONFIG_FILE3=/etc/default/databank/sss.conf.json #replace email TARGET_KEY=email_to echo "Setting email Id: " $dbemail @@ -47,24 +53,33 @@ sed -i "s,\($TARGET_KEY *= *\).*,\1$dbstore," $CONFIG_FILE2 #replace granary uri root echo "Setting Base URI: " $dburi TARGET_KEY=granary.uri_root -[ $TARGET_KEY != "*/" ] && TARGET_KEY="$TARGET_KEY""/" +#[ $dburi != "*/" ] && dburi="$dburi""/" +dburi=${dburi%/} +dburi="$dburi""/" sed -i "s,\($TARGET_KEY *= *\).*,\1$dburi," $CONFIG_FILE1 sed -i "s,\($TARGET_KEY *= *\).*,\1$dburi," $CONFIG_FILE2 - -#Add the user password -#htpasswd -b -c /var/lib/databank/passwd admin $dbpasswd -#cp /var/lib/databank/passwd-default /var/lib/databank/passwd -cd /var/lib/databank -if ! [ -f /var/lib/databank/passwd ] -then -touch /var/lib/databank/passwd -fi -python add_user.py admin $dbpasswd +TARGET_KEY1=' "base_url"' +TARGET_KEY2=' "db_base_url"' +val1='"'$dburi'swordv2/",' +val2='"'$dburi'",' +sed -i "s|\($TARGET_KEY1 *: *\).*|\1$val1|" $CONFIG_FILE3 +sed -i "s|\($TARGET_KEY2 *: *\).*|\1$val2|" $CONFIG_FILE3 +#replace the database connection string +TARGET_KEY=sqlalchemy.url +echo "Setting Database connection string: " +chmod 744 $paramfile +. $paramfile +dburl=mysql://$dbuser:$dbpass@localhost:3306/$dbname +sed -i "s^\($TARGET_KEY *= *\).*^\1$dburl^" $CONFIG_FILE1 +sed -i "s^\($TARGET_KEY *= *\).*^\1$dburl^" $CONFIG_FILE2 #Link config files ln -sf /etc/default/databank/production.ini /var/lib/databank/production.ini ln -sf /etc/default/databank/development.ini /var/lib/databank/development.ini +ln -sf /etc/default/databank/sss.conf.json /var/lib/databank/sss.conf.json ln -sf /etc/default/databank/databank_wsgi /etc/apache2/sites-available/databank_wsgi +ln -sf /etc/default/databank/databank_ve26_wsgi /etc/apache2/sites-available/databank_ve26_wsgi +ln -sf /etc/default/databank/databank_ve27_wsgi /etc/apache2/sites-available/databank_ve27_wsgi #Create dirs and change ownership and permissions # ownership and permissions for /var/lib/databank @@ -83,9 +98,56 @@ chgrp -R www-data /etc/default/databank/ chmod -R 770 /var/cache/databank/ # ownership and permissions granary.store mkdir -p $dbstore +cp /var/lib/databank/persisted_state.json $dbstore chgrp -R www-data $dbstore chmod -R 770 $dbstore +#Setup the python virtual environment +virtualenv --no-site-packages /var/lib/databank/ +cd /var/lib/databank/ +. bin/activate +# Get the version of python being used +if [ -d /var/lib/databank/lib/python2.6/ ] +then + py_rs='https://github.com/anusharanganathan/RecordSilo/raw/master/dist/RecordSilo-0.4.15-py2.6.egg' + py_who='/var/lib/databank/lib/python2.6/site-packages/repoze.who-1.0.19-py2.6.egg/' + py_site=databank_ve26_wsgi + py_webob='/var/lib/databank/lib/python2.6/site-packages/WebOb-1.2b3-py2.7.egg' +else + py_rs='https://github.com/anusharanganathan/RecordSilo/raw/master/dist/RecordSilo-0.4.15-py2.7.egg' + py_who='/var/lib/databank/lib/python2.7/site-packages/repoze.who-1.0.19-py2.7.egg/' + py_site=databank_ve27_wsgi + py_webob='/var/lib/databank/lib/python2.7/site-packages/WebOb-1.2b3-py2.7.egg' +fi +easy_install python-dateutil==1.5 +easy_install pairtree==0.7.1-T +easy_install $py_rs +easy_install solrpy==0.9.5 +easy_install rdflib==2.4.2 +easy_install redis==2.4.11 +easy_install MySQL-python +easy_install pylons==1.0 +easy_install lxml==2.3.4 +easy_install web.py +easy_install sqlalchemy==0.7.6 +easy_install webob==1.0.8 +easy_install repoze.what-pylons +easy_install repoze.what-quickstart +if [ -d $py_who ] +then + rm -r $py_who +fi +if [ -d $py_webob ] +then + rm -r $py_webob +fi +easy_install repoze.who==2.0a4 + +#Setup the database and add the user password +cd /var/lib/databank +paster setup-app production.ini +python add_user.py admin $dbpasswd $dbemail + #Configure SOLR and message workers in supervisor db_get databank/confsolr local dbconf="$RET" @@ -132,7 +194,7 @@ fi #Enable site in apache a2dissite default a2dissite default-ssl -a2ensite databank_wsgi +a2ensite $py_site #Start Apache invoke-rc.d apache2 reload diff --git a/debian/postrm b/debian/postrm index 7bb6e76..d1d6280 100644 --- a/debian/postrm +++ b/debian/postrm @@ -1,8 +1,22 @@ #!/bin/bash set -e -# Source debconf library. -. /usr/share/debconf/confmodule +if [ -f /usr/share/debconf/confmodule ]; then + . /usr/share/debconf/confmodule +fi +if [ -f /usr/share/dbconfig-common/dpkg/postrm.mysql ]; then + . /usr/share/dbconfig-common/dpkg/postrm.mysql + dbc_go databank $@ +fi + +if [ "$1" = "purge" ]; then + rm -f /etc/default/databank/db.sh + if which ucf >/dev/null 2>&1; then + ucf --purge /etc/default/databank/db.sh + ucfr --purge databank /etc/default/databank/db.sh + fi +fi + # Remove my changes to the db. db_purge diff --git a/debian/prerm b/debian/prerm new file mode 100644 index 0000000..7a28f6d --- /dev/null +++ b/debian/prerm @@ -0,0 +1,10 @@ +#!/bin/sh + +set -e +#set -x + +. /usr/share/debconf/confmodule +. /usr/share/dbconfig-common/dpkg/prerm.mysql +dbc_go databank $@ + + diff --git a/debian/templates b/debian/templates index 017c4c8..f4592eb 100644 --- a/debian/templates +++ b/debian/templates @@ -22,7 +22,7 @@ Description: Base URI: Example: http://example.com/ Template: databank/password -Type: string +Type: password Description: Administrator password: The password for user 'admin' The user 'admin' is the main administartor for Databank and diff --git a/development.ini b/development.ini index adde6ac..21d643d 100644 --- a/development.ini +++ b/development.ini @@ -52,7 +52,7 @@ beaker.session.key = rdfdatabank beaker.session.secret = somesecret who.config_file = /var/lib/databank/who.ini -who.log_level = info +who.log_level = debug who.log_file = stdout #who.log_file = /var/log/databank/who.log diff --git a/docs/Databank_VM_Installation.txt b/docs/Databank_VM_Installation.txt index a4fc900..ef67d1b 100644 --- a/docs/Databank_VM_Installation.txt +++ b/docs/Databank_VM_Installation.txt @@ -1,4 +1,9 @@ -Databank VM Setup +Databank VM Setup - 0.3rc2 +This document details installing Databank from source + +For installing Databank from a debian package, visit http://apt-repo.bodleian.ox.ac.uk/databank/ +and follow the instruction under 'Using the repository' and 'Installing Databank' + ------------------------------------------------------------------------------------------------------ I. Virtual machine details ------------------------------------------------------------------------------------------------------ @@ -9,7 +14,7 @@ Network: NAT 1 processor hostname: databank -Partition disk � guided � use entire disk and set up LVM +Partition disk - guided - use entire disk and set up LVM Full name: Databank Admin username: demoSystemUser password: xxxxxxx @@ -19,42 +24,228 @@ No automatic updates No predefined software Install Grub boot loader to master boot record +Installing VMWare tools + Select Install Vmware tools from the VMWare console + sudo mkdir /mnt/cdrom + sudo mount /dev/cdrom /mnt/cdrom + cd tmp + cd /tmp + ls -l + tar zxpf /mnt/cdrom/VMwareTools-7.7.6-203138.tar.gz vmware-tools-distrib/ + ls -l + sudo umount /dev/cdrom + sudo apt-get install linux-headers-virtual + sudo apt-get install psmisc + cd vmware-tools-distrib/ + sudo ./vmware-install.pl +Accept all of the default options + ------------------------------------------------------------------------------------------------------ -II. Packages to be Installed +II. A. Packages to be Installed ------------------------------------------------------------------------------------------------------ sudo apt-get install build-essential sudo apt-get update sudo apt-get install openssh-server - sudo apt-get install openjdk-6-jre + sudo apt-get install python-dev + sudo apt-get install python-setuptools + sudo apt-get install python-virtualenv + sudo apt-get install curl sudo apt-get install links2 sudo apt-get install unzip + sudo apt-get install libxml2-dev + sudo apt-get install libxslt-dev + sudo apt-get install libxml2 + sudo apt-get install libxslt1.1 - sudo apt-get install apache2 - sudo apt-get install apache2-utils - sudo apt-get install libapache2-mod-wsgi + sudo apt-get install redis-server + +------------------------------------------------------------------------------------------------------ +III. Create mysql user and database for Databank +------------------------------------------------------------------------------------------------------ + + # If you don't have mysql installed, run the following command + sudo apt-get install mysql-server libmysql++-dev + + # Create mysql user and database for Databank + # Create Database databankauth and user databanksqladmin. Give user databanksqladmin access to databankauth + # Set the password for user databanksqladmin - replace 'password' in the command below + mysql -u root -p +mysql> use mysql; +mysql> CREATE DATABASE databankauth DEFAULT CHARACTER SET utf8 COLLATE utf8_bin; +mysql> GRANT ALL ON databankauth.* TO databanksqladmin@localhost IDENTIFIED BY password; +mysql> exit + + # Test the user and database are created fine. + # You should be able to login as used databanksqladmin and use the database databankatuh. + # The database will be populated with the required tables when the databank application is setup + mysql -h localhost -u databanksqladmin -p +mysql> use databankauth; +mysql> show tables; +mysql> exit + +------------------------------------------------------------------------------------------------------ +IV. Install Databank, Sword server and python depedencies +------------------------------------------------------------------------------------------------------ + Databank's root folder is not /var/lib/databank + + # Create all of the folders needed for Databank and set the permission and owner + sudo mkdir /var/lib/databank + sudo mkdir /var/log/databank + sudo mkdir /var/cache/databank + sudo mkdir /etc/default/databank + sudo mkdir /silos + sudo chown -R databankadmin:www-data /var/lib/databank/ + sudo chown -R databankadmin:www-data /var/log/databank/ + sudo chown -R databankadmin:www-data /var/cache/databank/ + sudo chown -R databankadmin:www-data /etc/default/databank/ + sudo chown -R databankadmin:www-data /silos/ + sudo chmod -R 775 /var/lib/databank/ + sudo chmod -R 775 /var/log/databank/ + sudo chmod -R 775 /var/cache/databank/ + sudo chmod -R 775 /etc/default/databank/ + sudo chmod -R 775 /silos/ + + # Pull databank source code from Github into /var/lib/databank + sudo apt-get install git-core git-doc + git clone git://github.com/dataflow/RDFDatabank /var/lib/databank + + # Move all of the config files into /etc/default/databank so you don't overwrite them by mistake when updating the source code + cp production.ini /etc/default/databank/ + cp development.ini /etc/default/databank/ + cp -r docs/apache_config/*_wsgi /etc/default/databank/ + cp docs/solr_config/conf/schema.xml /etc/default/databank/ + + # Setup a virtual environemnt fro python and install all the python packages + virtualenv --no-site-packages /var/lib/databank/ + cd /var/lib/databank/ + source bin/activate + easy_install python-dateutil==1.5 + easy_install pairtree==0.7.1-T + easy_install https://github.com/anusharanganathan/RecordSilo/raw/master/dist/RecordSilo-0.4.15-py2.7.egg + easy_install solrpy==0.9.5 + easy_install rdflib==2.4.2 + easy_install redis==2.4.11 + easy_install MySQL-python + easy_install pylons==1.0 + easy_install lxml==2.3.4 + easy_install web.py + easy_install sqlalchemy==0.7.6 + easy_install repoze.what-pylons + easy_install repoze.what-quickstart - sudo apt-get install python-setuptools - sudo apt-get install python-libxml2 - sudo apt-get install python-libxslt1 - sudo apt-get install python-pylons - sudo apt-get install python-rdflib - sudo apt-get install python-dateutil + # Repoze.what installs repoze.who version 1.0.19 while Databank uses repoze.who 2.0a4. So delete repoze.who 1.0.19 + rm -r lib/python2.7/site-packages/repoze.who-1.0.19-py2.7.egg/ + + # Pylons installs the latest version of WebOb, which expects all requests in utf-8 while earlier WebOb until 1.0.8 did't insist on utf-8. + # So remove the latest version of WebOb, which currently is 1.2b3 + rm -r lib/python2.7/site-packages/WebOb-1.2b3-py2.7.egg/ - sudo easy_install pairtree==0.5.6-T - sudo easy_install repoze.who==2.0a4 - sudo easy_install repoze.who_friendlyform - sudo easy_install solrpy + # Install the particular version of repoze.who and WebOb needed for Databank + easy_install repoze.who==2.0a4 + easy_install webob==1.0.8 + + # Pull the sword server from source forge and copy the folder sss within sword server into databank + cd ~ + wget http://sword-app.svn.sourceforge.net/viewvc/sword-app/sss/branches/sss-2/?view=tar + mv index.html\?view\=tar sword-server-2.tar.gz + tar xzvf sword-server-2.tar.gz + cp -r ./sss-2/sss/ ./ + cd /var/lib/databank Installing profilers in python and pylons to obtain run time performance and other stats Note: This package is OPTIONAL and is only needed in development machines. See the note below about running Pylons in debug mode (section B) - sudo apt-get install python-profiler - sudo easy_install repoze.profile + easy_install profiler + easy_install repoze.profile - Installing solr - tomcat +------------------------------------------------------------------------------------------------------ +V. Customizing Databank to your environment +------------------------------------------------------------------------------------------------------ +All of Databank's configuration settings are placed in the file production.ini or development.ini + * development.ini is configured to work in debug mode with all of the logs written to the console. + * production.ini is configured to not work in debug mode with all of the logs written to log files + +The following settings need to be configured +1. Adminsitrator email and smtp server for emails + The databank will email errors to the administrator + Edit the field 'email_to' for the email address + Edit the field 'smtp_server' for the smtp server to be used. The default value is 'localhost'. + +2. The location where all of Databank's data is to be stored + Edit the field 'granary.store' + The default value is '/silos' + +3^. The url where Databank will be available. + Examples for this are: + The server name like http://example.com/databank/ or + the ip address fo the machine,if it has no cname http://192.168.23.131/ or + just using localhost (development / evaluation) http://localhost/ or + Edit the field 'granary.uri_root' + The default value is 'http://databank/' + +4. The mysql database connction string for databank + The format of the connection string is mysql://username:password@localhost:3306/database_name + Replace username, password and database_name with the corect values. + The default username is databankdsqladmin + The default database name is databankauth + Edit the field 'sqlalchemy.url' + The default value is mysql://databanksqladmin:d6sqL4dm;n@localhost:3306/databankauth' + +5. The SOLR end point + Should point to the databank solr instance + Edit the field 'solr.host' + The default value is http://localhost:8080/solr, + +6. Default metadata values + The value of publisher and the defualt value of rights and license can be modified + These are treated as text strings and are currently used in the manifest.rdf + + ^ This setting will also need to be modified at /var/lib/databank/rdfdatabank/tests/RDFDatabankConfig.py + Change 'granary_uri_root'. + See section XVI for the significance of the base URI + +------------------------------------------------------------------------------------------------------ +VI. Customizing Databank Sword to your environment +------------------------------------------------------------------------------------------------------ +The sword configuration settings are placed in the file sss.conf.json + +The url where Databank will be available needs to be set +Without this, a sword client cannot talk to Databank through the sword interface + +Edit the field 'base_url' +The default value is http://localhost:5000/swordv2/ +Replace http://localhost/ with the correct base url +Examples for this are: + The server name like http://example.com/databank/ or + the ip address fo the machine,if it has no cname http://192.168.23.131/ or + just using localhost (development / evaluation) http://localhost/ or + + +Edit the field 'db_base_url' +The default value is http://192.168.23.133/ +Replace with the correct base url + +------------------------------------------------------------------------------------------------------ +VII. Intialize databank and Create the main admin user to access Databank +------------------------------------------------------------------------------------------------------ + paster setup-app production.ini + python add_user.py admin password dataflow-devel@googlegroups.com + + The second command is used to create the administrator user for databank. + * The administrator has a default username as 'admin'. + * This user is the root administrator for Databank and has access to all the silos in Databank. + * Please choose a strong password for the user and replace the string 'password' with the password. + +------------------------------------------------------------------------------------------------------ +VIII. Installing SOLR with Tomcat and cutomizing SOLR for Databank + * If you already have an existing SOLR installation and would like to use that, see section XVIII +------------------------------------------------------------------------------------------------------ + # Install solr with tomcat + sudo apt-get install openjdk-6-jre sudo apt-get install solr-tomcat + This will install Solr from Ubuntu's repositories as well as install and configure Tomcat. Tomcat is installed with CATALINA_HOME in /usr/share/tomcat6 and CATALINA_BASE in /var/lib/tomcat6, following the rules from /usr/share/doc/tomcat6-common/RUNNING.txt.gz. @@ -64,167 +255,58 @@ II. Packages to be Installed These directories contain the solr home director, data directory and configuration data respectively. You can visit the url http://localhost:8080 and http://localhost:8080/solr to make sure Tomcat and SOLR are working fine - - Installing VMWare tools - Select Install Vmware tools from the VMWare console - sudo mkdir /mnt/cdrom - sudo mount /dev/cdrom /mnt/cdrom - cd tmp - cd /tmp - ls -l - tar zxpf /mnt/cdrom/VMwareTools-7.7.6-203138.tar.gz vmware-tools-distrib/ - ls -l - sudo umount /dev/cdrom - sudo apt-get install linux-headers-virtual - sudo apt-get install psmisc - cd vmware-tools-distrib/ - sudo ./vmware-install.pl -Accept all of the default options -Installing Redis (http://redis.io/) - sudo apt-get install redis-server - The data directory is at /var/lib/redis and the configuration is at /etc/redis - This installed version 2:1.2.0-1 - This is actually version 1.2.0-1 of redis whihc is very old. If on Lucid, download the debian package from Oneiric - sudo apt-get remove --purge redis-server - cd ~ - wget http://ubuntu.intergenia.de/ubuntu//pool/universe/r/redis/redis-server_2.2.11-3_amd64.deb - sudo dpkg -i redis-server_2.2.11-3_amd64.deb - -Installing the python redis client - Get the latest version of the python-redis client from oneiric - wget http://de.archive.ubuntu.com/ubuntu/pool/universe/p/python-redis/python-redis_2.4.5-1_all.deb - sudo dpkg -i python-redis_2.4.5-1_all.deb - I initially did sudo apt-get install python-redis. - This installed verison 0.6.1, which is the latest version available for Lucid amd64 architecture is 0.6.1, - so I removed the package using apt-get remove --purge python-redis - I then downloaded the latest debian package from Oneiric amd64 using wget and installed it using dpkg - I did have a look at the backports but from what I saw, the necessary package isn't in there - -Installing supervisor - to manage the message workers - sudo apt-get install supervisor + # Stop tomcat before customizing solr + /etc/init.d/tomcat6 stop -Installing git - sudo apt-get install git-core git-doc - If you want to clone the repository (to be able to commit changes), - setup git following the instructions in http://help.github.com/linux-set-up-git/ + # Backup the current solr schema + sudo cp /etc/solr/conf/schema.xml /etc/solr/conf/schema.xml.bak -Pulling Recordsilo from github - Recordsilo is available at https://github.com/anusharanganathan/RecordSilo - Recordsilo and RDFDatabank and kept in sync so at any period of time the latest egg file in - Recordsilo/dist (https://github.com/anusharanganathan/RecordSilo/tree/master/dist) is comaptible with the code in the master branch of RDFDatabank - - Clone/Pull Recordsilo and install the latest vesion. Currently the latest version is 0.4.14. - - sudo mkdir ~/git-src - cd git-src - git clone git://github.com/anusharanganathan/RecordSilo.git - cd RecordSilo/dist - sudo easy_install RecordSilo-0.4.14-py2.x.egg (2.x - depending on whether you are running python 2.6 / python 2.7) - -Pulling Recordsilo and Databank from github - RDFDatabank is available at https://github.com/anusharanganathan/RDFDatabank - - Clone/Pull RDFDatabank into its root directory - Previously Databank was installed in /opt/RDFDatabank/ - Since creating a debian package, Databank is installed in /var/lib/databank/ - Installing it in either place is fine. I shall herewith refer to the location of the Databank root directory with $DB_ROOT - where $DB_ROOT = /var/lib/databank/ (or /opt/RDFDatabank/) - - sudo mkdir -p $DB_ROOT - sudo chown www-data:demoSystemUser $DB_ROOT - sudo chmod 775 $DB_ROOT - git clone git://github.com/dataflow/RDFDatabank.git $DB_ROOT + # Copy (sym link) the Databank SOLR Schema into Solr + sudo ln -sf /etc/default/databank/schema.xml /etc/solr/conf/schema.xml ------------------------------------------------------------------------------------------------------- -III. Customizing Databank to your environment ------------------------------------------------------------------------------------------------------- -Configure your Pylons environment - edit the production.ini file -In the file production.ini, edit the folllowing: - email_to = your_email_address - #Paster uses this to send you error messages - - who.log_file = /var/log/databank/who.log - #You can change this if you want your log files stored in another location. - #If running Databank using Apache, user "www-data" will need to be able to read and write to this directory - - granary.store = /silos - #This is root directory where all of the data in Databank is going to be held. - #You can change this to a location of your choice. - #If you are leaving at /silos, Please create a directory under root called silos and www-data needs to be able to read and write to it - cd / - sudo mkdir silos - sudo chown www-data:demoSystemUser /silos - sudo chmod 775 /silos - - granary.uri_root = http://databank.ora.ox.ac.uk/ - #Change the domain name to yours - - solr.host = http://localhost:8080/solr - #If you have installed Solr with tomcat (as detailed above), then change the url to http://localhost:8080/solr. - #If you have SOLR running with Jetty, you would need to change the port to 8983 - - Change the default values of publisher, rights and license. these are treated as text strings and are currently used in the manifest.rdf. - The license and rights information can be overwritten + # Start tomcat and test solr is working fine by visting http://localhost:8080/solr + /etc/init.d/tomcat6 start ------------------------------------------------------------------------------------------------------ -IV. Create the main admin user to access Databank +IX. Setting up Supervisor to manage the message workers +------------------------------------------------------------------------------------------------------ +Items are indexed in SOLR from Databank, through redis using message queues +The workers that run on these message queues are managed using supervisor -TODO: This section uses htpasswd and a python file to manage user groups and roles. - This would best be replaced by a DB lookup or similar. ------------------------------------------------------------------------------------------------------- -1. Create a file 'passwd' in the root directory of RDFDatabank using 'htpasswd' and add a user to the passwd file. - OR copy the file passwd-default to passwd +# If you do not already have supervisor, install it + sudo apt-get install supervisor - The passwd-default file has users whose username, password and roles are as given in the file rdfdatabank/tests/userRolesForTesting.txt. - These user accounts are just for demonstration and would have to be changed for a production instance. +# Configuring Supervisor for Databank - To create a new pssswd file with the username 'admin'. - Note: To start off you just need to create this one username and password. This user will be the main databank administrator. - cd RDFDatabank/ - htpasswd -c passwd admin + # Stop supervisor + sudo /etc/init.d/supervisor stop - [enter admin password] + # Copy (sym link) the supervisor configuration files for the message workers + sudo ln -sf /var/lib/databank/message_workers/workers_available/worker_broker.conf /etc/supervisor/conf.d/worker_broker.conf + sudo ln -sf /var/lib/databank/message_workers/workers_available/worker_solr.conf /etc/supervisor/conf.d/worker_solr.conf -2. Create and add data to the file rdfdatabank/config/users.py. - OR copy the file rdfdatabank/config/users-default.py to rdfdatabank/config/users-.py. - - Currently, you need to create a file called 'users.py' in rdfdatabank/config/ and populate the file with the data for each of the users you had added into the passwd file. - You just need to define the user for the main admin account user ('admin' - added earlier into htpasswd). You can create further users using the Databank web interface. + sudo /etc/init.d/supervisor start - Alternatively, you can copy the file rdfdatabank/config/users-default.py to rdfdatabank/config/users.py - -If creating the users.py file: - The data to add should look like this -#-*- coding: utf-8 -*- -_USERS = { - 'username': { - 'owner': '*', - 'first_name': 'Your first name', - 'last_name': 'Your last name', - 'role': 'admin' - } -} - - Replace 'username' with the username you used when adding a user using htpasswd - Replace 'Your first name' with your first name - Replace 'Your last name' with your last name - - The key 'owner' should contain the names of all the silos the user has access to separated by a comma, - or '*' indicating the user has access to all silos - - Examples for owner: - 'owner': '*', - 'owner': 'my_first_silo,sandbox,my_second_silo', +# The controller for supervisor can be invoked with the command 'supervisorctl' + sudo supervisorctl + + This will list all of the jobs manged by supervisor and their current status. + You can start / stop / restart jobs from within the controller. + For more info on supervisor, read http://supervisord.org/index.html ------------------------------------------------------------------------------------------------------ -V. Integrate Databank with Datacite, for minting DOIs (this section is optional) +X. Integrate Databank with Datacite, for minting DOIs (this section is optional) ------------------------------------------------------------------------------------------------------ If you want to integrate Databank with Datacite for minting DOIs for each of the data-packages, then you would need to do the following: Create a file called doi_config.py which has all of the authentication information given to you by Datacite. Copy the lines below and edit the values for each of the fields in "#Details pertaining to account with datacite" and "#Datacite api endpoint" if it is different +By default, this file is palced in /var/lib/databank/rdfdatabank/config/doi_config.py. +If you want to place the file in a different location, make sure Datababk knows where to find the file. +The field 'doi.config' in section [app:main] in production.ini and development.ini has this setting. + #-*- coding: utf-8 -*- from pylons import config @@ -242,7 +324,7 @@ class OxDataciteDoi(): self.email = "email of contact person in your organisation" self.password = "password as given by DataCite" self.domain = "ox.ac.uk" - self.prefix = "the prefix as gcen by DataCite" + self.prefix = "the prefix as given by DataCite" self.quota = 500 if config.has_key("doi.count"): @@ -254,212 +336,202 @@ class OxDataciteDoi(): self.endpoint_path_metadata = "/metadata" ------------------------------------------------------------------------------------------------------ -VI. Integrate Pylons with Apache +XI. Integrate Databank with Apache ------------------------------------------------------------------------------------------------------ -1. Edit the file dispatch.wsgi - Note: you only need to do this step if the root directory running the databank instance is not /var/lib/databank) - In the mod_wsgi/dispatch.wsgi file, edit the location of your databank instance. - Replace the string '/var/lib/databank' with the complete path to the directory where you have installed Databank ($DB_ROOT). +1. Install Apache and the required libraries + sudo apt-get install apache2 apache2-utils libapache2-mod-wsgi -2. Add a new site in apache sites-available called databank-wsgi +2. Stop Apache before making any modification + sudo /etc/init.d/apache2 stop - There is a file provided to you in the docs dir called databank_wsgi ($DB_ROOT/docs/apache_config/databank_wsgi). - If the root directory running the databank instance is not /var/lib/databank, edit the location of your databank instance, - replacing the string '/var/lib/databank' with the complete path to the directory where you have installed Databank ($DB_ROOT). - - The apache log files are configured to be stored in /var/log/apache2/. Agaian you can change this location. - - Copy the file docs/apache_config/databank_wsgi to /etc/apache/sites-available. - $sudo cp $DB_ROOT/docs/apache_config/databank_wsgi /etc/apache2/sites-available/ - -3. Enable mod_wsgi and the site databank_wsgi. If the site default is enabled, disable that site. - a2dissite 000-default - sudo a2enmod wsgi - sudo a2ensite databank_wsgi - ------------------------------------------------------------------------------------------------------- -VII. Setup SOLR ------------------------------------------------------------------------------------------------------- -Copy the solr schema file from docs into your solr conf - sudo /etc/init.d/tomcat6 stop - sudo cp $DB_ROOT/docs/solr_config/conf/schema.xml /etc/solr/conf/ - sudo /etc/init.d/tomcat6 start +3. Add a new site in apache sites-available called 'databank_ve27_wsgi' + sudo ln -sf /etc/default/databank/databank_ve27_wsgi /etc/apache2/sites-available/databank_ve27_wsgi ------------------------------------------------------------------------------------------------------- -VIII. Setup Supervisor ------------------------------------------------------------------------------------------------------- -Items are indexed in SOLR from Databank, through redis using message queues -The workers that run on these message queues are managed using supervisor +4. Disable the default sites + # Check what default sites you have + sudo ls -l /etc/apache2/sites-available + sudo a2dissite default + sudo a2dissite default-ssl + sudo a2dissite 000-default - The related files are available in $DB_ROOT/message_workers - - Modify the file loglines.cfg - if the solr end point is not http://localhost:8080/solr or databank root is not /var/lib/databank - [worker_solr] - solrurl = http://localhost:8080/solr - dbroot = /var/lib/databank - - Modify the file workers_available/worker_broker.conf - if databank root is not /var/lib/databank - [program:worker_broker] - directory = /var/lib/databank/message_workers/ - command = /var/lib/databank/message_workers/broker.py %(process_num)s - stdout_logfile = /var/log/databank/broker.log - - Modify the file workers_available/worker_solr.conf - if databank root is not /var/lib/databank - [program:worker_solr] - directory = /var/lib/databank/message_workers/ - command = /var/lib/databank/message_workers/solr_worker.py %(process_num)s - stdout_logfile = /var/log/databank/solr_worker.log - - Stop supervisor, copy the following files to /etc/supervisor/conf.d/ and start supervisor - $ sudo /etc/init.d/supervisor stop - $ sudo cp workers_available/worker_broker.conf /etc/supervisor/conf.d/ - $ sudo cp workers_available/worker_solr.conf /etc/supervisor/conf.d - $ sudo /etc/init.d/supervisor start - - The controller for supervisor can be invoked with the command 'supervisorctl' - sudo supervisorctl - - This will list all of the jobs manged by supervisor and their current status. - You can start / stop / restart jobs from withon the controller. - For more info on supervisor, read http://supervisord.org/index.html +5. Enable the site 'databank_ve27_wsgi' + sudo a2ensite databank_ve_27_wsgi + +6. Reload apache and start it + sudo /etc/init.d/apache2 reload + sudo /etc/init.d/apache2 start ------------------------------------------------------------------------------------------------------ -IX. Making sure all of the needed folders are available and apache has access to all the needed parts +XII. Making sure all of the needed folders are available and apache has access to all the needed parts ------------------------------------------------------------------------------------------------------ Apache runs as user www-data. Make sure the user www-data is able to read write to the following locations - $DB_ROOT - /silos (create if not available) - /var/log/databank (create if not available) + /var/lib/databank + /silos + /var/log/databank + /var/cache/databank Change permission, so www-data has access to RDFDatabank - sudo chown -R www-data:demoSystemUser $DB_ROOT - sudo chmod -R 775 $DB_ROOT - -Create a folder called /silos and change it's owner and permission. This is where the silos are going to be stored - sudo mkdir /silos - sudo chown -R www-data:demoSystemUser /silos - sudo chmod -R 775 /silos - -Create a folder called databank in /var/log/databank and change it's owner and permission - sudo mkdir /var/log/databank - sudo chown -R www-data:demoSystemUser /var/log/databank - sudo chmod -R 775 /var/log/databank + sudo chgrp -R www-data path_to_dir + sudo chmod -R 775 $path_to_dir ------------------------------------------------------------------------------------------------------ -X. Test your Pylons installation +XIII. Test your Pylons installation ------------------------------------------------------------------------------------------------------ -Restart apache - sudo /etc/init.d/apache2 restart - Visit the page http://localhost/ -If you see an error message look at the logs at /var/log/apache2/databank-error.log - -If you have problems with authorization, you can change the who.log_level to debug in the production.ini file -and restart apache so you can check the logs to see what's happening. The who.log_file is available at /var/log/databank/who.log +If you see an error message look at the logs at /var/log/apache2/databank-error.log and /var/log/databank/ ------------------------------------------------------------------------------------------------------ -XI. Run the test code and make sure all the tests pass +XIV. Run the test code and make sure all the tests pass ------------------------------------------------------------------------------------------------------ -The test code is located at $DB_ROOT/rdfdatabank/tests - -There are two test files - TestSubmission.py and TestSubmission_submitter.py - TestSubmission.py test all of the different functionality - TestSubmission_submitter.py tests the different user roles and permissions +The test code is located at /var/lib/databank/rdfdatabank/tests -Both of these tests use the configuration file RDFDatabankConfig.py, which you may need to modify +The test use the configuration file RDFDatabankConfig.py, which you may need to modify granary_uri_root="http://databank" This needs to be the same value as granary.uri_root in the production.ini file (or development.ini file if usign that instead) endpointhost="localhost" This should point to the url where the databank instance is running. If it is running on http://localhost/, it should be localhost. If it is running on http://example.org it should be example.org. if it is running on a non-standard port like port 5000 at http://localhost:5000, this would be localhost:5000 - endpointpath="/test/" and endpointpath2="/test2/" + endpointpath="/sandbox/" and endpointpath2="/sandbox2/" The silos that are going to be used for testing. Currently only the silo defined in endpointpath is used. The silos will be created by the test if they don't exist. The rest of the file lists the credentials of the different users used for testing To run the tests Make sure databank is running (see section IX) - cd $DB_ROOT/rdfdatabank/tests - python TestSubmission.py (to run the tests in TestSubmission.py) - python TestSubmission_submitter.py (to run the tests in TestSubmission_submitter.py) + cd /var/lib/databank + . bin/activate + cd rdfdatabank/tests + python TestSubmission.py ----------------------------------------------------------------------------------------------------- -B. Running Pylons from the command line in debug mode and dumping logs to stdout +XV. Running Pylons from the command line in debug mode and dumping logs to stdout ----------------------------------------------------------------------------------------------------- If you would like to run Pylons in debug mode from the command line and dump all of the log messages to stdout, stop apache and start paster from the command line. -Open the file development.ini and make the changes as in section III (except may be who.log_file). -Note: development.ini is similar to and essentially a copy of production.ini, except in the configuration of logs and debug levels. - - To run pylons in debug mode, set 'debug' to 'true' in the '[DEFAULT]' section - [DEFAULT] - debug = true - If you run pylons in debug mode, the error stack will be displayed on the browser with the state variables and their values. - This is useful for debugging. It is not advisable to have this set to true in production machines. - Also, the pylons profiler (repoze.profile) is configured to run in debug mode. Make sure the packages python-profiler, repoze.profile are installed. - The lines profile.log_filename has the log file for the profiler and the line profile.path has the end point. Point your browser to http://localhost/__profile__ to see the stats and control them - profile.log_filename = /var/log/databank/profile.log - profile.path = /__profile__ - - - To set the who.log_file to stdout, the line would read as below - who.log_file = stdout - To set the who.log_level to debug, so you can get more information if you want to debug authorization / authentication - who.log_level = debug - - if you would like to run paster on port 80, make sure host and port are set as follows - host = 0.0.0.0 - port = 80 - If you don't mind running on port 5000 (or any other port that does not require super user privilidges), the host and port settings will be as below - host = 127.0.0.1 - port = 5000 - - The configuration of the logs can be modified so it's all printed to stdout and the log level is debug. - For example the following configuration will write all logs to stdout - [loggers] - keys = root, routes, rdfdatabank - - [handlers] - keys = console - - [formatters] - keys = generic - - [logger_root] - level = DEBUG - handlers = console - - [logger_routes] - level = DEBUG - handlers = console - qualname = routes.middleware - - [logger_rdfdatabank] - level = DEBUG - handlers = console - qualname = rdfdatabank - - [handler_console] - class = StreamHandler - args = (sys.stderr,) - level = NOTSET - formatter = generic - - [formatter_generic] - format = %(asctime)s,%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s - datefmt = %H:%M:%S +The configuration file development.ini has been setup to do just that. Make sure the user running paster has access to all the directories. -If running Pylons on port 80, - you are now most likely not running as user 'www-data' and so would have to revisit section VIII and + +Running Pylons on port 80 (host=0.0.0.0 and port=80 in development.ini) + you are now likely to be running databank as the super user and not user 'www-data' and so would have to revisit section XII and change permissions giving the super user running paster access to the different directories. -If running on any other port, - You could just run pylons as the demoSystemUser - + The commands to run pylons from the command line sudo /etc/init.d/apache2 stop - sudo paster serve development.ini - + sudo ./bin/paster serve development.ini + +To stop paster,press ctrl+c + +To run paster on another port, modify the fields host and port in development.ini. +For example, to run on port 5000, the settings would be +host = 127.0.0.1 +port = 5000 + ----------------------------------------------------------------------------------------------------- +XVI. The Base URI setting (granary.uri_root) for Databank and it's significance +----------------------------------------------------------------------------------------------------- +One of the configuration options available in Databank is the 'granary.uri_root' which is the base uri for Databank. +This value is used in the following: + * Each of the silos created in Databank will be intialized with this base URI + * In each of the data packages, the metadata (held in the manifest.rdf) will use this base URI in creating the URI for the data package + * The links to each data item in the package will be created using this base uri (aggregate map for each data package) + + If this base uri doesn't resolve, the links for each of the items in the data package will not resolve + +This base uri is regarded to be permanent. Modifying the base uri at some point in the future will create all new silos and the data packages within the new silos with the new base uri, but the existing silos and data packages will continue to have the old uri. + +----------------------------------------------------------------------------------------------------- +XVII. Recap of the services running in Databank +----------------------------------------------------------------------------------------------------- +Apache2 + Runs the databank web server (powered by Pylons) + at http://localhost or http://ip_address from your host machine + + Apache should start automatically on startup of the VM. + + The apache log files are at + /var/log/apache2/ + + The command to stop, start and restart apache are + sudo /etc/init.d/apache2 [ stop | start | restart ] + + +Tomcat + Tomcat runs the SOLR webservice. Tomcat should start automatically on startup of the VM. + Tomcat should be available at http://localhost:8080 and + SOLR should be available at http://localhost:8080/solr + + Tomcat is installed with + CATALINA_HOME in /usr/share/tomcat6, + CATALINA_BASE in /var/lib/tomcat6 and + configuration files in /etc/tomcat6/ + + SOLR itself lives in three spots, + /usr/share/solr - contains the SOLR home director, + /var/lib/solr/ - contains the data directory and + /etc/solr contains the configuration data + + The command to stop, start and restart tomcat are + sudo /etc/init.d/tomcat6 [ stop | start | restart ] + + + Redis + Runs a basic messaging queue used by the API for indexing items into SOLR + and storing information that need to accessed quickly (like embargo information) + + Redis should start automatically on startup of the VM. + + The data directory is at /var/lib/redis and the configuration is at /etc/redis + + The command to stop, start and restart redis are + sudo /etc/init.d/redis-server [ stop | start | restart ] + + +Supervisor + Supervisor maintains the message workers run by Databank. + Run the supervisor controller to manage processes maintained by supervisor + sudo supervisorctl + +------------------------------------------------------------------------------------------------------ +XVIII. Integrating SOLR for Databank with an existing SOLR installation +------------------------------------------------------------------------------------------------------ +If you already have a SOLR instance running and would like to add databank to it + - either by creating a new core (https://wiki.apache.org/solr/CoreAdmin) + - or by creating a new instance + http://wiki.apache.org/solr/SolrTomcat#Multiple_Solr_Webapps + http://wiki.apache.org/solr/SolrJetty#Running_multiple_instances +you can do so. + + +Once you have created a new core or new instance, and verified it is wotking, + stop SOLR, + replace the example schema file for that core / instance with Databank's schema file. + It is available at /etc/default/databank/schema.xml + Start SOLR + + +Stop Databank web server (stop apache) and the solr worker (using supervisorctl) + + +You need to configure the solr end point in the config file production.ini or development.ini +(as mentioned in section V). + In the case of mmultiple cores, the solr end point would be something like http://localhost:8080/solr/core_databank + if you have called the databank core 'core_databank' + + In the case of mmultiple SOLR instances, the solr end point would be something like http://localhost:8080/solr_databank + if you have called the databank instance 'solr_databank' + + Edit the field 'solr.host'. + Replace the default value with your solr endpoint + + +You need to configure the solr end point in the config file loglines.cfg +located at /var/lib/databank/message_workers/ and used by the solr worker for indexing items into SOLR + Edit the field 'solrurl' in the section [worker_solr]. + Replace the default value with your solr endpoint + +Start the solr worker (using supervisorctl) and the Databank web server (start apache) + +----------------------------------------------------------------------------------------------------- \ No newline at end of file diff --git a/docs/DebianInstallation.txt b/docs/DebianInstallation.txt new file mode 100644 index 0000000..9cba592 --- /dev/null +++ b/docs/DebianInstallation.txt @@ -0,0 +1,112 @@ +------------------------------------------------------------------------------- +Installing Databank from the Debian package +------------------------------------------------------------------------------- +The debian package fro Databank is available at +http://apt-repo.bodleian.ox.ac.uk/databank/ + +To be able to install Databank using apt or aptitude, + +1. Add the repository to the file sources.list. +Add the following line to your /etc/apt/sources.list file: + +deb http://apt-repo.bodleian.ox.ac.uk/databank/ unstable main + +2. Import the following signing key: +wget http://apt-repo.bodleian.ox.ac.uk/datastage/dataflow.gpg +apt-key add dataflow.gpg + +3. Update the package index +sudo apt-get update + +4. Install Databank +sudo apt-get install databank + +------------------------------------------------------------------------------- +Questions asked during Databank's installation +------------------------------------------------------------------------------- +1. Adminstrator Email Id - +This will be used to send error messages from Databank to the administrator. + +2. SMTP Server used to send email messages. The default value is localhost + +3. Data storage location - the location where the data will be stored by Databank. + The default value is /silos + +4. The base uri (domain name) of Databank + Examples for this are: + Server name like http://example.com/databank/ or + Ip address of the machine, if it has no cname http://192.168.23.131/ or + Using localhost (development / evaluation) http://localhost/ or + + The default value is 'http://databank/' + + This value is used in the following: + * Each of the silos created in Databank will be intialized with the base URI + * In each of the data packages, the metadata (held in the manifest.rdf) will + use this base URI in creating the URI for the data package + * The links to each data item in the package will be created using this base + uri (aggregate map for each data package) + + If this base uri doesn't resolve, the links for each of the items in the + data package will not resolve + + The base uri is regarded to be permanent. Modifying the base uri at some + point in the future will create all new silos and the data packages within + the new silos with the new base uri, but the existing silos and data packages + will continue to have the old uri. + +5. The password for the administrator user of databank used by the web interface + for authentication and authorization. + * The administrator has a default username as 'admin'. + * This user is the root administrator for Databank and has access to all the silos in Databank. + * Please choose a strong password for the user + +6. Choosing a password for the MySQL user + Databank will install the MySQL database if it isn't alredy installed. + A database with the name 'databank' will be create during installation. + The database user 'databank' will also be craeted. + You will be asked for a password for the user 'databank' and the credentials + of the admin user of MySQL itself, so that the database and user can be created. + +7. Confirmation if Databank can be configured for SOLR + The search facility in Databank is powered by SOLR. + + If you choose to configure SOLR now, the existing schema + at /etc/solr/conf/schema.xml will be replaced with Databank's schema + + If you choose to configure SOLR at a later time, + the instructions to do so are at /usr/share/doc/databank/ConfiguringSOLR.txt + +------------------------------------------------------------------------------- +After installing Databank +------------------------------------------------------------------------------- +Visit http://localhost from the local browser to get started. + +------------------------------------------------------------------------------- +Customizing or debugging Databank +------------------------------------------------------------------------------- +Please read the document available at +/usr/share/doc/databank/Databank_VM_Installation.txt +or online at +https://github.com/dataflow/RDFDatabank/blob/master/docs/Databank_VM_Installation.txt + +If you are not interested in installing Databank from source, skip to Section V. +Section V and later sections describe the various settings for Databank. + +------------------------------------------------------------------------------- +Databank Documentation +------------------------------------------------------------------------------- +The databank source code is at https://github.com/dataflow/RDFDatabank + +Databank is provided to you as a part of the datafalow project. +You can find more information about Databank and Dataflow at +http://www.dataflow.ox.ac.uk/ + +The API documentation is available at https://databank-vm1.oerc.ox.ac.uk/api/ +or in your local instance of databank at http://localhost/api + +There are some notes on using the API at the following link, https://github.com/dataflow/RDFDatabank/tree/master/docs/using_databank_api + +The Databank wiki at https://github.com/dataflow/RDFDatabank/wiki has notes on the +current DataBank feature set, architecture, policies and the development roadmap +for the near future. diff --git a/docs/Dependencies.txt b/docs/Dependencies.txt index a2bebf7..41eddfa 100644 --- a/docs/Dependencies.txt +++ b/docs/Dependencies.txt @@ -1,51 +1,85 @@ Databank is a web-based application for storing, curating and publishing data-packages, and is written using python and the pylons web framework. Its default deployment includes a message queue providing databank notifications to listener services. This message queue is handled using `Redis `_ and `Supervisor `_ to maintain the listener services. The search interface in Databank is powered by `Apache SOLR `_. -Dependencies for databank version 0.3rc1 +Dependencies for databank -+--------------------------------+-----------+ -| Package name | Version | -+================================+===========+ -| python | >=2.6 | -+--------------------------------+-----------+ -| python-pylons | >=0.9.7 | -+--------------------------------+-----------+ -| python-repoze.who | =2.0a4 | -+--------------------------------+-----------+ -| python-repoze.who-friendlyform | =1.0.8 | -+--------------------------------+-----------+ -| python-rdflib | =2.4.2 | -+--------------------------------+-----------+ -| python-dateutil | >=1.4.1-4 | -+--------------------------------+-----------+ -| python-libxml2 | >=2.7.8 | -+--------------------------------+-----------+ -| python-libxslt1 | >=1.1.26-7| -+--------------------------------+-----------+ -| python-pairtree | >=0.7.1 | -+--------------------------------+-----------+ -| python-recordsilo | >=0.4.15 | -+--------------------------------+-----------+ -| python-solrpy | >=0.9.5 | -+--------------------------------+-----------+ -| python-redis | >=2.4.5-1 | -+--------------------------------+-----------+ -| unzip | >=6.0 | -+--------------------------------+-----------+ -| apache2 | >=2.2.20 | -+--------------------------------+-----------+ -| apache2-utils | >=2.2.20 | -+--------------------------------+-----------+ -| libapache2-mod-wsgi | >=3.3 | -+--------------------------------+-----------+ -| redis-server | >=2.2.11 | -+--------------------------------+-----------+ -| supervisor | >=3.0 | -+--------------------------------+-----------+ -| openjdk-6-jre | >=6b23 | -+--------------------------------+-----------+ -| solr-tomcat | >=1.4.1 | -+--------------------------------+-----------+ -| Simple Sword Server * | 2.0 | -+--------------------------------+-----------+ ++---------------------------------+-----------+ +| Package name | Version | ++=================================+===========+ +| python | >=2.6 | ++---------------------------------+-----------+ +| python-dev | | ++---------------------------------+-----------+ +| libxml2 | | ++---------------------------------+-----------+ +| libxslt1.1 | | ++---------------------------------+-----------+ +| libxml-dev | | ++---------------------------------+-----------+ +| libxslt-dev | | ++---------------------------------+-----------+ +| python-virtualenv | | ++---------------------------------+-----------+ +| python-pylons^ | >=0.9.7 | ++---------------------------------+-----------+ +| python-repoze.who^ | =2.0a4 | ++---------------------------------+-----------+ +| python-repoze.who-friendlyform^ | =1.0.8 | ++---------------------------------+-----------+ +| python-rdflib^ | =2.4.2 | ++---------------------------------+-----------+ +| python-dateutil^ | >=1.4.1-4 | ++---------------------------------+-----------+ +| python-lxml^ | >=2.3.4 | ++---------------------------------+-----------+ +| python-pairtree^ | >=0.7.1 | ++---------------------------------+-----------+ +| python-recordsilo^ | >=0.4.15 | ++---------------------------------+-----------+ +| python-solrpy^ | >=0.9.5 | ++---------------------------------+-----------+ +| python-redis^ | >=2.4.5-1 | ++---------------------------------+-----------+ +| unzip | >=6.0 | ++---------------------------------+-----------+ +| apache2 | >=2.2.20 | ++---------------------------------+-----------+ +| apache2-utils | >=2.2.20 | ++---------------------------------+-----------+ +| libapache2-mod-wsgi | >=3.3 | ++---------------------------------+-----------+ +| redis-server | >=2.2.11 | ++---------------------------------+-----------+ +| supervisor | >=3.0 | ++---------------------------------+-----------+ +| openjdk-6-jre | >=6b23 | ++---------------------------------+-----------+ +| solr-tomcat | >=1.4.1 | ++---------------------------------+-----------+ -\* Availabale from http://sword-app.svn.sourceforge.net/viewvc/sword-app/sss/branches/sss-2/ + +In addition to the above, these are the depoendencies for databank veersion 0.3rc2 + ++---------------------------------+-----------+ +| Package name | Version | ++=================================+===========+ +| python-repoze.what^ | >=1.0.3 | ++---------------------------------+-----------+ +| python-repoze.who.plugins.sa^ | >=1.0.1 | ++---------------------------------+-----------+ +| python-repoze.who.plugins.sql^ | >=1.0rc2 | ++---------------------------------+-----------+ +| python-sqlalchemy^ | =0.6.8.1 | ++---------------------------------+-----------+ +| mysql-server | =5.1 | ++---------------------------------+-----------+ +| libmysql++-dev | =3.1.0 | ++---------------------------------+-----------+ +| python-mysqldb (mysql-python)^ | =1.2.3 | ++---------------------------------+-----------+ + +\^ these python packages are installed within a virtual environment no site packages used + + Databank 0.3rc2 has the Simple Sword Server also packaged with it. + * Availabale from http://sword-app.svn.sourceforge.net/viewvc/sword-app/sss/branches/sss-2/ + * This will be packaged separately for the next release of databank + * The simple swrod server depends on web.py and lxml version 2.3.4 diff --git a/docs/LookingatRedis.txt b/docs/LookingatRedis.txt new file mode 100644 index 0000000..869e8f3 --- /dev/null +++ b/docs/LookingatRedis.txt @@ -0,0 +1,77 @@ +Indexing items in Databank for search and retreival using SOLR, Redis and Supervisor + + * Records are indexed into SOLR from Databank asynchronously, using Redis and Supervisor to manage this process + + * The items to be indexed are written into a queue (silochanges) in redis from Databank. + * The library broadcast.py adds the silo name and data package id along with a tiemstamap and the type of action (create, update or delete) into redis + + + + * Supervisor manages the processes used to work on the items queued in redis + + * The configuration file for each of the processes maintained by supervisor is available at workers_available. + + * These are symlinked to /etc/supervisor/conf.d/--- + + + + * Managing the queue in redis - the queue workflow (redisqueue.py) + + * For each of the workers, when an item is popped out of a queue it is written into another temporary queue (for example temp:broker, temp:logger, temp:solr) recording the item currently being processed + + * When the worker marks the task as completed the item is deleted from the temporary queue + + * When the worker marks the task as failed, it is moved from the temporary queue into another queue (or back to the same queue if another queue isn't configured). For example: it is moved into the queue solrindexerror in worker_solr. + + + + * There are three workers working off the redis queue - worker_broker, worker_auditlogger and worker_solr + + * loglines.cfg has the configuration options which are parsed by LogConfigParser.py + + * worker_broker works off the items queued in the silochanges queue. It copies each item to two other queues - auditlog and solrindex + + * worker_solr works off the items queued in the solrindex queue. It is used to index documents / delete documents from solr + * For each item, it gets the manifest from for the item the granary store, walks through the triples, genrates a solr document and adds the document to solr. It commits the index every hour or when no futher items are queued for indexing. If there is an error, the item is pushed to the queue solrindexerror and the error is logged in the file /var/log/databank/solr_error.log. + + + +The following is the python code to query redis + +{{{ +from redis import Redis +r = Redis() +all_keys = r.keys() + +#Have a look at all the keys associated with supervisor workers +r.llen('silochanges') + +r.llen('solrindex') +r.lindex('solrindex', 0) + +r.llen('solrindexerror') + +r.llen('temp:broker_0') +r.llen('temp:broker_1') + +r.llen('temp:solr_0') +r.llen('temp:solr_1') + +#Have a look at all the embargoed and embargoed_until keys +em_keys = r.keys('*:*:embargoed') +if type(em_keys).__name__ != 'list': + em_keys = em_keys.split(' ') +len(em_keys) + +emu_keys = r.keys('*:*:embargoed_until') +if type(emu_keys).__name__ != 'list': + emu_keys = emu_keys.split(' ') +len(emu_keys) + +#To delete the embargoed keys +for i in em_keys: + r.delete(i) + +for i in emu_keys: + r.delete(i) +}}} diff --git a/docs/apache_config/databank_ve26_wsgi b/docs/apache_config/databank_ve26_wsgi new file mode 100644 index 0000000..d8b2645 --- /dev/null +++ b/docs/apache_config/databank_ve26_wsgi @@ -0,0 +1,21 @@ + + #ServerName databank.com + + # Logfiles + ErrorLog /var/log/apache2/databank-error.log + CustomLog /var/log/apache2/databank-access.log combined + LogLevel warn + + # Use only 1 Python sub-interpreter. Multiple sub-interpreters + # play badly with C extensions. + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + # Setup mod_wsgi + WSGIScriptAlias / /var/lib/databank/mod_wsgi/dispatch_ve_26.wsgi + + + Order deny,allow + Allow from all + + + diff --git a/docs/apache_config/databank_ve27_wsgi b/docs/apache_config/databank_ve27_wsgi new file mode 100644 index 0000000..e8c0b1c --- /dev/null +++ b/docs/apache_config/databank_ve27_wsgi @@ -0,0 +1,21 @@ + + #ServerName databank.com + + # Logfiles + ErrorLog /var/log/apache2/databank-error.log + CustomLog /var/log/apache2/databank-access.log combined + LogLevel warn + + # Use only 1 Python sub-interpreter. Multiple sub-interpreters + # play badly with C extensions. + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + # Setup mod_wsgi + WSGIScriptAlias / /var/lib/databank/mod_wsgi/dispatch_ve_27.wsgi + + + Order deny,allow + Allow from all + + + diff --git a/docs/apache_config/databank_wsgi b/docs/apache_config/databank_wsgi index 071d498..e21f5a5 100644 --- a/docs/apache_config/databank_wsgi +++ b/docs/apache_config/databank_wsgi @@ -4,7 +4,8 @@ # Logfiles ErrorLog /var/log/apache2/databank-error.log CustomLog /var/log/apache2/databank-access.log combined - + LogLevel warn + # Use only 1 Python sub-interpreter. Multiple sub-interpreters # play badly with C extensions. WSGIApplicationGroup %{GLOBAL} diff --git a/docs/authWithSqlAlchemy.txt b/docs/authWithSqlAlchemy.txt index 67877bb..0e1b85e 100644 --- a/docs/authWithSqlAlchemy.txt +++ b/docs/authWithSqlAlchemy.txt @@ -1,19 +1,32 @@ Using Pylons with SQLAlchemy - +=================================================== sudo easy_install sqlalchemy SQLAlchemy-0.7.6 + sudo apt-get install python-sqlalchemy + sudo apt-get install mysql-server + sudo apt-get install libmysql++-dev + sudo easy_install MySQL-python MySQL-python 1.2.3 -sudo easy_install repoze.what-pylons - repoze.what 1.0.9 + sudo easy_install repoze.what-quickstart + This has repose.what, repoze,who, plugins.sa, plugins.sql and friendly-form. + Since I already install some fo these packages, I will need to package repoze.what, plugins.sa and plugins.sql + + The other approach would be to modify the version number of repoze.who in quickstart and just go with one package + I renamed the quickstart as quickstart 2.0 and modified the install requires. This seems like an easier approach -#Repoze.who version 1.0.19 gets installed. Remove thisand re-install repoze,who 2.0a4 -#Note: I need to check which of the above packages does this and not install it. Also is repoze.what-quickstart really needed? -sudo rm -r /usr/local/lib/python2.6/dist-packages/repoze.who-1.0.19-py2.6.egg/ -sudo easy_install repoze.who==2.0a4 + sudo easy_install repoze.what-pylons + repoze.what 1.0.9 + sudo easy_install repoze.what.plugins.sql + sudo easy_install repoze.who.plugins.sa + + #Repoze.who version 1.0.19 gets installed. Remove thisand re-install repoze,who 2.0a4 + #Note: I need to check which of the above packages does this and not install it. Also is repoze.what-quickstart really needed? + sudo rm -r /usr/local/lib/python2.6/dist-packages/repoze.who-1.0.19-py2.6.egg/ + sudo easy_install repoze.who==2.0a4 # Create a database and a user for that database. See usingDatabase-databankauth.txt for instruction on doing so. # Modify the mysql connection string in development.ini to match the database and user created diff --git a/docs/using_databank_api/data/testrdf.zip b/docs/using_databank_api/data/testrdf.zip new file mode 100644 index 0000000..a52cdce Binary files /dev/null and b/docs/using_databank_api/data/testrdf.zip differ diff --git a/docs/using_databank_api/data/testrdf4.zip b/docs/using_databank_api/data/testrdf4.zip new file mode 100644 index 0000000..376bd34 Binary files /dev/null and b/docs/using_databank_api/data/testrdf4.zip differ diff --git a/docs/using_databank_api/data/unicode07.xml b/docs/using_databank_api/data/unicode07.xml new file mode 100644 index 0000000..cd7a87c --- /dev/null +++ b/docs/using_databank_api/data/unicode07.xml @@ -0,0 +1,29 @@ + + + + Some verses in Sanskrit + The following is one stanza of canto Ⅵ of the Kumāra-saṃbhava (“the birth of Kumāra”) by the great Sanskrit poet Kālidāsa: <br> + <br> + पशुपतिरपि तान्यहानि कृच्छ्राद् <br> + अगमयदद्रिसुतासमागमोत्कः । <br> + कमपरमवशं न विप्रकुर्युर् <br> + विभुमपि तं यदमी स्पृशन्ति भावाः ॥ <br> + <br> +And here is the transcription of it: <br> + <br> + Paśupatirapi tānyahāni kṛcchrād <br> + agamayadadrisutāsamāgamotkaḥ; <br> + kamaparamavaśaṃ na viprakuryur <br> + vibhumapi taṃ yadamī spṛśanti bhāvāḥ? <br> + <br> +A rough translation might be: <br> + <br> + And Paśupati passed those days with hardship, / eager for union with the daughter of the mountain. / Which other powerless [creature] would they not torment, / such emotions, when they affect even the powerful [Śiva]? + http://www.madore.org/~david/misc/unitest/ + + diff --git a/mod_wsgi/dispatch.wsgi b/mod_wsgi/dispatch.wsgi index 67ce816..7591098 100644 --- a/mod_wsgi/dispatch.wsgi +++ b/mod_wsgi/dispatch.wsgi @@ -33,6 +33,9 @@ pkg_resources.working_set.add_entry('/var/lib/databank') import os os.environ['PYTHON_EGG_CACHE'] = '/var/cache/databank/egg-cache' +import sys +sys.stdout = sys.stderr + # Load the Pylons application from paste.deploy import loadapp application = loadapp('config:/var/lib/databank/production.ini') diff --git a/mod_wsgi/dispatch_ve_26.wsgi b/mod_wsgi/dispatch_ve_26.wsgi new file mode 100644 index 0000000..9c1085e --- /dev/null +++ b/mod_wsgi/dispatch_ve_26.wsgi @@ -0,0 +1,43 @@ +""" +Copyright (c) 2012 University of Oxford + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +# Add the virtual Python environment site-packages directory to the path +import site +site.addsitedir('/var/lib/databank/lib/python2.6/site-packages') +#site.addsitedir('/home/simplesite/env/lib/python2.5/site-packages') +#site.addsitedir('/usr/local/lib/python2.6/dist-packages') + +import pkg_resources +pkg_resources.working_set.add_entry('/var/lib/databank') + +# Avoid ``[Errno 13] Permission denied: '/var/www/.python-eggs'`` messages +import os +os.environ['PYTHON_EGG_CACHE'] = '/var/cache/databank/egg-cache' + +import sys +sys.stdout = sys.stderr + +# Load the Pylons application +from paste.deploy import loadapp +application = loadapp('config:/var/lib/databank/production.ini') + diff --git a/mod_wsgi/dispatch_ve_27.wsgi b/mod_wsgi/dispatch_ve_27.wsgi new file mode 100644 index 0000000..dfbc9d4 --- /dev/null +++ b/mod_wsgi/dispatch_ve_27.wsgi @@ -0,0 +1,43 @@ +""" +Copyright (c) 2012 University of Oxford + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, --INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +# Add the virtual Python environment site-packages directory to the path +import site +site.addsitedir('/var/lib/databank/lib/python2.7/site-packages') +#site.addsitedir('/home/simplesite/env/lib/python2.5/site-packages') +#site.addsitedir('/usr/local/lib/python2.6/dist-packages') + +import pkg_resources +pkg_resources.working_set.add_entry('/var/lib/databank') + +# Avoid ``[Errno 13] Permission denied: '/var/www/.python-eggs'`` messages +import os +os.environ['PYTHON_EGG_CACHE'] = '/var/cache/databank/egg-cache' + +import sys +sys.stdout = sys.stderr + +# Load the Pylons application +from paste.deploy import loadapp +application = loadapp('config:/var/lib/databank/production.ini') + diff --git a/setup_db.py b/setup_db.py new file mode 100644 index 0000000..c798c01 --- /dev/null +++ b/setup_db.py @@ -0,0 +1,61 @@ +import sqlalchemy as sa +from rdfdatabank.model import init_model +from rdfdatabank.lib.auth_entry import add_user, add_user_groups, add_silo +import ConfigParser +import sys, os + +class setupDB(): + + def __init__(self, config_file='/var/lib/databank/production.ini'): + if not os.path.exists(config_file): + print "Config file not found" + sys.exit() + c = ConfigParser.ConfigParser() + c.read(config_file) + if not 'app:main' in c.sections(): + print "Section app:main not found in config file" + sys.exit() + engine = sa.create_engine(c.get('app:main', 'sqlalchemy.url')) + init_model(engine) + return + + def addUser(self, user_details): + if not ('username' in user_details and user_details['username'] and \ + 'password' in user_details and user_details['password'] and \ + ('name' in user_details and user_details['name'] or \ + ('firstname' in user_details and user_details['firstname'] and \ + 'lastname' in user_details and user_details['lastname']))): + return False + add_user(user_details) + return True + + def addSilo(self, silo): + add_silo(silo) + return + + def addUserGroup(self, username, silo, permission): + groups = [] + groups.append((silo, permission)) + add_user_groups(username, groups) + return + +if __name__ == "__main__": + #Initialize sqlalchemy + s = setupDB() + + #add user + username = sys.argv[1] + password = sys.argv[2] + email = sys.argv[3] + + user_details = { + 'username':u'%s'%username, + 'password':u"%s"%password, + 'name':u'Databank Administrator', + 'email':u"%s"%email + } + s.addUser(user_details) + + #Add user membership + s.addUserGroup(username, '*', 'administrator') +