diff --git a/ckanext/geonetwork/harvesters/geonetwork.py b/ckanext/geonetwork/harvesters/geonetwork.py index ad49892..3d2d050 100644 --- a/ckanext/geonetwork/harvesters/geonetwork.py +++ b/ckanext/geonetwork/harvesters/geonetwork.py @@ -13,12 +13,12 @@ from ckanext.spatial.lib.csw_client import CswService from ckanext.spatial.harvesters.csw import CSWHarvester -from ckanext.spatial.model import ISODocument -from ckanext.spatial.model import ISOElement +from ckanext.spatial.harvested_metadata import ISODocument +from ckanext.spatial.harvested_metadata import ISOElement from ckan.logic import ValidationError, NotFound, get_action -from pylons import config +from ckan.common import config from datetime import datetime log = logging.getLogger(__name__) @@ -80,7 +80,7 @@ def get_package_dict(self, iso_values, harvest_object): existing_keys = [entry.get('key') for entry in package_dict['extras']] - for key, value in default_extras.iteritems(): + for key, value in default_extras.items(): log.debug('Processing extra %s', key) if not key in existing_keys or override_extras: # Look for replacement strings @@ -163,8 +163,10 @@ def handle_groups(self, harvest_object, group_mapping, gn_localized_url, values) version = self.source_config.get('version') client = GeoNetworkClient(gn_localized_url, version) cats = client.retrieveMetadataCategories(harvest_object.guid) + log.info(':::::::::::::-TOPIC-CATEGORY-::::::::::::: %r ', cats) for cat in cats: + log.info('group_mapping %r', group_mapping.items()) groupname = group_mapping[cat] printname = groupname if not None else "NONE" @@ -172,6 +174,7 @@ def handle_groups(self, harvest_object, group_mapping, gn_localized_url, values) if groupname: try: + log.info('groupname1 %r', groupname) data_dict = {'id': groupname} get_action('group_show')(context, data_dict) #log.info('Group %s found %s' % (groupname, group)) @@ -180,10 +183,11 @@ def handle_groups(self, harvest_object, group_mapping, gn_localized_url, values) #else: #validated_groups.append(group['id']) validated_groups.append({'name': groupname}) - except NotFound, e: + except NotFound as e: log.warning('Group %s from category %s is not available' % (groupname, cat)) - except Exception, e: - log.warning('Error handling groups for metadata %s' % harvest_object.guid) + except Exception as e: + # log.warning('Error handling groups for metadata %s' % harvest_object.guid) + log.warning('Error handling groups for metadata %r', e) return validated_groups diff --git a/ckanext/geonetwork/harvesters/utils.py b/ckanext/geonetwork/harvesters/utils.py index 31d1f7d..899550f 100644 --- a/ckanext/geonetwork/harvesters/utils.py +++ b/ckanext/geonetwork/harvesters/utils.py @@ -2,9 +2,9 @@ import logging #import re import urllib -import urllib2 +import urllib.request import zipfile -from StringIO import StringIO +import io from lxml import etree GEONETWORK_V26 = "2.6" @@ -27,26 +27,29 @@ def __init__(self, base, version): def retrieveInfo(self, uuid): if self.version == GEONETWORK_V26: - url = "%s/srv/en/mef.export" % self.base + # url = "%s/srv/en/mef.export" % self.base + url = "%smef.export?uuid=%s" % (self.base, uuid) + + logger.info('URL %r ', url) + #headers = { - #"Content-Type": "application/x-www-form-urlencoded", - #"Accept": "text/plain" - #} - query = urllib.urlencode({ - "uuid": uuid - }) + # "Content-Type": "application/x-www-form-urlencoded", + # "Accept": "text/plain" + + #query = urllib.parse.urlencode({ + # "uuid": uuid + #}).encode('utf-8') + + request = urllib.request.Request(url, method='GET') - logger.info('Loading MEF for %s', uuid) - request = urllib2.Request(url, query) - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(), urllib2.HTTPRedirectHandler()) + opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(), urllib.request.HTTPRedirectHandler()) response = opener.open(request) # will get a ZIP file content = response.read() - - #logger.info('----> %s', content) + #print 'RESPONSE ', content - zdata = StringIO(content) + zdata = io.BytesIO(content) zfile = zipfile.ZipFile(zdata) xml = None @@ -54,19 +57,20 @@ def retrieveInfo(self, uuid): for name in zfile.namelist(): #logger.info(' MEF entry: %s', name) #print ' MEF entry: ', name - if name == 'info.xml': + if name == 'metadata.xml': uncompressed = zfile.read(name) xml = etree.fromstring(uncompressed) - + return xml def retrieveMetadataCategories(self, uuid): xml = self.retrieveInfo(uuid) - cats = [] - for cat in xml.findall('categories/category'): - cats.append(cat.get('name')) + for cat in xml.iter('{http://www.isotc211.org/2005/gmd}MD_TopicCategoryCode'): + cat = cat.text + logger.info('cat %r', cat) + cats.append(cat) return cats