Skip to content

Commit

Permalink
Update configuration processors to handle organization and extras field
Browse files Browse the repository at this point in the history
  • Loading branch information
jguo144 committed Sep 25, 2024
1 parent f55d42d commit 2f4db41
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 24 deletions.
18 changes: 13 additions & 5 deletions ckanext/custom_harvest/configuration_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,11 @@ def check_config(config_obj):

@staticmethod
def modify_package_dict(package_dict, config, source_dict):
exclude_keys = ['guid', 'harvest_object_id', 'harvest_source_id', 'harvest_source_title', 'spatial']
exclude_keys = [
'dcat_issued', 'dcat_modified', 'guid',
'harvest_object_id', 'harvest_source_id', 'harvest_source_title',
'source_metadata_created', 'source_metadata_modified', 'spatial'
]

if not 'extras' in package_dict:
package_dict['extras'] = []
Expand Down Expand Up @@ -248,11 +252,15 @@ def modify_package_dict(package_dict, config, source_dict):

value = None

if source_field.startswith('extras_'):
if source_field.startswith('extras.'):
# This is an extra field
source_extra = get_extra(source_field[7:], source_dict)
if source_extra:
value = source_extra.get('value')
elif source_field.startswith('organization.'):
org_key = source_field.split('.')[1]
if source_dict.get('organization', {}).get(org_key):
value = source_dict.get('organization', {}).get(org_key)
else:
value = source_dict.get(source_field)

Expand Down Expand Up @@ -331,7 +339,7 @@ def modify_package_dict(package_dict, config, source_dict):
value_dict = {}
for subfield in list(composite_map.get(field_name)):
mapped_field = composite_map.get(field_name).get(subfield)
if mapped_field.startswith('extras_'):
if mapped_field.startswith('extras.'):
source_extra = get_extra(mapped_field[7:], source_dict)
if source_extra and source_extra.get('value') not in ['none', 'null']:
value_dict[subfield] = source_extra.get('value')
Expand Down Expand Up @@ -362,7 +370,7 @@ def modify_package_dict(package_dict, config, source_dict):

# Get contact point name
contact_point_name = contact_point_mapping.get('default_name')
if source_name.startswith('extras_'):
if source_name.startswith('extras.'):
source_extra = get_extra(source_name[7:], source_dict)
if source_extra:
contact_point_name = source_extra.get('value')
Expand All @@ -388,7 +396,7 @@ def modify_package_dict(package_dict, config, source_dict):

# Get contact point email
contact_point_email = contact_point_mapping.get('default_email')
if source_email.startswith('extras_'):
if source_email.startswith('extras.'):
source_extra = get_extra(source_email[7:], source_dict)
if source_extra:
contact_point_email = source_extra.get('value')
Expand Down
8 changes: 6 additions & 2 deletions ckanext/custom_harvest/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ def package_search_to_ckan(source_dict):
package_dict['tags'].append({'name': tag.get('name')})

package_dict['extras'] = []

for key in ['metadata_created', 'metadata_modified']:
package_dict['extras'].append({'key': 'source_{0}'.format(key), 'value': source_dict.get(key)})

package_dict['extras'].append({'key': 'guid', 'value': source_dict.get('name')})

for extra in source_dict.get('extras', []):
Expand Down Expand Up @@ -59,14 +63,14 @@ def package_search_to_ckan(source_dict):
continue

resource = {
'name': resource.get('title', source_dict.get('title')),
'name': resource.get('name'),
'description': resource.get('description', ''),
'url': resource.get('url'),
'format': format,
}

if 'fluent' in config.get('ckan.plugins'):
resource['name_translated'] = {'en': resource.get('title', source_dict.get('title'))}
resource['name_translated'] = {'en': resource.get('name')}
resource['description_translated'] = {'en': resource.get('description', '') or ''}

if resource.get('size'):
Expand Down
2 changes: 1 addition & 1 deletion ckanext/custom_harvest/harvesters/package_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def import_stage(self, harvest_object):
new_package_dict = p.toolkit.get_action('package_show')(context, {'id': package_id})
upload_resources_to_datastore(context, new_package_dict, source_dict)
if status == 'change':
# Submit to xloader if dcat_modified date is different since resource urls may not change
# Submit to xloader if modified date is different since resource urls may not change
upload_resources_to_datastore(context, package_dict, source_dict)
except Exception as e:
dataset = json.loads(harvest_object.content)
Expand Down
20 changes: 10 additions & 10 deletions ckanext/custom_harvest/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ class CustomHarvestPlugin(plugins.SingletonPlugin):

# IPackageController
def before_index(self, pkg_dict):
dcat_modified = utils.parse_date_iso_format(pkg_dict.get('extras_modified_date'))
if dcat_modified:
if not dcat_modified.endswith('Z'):
dcat_modified += 'Z'
pkg_dict['metadata_modified'] = dcat_modified
source_modified = utils.parse_date_iso_format(pkg_dict.get('extras_source_metadata_modified'))
if source_modified:
if not source_modified.endswith('Z'):
source_modified += 'Z'
pkg_dict['metadata_modified'] = source_modified

dcat_issued = utils.parse_date_iso_format(pkg_dict.get('extras_issued_date'))
if dcat_issued:
if not dcat_issued.endswith('Z'):
dcat_issued += 'Z'
pkg_dict['metadata_created'] = dcat_issued
source_created = utils.parse_date_iso_format(pkg_dict.get('extras_source_metadata_created'))
if source_created:
if not source_created.endswith('Z'):
source_created += 'Z'
pkg_dict['metadata_created'] = source_created

return pkg_dict
77 changes: 71 additions & 6 deletions ckanext/custom_harvest/tests/test_configuration_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,71 @@ def test_modify_package_mapping_values_with_modified_time(self):

assert package["modified_time"] == "11:16:25.000000Z"

def test_modify_package_mapping_values_from_extras(self):
package = {
"title": "Test Dataset",
"name": "test-dataset"
}
config = {
"map_fields": [
{
"source": "extras.access_constraints",
"target": "limitations"
},
{
"source": "extras.progress",
"target": "progress"
}
]
}
source_dict = {
"title": "Test Dataset",
"name": "test-dataset",
"extras": [
{
"key": "access_constraints",
"value": "Cite as: NOAA National Centers of Environmental Information"
},
{
"key": "progress",
"value": "completed"
}
]
}

self.processor.modify_package_dict(package, config, source_dict)

assert package["limitations"] == "Cite as: NOAA National Centers of Environmental Information"
assert package["progress"] == "completed"

def test_modify_package_mapping_values_from_org_title(self):
package = {
"title": "Test Dataset 1",
"name": "test-dataset-1"
}
config = {
"map_fields": [
{
"source": "organization.title",
"target": "publisher"
}
]
}
source_dict = {
"title": "Test Dataset 1",
"name": "test-dataset-1",
"organization": {
"title": "National Oceanic and Atmospheric Administration, Department of Commerce",
"name": "noaa-gov",
"type": "organization",
"image_url": "https://fortress.wa.gov/dfw/score/score/images/noaa_logo.png"
}
}

self.processor.modify_package_dict(package, config, source_dict)

assert package["publisher"] == "National Oceanic and Atmospheric Administration, Department of Commerce"


class TestCompositeMapping:

Expand Down Expand Up @@ -758,10 +823,10 @@ def test_modify_package_contact_fields_from_extras(self):
config = {
"contact_point": {
"default_name": "nonameprovided",
"source_name": "extras_contact_name",
"source_name": "extras.contact_name",
"target_name": "contact_name",
"default_email": "[email protected]",
"source_email": "extras_contact_email",
"source_email": "extras.contact_email",
"target_email": "contact_email"
}
}
Expand All @@ -787,10 +852,10 @@ def test_modify_package_contact_fields_from_extras_json(self):
config = {
"contact_point": {
"default_name": "nonameprovided",
"source_name": "extras_responsible_party",
"source_name": "extras.responsible_party",
"target_name": "contact_name",
"default_email": "[email protected]",
"source_email": "extras_contact_email",
"source_email": "extras.contact_email",
"target_email": "contact_email"
}
}
Expand All @@ -816,10 +881,10 @@ def test_modify_package_contact_fields_default_values(self):
config = {
"contact_point": {
"default_name": "nonameprovided",
"source_name": "extras_responsible_party",
"source_name": "extras.responsible_party",
"target_name": "contact_name",
"default_email": "[email protected]",
"source_email": "extras_contact_email",
"source_email": "extras.contact_email",
"target_email": "contact_email"
}
}
Expand Down

0 comments on commit 2f4db41

Please sign in to comment.