Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HYC-1951 - Refine Dimensions Query #1116

Merged
merged 6 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion app/services/tasks/dimensions_ingest_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module Tasks
class DimensionsIngestService
include Tasks::IngestHelper
attr_reader :admin_set, :depositor
UNC_GRID_ID = 'grid.410711.2'

def initialize(config)
@config = config
Expand Down Expand Up @@ -104,11 +105,22 @@ def author_to_hash(author, index)
}
# Add first author affiliation to other affiliation array
if author['affiliations'].present?
hash['other_affiliation'] = author['affiliations'][0]['raw_affiliation']
hash['other_affiliation'] = retrieve_author_affiliation(author['affiliations'])
end
hash
end

def retrieve_author_affiliation(affiliations)
unc_affiliations = affiliations.select { |affiliation| affiliation['id'] == UNC_GRID_ID }
if !unc_affiliations.empty?
# Prioritize UNC affiliations, only retrieving the first one
return unc_affiliations[0]['raw_affiliation']
end
# Otherwise, retrieve the first affiliation
return affiliations[0]['raw_affiliation']
end


def format_publication_identifiers(publication)
[
publication['id'].present? ? "Dimensions ID: #{publication['id']}" : nil,
Expand Down
3 changes: 2 additions & 1 deletion app/services/tasks/dimensions_query_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,11 @@ def solr_query_builder(pub)
def generate_query_string(start_date, end_date, page_size, cursor)
search_clauses = ['where type = "article"', "date >= \"#{start_date}\"", "date < \"#{end_date}\""].join(' and ')
return_fields = ['basics', 'extras', 'abstract', 'issn', 'publisher', 'journal_title_raw', 'linkout', 'concepts'].join(' + ')
unc_affiliation_variants = ['"UNC-CH"', '"University of North Carolina at Chapel Hill"', '"UNC-Chapel Hill"', '"University of North Carolina-Chapel Hill"', '"University of North Carolina, Chapel Hill"'].join(' OR ')
<<~QUERY
search publications #{search_clauses} in raw_affiliations
for """
"University of North Carolina, Chapel Hill" OR "UNC"
#{unc_affiliation_variants}
"""
return publications[#{return_fields}]
limit #{page_size}
Expand Down
4 changes: 3 additions & 1 deletion spec/fixtures/files/dimensions_ingest_test_fixture.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@
"current_organization_id": "grid.10698.36",
"first_name": "Susan L",
"last_name": "Hogan",
"orcid": null,
"orcid": [
"0000-0000-0000-0000"
],
"raw_affiliation": [
"UNC Kidney Center, Division of Nephrology and Hypertension, University of North Carolina, Chapel Hill."
],
Expand Down
55 changes: 55 additions & 0 deletions spec/services/tasks/dimensions_ingest_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,59 @@
expect(article.keyword).to eq([])
end
end

describe '#author_to_hash' do
let (:unc_grid_id) { 'grid.410711.2' }
let (:non_unc_affiliation) {
{
'city' => 'Test City',
'city_id' => 5318313,
'country' => 'United States',
'country_code' => 'US',
'id' => 'grid.134563.6',
'name' => 'Test University',
'raw_affiliation' => 'Test Raw Affiliation',
'state' => 'Test-State',
'state_code' => 'US-AZ'
}
}

context 'when an author has multiple affiliations' do
it 'uses their first affiliation to populate the author hash if no UNC affiliation exists' do
non_unc_affiliated_author = test_publications.first['authors'].find { |author| author['id'] != unc_grid_id }
# Ensure the author has multiple non-unc affiliations
non_unc_affiliated_author['affiliations'].append(non_unc_affiliation) unless non_unc_affiliated_author['affiliations'].size > 1
author_hash = service.author_to_hash(non_unc_affiliated_author, 0)
# Check that the author hash contains the expected metadata from the first affiliation
expect(author_hash).to eq(
{
'name' => 'Thorpe, Carolyn T',
'other_affiliation' => 'Eshelman School of Pharmacy, University of North Carolina, Chapel Hill.',
'orcid' => 'https://orcid.org/0000-0002-7662-7497',
'index' => '1'
}
)
end

it 'prioritizes retrieval of the UNC affiliation even if it is not the first one' do
first_publication_authors = test_publications.first['authors']
# Retrieve the first UNC-affiliated author and their first UNC-affiliation
unc_affiliated_author = first_publication_authors.find do |author|
author['affiliations'].any? { |affiliation| affiliation['id'] == unc_grid_id }
end
first_unc_affiliation = unc_affiliated_author['affiliations'].find { |affiliation| affiliation['id'] == unc_grid_id }
# Ensure the author's first affiliation is not the UNC affiliation
unc_affiliated_author['affiliations'].unshift(non_unc_affiliation)
author_hash = service.author_to_hash(unc_affiliated_author, 0)
expect(author_hash).to eq(
{
'name' => 'Hogan, Susan L',
'other_affiliation' => 'UNC Kidney Center, Division of Nephrology and Hypertension, University of North Carolina, Chapel Hill.',
'orcid' => 'https://orcid.org/0000-0000-0000-0000',
'index' => '1'
}
)
end
end
end
end
Loading