Skip to content

Commit

Permalink
Merge pull request #321 from sul-dlss/href-that-are-not
Browse files Browse the repository at this point in the history
Normalize the DigitalObject href value to something usable
  • Loading branch information
cbeer authored Apr 13, 2023
2 parents 6643f39 + ca21486 commit 0ae34f2
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 6 deletions.
16 changes: 14 additions & 2 deletions app/models/digital_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

##
# Override Arclight::DigitalObject to add Purl URL
# Demo data href only contains the ID
# Demo data href sometimes only contains the ID
class DigitalObject
attr_reader :label, :href

Expand All @@ -13,6 +13,18 @@ def initialize(label:, href:)

def self.from_json(json)
object_data = JSON.parse(json)
new(label: object_data['label'], href: "https://purl.stanford.edu/#{object_data['href']}")
new(label: object_data['label'], href: normalize_href(object_data['href']))
end

# Make a usable Purl URL from whatever happens to be in
# the DigitalObject href in the sample data.
# Ideally, this value would be consistent, but it is not currently.
def self.normalize_href(href)
# Some complete Purl URLs do not use https, convert them
return href.gsub('http://', 'https://') if href.match?(%r{https?://purl.stanford.edu})
# Some hrefs contain only a druid, convert them to a complete Purl URL
return "https://purl.stanford.edu/#{href}" if href.match?(/^([a-z]{2})(\d{3})([a-z]{2})(\d{4})$/)

href
end
end
30 changes: 26 additions & 4 deletions spec/models/digital_object_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@

RSpec.describe DigitalObject do
subject(:instance) do
described_class.new(label: 'An object label', href: 'an-object-id')
described_class.new(label: 'An object label', href: 'aa111bb2222')
end

describe 'label' do
let(:empty_label) do
described_class.new(label: '', href: 'an-object-id')
described_class.new(label: '', href: 'aa111bb2222')
end

it 'uses href if label is blank' do
expect(empty_label.href).to eq 'an-object-id'
expect(empty_label.href).to eq 'aa111bb2222'
end
end

Expand All @@ -22,7 +22,29 @@
deserialized = described_class.from_json(instance.to_json)
expect(deserialized).to be_a described_class
expect(deserialized.label).to eq 'An object label'
expect(deserialized.href).to eq 'https://purl.stanford.edu/an-object-id'
expect(deserialized.href).to eq 'https://purl.stanford.edu/aa111bb2222'
end
end

describe "#{described_class}.normalize_href" do
it 'returns the href unchanged if it contains something other than a druid' do
expect(described_class.normalize_href('some-other-id')).to eq 'some-other-id'
end

it 'returns the href unchanged if it contains a URL that is not a Purl' do
expect(described_class.normalize_href('http://www.somewebsite/some-other-id')).to eq 'http://www.somewebsite/some-other-id'
end

it 'returns the href unchanged if it contains a complete Purl URL' do
expect(described_class.normalize_href('https://purl.stanford.edu/aa111bb2222')).to eq 'https://purl.stanford.edu/aa111bb2222'
end

it 'returns the Purl URL but converts http to https' do
expect(described_class.normalize_href('http://purl.stanford.edu/aa111bb2222')).to eq 'https://purl.stanford.edu/aa111bb2222'
end

it 'returns a complete Purl URL if the href only contains a druid' do
expect(described_class.normalize_href('aa111bb2222')).to eq 'https://purl.stanford.edu/aa111bb2222'
end
end
end

0 comments on commit 0ae34f2

Please sign in to comment.