From af5f301802a61a7fd219ac12b8c33eff69e6e1d4 Mon Sep 17 00:00:00 2001 From: Cory Lown Date: Fri, 31 Mar 2023 17:28:16 -0400 Subject: [PATCH 1/2] Normalize the DigitalObject href value to something usable --- app/models/digital_object.rb | 13 +++++++++++-- spec/models/digital_object_spec.rb | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/app/models/digital_object.rb b/app/models/digital_object.rb index 065e529a..49a4d49b 100644 --- a/app/models/digital_object.rb +++ b/app/models/digital_object.rb @@ -2,7 +2,7 @@ ## # Override Arclight::DigitalObject to add Purl URL -# Demo data href only contains the ID +# Demo data href sometimes only contains the ID class DigitalObject attr_reader :label, :href @@ -13,6 +13,15 @@ def initialize(label:, href:) def self.from_json(json) object_data = JSON.parse(json) - new(label: object_data['label'], href: "https://purl.stanford.edu/#{object_data['href']}") + new(label: object_data['label'], href: normalize_href(object_data['href'])) + end + + # Make a usable Purl URL from whatever happens to be in + # the DigitalObject href in the sample data. + # Ideally, this value would be consistent, but it is not currently. + def self.normalize_href(href) + return href.gsub('http://', 'https://') if href.match?(%r{https?://}) + + "https://purl.stanford.edu/#{href}" end end diff --git a/spec/models/digital_object_spec.rb b/spec/models/digital_object_spec.rb index e1601a8c..a1f36dad 100644 --- a/spec/models/digital_object_spec.rb +++ b/spec/models/digital_object_spec.rb @@ -25,4 +25,18 @@ expect(deserialized.href).to eq 'https://purl.stanford.edu/an-object-id' end end + + describe "#{described_class}.normalize_href" do + it 'returns the href unchanged if it contains a complete URL' do + expect(described_class.normalize_href('https://purl.stanford.edu/an-object-id')).to eq 'https://purl.stanford.edu/an-object-id' + end + + it 'returns the href but converts http to https' do + expect(described_class.normalize_href('http://purl.stanford.edu/an-object-id')).to eq 'https://purl.stanford.edu/an-object-id' + end + + it 'returns a complete Purl URL if the href only contains an ID' do + expect(described_class.normalize_href('an-object-id')).to eq 'https://purl.stanford.edu/an-object-id' + end + end end From ca21486bd60e898bd7335151f61e1eeb7f10ab98 Mon Sep 17 00:00:00 2001 From: Cory Lown Date: Thu, 13 Apr 2023 10:52:22 -0400 Subject: [PATCH 2/2] Add a Purl URL only to druid-looking things; return other IDs or URLs unchanged --- app/models/digital_object.rb | 7 +++++-- spec/models/digital_object_spec.rb | 28 ++++++++++++++++++---------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/app/models/digital_object.rb b/app/models/digital_object.rb index 49a4d49b..39bc69d1 100644 --- a/app/models/digital_object.rb +++ b/app/models/digital_object.rb @@ -20,8 +20,11 @@ def self.from_json(json) # the DigitalObject href in the sample data. # Ideally, this value would be consistent, but it is not currently. def self.normalize_href(href) - return href.gsub('http://', 'https://') if href.match?(%r{https?://}) + # Some complete Purl URLs do not use https, convert them + return href.gsub('http://', 'https://') if href.match?(%r{https?://purl.stanford.edu}) + # Some hrefs contain only a druid, convert them to a complete Purl URL + return "https://purl.stanford.edu/#{href}" if href.match?(/^([a-z]{2})(\d{3})([a-z]{2})(\d{4})$/) - "https://purl.stanford.edu/#{href}" + href end end diff --git a/spec/models/digital_object_spec.rb b/spec/models/digital_object_spec.rb index a1f36dad..b2d9b48e 100644 --- a/spec/models/digital_object_spec.rb +++ b/spec/models/digital_object_spec.rb @@ -4,16 +4,16 @@ RSpec.describe DigitalObject do subject(:instance) do - described_class.new(label: 'An object label', href: 'an-object-id') + described_class.new(label: 'An object label', href: 'aa111bb2222') end describe 'label' do let(:empty_label) do - described_class.new(label: '', href: 'an-object-id') + described_class.new(label: '', href: 'aa111bb2222') end it 'uses href if label is blank' do - expect(empty_label.href).to eq 'an-object-id' + expect(empty_label.href).to eq 'aa111bb2222' end end @@ -22,21 +22,29 @@ deserialized = described_class.from_json(instance.to_json) expect(deserialized).to be_a described_class expect(deserialized.label).to eq 'An object label' - expect(deserialized.href).to eq 'https://purl.stanford.edu/an-object-id' + expect(deserialized.href).to eq 'https://purl.stanford.edu/aa111bb2222' end end describe "#{described_class}.normalize_href" do - it 'returns the href unchanged if it contains a complete URL' do - expect(described_class.normalize_href('https://purl.stanford.edu/an-object-id')).to eq 'https://purl.stanford.edu/an-object-id' + it 'returns the href unchanged if it contains something other than a druid' do + expect(described_class.normalize_href('some-other-id')).to eq 'some-other-id' end - it 'returns the href but converts http to https' do - expect(described_class.normalize_href('http://purl.stanford.edu/an-object-id')).to eq 'https://purl.stanford.edu/an-object-id' + it 'returns the href unchanged if it contains a URL that is not a Purl' do + expect(described_class.normalize_href('http://www.somewebsite/some-other-id')).to eq 'http://www.somewebsite/some-other-id' end - it 'returns a complete Purl URL if the href only contains an ID' do - expect(described_class.normalize_href('an-object-id')).to eq 'https://purl.stanford.edu/an-object-id' + it 'returns the href unchanged if it contains a complete Purl URL' do + expect(described_class.normalize_href('https://purl.stanford.edu/aa111bb2222')).to eq 'https://purl.stanford.edu/aa111bb2222' + end + + it 'returns the Purl URL but converts http to https' do + expect(described_class.normalize_href('http://purl.stanford.edu/aa111bb2222')).to eq 'https://purl.stanford.edu/aa111bb2222' + end + + it 'returns a complete Purl URL if the href only contains a druid' do + expect(described_class.normalize_href('aa111bb2222')).to eq 'https://purl.stanford.edu/aa111bb2222' end end end