diff --git a/app/overrides/lib/hydra/derivatives/processors/document_override.rb b/app/overrides/lib/hydra/derivatives/processors/document_override.rb index b4101071e..872ed9e04 100644 --- a/app/overrides/lib/hydra/derivatives/processors/document_override.rb +++ b/app/overrides/lib/hydra/derivatives/processors/document_override.rb @@ -6,9 +6,9 @@ class SofficeTimeoutError < StandardError; end Hydra::Derivatives::Processors::Document.class_eval do # [hyc-override] Use Redlock to manage soffice process lock - LOCK_KEY = "soffice:document_conversion" + LOCK_KEY = 'soffice:document_conversion' LOCK_TIMEOUT = 6 * 60 * 1000 - JOB_TIMEOUT_SECONDS = 30 + JOB_TIMEOUT_SECONDS = 300 LOCK_MANAGER = Redlock::Client.new([Redis.current]) # [hyc-override] Trigger kill if soffice process takes too long, and throw a non-retry error if that happens @@ -33,9 +33,6 @@ def self.encode(path, format, outdir, timeout = JOB_TIMEOUT_SECONDS) end end - # TODO: soffice can only run one command at a time. Right now we manage this by only running one - # background job at a time; however, if we want to up concurrency we'll need to deal with this - # Converts the document to the format specified in the directives hash. # TODO: file_suffix and options are passed from ShellBasedProcessor.process but are not needed. # A refactor could simplify this. diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 7c0ab61d8..8d337d7fd 100755 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -1,7 +1,7 @@ --- # In production this is overridden by a template in the vagrant-rails project :concurrency: 6 -:max_retries: 5 +:max_retries: 3 :queues: - default - derivatives diff --git a/spec/lib/hydra/derivatives/processors/document_spec.rb b/spec/lib/hydra/derivatives/processors/document_spec.rb index 7c5a75b12..dbfab9c66 100644 --- a/spec/lib/hydra/derivatives/processors/document_spec.rb +++ b/spec/lib/hydra/derivatives/processors/document_spec.rb @@ -29,4 +29,57 @@ end end end + + describe '.encode' do + let(:path) { '/path/to/document.pptx' } + let(:format) { 'pdf' } + let(:outdir) { '/output/dir' } + let(:timeout) { 1 } + + before do + # Mock the Hydra::Derivatives.libreoffice_path + allow(Hydra::Derivatives).to receive(:libreoffice_path).and_return('/fake/path/to/soffice') + end + + context 'when the process completes successfully' do + it 'runs the command and completes without timeout' do + # Mock Process.spawn and Process.wait to simulate successful execution + pid = 991234 + allow(Process).to receive(:spawn).and_return(pid) + allow(Process).to receive(:wait).with(pid) + + expect do + described_class.encode(path, format, outdir, timeout) + end.not_to raise_error + + # Verify that the process was spawned + expect(Process).to have_received(:spawn) + expect(Process).to have_received(:wait).with(pid) + end + end + + context 'when the process times out' do + it 'kills the process after a timeout' do + pid = 991234 + + # Mock Process.spawn to return a fake PID + allow(Process).to receive(:spawn).and_return(pid) + # Simulate timeout by making Process.wait sleep beyond the timeout + allow(Process).to receive(:wait).with(pid) { sleep 5 } + + # Mock Process.kill to simulate killing the process + allow(Process).to receive(:kill) + allow(Hydra::Derivatives::Processors::Document).to receive(:system).with("ps -p #{pid}").and_return(true) + + expect do + described_class.encode(path, format, outdir, timeout) + end.to raise_error(SofficeTimeoutError, "soffice process timed out after #{timeout} seconds") + + # Verify that the process was spawned + expect(Process).to have_received(:spawn) + expect(Process).to have_received(:kill).with('TERM', pid) # Attempted graceful termination + expect(Process).to have_received(:kill).with('KILL', pid) # Force kill if necessary + end + end + end end