-
Notifications
You must be signed in to change notification settings - Fork 690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for PDF/A-1b #1029
base: master
Are you sure you want to change the base?
Add support for PDF/A-1b #1029
Changes from 5 commits
4ea556f
eb9b667
8296035
bd70bcd
d575d6e
54cb87d
38ae30c
4dc6db8
d929572
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -455,7 +455,6 @@ def self.format(string) | |
|
||
it 'is idempotent' do | ||
pdf = described_class.new | ||
|
||
contents = pdf.render | ||
contents2 = pdf.render | ||
expect(contents2).to eq(contents) | ||
|
@@ -508,18 +507,18 @@ def self.format(string) | |
end | ||
|
||
describe 'content stream characteristics' do | ||
it 'has 1 single content stream for a single page PDF' do | ||
it 'has 2 content streams for a single page PDF' do | ||
pdf = described_class.new | ||
pdf.text 'James' | ||
output = StringIO.new(pdf.render) | ||
hash = PDF::Reader::ObjectHash.new(output) | ||
|
||
streams = hash.values.select { |obj| obj.is_a?(PDF::Reader::Stream) } | ||
|
||
expect(streams.size).to eq(1) | ||
expect(streams.size).to eq(2) | ||
end | ||
|
||
it 'has 1 single content stream for a single page PDF, even if go_to_page '\ | ||
it 'has 2 content streams for a single page PDF, even if go_to_page '\ | ||
'is used' do | ||
pdf = described_class.new | ||
pdf.text 'James' | ||
|
@@ -530,7 +529,7 @@ def self.format(string) | |
|
||
streams = hash.values.select { |obj| obj.is_a?(PDF::Reader::Stream) } | ||
|
||
expect(streams.size).to eq(1) | ||
expect(streams.size).to eq(2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't change either. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
end | ||
end | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
require 'spec_helper' | ||
require 'prawn/vera_pdf' | ||
|
||
include Prawn::VeraPdf | ||
|
||
if vera_pdf_available? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's nice to let developers know whats's wrong. But please make sure CI has all tools installed to actually run the specs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
require_relative 'pdfa_1b_spec_impl' | ||
else | ||
puts 'NOTICE: Specs for PDF/A-1b are not run, because veraPDF ' \ | ||
'binary was not found in path.' | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
require 'spec_helper' | ||
require_relative 'vera_pdf' | ||
|
||
describe Prawn::Document do | ||
include Prawn::VeraPdf | ||
|
||
let(:pdf) { described_class.new(enable_pdfa_1b: true) } | ||
|
||
describe 'PDF/A 1b conformance' do | ||
it 'empty document' do | ||
expect(valid_pdfa_1b?(pdf.render)).to be true | ||
end | ||
|
||
it 'document with some text' do | ||
pdf.font_families.update( | ||
'DejaVuSans' => { | ||
normal: "#{Prawn::DATADIR}/fonts/DejaVuSans.ttf" | ||
} | ||
) | ||
pdf.font 'DejaVuSans' do | ||
pdf.text_box 'Some text', at: [100, 100] | ||
end | ||
expect(valid_pdfa_1b?(pdf.render)).to be true | ||
end | ||
|
||
it 'document with some image' do | ||
pdf.image "#{Prawn::DATADIR}/images/pigs.jpg" | ||
expect(valid_pdfa_1b?(pdf.render)).to be true | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,7 +95,7 @@ | |
next unless obj =~ %r{/Type /Page$} | ||
# The page object must contain the annotation reference | ||
# to render a clickable link | ||
expect(obj).to match(%r{^/Annots \[\d \d .\]$}) | ||
expect(obj).to match(%r{^/Annots \[\d+ \d .\]$}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this change is needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the additional object for the XMP metadata stream the object number for the annotation object switched from single digit to double digit (from 9 to 10). This regex only tested for single digit object number. If we make the XMP metadata stream optional, this change can be reverted. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted. |
||
end | ||
end | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
require 'rexml/document' | ||
require 'open3' | ||
|
||
module Prawn | ||
module VeraPdf | ||
VERA_PDF_EXECUTABLE = 'verapdf'.freeze | ||
VERA_PDF_COMMAND = "#{VERA_PDF_EXECUTABLE} --flavour 1b --format xml".freeze | ||
|
||
def which(cmd) | ||
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] | ||
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| | ||
exts.each do |ext| | ||
exe = File.join(path, "#{cmd}#{ext}") | ||
return exe if File.executable?(exe) && !File.directory?(exe) | ||
end | ||
end | ||
return nil | ||
end | ||
|
||
def vera_pdf_available? | ||
which VERA_PDF_EXECUTABLE | ||
end | ||
|
||
def valid_pdfa_1b?(pdf_data) | ||
stdout, stderr, status = Open3.capture3(VERA_PDF_COMMAND, stdin_data: pdf_data) | ||
raise Exception, "VeraPDF could not be run. #{stderr}" unless status.success? | ||
|
||
reported_as_compliant? stdout.lines[4..-1].join | ||
end | ||
|
||
def reported_as_compliant?(xml_data) | ||
xml_doc = REXML::Document.new xml_data | ||
xml_doc.elements.each('/processorResult/validationResult/ns2:assertions/ns2:assertion') do |element| | ||
message = element.elements.to_a('ns2:message').first.text | ||
clause = element.elements.to_a('ns2:ruleId').first.attributes['clause'] | ||
test = element.elements.to_a('ns2:ruleId').first.attributes['testNumber'] | ||
context = element.elements.to_a('ns2:location/ns2:context').first.text | ||
url = 'https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Part-1-rules' | ||
url_anchor = "rule-#{clause.delete('.')}-#{test}" | ||
puts | ||
puts 'PDF/A-1b VIOLATION' | ||
puts " Message: #{message}" | ||
puts " Context: #{context}" | ||
puts " Details: #{url}##{url_anchor}" | ||
puts | ||
end | ||
xml_doc.elements.to_a('/processorResult/validationResult').first.attributes['isCompliant'] == 'true' | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.