Skip to content

Commit

Permalink
Fix row_count always being 1 when \r used as lineTerminator
Browse files Browse the repository at this point in the history
  • Loading branch information
kspurgin committed Dec 12, 2024
1 parent 6eeba10 commit 0949e0c
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 6 deletions.
61 changes: 55 additions & 6 deletions lib/csvlint/validate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def initialize(source, dialect = {}, schema = nil, options = {})
@formats = []
@schema = schema
@dialect = dialect
@orig_sep = $/
@csv_header = true
@headers = {}
@lambda = options[:lambda]
Expand Down Expand Up @@ -107,10 +108,17 @@ def validate

def validate_stream
@current_line = 1
# StringIO.each_line splits on the value of $/ (input record separator),
# which defaults to \n. This is why CSVs with \r EOL character don't get
# lines counted properly?
set_sep(@source)

@source.each_line do |line|
break if line_limit_reached?
parse_line(line)
end

reset_sep
validate_line(@leading, @current_line) unless @leading == ""
end

Expand All @@ -131,10 +139,13 @@ def validate_url
request.on_body do |chunk|
chunk.force_encoding(Encoding::UTF_8) if chunk.encoding == Encoding::ASCII_8BIT
io = StringIO.new(chunk)
set_sep(io)

io.each_line do |line|
break if line_limit_reached?
parse_line(line)
end
reset_sep
end
request.run
# Validate the last line too
Expand All @@ -144,7 +155,7 @@ def validate_url
def parse_line(line)
line = @leading + line
# Check if the last line is a line break - in which case it's a full line
if line[-1, 1].include?("\n")
if ["\n", "\r"].include?(line[-1, 1])
# If the number of quotes is odd, the linebreak is inside some quotes
if line.count(@dialect["quoteChar"]).odd?
@leading = line
Expand Down Expand Up @@ -304,7 +315,7 @@ def header?
end

def report_line_breaks(line_no = nil)
return unless @input[-1, 1].include?("\n") # Return straight away if there's no newline character - i.e. we're on the last line
return unless ["\r", "\n"].include?(@input[-1, 1]) # Return straight away if there's no newline character - i.e. we're on the last line
line_break = get_line_break(@input)
@line_breaks << line_break
unless line_breaks_reported?
Expand Down Expand Up @@ -518,6 +529,43 @@ def locate_schema

private

def set_sep(source)
$/ = determine_sep(source)
end

def determine_sep(source)
return explicitly_set_sep if explicitly_set_sep

src_str = case source
when File
File.read(source.path)
when IO
source.read
when StringIO
source.string
when Tempfile
source.read
else
raise "Unhandled source class: #{source.class}"
end
src_str.include?("\n") ? "\n" : "\r"
end

def explicitly_set_sep
return unless @dialect
return unless @dialect.key?("lineTerminator")

sep = @dialect["lineTerminator"]
return unless sep.is_a?(String)
return if sep.empty?

sep
end

def reset_sep
$/ = @orig_sep
end

def parse_extension(source)
case source
when File
Expand Down Expand Up @@ -589,11 +637,12 @@ def line_limit_reached?
end

def get_line_break(line)
eol = line.chars.last(2)
if eol.first == "\r"
"\r\n"
eol = line.chars.last(2).join
case eol
when "\r\n"
eol
else
"\n"
eol[-1]
end
end

Expand Down
58 changes: 58 additions & 0 deletions spec/validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,64 @@
end
end

describe "#row_count" do
let(:validator) { Csvlint::Validator.new(StringIO.new(csv), opts) }
let(:opts) { {} }
let(:result) { validator.row_count }

context 'with \r\n line terminators and final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\r\n"1","2","3"\r\n"3","2","1"\r\n)
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \r\n line terminators and without final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\r\n"1","2","3"\r\n"3","2","1")
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \n line terminators and final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\n"1","2","3"\n"3","2","1"\n)
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \r line terminators and final EOL' do
let(:csv) do
%("Foo","Bar","Baz"\r"1","2","3"\r"3","2","1"\r)
end

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end

context 'with \r line terminators and final EOL and lineTerminator ' \
"specified" do
let(:csv) do
%("Foo","Bar","Baz"\r"1","2","3"\r"3","2","1"\r)
end
let(:opts) { {"lineTerminator" => "\r"} }

it "should count the total number of rows read" do
expect(validator.row_count).to eq(3)
end
end
end

# Commented out because there is currently no way to mock redirects with Typhoeus and WebMock - see https://github.com/bblimke/webmock/issues/237
# it "should follow redirects to SSL" do
# stub_request(:get, "http://example.com/redirect").to_return(:status => 301, :headers=>{"Location" => "https://example.com/example.csv"})
Expand Down

0 comments on commit 0949e0c

Please sign in to comment.