1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
#!/usr/bin/env ruby -w
# encoding: UTF-8
# tc_csv_parsing.rb
#
# Created by James Edward Gray II on 2005-10-31.
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
# under the terms of Ruby's license.
require "timeout"
require_relative "base"
#
# Following tests are my interpretation of the
# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
# document in one place (intentionally) and that is to make the default row
# separator <tt>$/</tt>.
#
class TestCSV::Parsing < TestCSV
extend DifferentOFS
BIG_DATA = "123456789\n" * 1024
def test_mastering_regex_example
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
"It's \"10 Grand\", baby", "10K" ],
CSV.parse_line(ex) )
end
# Old Ruby 1.8 CSV library tests.
def test_std_lib_csv
[ ["\t", ["\t"]],
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
["\"\"", [""]],
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
["foo,\"\",baz", ["foo", "", "baz"]],
["\",\"", [","]],
["foo", ["foo"]],
[",,", [nil, nil, nil]],
[",", [nil, nil]],
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
["foo,,baz", ["foo", nil, "baz"]],
["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
["\",\",\",\"", [",", ","]],
["foo,bar,", ["foo", "bar", nil]],
[",foo,bar", [nil, "foo", "bar"]],
["foo,bar", ["foo", "bar"]],
[";", [";"]],
["\t,\t", ["\t", "\t"]],
["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
[";,;", [";", ";"]] ].each do |csv_test|
assert_equal(csv_test.last, CSV.parse_line(csv_test.first))
end
[ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
["\"\"", [""]],
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
["foo,\"\",baz", ["foo", "", "baz"]],
["foo", ["foo"]],
[",,", [nil, nil, nil]],
[",", [nil, nil]],
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
["foo,,baz", ["foo", nil, "baz"]],
["foo,bar", ["foo", "bar"]],
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
assert_equal(csv_test.last, CSV.parse_line(csv_test.first))
end
end
# From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
def test_aras_edge_cases
[ [%Q{a,b}, ["a", "b"]],
[%Q{a,"""b"""}, ["a", "\"b\""]],
[%Q{a,"""b"}, ["a", "\"b"]],
[%Q{a,"b"""}, ["a", "b\""]],
[%Q{a,"\nb"""}, ["a", "\nb\""]],
[%Q{a,"""\nb"}, ["a", "\"\nb"]],
[%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
[%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
[%Q{a,,,}, ["a", nil, nil, nil]],
[%Q{,}, [nil, nil]],
[%Q{"",""}, ["", ""]],
[%Q{""""}, ["\""]],
[%Q{"""",""}, ["\"",""]],
[%Q{,""}, [nil,""]],
[%Q{,"\r"}, [nil,"\r"]],
[%Q{"\r\n,"}, ["\r\n,"]],
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
end
end
def test_james_edge_cases
# A read at eof? should return nil.
assert_equal(nil, CSV.parse_line(""))
#
# With Ruby 1.8 CSV it's impossible to tell an empty line from a line
# containing a single +nil+ field. The old CSV library returns
# <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to
# me.
#
assert_equal(Array.new, CSV.parse_line("\n1,2,3\n"))
end
def test_rob_edge_cases
[ [%Q{"a\nb"}, ["a\nb"]],
[%Q{"\n\n\n"}, ["\n\n\n"]],
[%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
[%Q{,"\r\n"}, [nil,"\r\n"]],
[%Q{,"\r\n."}, [nil,"\r\n."]],
[%Q{"a\na","one newline"}, ["a\na", 'one newline']],
[%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
[%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
[%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
[%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
].each do |edge_case|
assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
end
end
def test_non_regex_edge_cases
# An early version of the non-regex parser fails this test
[ [ "foo,\"foo,bar,baz,foo\",\"foo\"",
["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case|
assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
end
assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,\"23\"4\"5\", 6")
end
end
def test_malformed_csv
assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2\r,3", row_sep: "\n")
end
bad_data = <<-END_DATA.gsub(/^ +/, "")
line,1,abc
line,2,"def\nghi"
line,4,some\rjunk
line,5,jkl
END_DATA
lines = bad_data.lines.to_a
assert_equal(6, lines.size)
assert_match(/\Aline,4/, lines.find { |l| l =~ /some\rjunk/ })
csv = CSV.new(bad_data)
begin
loop do
assert_not_nil(csv.shift)
assert_send([csv.lineno, :<, 4])
end
rescue CSV::MalformedCSVError
assert_equal( "Unquoted fields do not allow \\r or \\n (line 4).",
$!.message )
end
assert_raise(CSV::MalformedCSVError) { CSV.parse_line('1,2,"3...') }
bad_data = <<-END_DATA.gsub(/^ +/, "")
line,1,abc
line,2,"def\nghi"
line,4,8'10"
line,5,jkl
END_DATA
lines = bad_data.lines.to_a
assert_equal(6, lines.size)
assert_match(/\Aline,4/, lines.find { |l| l =~ /8'10"/ })
csv = CSV.new(bad_data)
begin
loop do
assert_not_nil(csv.shift)
assert_send([csv.lineno, :<, 4])
end
rescue CSV::MalformedCSVError
assert_equal("Illegal quoting in line 4.", $!.message)
end
end
def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA)
end
def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes
assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA)
end
def test_field_size_limit_controls_lookahead
assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
field_size_limit: 2048 )
end
private
def assert_parse_errors_out(*args)
assert_raise(CSV::MalformedCSVError) do
Timeout.timeout(0.2) do
CSV.parse(*args)
fail("Parse didn't error out")
end
end
end
end
|