mirror of
https://github.com/ruby/ruby.git
synced 2025-09-20 19:14:00 +02:00
Merge csv-3.2.4
This commit is contained in:
parent
c69fffe67d
commit
cd0c2a67c4
9 changed files with 244 additions and 51 deletions
24
lib/csv.rb
24
lib/csv.rb
|
@ -95,14 +95,11 @@ require "stringio"
|
|||
|
||||
require_relative "csv/fields_converter"
|
||||
require_relative "csv/input_record_separator"
|
||||
require_relative "csv/match_p"
|
||||
require_relative "csv/parser"
|
||||
require_relative "csv/row"
|
||||
require_relative "csv/table"
|
||||
require_relative "csv/writer"
|
||||
|
||||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
# == \CSV
|
||||
#
|
||||
# === In a Hurry?
|
||||
|
@ -866,8 +863,9 @@ class CSV
|
|||
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
||||
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
||||
# <b><tt>header</tt></b>:: The header for the column, when available.
|
||||
# <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not.
|
||||
#
|
||||
FieldInfo = Struct.new(:index, :line, :header)
|
||||
FieldInfo = Struct.new(:index, :line, :header, :quoted?)
|
||||
|
||||
# A Regexp used to find and convert some common Date formats.
|
||||
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
||||
|
@ -875,10 +873,9 @@ class CSV
|
|||
# A Regexp used to find and convert some common DateTime formats.
|
||||
DateTimeMatcher =
|
||||
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
||||
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} |
|
||||
# ISO-8601
|
||||
# ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse
|
||||
\d{4}-\d{2}-\d{2}
|
||||
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
||||
(?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
||||
)\z /x
|
||||
|
||||
# The encoding used by all converters.
|
||||
|
@ -1893,8 +1890,19 @@ class CSV
|
|||
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
||||
|
||||
if data.is_a?(String)
|
||||
if encoding
|
||||
if encoding.is_a?(String)
|
||||
data_external_encoding, data_internal_encoding = encoding.split(":", 2)
|
||||
if data_internal_encoding
|
||||
data = data.encode(data_internal_encoding, data_external_encoding)
|
||||
else
|
||||
data = data.dup.force_encoding(data_external_encoding)
|
||||
end
|
||||
else
|
||||
data = data.dup.force_encoding(encoding)
|
||||
end
|
||||
end
|
||||
@io = StringIO.new(data)
|
||||
@io.set_encoding(encoding || data.encoding)
|
||||
else
|
||||
@io = data
|
||||
end
|
||||
|
|
|
@ -44,7 +44,7 @@ class CSV
|
|||
@converters.empty?
|
||||
end
|
||||
|
||||
def convert(fields, headers, lineno)
|
||||
def convert(fields, headers, lineno, quoted_fields)
|
||||
return fields unless need_convert?
|
||||
|
||||
fields.collect.with_index do |field, index|
|
||||
|
@ -63,7 +63,8 @@ class CSV
|
|||
else
|
||||
header = nil
|
||||
end
|
||||
field = converter[field, FieldInfo.new(index, lineno, header)]
|
||||
quoted = quoted_fields[index]
|
||||
field = converter[field, FieldInfo.new(index, lineno, header, quoted)]
|
||||
end
|
||||
break unless field.is_a?(String) # short-circuit pipeline for speed
|
||||
end
|
||||
|
|
|
@ -2,15 +2,10 @@
|
|||
|
||||
require "strscan"
|
||||
|
||||
require_relative "delete_suffix"
|
||||
require_relative "input_record_separator"
|
||||
require_relative "match_p"
|
||||
require_relative "row"
|
||||
require_relative "table"
|
||||
|
||||
using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
||||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
class CSV
|
||||
# Note: Don't use this class directly. This is an internal class.
|
||||
class Parser
|
||||
|
@ -763,9 +758,10 @@ class CSV
|
|||
case headers
|
||||
when Array
|
||||
@raw_headers = headers
|
||||
quoted_fields = [false] * @raw_headers.size
|
||||
@use_headers = true
|
||||
when String
|
||||
@raw_headers = parse_headers(headers)
|
||||
@raw_headers, quoted_fields = parse_headers(headers)
|
||||
@use_headers = true
|
||||
when nil, false
|
||||
@raw_headers = nil
|
||||
|
@ -775,21 +771,28 @@ class CSV
|
|||
@use_headers = true
|
||||
end
|
||||
if @raw_headers
|
||||
@headers = adjust_headers(@raw_headers)
|
||||
@headers = adjust_headers(@raw_headers, quoted_fields)
|
||||
else
|
||||
@headers = nil
|
||||
end
|
||||
end
|
||||
|
||||
def parse_headers(row)
|
||||
CSV.parse_line(row,
|
||||
quoted_fields = []
|
||||
converter = lambda do |field, info|
|
||||
quoted_fields << info.quoted?
|
||||
field
|
||||
end
|
||||
headers = CSV.parse_line(row,
|
||||
col_sep: @column_separator,
|
||||
row_sep: @row_separator,
|
||||
quote_char: @quote_character)
|
||||
quote_char: @quote_character,
|
||||
converters: [converter])
|
||||
[headers, quoted_fields]
|
||||
end
|
||||
|
||||
def adjust_headers(headers)
|
||||
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno)
|
||||
def adjust_headers(headers, quoted_fields)
|
||||
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields)
|
||||
adjusted_headers.each {|h| h.freeze if h.is_a? String}
|
||||
adjusted_headers
|
||||
end
|
||||
|
@ -933,9 +936,11 @@ class CSV
|
|||
if line.empty?
|
||||
next if @skip_blanks
|
||||
row = []
|
||||
quoted_fields = []
|
||||
else
|
||||
line = strip_value(line)
|
||||
row = line.split(@split_column_separator, -1)
|
||||
quoted_fields = [false] * row.size
|
||||
if @max_field_size
|
||||
row.each do |column|
|
||||
validate_field_size(column)
|
||||
|
@ -949,7 +954,7 @@ class CSV
|
|||
end
|
||||
end
|
||||
@last_line = original_line
|
||||
emit_row(row, &block)
|
||||
emit_row(row, quoted_fields, &block)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -971,25 +976,30 @@ class CSV
|
|||
next
|
||||
end
|
||||
row = []
|
||||
quoted_fields = []
|
||||
elsif line.include?(@cr) or line.include?(@lf)
|
||||
@scanner.keep_back
|
||||
@need_robust_parsing = true
|
||||
return parse_quotable_robust(&block)
|
||||
else
|
||||
row = line.split(@split_column_separator, -1)
|
||||
quoted_fields = []
|
||||
n_columns = row.size
|
||||
i = 0
|
||||
while i < n_columns
|
||||
column = row[i]
|
||||
if column.empty?
|
||||
quoted_fields << false
|
||||
row[i] = nil
|
||||
else
|
||||
n_quotes = column.count(@quote_character)
|
||||
if n_quotes.zero?
|
||||
quoted_fields << false
|
||||
# no quote
|
||||
elsif n_quotes == 2 and
|
||||
column.start_with?(@quote_character) and
|
||||
column.end_with?(@quote_character)
|
||||
quoted_fields << true
|
||||
row[i] = column[1..-2]
|
||||
else
|
||||
@scanner.keep_back
|
||||
|
@ -1004,13 +1014,14 @@ class CSV
|
|||
@scanner.keep_drop
|
||||
@scanner.keep_start
|
||||
@last_line = original_line
|
||||
emit_row(row, &block)
|
||||
emit_row(row, quoted_fields, &block)
|
||||
end
|
||||
@scanner.keep_drop
|
||||
end
|
||||
|
||||
def parse_quotable_robust(&block)
|
||||
row = []
|
||||
quoted_fields = []
|
||||
skip_needless_lines
|
||||
start_row
|
||||
while true
|
||||
|
@ -1024,20 +1035,24 @@ class CSV
|
|||
end
|
||||
if parse_column_end
|
||||
row << value
|
||||
quoted_fields << @quoted_column_value
|
||||
elsif parse_row_end
|
||||
if row.empty? and value.nil?
|
||||
emit_row([], &block) unless @skip_blanks
|
||||
emit_row([], [], &block) unless @skip_blanks
|
||||
else
|
||||
row << value
|
||||
emit_row(row, &block)
|
||||
quoted_fields << @quoted_column_value
|
||||
emit_row(row, quoted_fields, &block)
|
||||
row = []
|
||||
quoted_fields = []
|
||||
end
|
||||
skip_needless_lines
|
||||
start_row
|
||||
elsif @scanner.eos?
|
||||
break if row.empty? and value.nil?
|
||||
row << value
|
||||
emit_row(row, &block)
|
||||
quoted_fields << @quoted_column_value
|
||||
emit_row(row, quoted_fields, &block)
|
||||
break
|
||||
else
|
||||
if @quoted_column_value
|
||||
|
@ -1141,7 +1156,7 @@ class CSV
|
|||
if (n_quotes % 2).zero?
|
||||
quotes[0, (n_quotes - 2) / 2]
|
||||
else
|
||||
value = quotes[0, (n_quotes - 1) / 2]
|
||||
value = quotes[0, n_quotes / 2]
|
||||
while true
|
||||
quoted_value = @scanner.scan_all(@quoted_value)
|
||||
value << quoted_value if quoted_value
|
||||
|
@ -1165,11 +1180,9 @@ class CSV
|
|||
n_quotes = quotes.size
|
||||
if n_quotes == 1
|
||||
break
|
||||
elsif (n_quotes % 2) == 1
|
||||
value << quotes[0, (n_quotes - 1) / 2]
|
||||
break
|
||||
else
|
||||
value << quotes[0, n_quotes / 2]
|
||||
break if (n_quotes % 2) == 1
|
||||
end
|
||||
end
|
||||
value
|
||||
|
@ -1205,18 +1218,15 @@ class CSV
|
|||
|
||||
def strip_value(value)
|
||||
return value unless @strip
|
||||
return nil if value.nil?
|
||||
return value if value.nil?
|
||||
|
||||
case @strip
|
||||
when String
|
||||
size = value.size
|
||||
while value.start_with?(@strip)
|
||||
size -= 1
|
||||
value = value[1, size]
|
||||
while value.delete_prefix!(@strip)
|
||||
# do nothing
|
||||
end
|
||||
while value.end_with?(@strip)
|
||||
size -= 1
|
||||
value = value[0, size]
|
||||
while value.delete_suffix!(@strip)
|
||||
# do nothing
|
||||
end
|
||||
else
|
||||
value.strip!
|
||||
|
@ -1239,22 +1249,22 @@ class CSV
|
|||
@scanner.keep_start
|
||||
end
|
||||
|
||||
def emit_row(row, &block)
|
||||
def emit_row(row, quoted_fields, &block)
|
||||
@lineno += 1
|
||||
|
||||
raw_row = row
|
||||
if @use_headers
|
||||
if @headers.nil?
|
||||
@headers = adjust_headers(row)
|
||||
@headers = adjust_headers(row, quoted_fields)
|
||||
return unless @return_headers
|
||||
row = Row.new(@headers, row, true)
|
||||
else
|
||||
row = Row.new(@headers,
|
||||
@fields_converter.convert(raw_row, @headers, @lineno))
|
||||
@fields_converter.convert(raw_row, @headers, @lineno, quoted_fields))
|
||||
end
|
||||
else
|
||||
# convert fields, if needed...
|
||||
row = @fields_converter.convert(raw_row, nil, @lineno)
|
||||
row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields)
|
||||
end
|
||||
|
||||
# inject unconverted fields and accessor, if requested...
|
||||
|
|
|
@ -703,7 +703,7 @@ class CSV
|
|||
# by +index_or_header+ and +specifiers+.
|
||||
#
|
||||
# The nested objects may be instances of various classes.
|
||||
# See {Dig Methods}[https://docs.ruby-lang.org/en/master/doc/dig_methods_rdoc.html].
|
||||
# See {Dig Methods}[https://docs.ruby-lang.org/en/master/dig_methods_rdoc.html].
|
||||
#
|
||||
# Examples:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
|
|
|
@ -2,5 +2,5 @@
|
|||
|
||||
class CSV
|
||||
# The version of the installed library.
|
||||
VERSION = "3.2.3"
|
||||
VERSION = "3.2.4"
|
||||
end
|
||||
|
|
|
@ -1,11 +1,8 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "input_record_separator"
|
||||
require_relative "match_p"
|
||||
require_relative "row"
|
||||
|
||||
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
||||
|
||||
class CSV
|
||||
# Note: Don't use this class directly. This is an internal class.
|
||||
class Writer
|
||||
|
@ -42,7 +39,10 @@ class CSV
|
|||
@headers ||= row if @use_headers
|
||||
@lineno += 1
|
||||
|
||||
row = @fields_converter.convert(row, nil, lineno) if @fields_converter
|
||||
if @fields_converter
|
||||
quoted_fields = [false] * row.size
|
||||
row = @fields_converter.convert(row, nil, lineno, quoted_fields)
|
||||
end
|
||||
|
||||
i = -1
|
||||
converted_row = row.collect do |field|
|
||||
|
@ -97,7 +97,7 @@ class CSV
|
|||
return unless @headers
|
||||
|
||||
converter = @options[:header_fields_converter]
|
||||
@headers = converter.convert(@headers, nil, 0)
|
||||
@headers = converter.convert(@headers, nil, 0, [])
|
||||
@headers.each do |header|
|
||||
header.freeze if header.is_a?(String)
|
||||
end
|
||||
|
|
|
@ -107,4 +107,63 @@ class TestCSVParseConvert < Test::Unit::TestCase
|
|||
assert_equal([nil, "empty", "a"],
|
||||
CSV.parse_line(',"",a', empty_value: "empty"))
|
||||
end
|
||||
|
||||
sub_test_case("#quoted?") do
|
||||
def setup
|
||||
@preserving_converter = lambda do |field, info|
|
||||
f = field.encode(CSV::ConverterEncoding)
|
||||
return f if info.quoted?
|
||||
begin
|
||||
Integer(f, 10)
|
||||
rescue
|
||||
f
|
||||
end
|
||||
end
|
||||
|
||||
@quoted_header_converter = lambda do |field, info|
|
||||
f = field.encode(CSV::ConverterEncoding)
|
||||
return f if info.quoted?
|
||||
f.to_sym
|
||||
end
|
||||
end
|
||||
|
||||
def test_parse_line
|
||||
row = CSV.parse_line('1,"2",3', converters: @preserving_converter)
|
||||
assert_equal([1, "2", 3], row)
|
||||
end
|
||||
|
||||
def test_parse
|
||||
expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]]
|
||||
rows = CSV.parse(<<~CSV, converters: @preserving_converter)
|
||||
"quoted",unquoted
|
||||
"109",1
|
||||
"10A",2
|
||||
CSV
|
||||
assert_equal(expected, rows)
|
||||
end
|
||||
|
||||
def test_alternating_quote
|
||||
row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter)
|
||||
assert_equal(['1', 2, '3'], row)
|
||||
end
|
||||
|
||||
def test_parse_headers
|
||||
expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]]
|
||||
table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter)
|
||||
"quoted",unquoted
|
||||
"109",1
|
||||
"10A",2
|
||||
CSV
|
||||
assert_equal(expected, table.to_a)
|
||||
end
|
||||
|
||||
def test_parse_with_string_headers
|
||||
expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]]
|
||||
table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter)
|
||||
"109",1
|
||||
"10A",2
|
||||
CSV
|
||||
assert_equal(expected, table.to_a)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -103,4 +103,88 @@ class TestCSVDataConverters < Test::Unit::TestCase
|
|||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][iso8601_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_minute
|
||||
rfc3339_string = "2018-01-14 22:25"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_second
|
||||
rfc3339_string = "2018-01-14 22:25:19"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_under_second
|
||||
rfc3339_string = "2018-01-14 22:25:19.1"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_under_second_offset
|
||||
rfc3339_string = "2018-01-14 22:25:19.1+09:00"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9")
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_offset
|
||||
rfc3339_string = "2018-01-14 22:25:19+09:00"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9")
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_utc
|
||||
rfc3339_string = "2018-01-14 22:25:19Z"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_tab_minute
|
||||
rfc3339_string = "2018-01-14\t22:25"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_tab_second
|
||||
rfc3339_string = "2018-01-14\t22:25:19"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_tab_under_second
|
||||
rfc3339_string = "2018-01-14\t22:25:19.1"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_tab_under_second_offset
|
||||
rfc3339_string = "2018-01-14\t22:25:19.1+09:00"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9")
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_tab_offset
|
||||
rfc3339_string = "2018-01-14\t22:25:19+09:00"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9")
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
|
||||
def test_builtin_date_time_converter_rfc3339_tab_utc
|
||||
rfc3339_string = "2018-01-14\t22:25:19Z"
|
||||
datetime = DateTime.new(2018, 1, 14, 22, 25, 19)
|
||||
assert_equal(datetime,
|
||||
CSV::Converters[:date_time][rfc3339_string])
|
||||
end
|
||||
end
|
||||
|
|
|
@ -288,6 +288,37 @@ class TestCSVEncodings < Test::Unit::TestCase
|
|||
error.message)
|
||||
end
|
||||
|
||||
def test_string_input_transcode
|
||||
# U+3042 HIRAGANA LETTER A
|
||||
# U+3044 HIRAGANA LETTER I
|
||||
# U+3046 HIRAGANA LETTER U
|
||||
value = "\u3042\u3044\u3046"
|
||||
csv = CSV.new(value, encoding: "UTF-8:EUC-JP")
|
||||
assert_equal([[value.encode("EUC-JP")]],
|
||||
csv.read)
|
||||
end
|
||||
|
||||
def test_string_input_set_encoding_string
|
||||
# U+3042 HIRAGANA LETTER A
|
||||
# U+3044 HIRAGANA LETTER I
|
||||
# U+3046 HIRAGANA LETTER U
|
||||
value = "\u3042\u3044\u3046".encode("EUC-JP")
|
||||
csv = CSV.new(value.dup.force_encoding("UTF-8"), encoding: "EUC-JP")
|
||||
assert_equal([[value.encode("EUC-JP")]],
|
||||
csv.read)
|
||||
end
|
||||
|
||||
def test_string_input_set_encoding_encoding
|
||||
# U+3042 HIRAGANA LETTER A
|
||||
# U+3044 HIRAGANA LETTER I
|
||||
# U+3046 HIRAGANA LETTER U
|
||||
value = "\u3042\u3044\u3046".encode("EUC-JP")
|
||||
csv = CSV.new(value.dup.force_encoding("UTF-8"),
|
||||
encoding: Encoding.find("EUC-JP"))
|
||||
assert_equal([[value.encode("EUC-JP")]],
|
||||
csv.read)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_parses(fields, encoding, **options)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue