Merge csv-3.2.3

This commit is contained in:
Hiroshi SHIBATA 2022-08-25 10:49:13 +09:00 committed by nagachika
parent a9bf13a4df
commit c69fffe67d
13 changed files with 1431 additions and 364 deletions

View file

@ -48,7 +48,7 @@
# #
# === Interface # === Interface
# #
# * CSV now uses Hash-style parameters to set options. # * CSV now uses keyword parameters to set options.
# * CSV no longer has generate_row() or parse_row(). # * CSV no longer has generate_row() or parse_row().
# * The old CSV's Reader and Writer classes have been dropped. # * The old CSV's Reader and Writer classes have been dropped.
# * CSV::open() is now more like Ruby's open(). # * CSV::open() is now more like Ruby's open().
@ -104,7 +104,18 @@ require_relative "csv/writer"
using CSV::MatchP if CSV.const_defined?(:MatchP) using CSV::MatchP if CSV.const_defined?(:MatchP)
# == \CSV # == \CSV
# \CSV (comma-separated variables) data is a text representation of a table: #
# === In a Hurry?
#
# If you are familiar with \CSV data and have a particular task in mind,
# you may want to go directly to the:
# - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
#
# Otherwise, read on here, about the API: classes, methods, and constants.
#
# === \CSV Data
#
# \CSV (comma-separated values) data is a text representation of a table:
# - A _row_ _separator_ delimits table rows. # - A _row_ _separator_ delimits table rows.
# A common row separator is the newline character <tt>"\n"</tt>. # A common row separator is the newline character <tt>"\n"</tt>.
# - A _column_ _separator_ delimits fields in a row. # - A _column_ _separator_ delimits fields in a row.
@ -346,7 +357,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
# - +row_sep+: Specifies the row separator; used to delimit rows. # - +row_sep+: Specifies the row separator; used to delimit rows.
# - +col_sep+: Specifies the column separator; used to delimit fields. # - +col_sep+: Specifies the column separator; used to delimit fields.
# - +quote_char+: Specifies the quote character; used to quote fields. # - +quote_char+: Specifies the quote character; used to quote fields.
# - +field_size_limit+: Specifies the maximum field size allowed. # - +field_size_limit+: Specifies the maximum field size + 1 allowed.
# Deprecated since 3.2.3. Use +max_field_size+ instead.
# - +max_field_size+: Specifies the maximum field size allowed.
# - +converters+: Specifies the field converters to be used. # - +converters+: Specifies the field converters to be used.
# - +unconverted_fields+: Specifies whether unconverted fields are to be available. # - +unconverted_fields+: Specifies whether unconverted fields are to be available.
# - +headers+: Specifies whether data contains headers, # - +headers+: Specifies whether data contains headers,
@ -703,7 +716,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
# Header converters operate only on headers (and not on other rows). # Header converters operate only on headers (and not on other rows).
# #
# There are three ways to use header \converters; # There are three ways to use header \converters;
# these examples use built-in header converter +:dowhcase+, # these examples use built-in header converter +:downcase+,
# which downcases each parsed header. # which downcases each parsed header.
# #
# - Option +header_converters+ with a singleton parsing method: # - Option +header_converters+ with a singleton parsing method:
@ -915,7 +928,8 @@ class CSV
symbol: lambda { |h| symbol: lambda { |h|
h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip. h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip.
gsub(/\s+/, "_").to_sym gsub(/\s+/, "_").to_sym
} },
symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym }
} }
# Default values for method options. # Default values for method options.
@ -926,6 +940,7 @@ class CSV
quote_char: '"', quote_char: '"',
# For parsing. # For parsing.
field_size_limit: nil, field_size_limit: nil,
max_field_size: nil,
converters: nil, converters: nil,
unconverted_fields: nil, unconverted_fields: nil,
headers: false, headers: false,
@ -1007,60 +1022,185 @@ class CSV
end end
# :call-seq: # :call-seq:
# filter(**options) {|row| ... } # filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
# filter(in_string, **options) {|row| ... } # filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
# filter(in_io, **options) {|row| ... } # filter(**options) {|row| ... } -> array_of_arrays or csv_table
# filter(in_string, out_string, **options) {|row| ... }
# filter(in_string, out_io, **options) {|row| ... }
# filter(in_io, out_string, **options) {|row| ... }
# filter(in_io, out_io, **options) {|row| ... }
# #
# Reads \CSV input and writes \CSV output. # - Parses \CSV from a source (\String, \IO stream, or ARGF).
# - Calls the given block with each parsed row:
# - Without headers, each row is an \Array.
# - With headers, each row is a CSV::Row.
# - Generates \CSV to an output (\String, \IO stream, or STDOUT).
# - Returns the parsed source:
# - Without headers, an \Array of \Arrays.
# - With headers, a CSV::Table.
# #
# For each input row: # When +in_string_or_io+ is given, but not +out_string_or_io+,
# - Forms the data into: # parses from the given +in_string_or_io+
# - A CSV::Row object, if headers are in use. # and generates to STDOUT.
# - An \Array of Arrays, otherwise.
# - Calls the block with that object.
# - Appends the block's return value to the output.
# #
# Arguments: # \String input without headers:
# * \CSV source:
# * Argument +in_string+, if given, should be a \String object;
# it will be put into a new StringIO object positioned at the beginning.
# * Argument +in_io+, if given, should be an IO object that is
# open for reading; on return, the IO object will be closed.
# * If neither +in_string+ nor +in_io+ is given,
# the input stream defaults to {ARGF}[https://ruby-doc.org/core/ARGF.html].
# * \CSV output:
# * Argument +out_string+, if given, should be a \String object;
# it will be put into a new StringIO object positioned at the beginning.
# * Argument +out_io+, if given, should be an IO object that is
# ppen for writing; on return, the IO object will be closed.
# * If neither +out_string+ nor +out_io+ is given,
# the output stream defaults to <tt>$stdout</tt>.
# * Argument +options+ should be keyword arguments.
# - Each argument name that is prefixed with +in_+ or +input_+
# is stripped of its prefix and is treated as an option
# for parsing the input.
# Option +input_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
# - Each argument name that is prefixed with +out_+ or +output_+
# is stripped of its prefix and is treated as an option
# for generating the output.
# Option +output_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
# - Each argument not prefixed as above is treated as an option
# both for parsing the input and for generating the output.
# - See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
# and {Options for Generating}[#class-CSV-label-Options+for+Generating].
# #
# Example: # in_string = "foo,0\nbar,1\nbaz,2"
# in_string = "foo,0\nbar,1\nbaz,2\n" # CSV.filter(in_string) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
#
# Output (to STDOUT):
#
# FOO,0
# BAR,-1
# BAZ,-2
#
# \String input with headers:
#
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
# CSV.filter(in_string, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end # => #<CSV::Table mode:col_or_row row_count:4>
#
# Output (to STDOUT):
#
# Name,Value
# FOO,0
# BAR,-1
# BAZ,-2
#
# \IO stream input without headers:
#
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
# File.open('t.csv') do |in_io|
# CSV.filter(in_io) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
#
# Output (to STDOUT):
#
# FOO,0
# BAR,-1
# BAZ,-2
#
# \IO stream input with headers:
#
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
# File.open('t.csv') do |in_io|
# CSV.filter(in_io, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# end # => #<CSV::Table mode:col_or_row row_count:4>
#
# Output (to STDOUT):
#
# Name,Value
# FOO,0
# BAR,-1
# BAZ,-2
#
# When both +in_string_or_io+ and +out_string_or_io+ are given,
# parses from +in_string_or_io+ and generates to +out_string_or_io+.
#
# \String output without headers:
#
# in_string = "foo,0\nbar,1\nbaz,2"
# out_string = '' # out_string = ''
# CSV.filter(in_string, out_string) do |row| # CSV.filter(in_string, out_string) do |row|
# row[0] = row[0].upcase # row[0].upcase!
# row[1] *= 4 # row[1] = - row[1].to_i
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
# out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n"
#
# \String output with headers:
#
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
# out_string = ''
# CSV.filter(in_string, out_string, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end # => #<CSV::Table mode:col_or_row row_count:4>
# out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
#
# \IO stream output without headers:
#
# in_string = "foo,0\nbar,1\nbaz,2"
# File.open('t.csv', 'w') do |out_io|
# CSV.filter(in_string, out_io) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end # end
# out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
# File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n"
#
# \IO stream output with headers:
#
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
# File.open('t.csv', 'w') do |out_io|
# CSV.filter(in_string, out_io, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# end # => #<CSV::Table mode:col_or_row row_count:4>
# File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
#
# When neither +in_string_or_io+ nor +out_string_or_io+ given,
# parses from {ARGF}[https://docs.ruby-lang.org/en/master/ARGF.html]
# and generates to STDOUT.
#
# Without headers:
#
# # Put Ruby code into a file.
# ruby = <<-EOT
# require 'csv'
# CSV.filter do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# EOT
# File.write('t.rb', ruby)
# # Put some CSV into a file.
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
# # Run the Ruby code with CSV filename as argument.
# system(Gem.ruby, "t.rb", "t.csv")
#
# Output (to STDOUT):
#
# FOO,0
# BAR,-1
# BAZ,-2
#
# With headers:
#
# # Put Ruby code into a file.
# ruby = <<-EOT
# require 'csv'
# CSV.filter(headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# EOT
# File.write('t.rb', ruby)
# # Put some CSV into a file.
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
# # Run the Ruby code with CSV filename as argument.
# system(Gem.ruby, "t.rb", "t.csv")
#
# Output (to STDOUT):
#
# Name,Value
# FOO,0
# BAR,-1
# BAZ,-2
#
# Arguments:
#
# * Argument +in_string_or_io+ must be a \String or an \IO stream.
# * Argument +out_string_or_io+ must be a \String or an \IO stream.
# * Arguments <tt>**options</tt> must be keyword options.
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
def filter(input=nil, output=nil, **options) def filter(input=nil, output=nil, **options)
# parse options for input, output, or both # parse options for input, output, or both
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value} in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
@ -1107,19 +1247,78 @@ class CSV
# #
# :call-seq: # :call-seq:
# foreach(path, mode='r', **options) {|row| ... ) # foreach(path_or_io, mode='r', **options) {|row| ... )
# foreach(io, mode='r', **options {|row| ... ) # foreach(path_or_io, mode='r', **options) -> new_enumerator
# foreach(path, mode='r', headers: ..., **options) {|row| ... )
# foreach(io, mode='r', headers: ..., **options {|row| ... )
# foreach(path, mode='r', **options) -> new_enumerator
# foreach(io, mode='r', **options -> new_enumerator
# #
# Calls the block with each row read from source +path+ or +io+. # Calls the block with each row read from source +path_or_io+.
# #
# * Argument +path+, if given, must be the path to a file. # \Path input without headers:
# :include: ../doc/csv/arguments/io.rdoc #
# string = "foo,0\nbar,1\nbaz,2\n"
# in_path = 't.csv'
# File.write(in_path, string)
# CSV.foreach(in_path) {|row| p row }
#
# Output:
#
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# \Path input with headers:
#
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# in_path = 't.csv'
# File.write(in_path, string)
# CSV.foreach(in_path, headers: true) {|row| p row }
#
# Output:
#
# <CSV::Row "Name":"foo" "Value":"0">
# <CSV::Row "Name":"bar" "Value":"1">
# <CSV::Row "Name":"baz" "Value":"2">
#
# \IO stream input without headers:
#
# string = "foo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
# File.open('t.csv') do |in_io|
# CSV.foreach(in_io) {|row| p row }
# end
#
# Output:
#
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# \IO stream input with headers:
#
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
# File.open('t.csv') do |in_io|
# CSV.foreach(in_io, headers: true) {|row| p row }
# end
#
# Output:
#
# <CSV::Row "Name":"foo" "Value":"0">
# <CSV::Row "Name":"bar" "Value":"1">
# <CSV::Row "Name":"baz" "Value":"2">
#
# With no block given, returns an \Enumerator:
#
# string = "foo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
# CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
#
# Arguments:
# * Argument +path_or_io+ must be a file path or an \IO stream.
# * Argument +mode+, if given, must be a \File mode # * Argument +mode+, if given, must be a \File mode
# See {Open Mode}[IO.html#method-c-new-label-Open+Mode]. # See {Open Mode}[https://ruby-doc.org/core/IO.html#method-c-new-label-Open+Mode].
# * Arguments <tt>**options</tt> must be keyword options. # * Arguments <tt>**options</tt> must be keyword options.
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]. # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
# * This method optionally accepts an additional <tt>:encoding</tt> option # * This method optionally accepts an additional <tt>:encoding</tt> option
@ -1132,86 +1331,6 @@ class CSV
# encoding: 'UTF-32BE:UTF-8' # encoding: 'UTF-32BE:UTF-8'
# would read +UTF-32BE+ data from the file # would read +UTF-32BE+ data from the file
# but transcode it to +UTF-8+ before parsing. # but transcode it to +UTF-8+ before parsing.
#
# ====== Without Option +headers+
#
# Without option +headers+, returns each row as an \Array object.
#
# These examples assume prior execution of:
# string = "foo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
#
# Read rows from a file at +path+:
# CSV.foreach(path) {|row| p row }
# Output:
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# Read rows from an \IO object:
# File.open(path) do |file|
# CSV.foreach(file) {|row| p row }
# end
#
# Output:
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# Returns a new \Enumerator if no block given:
# CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
# CSV.foreach(File.open(path)) # => #<Enumerator: CSV:foreach(#<File:t.csv>, "r")>
#
# Issues a warning if an encoding is unsupported:
# CSV.foreach(File.open(path), encoding: 'foo:bar') {|row| }
# Output:
# warning: Unsupported encoding foo ignored
# warning: Unsupported encoding bar ignored
#
# ====== With Option +headers+
#
# With {option +headers+}[#class-CSV-label-Option+headers],
# returns each row as a CSV::Row object.
#
# These examples assume prior execution of:
# string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
#
# Read rows from a file at +path+:
# CSV.foreach(path, headers: true) {|row| p row }
#
# Output:
# #<CSV::Row "Name":"foo" "Count":"0">
# #<CSV::Row "Name":"bar" "Count":"1">
# #<CSV::Row "Name":"baz" "Count":"2">
#
# Read rows from an \IO object:
# File.open(path) do |file|
# CSV.foreach(file, headers: true) {|row| p row }
# end
#
# Output:
# #<CSV::Row "Name":"foo" "Count":"0">
# #<CSV::Row "Name":"bar" "Count":"1">
# #<CSV::Row "Name":"baz" "Count":"2">
#
# ---
#
# Raises an exception if +path+ is a \String, but not the path to a readable file:
# # Raises Errno::ENOENT (No such file or directory @ rb_sysopen - nosuch.csv):
# CSV.foreach('nosuch.csv') {|row| }
#
# Raises an exception if +io+ is an \IO object, but not open for reading:
# io = File.open(path, 'w') {|row| }
# # Raises TypeError (no implicit conversion of nil into String):
# CSV.foreach(io) {|row| }
#
# Raises an exception if +mode+ is invalid:
# # Raises ArgumentError (invalid access mode nosuch):
# CSV.foreach(path, 'nosuch') {|row| }
#
def foreach(path, mode="r", **options, &block) def foreach(path, mode="r", **options, &block)
return to_enum(__method__, path, mode, **options) unless block_given? return to_enum(__method__, path, mode, **options) unless block_given?
open(path, mode, **options) do |csv| open(path, mode, **options) do |csv|
@ -1357,7 +1476,7 @@ class CSV
# open(io, mode = "rb", **options ) { |csv| ... } -> object # open(io, mode = "rb", **options ) { |csv| ... } -> object
# #
# possible options elements: # possible options elements:
# hash form: # keyword form:
# :invalid => nil # raise error on invalid byte sequence (default) # :invalid => nil # raise error on invalid byte sequence (default)
# :invalid => :replace # replace invalid byte sequence # :invalid => :replace # replace invalid byte sequence
# :undef => :replace # replace undefined conversion # :undef => :replace # replace undefined conversion
@ -1424,10 +1543,14 @@ class CSV
def open(filename, mode="r", **options) def open(filename, mode="r", **options)
# wrap a File opened with the remaining +args+ with no newline # wrap a File opened with the remaining +args+ with no newline
# decorator # decorator
file_opts = {universal_newline: false}.merge(options) file_opts = options.dup
unless file_opts.key?(:newline)
file_opts[:universal_newline] ||= false
end
options.delete(:invalid) options.delete(:invalid)
options.delete(:undef) options.delete(:undef)
options.delete(:replace) options.delete(:replace)
options.delete_if {|k, _| /newline\z/.match?(k)}
begin begin
f = File.open(filename, mode, **file_opts) f = File.open(filename, mode, **file_opts)
@ -1746,6 +1869,7 @@ class CSV
row_sep: :auto, row_sep: :auto,
quote_char: '"', quote_char: '"',
field_size_limit: nil, field_size_limit: nil,
max_field_size: nil,
converters: nil, converters: nil,
unconverted_fields: nil, unconverted_fields: nil,
headers: false, headers: false,
@ -1788,11 +1912,14 @@ class CSV
@initial_header_converters = header_converters @initial_header_converters = header_converters
@initial_write_converters = write_converters @initial_write_converters = write_converters
if max_field_size.nil? and field_size_limit
max_field_size = field_size_limit - 1
end
@parser_options = { @parser_options = {
column_separator: col_sep, column_separator: col_sep,
row_separator: row_sep, row_separator: row_sep,
quote_character: quote_char, quote_character: quote_char,
field_size_limit: field_size_limit, max_field_size: max_field_size,
unconverted_fields: unconverted_fields, unconverted_fields: unconverted_fields,
headers: headers, headers: headers,
return_headers: return_headers, return_headers: return_headers,
@ -1860,10 +1987,24 @@ class CSV
# Returns the limit for field size; used for parsing; # Returns the limit for field size; used for parsing;
# see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]: # see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
# CSV.new('').field_size_limit # => nil # CSV.new('').field_size_limit # => nil
#
# Deprecated since 3.2.3. Use +max_field_size+ instead.
def field_size_limit def field_size_limit
parser.field_size_limit parser.field_size_limit
end end
# :call-seq:
# csv.max_field_size -> integer or nil
#
# Returns the limit for field size; used for parsing;
# see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]:
# CSV.new('').max_field_size # => nil
#
# Since 3.2.3.
def max_field_size
parser.max_field_size
end
# :call-seq: # :call-seq:
# csv.skip_lines -> regexp or nil # csv.skip_lines -> regexp or nil
# #
@ -1994,7 +2135,7 @@ class CSV
end end
# :call-seq: # :call-seq:
# csv.encoding -> endcoding # csv.encoding -> encoding
# #
# Returns the encoding used for parsing and generating; # Returns the encoding used for parsing and generating;
# see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]: # see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]:

View file

@ -4,20 +4,7 @@ require "stringio"
class CSV class CSV
module InputRecordSeparator module InputRecordSeparator
class << self class << self
is_input_record_separator_deprecated = false if RUBY_VERSION >= "3.0.0"
verbose, $VERBOSE = $VERBOSE, true
stderr, $stderr = $stderr, StringIO.new
input_record_separator = $INPUT_RECORD_SEPARATOR
begin
$INPUT_RECORD_SEPARATOR = "\r\n"
is_input_record_separator_deprecated = (not $stderr.string.empty?)
ensure
$INPUT_RECORD_SEPARATOR = input_record_separator
$stderr = stderr
$VERBOSE = verbose
end
if is_input_record_separator_deprecated
def value def value
"\n" "\n"
end end

View file

@ -27,6 +27,10 @@ class CSV
class InvalidEncoding < StandardError class InvalidEncoding < StandardError
end end
# Raised when unexpected case is happen.
class UnexpectedError < StandardError
end
# #
# CSV::Scanner receives a CSV output, scans it and return the content. # CSV::Scanner receives a CSV output, scans it and return the content.
# It also controls the life cycle of the object with its methods +keep_start+, # It also controls the life cycle of the object with its methods +keep_start+,
@ -78,10 +82,10 @@ class CSV
# +keep_end+, +keep_back+, +keep_drop+. # +keep_end+, +keep_back+, +keep_drop+.
# #
# CSV::InputsScanner.scan() tries to match with pattern at the current position. # CSV::InputsScanner.scan() tries to match with pattern at the current position.
# If there's a match, the scanner advances the “scan pointer” and returns the matched string. # If there's a match, the scanner advances the "scan pointer" and returns the matched string.
# Otherwise, the scanner returns nil. # Otherwise, the scanner returns nil.
# #
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). # CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer).
# If there is no more data (eos? = true), it returns "". # If there is no more data (eos? = true), it returns "".
# #
class InputsScanner class InputsScanner
@ -96,11 +100,13 @@ class CSV
end end
def each_line(row_separator) def each_line(row_separator)
return enum_for(__method__, row_separator) unless block_given?
buffer = nil buffer = nil
input = @scanner.rest input = @scanner.rest
position = @scanner.pos position = @scanner.pos
offset = 0 offset = 0
n_row_separator_chars = row_separator.size n_row_separator_chars = row_separator.size
# trace(__method__, :start, line, input)
while true while true
input.each_line(row_separator) do |line| input.each_line(row_separator) do |line|
@scanner.pos += line.bytesize @scanner.pos += line.bytesize
@ -140,25 +146,28 @@ class CSV
end end
def scan(pattern) def scan(pattern)
# trace(__method__, pattern, :start)
value = @scanner.scan(pattern) value = @scanner.scan(pattern)
# trace(__method__, pattern, :done, :last, value) if @last_scanner
return value if @last_scanner return value if @last_scanner
if value read_chunk if value and @scanner.eos?
read_chunk if @scanner.eos? # trace(__method__, pattern, :done, value)
return value value
else
nil
end
end end
def scan_all(pattern) def scan_all(pattern)
# trace(__method__, pattern, :start)
value = @scanner.scan(pattern) value = @scanner.scan(pattern)
# trace(__method__, pattern, :done, :last, value) if @last_scanner
return value if @last_scanner return value if @last_scanner
return nil if value.nil? return nil if value.nil?
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
# trace(__method__, pattern, :sub, sub_value)
value << sub_value value << sub_value
end end
# trace(__method__, pattern, :done, value)
value value
end end
@ -167,68 +176,126 @@ class CSV
end end
def keep_start def keep_start
@keeps.push([@scanner.pos, nil]) # trace(__method__, :start)
adjust_last_keep
@keeps.push([@scanner, @scanner.pos, nil])
# trace(__method__, :done)
end end
def keep_end def keep_end
start, buffer = @keeps.pop # trace(__method__, :start)
scanner, start, buffer = @keeps.pop
if scanner == @scanner
keep = @scanner.string.byteslice(start, @scanner.pos - start) keep = @scanner.string.byteslice(start, @scanner.pos - start)
else
keep = @scanner.string.byteslice(0, @scanner.pos)
end
if buffer if buffer
buffer << keep buffer << keep
keep = buffer keep = buffer
end end
# trace(__method__, :done, keep)
keep keep
end end
def keep_back def keep_back
start, buffer = @keeps.pop # trace(__method__, :start)
scanner, start, buffer = @keeps.pop
if buffer if buffer
# trace(__method__, :rescan, start, buffer)
string = @scanner.string string = @scanner.string
if scanner == @scanner
keep = string.byteslice(start, string.bytesize - start) keep = string.byteslice(start, string.bytesize - start)
else
keep = string
end
if keep and not keep.empty? if keep and not keep.empty?
@inputs.unshift(StringIO.new(keep)) @inputs.unshift(StringIO.new(keep))
@last_scanner = false @last_scanner = false
end end
@scanner = StringScanner.new(buffer) @scanner = StringScanner.new(buffer)
else else
if @scanner != scanner
message = "scanners are different but no buffer: "
message += "#{@scanner.inspect}(#{@scanner.object_id}): "
message += "#{scanner.inspect}(#{scanner.object_id})"
raise UnexpectedError, message
end
# trace(__method__, :repos, start, buffer)
@scanner.pos = start @scanner.pos = start
end end
read_chunk if @scanner.eos? read_chunk if @scanner.eos?
end end
def keep_drop def keep_drop
@keeps.pop _, _, buffer = @keeps.pop
# trace(__method__, :done, :empty) unless buffer
return unless buffer
last_keep = @keeps.last
# trace(__method__, :done, :no_last_keep) unless last_keep
return unless last_keep
if last_keep[2]
last_keep[2] << buffer
else
last_keep[2] = buffer
end
# trace(__method__, :done)
end end
def rest def rest
@scanner.rest @scanner.rest
end end
def check(pattern)
@scanner.check(pattern)
end
private private
def trace(*args)
pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps])
end
def adjust_last_keep
# trace(__method__, :start)
keep = @keeps.last
# trace(__method__, :done, :empty) if keep.nil?
return if keep.nil?
scanner, start, buffer = keep
string = @scanner.string
if @scanner != scanner
start = 0
end
if start == 0 and @scanner.eos?
keep_data = string
else
keep_data = string.byteslice(start, @scanner.pos - start)
end
if keep_data
if buffer
buffer << keep_data
else
keep[2] = keep_data.dup
end
end
# trace(__method__, :done)
end
def read_chunk def read_chunk
return false if @last_scanner return false if @last_scanner
unless @keeps.empty? adjust_last_keep
keep = @keeps.last
keep_start = keep[0]
string = @scanner.string
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
if keep_data
keep_buffer = keep[1]
if keep_buffer
keep_buffer << keep_data
else
keep[1] = keep_data.dup
end
end
keep[0] = 0
end
input = @inputs.first input = @inputs.first
case input case input
when StringIO when StringIO
string = input.read string = input.read
raise InvalidEncoding unless string.valid_encoding? raise InvalidEncoding unless string.valid_encoding?
# trace(__method__, :stringio, string)
@scanner = StringScanner.new(string) @scanner = StringScanner.new(string)
@inputs.shift @inputs.shift
@last_scanner = @inputs.empty? @last_scanner = @inputs.empty?
@ -237,6 +304,7 @@ class CSV
chunk = input.gets(@row_separator, @chunk_size) chunk = input.gets(@row_separator, @chunk_size)
if chunk if chunk
raise InvalidEncoding unless chunk.valid_encoding? raise InvalidEncoding unless chunk.valid_encoding?
# trace(__method__, :chunk, chunk)
@scanner = StringScanner.new(chunk) @scanner = StringScanner.new(chunk)
if input.respond_to?(:eof?) and input.eof? if input.respond_to?(:eof?) and input.eof?
@inputs.shift @inputs.shift
@ -244,6 +312,7 @@ class CSV
end end
true true
else else
# trace(__method__, :no_chunk)
@scanner = StringScanner.new("".encode(@encoding)) @scanner = StringScanner.new("".encode(@encoding))
@inputs.shift @inputs.shift
@last_scanner = @inputs.empty? @last_scanner = @inputs.empty?
@ -278,7 +347,11 @@ class CSV
end end
def field_size_limit def field_size_limit
@field_size_limit @max_field_size&.succ
end
def max_field_size
@max_field_size
end end
def skip_lines def skip_lines
@ -346,6 +419,16 @@ class CSV
end end
message = "Invalid byte sequence in #{@encoding}" message = "Invalid byte sequence in #{@encoding}"
raise MalformedCSVError.new(message, lineno) raise MalformedCSVError.new(message, lineno)
rescue UnexpectedError => error
if @scanner
ignore_broken_line
lineno = @lineno
else
lineno = @lineno + 1
end
message = "This should not be happen: #{error.message}: "
message += "Please report this to https://github.com/ruby/csv/issues"
raise MalformedCSVError.new(message, lineno)
end end
end end
@ -390,7 +473,7 @@ class CSV
@backslash_quote = false @backslash_quote = false
end end
@unconverted_fields = @options[:unconverted_fields] @unconverted_fields = @options[:unconverted_fields]
@field_size_limit = @options[:field_size_limit] @max_field_size = @options[:max_field_size]
@skip_blanks = @options[:skip_blanks] @skip_blanks = @options[:skip_blanks]
@fields_converter = @options[:fields_converter] @fields_converter = @options[:fields_converter]
@header_fields_converter = @options[:header_fields_converter] @header_fields_converter = @options[:header_fields_converter]
@ -729,9 +812,7 @@ class CSV
sample[0, 128].index(@quote_character) sample[0, 128].index(@quote_character)
end end
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") class UnoptimizedStringIO # :nodoc:
if SCANNER_TEST
class UnoptimizedStringIO
def initialize(string) def initialize(string)
@io = StringIO.new(string, "rb:#{string.encoding}") @io = StringIO.new(string, "rb:#{string.encoding}")
end end
@ -749,8 +830,10 @@ class CSV
end end
end end
SCANNER_TEST_CHUNK_SIZE = SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10) if SCANNER_TEST
SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"
SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
def build_scanner def build_scanner
inputs = @samples.collect do |sample| inputs = @samples.collect do |sample|
UnoptimizedStringIO.new(sample) UnoptimizedStringIO.new(sample)
@ -760,10 +843,17 @@ class CSV
else else
inputs << @input inputs << @input
end end
begin
chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
rescue # Ractor::IsolationError
# Ractor on Ruby 3.0 can't read ENV value.
chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE
end
chunk_size = Integer((chunk_size_value || "1"), 10)
InputsScanner.new(inputs, InputsScanner.new(inputs,
@encoding, @encoding,
@row_separator, @row_separator,
chunk_size: SCANNER_TEST_CHUNK_SIZE) chunk_size: chunk_size)
end end
else else
def build_scanner def build_scanner
@ -826,6 +916,14 @@ class CSV
end end
end end
def validate_field_size(field)
return unless @max_field_size
return if field.size <= @max_field_size
ignore_broken_line
message = "Field size exceeded: #{field.size} > #{@max_field_size}"
raise MalformedCSVError.new(message, @lineno)
end
def parse_no_quote(&block) def parse_no_quote(&block)
@scanner.each_line(@row_separator) do |line| @scanner.each_line(@row_separator) do |line|
next if @skip_lines and skip_line?(line) next if @skip_lines and skip_line?(line)
@ -838,6 +936,11 @@ class CSV
else else
line = strip_value(line) line = strip_value(line)
row = line.split(@split_column_separator, -1) row = line.split(@split_column_separator, -1)
if @max_field_size
row.each do |column|
validate_field_size(column)
end
end
n_columns = row.size n_columns = row.size
i = 0 i = 0
while i < n_columns while i < n_columns
@ -893,6 +996,7 @@ class CSV
@need_robust_parsing = true @need_robust_parsing = true
return parse_quotable_robust(&block) return parse_quotable_robust(&block)
end end
validate_field_size(row[i])
end end
i += 1 i += 1
end end
@ -916,10 +1020,7 @@ class CSV
value = parse_column_value value = parse_column_value
if value if value
@scanner.scan_all(@strip_value) if @strip_value @scanner.scan_all(@strip_value) if @strip_value
if @field_size_limit and value.size >= @field_size_limit validate_field_size(value)
ignore_broken_line
raise MalformedCSVError.new("Field size exceeded", @lineno)
end
end end
if parse_column_end if parse_column_end
row << value row << value
@ -940,8 +1041,14 @@ class CSV
break break
else else
if @quoted_column_value if @quoted_column_value
ignore_broken_line if liberal_parsing? and (new_line = @scanner.check(@line_end))
message =
"Illegal end-of-line sequence outside of a quoted field " +
"<#{new_line.inspect}>"
else
message = "Any value after quoted field isn't allowed" message = "Any value after quoted field isn't allowed"
end
ignore_broken_line
raise MalformedCSVError.new(message, @lineno) raise MalformedCSVError.new(message, @lineno)
elsif @unquoted_column_value and elsif @unquoted_column_value and
(new_line = @scanner.scan(@line_end)) (new_line = @scanner.scan(@line_end))

View file

@ -3,30 +3,105 @@
require "forwardable" require "forwardable"
class CSV class CSV
# = \CSV::Row
# A \CSV::Row instance represents a \CSV table row.
# (see {class CSV}[../CSV.html]).
# #
# A CSV::Row is part Array and part Hash. It retains an order for the fields # The instance may have:
# and allows duplicates just as an Array would, but also allows you to access # - Fields: each is an object, not necessarily a \String.
# fields by name just as you could if they were in a Hash. # - Headers: each serves a key, and also need not be a \String.
# #
# All rows returned by CSV will be constructed from this class, if header row # === Instance Methods
# processing is activated. #
# \CSV::Row has three groups of instance methods:
# - Its own internally defined instance methods.
# - Methods included by module Enumerable.
# - Methods delegated to class Array.:
# * Array#empty?
# * Array#length
# * Array#size
#
# == Creating a \CSV::Row Instance
#
# Commonly, a new \CSV::Row instance is created by parsing \CSV source
# that has headers:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.each {|row| p row }
# Output:
# #<CSV::Row "Name":"foo" "Value":"0">
# #<CSV::Row "Name":"bar" "Value":"1">
# #<CSV::Row "Name":"baz" "Value":"2">
#
# You can also create a row directly. See ::new.
#
# == Headers
#
# Like a \CSV::Table, a \CSV::Row has headers.
#
# A \CSV::Row that was created by parsing \CSV source
# inherits its headers from the table:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# row = table.first
# row.headers # => ["Name", "Value"]
#
# You can also create a new row with headers;
# like the keys in a \Hash, the headers need not be Strings:
# row = CSV::Row.new([:name, :value], ['foo', 0])
# row.headers # => [:name, :value]
#
# The new row retains its headers even if added to a table
# that has headers:
# table << row # => #<CSV::Table mode:col_or_row row_count:5>
# row.headers # => [:name, :value]
# row[:name] # => "foo"
# row['Name'] # => nil
#
#
#
# == Accessing Fields
#
# You may access a field in a \CSV::Row with either its \Integer index
# (\Array-style) or its header (\Hash-style).
#
# Fetch a field using method #[]:
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
# row[1] # => 0
# row['Value'] # => 0
#
# Set a field using method #[]=:
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
# row # => #<CSV::Row "Name":"foo" "Value":0>
# row[0] = 'bar'
# row['Value'] = 1
# row # => #<CSV::Row "Name":"bar" "Value":1>
# #
class Row class Row
# :call-seq:
# CSV::Row.new(headers, fields, header_row = false) -> csv_row
# #
# Constructs a new CSV::Row from +headers+ and +fields+, which are expected # Returns the new \CSV::Row instance constructed from
# to be Arrays. If one Array is shorter than the other, it will be padded # arguments +headers+ and +fields+; both should be Arrays;
# with +nil+ objects. # note that the fields need not be Strings:
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
# row # => #<CSV::Row "Name":"foo" "Value":0>
# #
# The optional +header_row+ parameter can be set to +true+ to indicate, via # If the \Array lengths are different, the shorter is +nil+-filled:
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header # row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0])
# row. Otherwise, the row assumes to be a field row. # row # => #<CSV::Row "Name":"foo" "Value":0 "Date":nil "Size":nil>
# #
# A CSV::Row object supports the following Array methods through delegation: # Each \CSV::Row object is either a <i>field row</i> or a <i>header row</i>;
# # by default, a new row is a field row; for the row created above:
# * empty?() # row.field_row? # => true
# * length() # row.header_row? # => false
# * size()
# #
# If the optional argument +header_row+ is given as +true+,
# the created row is a header row:
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true)
# row # => #<CSV::Row "Name":"foo" "Value":0>
# row.field_row? # => false
# row.header_row? # => true
def initialize(headers, fields, header_row = false) def initialize(headers, fields, header_row = false)
@header_row = header_row @header_row = header_row
headers.each { |h| h.freeze if h.is_a? String } headers.each { |h| h.freeze if h.is_a? String }
@ -48,6 +123,10 @@ class CSV
extend Forwardable extend Forwardable
def_delegators :@row, :empty?, :length, :size def_delegators :@row, :empty?, :length, :size
# :call-seq:
# row.initialize_copy(other_row) -> self
#
# Calls superclass method.
def initialize_copy(other) def initialize_copy(other)
super_return_value = super super_return_value = super
@row = @row.collect(&:dup) @row = @row.collect(&:dup)
@ -71,7 +150,7 @@ class CSV
end end
# :call-seq: # :call-seq:
# row.headers # row.headers -> array_of_headers
# #
# Returns the headers for this row: # Returns the headers for this row:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
@ -83,9 +162,9 @@ class CSV
end end
# :call-seq: # :call-seq:
# field(index) # field(index) -> value
# field(header) # field(header) -> value
# field(header, offset) # field(header, offset) -> value
# #
# Returns the field value for the given +index+ or +header+. # Returns the field value for the given +index+ or +header+.
# #
@ -137,9 +216,9 @@ class CSV
# #
# :call-seq: # :call-seq:
# fetch(header) # fetch(header) -> value
# fetch(header, default) # fetch(header, default) -> value
# fetch(header) {|row| ... } # fetch(header) {|row| ... } -> value
# #
# Returns the field value as specified by +header+. # Returns the field value as specified by +header+.
# #
@ -193,7 +272,7 @@ class CSV
end end
# :call-seq: # :call-seq:
# row.has_key?(header) # row.has_key?(header) -> true or false
# #
# Returns +true+ if there is a field with the given +header+, # Returns +true+ if there is a field with the given +header+,
# +false+ otherwise. # +false+ otherwise.
@ -403,7 +482,7 @@ class CSV
end end
# :call-seq: # :call-seq:
# self.fields(*specifiers) # self.fields(*specifiers) -> array_of_fields
# #
# Returns field values per the given +specifiers+, which may be any mixture of: # Returns field values per the given +specifiers+, which may be any mixture of:
# - \Integer index. # - \Integer index.
@ -471,15 +550,26 @@ class CSV
end end
alias_method :values_at, :fields alias_method :values_at, :fields
#
# :call-seq: # :call-seq:
# index( header ) # index(header) -> index
# index( header, offset ) # index(header, offset) -> index
# #
# This method will return the index of a field with the provided +header+. # Returns the index for the given header, if it exists;
# The +offset+ can be used to locate duplicate header names, as described in # otherwise returns +nil+.
# CSV::Row.field().
# #
# With the single argument +header+, returns the index
# of the first-found field with the given +header+:
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
# table = CSV.parse(source, headers: true)
# row = table[0]
# row.index('Name') # => 0
# row.index('NAME') # => nil
#
# With arguments +header+ and +offset+,
# returns the index of the first-found field with given +header+,
# but ignoring the first +offset+ fields:
# row.index('Name', 1) # => 1
# row.index('Name', 3) # => nil
def index(header, minimum_index = 0) def index(header, minimum_index = 0)
# find the pair # find the pair
index = headers[minimum_index..-1].index(header) index = headers[minimum_index..-1].index(header)
@ -487,24 +577,36 @@ class CSV
index.nil? ? nil : index + minimum_index index.nil? ? nil : index + minimum_index
end end
# :call-seq:
# row.field?(value) -> true or false
# #
# Returns +true+ if +data+ matches a field in this row, and +false+ # Returns +true+ if +value+ is a field in this row, +false+ otherwise:
# otherwise. # source = "Name,Name,Name\nFoo,Bar,Baz\n"
# # table = CSV.parse(source, headers: true)
# row = table[0]
# row.field?('Bar') # => true
# row.field?('BAR') # => false
def field?(data) def field?(data)
fields.include? data fields.include? data
end end
include Enumerable include Enumerable
# :call-seq:
# row.each {|header, value| ... } -> self
# #
# Yields each pair of the row as header and field tuples (much like # Calls the block with each header-value pair; returns +self+:
# iterating over a Hash). This method returns the row for chaining. # source = "Name,Name,Name\nFoo,Bar,Baz\n"
# # table = CSV.parse(source, headers: true)
# If no block is given, an Enumerator is returned. # row = table[0]
# # row.each {|header, value| p [header, value] }
# Support for Enumerable. # Output:
# ["Name", "Foo"]
# ["Name", "Bar"]
# ["Name", "Baz"]
# #
# If no block is given, returns a new Enumerator:
# row.each # => #<Enumerator: #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz">:each>
def each(&block) def each(&block)
return enum_for(__method__) { size } unless block_given? return enum_for(__method__) { size } unless block_given?
@ -515,10 +617,19 @@ class CSV
alias_method :each_pair, :each alias_method :each_pair, :each
# :call-seq:
# row == other -> true or false
# #
# Returns +true+ if this row contains the same headers and fields in the # Returns +true+ if +other+ is a /CSV::Row that has the same
# same order as +other+. # fields (headers and values) in the same order as +self+;
# # otherwise returns +false+:
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
# table = CSV.parse(source, headers: true)
# row = table[0]
# other_row = table[0]
# row == other_row # => true
# other_row = table[1]
# row == other_row # => false
def ==(other) def ==(other)
return @row == other.row if other.is_a? CSV::Row return @row == other.row if other.is_a? CSV::Row
@row == other @row == other
@ -548,8 +659,30 @@ class CSV
end end
alias_method :to_hash, :to_h alias_method :to_hash, :to_h
# :call-seq:
# row.deconstruct_keys(keys) -> hash
#
# Returns the new \Hash suitable for pattern matching containing only the
# keys specified as an argument.
def deconstruct_keys(keys)
if keys.nil?
to_h
else
keys.to_h { |key| [key, self[key]] }
end
end
alias_method :to_ary, :to_a alias_method :to_ary, :to_a
# :call-seq:
# row.deconstruct -> array
#
# Returns the new \Array suitable for pattern matching containing the values
# of the row.
def deconstruct
fields
end
# :call-seq: # :call-seq:
# row.to_csv -> csv_string # row.to_csv -> csv_string
# #

View file

@ -3,31 +3,199 @@
require "forwardable" require "forwardable"
class CSV class CSV
# = \CSV::Table
# A \CSV::Table instance represents \CSV data.
# (see {class CSV}[../CSV.html]).
# #
# A CSV::Table is a two-dimensional data structure for representing CSV # The instance may have:
# documents. Tables allow you to work with the data by row or column, # - Rows: each is a Table::Row object.
# manipulate the data, and even convert the results back to CSV, if needed. # - Headers: names for the columns.
# #
# All tables returned by CSV will be constructed from this class, if header # === Instance Methods
# row processing is activated.
# #
# \CSV::Table has three groups of instance methods:
# - Its own internally defined instance methods.
# - Methods included by module Enumerable.
# - Methods delegated to class Array.:
# * Array#empty?
# * Array#length
# * Array#size
#
# == Creating a \CSV::Table Instance
#
# Commonly, a new \CSV::Table instance is created by parsing \CSV source
# using headers:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.class # => CSV::Table
#
# You can also create an instance directly. See ::new.
#
# == Headers
#
# If a table has headers, the headers serve as labels for the columns of data.
# Each header serves as the label for its column.
#
# The headers for a \CSV::Table object are stored as an \Array of Strings.
#
# Commonly, headers are defined in the first row of \CSV source:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.headers # => ["Name", "Value"]
#
# If no headers are defined, the \Array is empty:
# table = CSV::Table.new([])
# table.headers # => []
#
# == Access Modes
#
# \CSV::Table provides three modes for accessing table data:
# - \Row mode.
# - Column mode.
# - Mixed mode (the default for a new table).
#
# The access mode for a\CSV::Table instance affects the behavior
# of some of its instance methods:
# - #[]
# - #[]=
# - #delete
# - #delete_if
# - #each
# - #values_at
#
# === \Row Mode
#
# Set a table to row mode with method #by_row!:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_row! # => #<CSV::Table mode:row row_count:4>
#
# Specify a single row by an \Integer index:
# # Get a row.
# table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
# # Set a row, then get it.
# table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
# table[1] # => #<CSV::Row "Name":"bam" "Value":3>
#
# Specify a sequence of rows by a \Range:
# # Get rows.
# table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
# # Set rows, then get them.
# table[1..2] = [
# CSV::Row.new(['Name', 'Value'], ['bat', 4]),
# CSV::Row.new(['Name', 'Value'], ['bad', 5]),
# ]
# table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
#
# === Column Mode
#
# Set a table to column mode with method #by_col!:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_col! # => #<CSV::Table mode:col row_count:4>
#
# Specify a column by an \Integer index:
# # Get a column.
# table[0]
# # Set a column, then get it.
# table[0] = ['FOO', 'BAR', 'BAZ']
# table[0] # => ["FOO", "BAR", "BAZ"]
#
# Specify a column by its \String header:
# # Get a column.
# table['Name'] # => ["FOO", "BAR", "BAZ"]
# # Set a column, then get it.
# table['Name'] = ['Foo', 'Bar', 'Baz']
# table['Name'] # => ["Foo", "Bar", "Baz"]
#
# === Mixed Mode
#
# In mixed mode, you can refer to either rows or columns:
# - An \Integer index refers to a row.
# - A \Range index refers to multiple rows.
# - A \String index refers to a column.
#
# Set a table to mixed mode with method #by_col_or_row!:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
#
# Specify a single row by an \Integer index:
# # Get a row.
# table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
# # Set a row, then get it.
# table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
# table[1] # => #<CSV::Row "Name":"bam" "Value":3>
#
# Specify a sequence of rows by a \Range:
# # Get rows.
# table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
# # Set rows, then get them.
# table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4])
# table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5])
# table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
#
# Specify a column by its \String header:
# # Get a column.
# table['Name'] # => ["foo", "bat", "bad"]
# # Set a column, then get it.
# table['Name'] = ['Foo', 'Bar', 'Baz']
# table['Name'] # => ["Foo", "Bar", "Baz"]
class Table class Table
# :call-seq:
# CSV::Table.new(array_of_rows, headers = nil) -> csv_table
# #
# Constructs a new CSV::Table from +array_of_rows+, which are expected # Returns a new \CSV::Table object.
# to be CSV::Row objects. All rows are assumed to have the same headers.
# #
# The optional +headers+ parameter can be set to Array of headers. # - Argument +array_of_rows+ must be an \Array of CSV::Row objects.
# If headers aren't set, headers are fetched from CSV::Row objects. # - Argument +headers+, if given, may be an \Array of Strings.
# Otherwise, headers() method will return headers being set in
# headers argument.
# #
# A CSV::Table object supports the following Array methods through # ---
# delegation:
# #
# * empty?() # Create an empty \CSV::Table object:
# * length() # table = CSV::Table.new([])
# * size() # table # => #<CSV::Table mode:col_or_row row_count:1>
# #
# Create a non-empty \CSV::Table object:
# rows = [
# CSV::Row.new([], []),
# CSV::Row.new([], []),
# CSV::Row.new([], []),
# ]
# table = CSV::Table.new(rows)
# table # => #<CSV::Table mode:col_or_row row_count:4>
#
# ---
#
# If argument +headers+ is an \Array of Strings,
# those Strings become the table's headers:
# table = CSV::Table.new([], headers: ['Name', 'Age'])
# table.headers # => ["Name", "Age"]
#
# If argument +headers+ is not given and the table has rows,
# the headers are taken from the first row:
# rows = [
# CSV::Row.new(['Foo', 'Bar'], []),
# CSV::Row.new(['foo', 'bar'], []),
# CSV::Row.new(['FOO', 'BAR'], []),
# ]
# table = CSV::Table.new(rows)
# table.headers # => ["Foo", "Bar"]
#
# If argument +headers+ is not given and the table is empty (has no rows),
# the headers are also empty:
# table = CSV::Table.new([])
# table.headers # => []
#
# ---
#
# Raises an exception if argument +array_of_rows+ is not an \Array object:
# # Raises NoMethodError (undefined method `first' for :foo:Symbol):
# CSV::Table.new(:foo)
#
# Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object:
# # Raises NoMethodError (undefined method `headers' for :foo:Symbol):
# CSV::Table.new([:foo])
def initialize(array_of_rows, headers: nil) def initialize(array_of_rows, headers: nil)
@table = array_of_rows @table = array_of_rows
@headers = headers @headers = headers
@ -54,88 +222,141 @@ class CSV
extend Forwardable extend Forwardable
def_delegators :@table, :empty?, :length, :size def_delegators :@table, :empty?, :length, :size
# :call-seq:
# table.by_col -> table_dup
# #
# Returns a duplicate table object, in column mode. This is handy for # Returns a duplicate of +self+, in column mode
# chaining in a single call without changing the table mode, but be aware # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]):
# that this method can consume a fair amount of memory for bigger data sets. # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.mode # => :col_or_row
# dup_table = table.by_col
# dup_table.mode # => :col
# dup_table.equal?(table) # => false # It's a dup
# #
# This method returns the duplicate table for chaining. Don't chain # This may be used to chain method calls without changing the mode
# destructive methods (like []=()) this way though, since you are working # (but also will affect performance and memory usage):
# with a duplicate. # dup_table.by_col['Name']
# #
# Also note that changes to the duplicate table will not affect the original.
def by_col def by_col
self.class.new(@table.dup).by_col! self.class.new(@table.dup).by_col!
end end
# :call-seq:
# table.by_col! -> self
# #
# Switches the mode of this table to column mode. All calls to indexing and # Sets the mode for +self+ to column mode
# iteration methods will work with columns until the mode is changed again. # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+:
# # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# This method returns the table and is safe to chain. # table = CSV.parse(source, headers: true)
# # table.mode # => :col_or_row
# table1 = table.by_col!
# table.mode # => :col
# table1.equal?(table) # => true # Returned self
def by_col! def by_col!
@mode = :col @mode = :col
self self
end end
# :call-seq:
# table.by_col_or_row -> table_dup
# #
# Returns a duplicate table object, in mixed mode. This is handy for # Returns a duplicate of +self+, in mixed mode
# chaining in a single call without changing the table mode, but be aware # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]):
# that this method can consume a fair amount of memory for bigger data sets. # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true).by_col!
# table.mode # => :col
# dup_table = table.by_col_or_row
# dup_table.mode # => :col_or_row
# dup_table.equal?(table) # => false # It's a dup
# #
# This method returns the duplicate table for chaining. Don't chain # This may be used to chain method calls without changing the mode
# destructive methods (like []=()) this way though, since you are working # (but also will affect performance and memory usage):
# with a duplicate. # dup_table.by_col_or_row['Name']
# #
# Also note that changes to the duplicate table will not affect the original.
def by_col_or_row def by_col_or_row
self.class.new(@table.dup).by_col_or_row! self.class.new(@table.dup).by_col_or_row!
end end
# :call-seq:
# table.by_col_or_row! -> self
# #
# Switches the mode of this table to mixed mode. All calls to indexing and # Sets the mode for +self+ to mixed mode
# iteration methods will use the default intelligent indexing system until # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+:
# the mode is changed again. In mixed mode an index is assumed to be a row # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# reference while anything else is assumed to be column access by headers. # table = CSV.parse(source, headers: true).by_col!
# # table.mode # => :col
# This method returns the table and is safe to chain. # table1 = table.by_col_or_row!
# # table.mode # => :col_or_row
# table1.equal?(table) # => true # Returned self
def by_col_or_row! def by_col_or_row!
@mode = :col_or_row @mode = :col_or_row
self self
end end
# :call-seq:
# table.by_row -> table_dup
# #
# Returns a duplicate table object, in row mode. This is handy for chaining # Returns a duplicate of +self+, in row mode
# in a single call without changing the table mode, but be aware that this # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]):
# method can consume a fair amount of memory for bigger data sets. # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.mode # => :col_or_row
# dup_table = table.by_row
# dup_table.mode # => :row
# dup_table.equal?(table) # => false # It's a dup
# #
# This method returns the duplicate table for chaining. Don't chain # This may be used to chain method calls without changing the mode
# destructive methods (like []=()) this way though, since you are working # (but also will affect performance and memory usage):
# with a duplicate. # dup_table.by_row[1]
# #
# Also note that changes to the duplicate table will not affect the original.
def by_row def by_row
self.class.new(@table.dup).by_row! self.class.new(@table.dup).by_row!
end end
# :call-seq:
# table.by_row! -> self
# #
# Switches the mode of this table to row mode. All calls to indexing and # Sets the mode for +self+ to row mode
# iteration methods will work with rows until the mode is changed again. # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+:
# # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# This method returns the table and is safe to chain. # table = CSV.parse(source, headers: true)
# # table.mode # => :col_or_row
# table1 = table.by_row!
# table.mode # => :row
# table1.equal?(table) # => true # Returned self
def by_row! def by_row!
@mode = :row @mode = :row
self self
end end
# :call-seq:
# table.headers -> array_of_headers
# #
# Returns the headers for the first row of this table (assumed to match all # Returns a new \Array containing the \String headers for the table.
# other rows). The headers Array passed to CSV::Table.new is returned for
# empty tables.
# #
# If the table is not empty, returns the headers from the first row:
# rows = [
# CSV::Row.new(['Foo', 'Bar'], []),
# CSV::Row.new(['FOO', 'BAR'], []),
# CSV::Row.new(['foo', 'bar'], []),
# ]
# table = CSV::Table.new(rows)
# table.headers # => ["Foo", "Bar"]
# table.delete(0)
# table.headers # => ["FOO", "BAR"]
# table.delete(0)
# table.headers # => ["foo", "bar"]
#
# If the table is empty, returns a copy of the headers in the table itself:
# table.delete(0)
# table.headers # => ["Foo", "Bar"]
def headers def headers
if @table.empty? if @table.empty?
@headers.dup @headers.dup
@ -145,17 +366,21 @@ class CSV
end end
# :call-seq: # :call-seq:
# table[n] -> row # table[n] -> row or column_data
# table[range] -> array_of_rows # table[range] -> array_of_rows or array_of_column_data
# table[header] -> array_of_fields # table[header] -> array_of_column_data
# #
# Returns data from the table; does not modify the table. # Returns data from the table; does not modify the table.
# #
# --- # ---
# #
# The expression <tt>table[n]</tt>, where +n+ is a non-negative \Integer, # Fetch a \Row by Its \Integer Index::
# returns the +n+th row of the table, if that row exists, # - Form: <tt>table[n]</tt>, +n+ an integer.
# and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
# - Return value: _nth_ row of the table, if that row exists;
# otherwise +nil+.
#
# Returns the _nth_ row of the table if that row exists:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true) # table = CSV.parse(source, headers: true)
# table.by_row! # => #<CSV::Table mode:row row_count:4> # table.by_row! # => #<CSV::Table mode:row row_count:4>
@ -168,20 +393,45 @@ class CSV
# #
# Returns +nil+ if +n+ is too large or too small: # Returns +nil+ if +n+ is too large or too small:
# table[4] # => nil # table[4] # => nil
# table[-4] => nil # table[-4] # => nil
# #
# Raises an exception if the access mode is <tt>:row</tt> # Raises an exception if the access mode is <tt>:row</tt>
# and +n+ is not an # and +n+ is not an \Integer:
# {Integer-convertible object}[https://docs.ruby-lang.org/en/master/implicit_conversion_rdoc.html#label-Integer-Convertible+Objects].
# table.by_row! # => #<CSV::Table mode:row row_count:4> # table.by_row! # => #<CSV::Table mode:row row_count:4>
# # Raises TypeError (no implicit conversion of String into Integer): # # Raises TypeError (no implicit conversion of String into Integer):
# table['Name'] # table['Name']
# #
# --- # ---
# #
# The expression <tt>table[range]</tt>, where +range+ is a Range object, # Fetch a Column by Its \Integer Index::
# returns rows from the table, beginning at row <tt>range.first</tt>, # - Form: <tt>table[n]</tt>, +n+ an \Integer.
# if those rows exist, and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: # - Access mode: <tt>:col</tt>.
# - Return value: _nth_ column of the table, if that column exists;
# otherwise an \Array of +nil+ fields of length <tt>self.size</tt>.
#
# Returns the _nth_ column of the table if that column exists:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_col! # => #<CSV::Table mode:col row_count:4>
# table[1] # => ["0", "1", "2"]
#
# Counts backward from the last column if +n+ is negative:
# table[-2] # => ["foo", "bar", "baz"]
#
# Returns an \Array of +nil+ fields if +n+ is too large or too small:
# table[4] # => [nil, nil, nil]
# table[-4] # => [nil, nil, nil]
#
# ---
#
# Fetch Rows by \Range::
# - Form: <tt>table[range]</tt>, +range+ a \Range object.
# - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
# - Return value: rows from the table, beginning at row <tt>range.start</tt>,
# if those rows exists.
#
# Returns rows from the table, beginning at row <tt>range.first</tt>,
# if those rows exist:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true) # table = CSV.parse(source, headers: true)
# table.by_row! # => #<CSV::Table mode:row row_count:4> # table.by_row! # => #<CSV::Table mode:row row_count:4>
@ -191,7 +441,7 @@ class CSV
# rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1"> # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1">
# rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
# #
# If there are too few rows, returns all from <tt>range.first</tt> to the end: # If there are too few rows, returns all from <tt>range.start</tt> to the end:
# rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1"> # rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1">
# rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
# #
@ -211,9 +461,41 @@ class CSV
# #
# --- # ---
# #
# The expression <tt>table[header]</tt>, where +header+ is a \String, # Fetch Columns by \Range::
# returns column values (\Array of \Strings) if the column exists # - Form: <tt>table[range]</tt>, +range+ a \Range object.
# and if the access mode is <tt>:col</tt> or <tt>:col_or_row</tt>: # - Access mode: <tt>:col</tt>.
# - Return value: column data from the table, beginning at column <tt>range.start</tt>,
# if those columns exist.
#
# Returns column values from the table, if the column exists;
# the values are arranged by row:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_col!
# table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
#
# Special case: if <tt>range.start == headers.size</tt>,
# returns an \Array (size: <tt>table.size</tt>) of empty \Arrays:
# table[table.headers.size..50] # => [[], [], []]
#
# If <tt>range.end</tt> is negative, calculates the ending index from the end:
# table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
#
# If <tt>range.start</tt> is negative, calculates the starting index from the end:
# table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
#
# If <tt>range.start</tt> is larger than <tt>table.size</tt>,
# returns an \Array of +nil+ values:
# table[4..4] # => [nil, nil, nil]
#
# ---
#
# Fetch a Column by Its \String Header::
# - Form: <tt>table[header]</tt>, +header+ a \String header.
# - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>
# - Return value: column data from the table, if that +header+ exists.
#
# Returns column values from the table, if the column exists:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true) # table = CSV.parse(source, headers: true)
# table.by_col! # => #<CSV::Table mode:col row_count:4> # table.by_col! # => #<CSV::Table mode:col row_count:4>
@ -238,22 +520,132 @@ class CSV
end end
end end
# :call-seq:
# table[n] = row -> row
# table[n] = field_or_array_of_fields -> field_or_array_of_fields
# table[header] = field_or_array_of_fields -> field_or_array_of_fields
# #
# In the default mixed mode, this method assigns rows for index access and # Puts data onto the table.
# columns for header access. You can force the index association by first
# calling by_col!() or by_row!().
# #
# Rows may be set to an Array of values (which will inherit the table's # ---
# headers()) or a CSV::Row.
# #
# Columns may be set to a single value, which is copied to each row of the # Set a \Row by Its \Integer Index::
# column, or an Array of values. Arrays of values are assigned to rows top # - Form: <tt>table[n] = row</tt>, +n+ an \Integer,
# to bottom in row major order. Excess values are ignored and if the Array # +row+ a \CSV::Row instance or an \Array of fields.
# does not have a value for each row the extra rows will receive a +nil+. # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
# - Return value: +row+.
# #
# Assigning to an existing column or row clobbers the data. Assigning to # If the row exists, it is replaced:
# new columns creates them at the right end of the table. # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3])
# table.by_row! # => #<CSV::Table mode:row row_count:4>
# return_value = table[0] = new_row
# return_value.equal?(new_row) # => true # Returned the row
# table[0].to_h # => {"Name"=>"bat", "Value"=>3}
# #
# With access mode <tt>:col_or_row</tt>:
# table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
# table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4])
# table[0].to_h # => {"Name"=>"bam", "Value"=>4}
#
# With an \Array instead of a \CSV::Row, inherits headers from the table:
# array = ['bad', 5]
# return_value = table[0] = array
# return_value.equal?(array) # => true # Returned the array
# table[0].to_h # => {"Name"=>"bad", "Value"=>5}
#
# If the row does not exist, extends the table by adding rows:
# assigns rows with +nil+ as needed:
# table.size # => 3
# table[5] = ['bag', 6]
# table.size # => 6
# table[3] # => nil
# table[4]# => nil
# table[5].to_h # => {"Name"=>"bag", "Value"=>6}
#
# Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields.
#
# ---
#
# Set a Column by Its \Integer Index::
# - Form: <tt>table[n] = array_of_fields</tt>, +n+ an \Integer,
# +array_of_fields+ an \Array of \String fields.
# - Access mode: <tt>:col</tt>.
# - Return value: +array_of_fields+.
#
# If the column exists, it is replaced:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# new_col = [3, 4, 5]
# table.by_col! # => #<CSV::Table mode:col row_count:4>
# return_value = table[1] = new_col
# return_value.equal?(new_col) # => true # Returned the column
# table[1] # => [3, 4, 5]
# # The rows, as revised:
# table.by_row! # => #<CSV::Table mode:row row_count:4>
# table[0].to_h # => {"Name"=>"foo", "Value"=>3}
# table[1].to_h # => {"Name"=>"bar", "Value"=>4}
# table[2].to_h # => {"Name"=>"baz", "Value"=>5}
# table.by_col! # => #<CSV::Table mode:col row_count:4>
#
# If there are too few values, fills with +nil+ values:
# table[1] = [0]
# table[1] # => [0, nil, nil]
#
# If there are too many values, ignores the extra values:
# table[1] = [0, 1, 2, 3, 4]
# table[1] # => [0, 1, 2]
#
# If a single value is given, replaces all fields in the column with that value:
# table[1] = 'bat'
# table[1] # => ["bat", "bat", "bat"]
#
# ---
#
# Set a Column by Its \String Header::
# - Form: <tt>table[header] = field_or_array_of_fields</tt>,
# +header+ a \String header, +field_or_array_of_fields+ a field value
# or an \Array of \String fields.
# - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>.
# - Return value: +field_or_array_of_fields+.
#
# If the column exists, it is replaced:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# new_col = [3, 4, 5]
# table.by_col! # => #<CSV::Table mode:col row_count:4>
# return_value = table['Value'] = new_col
# return_value.equal?(new_col) # => true # Returned the column
# table['Value'] # => [3, 4, 5]
# # The rows, as revised:
# table.by_row! # => #<CSV::Table mode:row row_count:4>
# table[0].to_h # => {"Name"=>"foo", "Value"=>3}
# table[1].to_h # => {"Name"=>"bar", "Value"=>4}
# table[2].to_h # => {"Name"=>"baz", "Value"=>5}
# table.by_col! # => #<CSV::Table mode:col row_count:4>
#
# If there are too few values, fills with +nil+ values:
# table['Value'] = [0]
# table['Value'] # => [0, nil, nil]
#
# If there are too many values, ignores the extra values:
# table['Value'] = [0, 1, 2, 3, 4]
# table['Value'] # => [0, 1, 2]
#
# If the column does not exist, extends the table by adding columns:
# table['Note'] = ['x', 'y', 'z']
# table['Note'] # => ["x", "y", "z"]
# # The rows, as revised:
# table.by_row!
# table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"}
# table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"}
# table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"}
# table.by_col!
#
# If a single value is given, replaces all fields in the column with that value:
# table['Value'] = 'bat'
# table['Value'] # => ["bat", "bat", "bat"]
def []=(index_or_header, value) def []=(index_or_header, value)
if @mode == :row or # by index if @mode == :row or # by index
(@mode == :col_or_row and index_or_header.is_a? Integer) (@mode == :col_or_row and index_or_header.is_a? Integer)
@ -463,6 +855,9 @@ class CSV
end end
end end
# :call-seq:
# table.delete_if {|row_or_column| ... } -> self
#
# Removes rows or columns for which the block returns a truthy value; # Removes rows or columns for which the block returns a truthy value;
# returns +self+. # returns +self+.
# #
@ -506,6 +901,9 @@ class CSV
include Enumerable include Enumerable
# :call-seq:
# table.each {|row_or_column| ... ) -> self
#
# Calls the block with each row or column; returns +self+. # Calls the block with each row or column; returns +self+.
# #
# When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, # When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>,
@ -534,7 +932,9 @@ class CSV
return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given?
if @mode == :col if @mode == :col
headers.each { |header| yield([header, self[header]]) } headers.each.with_index do |header, i|
yield([header, @table.map {|row| row[header, i]}])
end
else else
@table.each(&block) @table.each(&block)
end end
@ -542,6 +942,9 @@ class CSV
self # for chaining self # for chaining
end end
# :call-seq:
# table == other_table -> true or false
#
# Returns +true+ if all each row of +self+ <tt>==</tt> # Returns +true+ if all each row of +self+ <tt>==</tt>
# the corresponding row of +other_table+, otherwise, +false+. # the corresponding row of +other_table+, otherwise, +false+.
# #
@ -565,10 +968,14 @@ class CSV
@table == other @table == other
end end
# :call-seq:
# table.to_a -> array_of_arrays
# #
# Returns the table as an Array of Arrays. Headers will be the first row, # Returns the table as an \Array of \Arrays;
# then all of the field rows will follow. # the headers are in the first row:
# # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]]
def to_a def to_a
array = [headers] array = [headers]
@table.each do |row| @table.each do |row|
@ -578,16 +985,29 @@ class CSV
array array
end end
# :call-seq:
# table.to_csv(**options) -> csv_string
# #
# Returns the table as a complete CSV String. Headers will be listed first, # Returns the table as \CSV string.
# then all of the field rows. # See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating].
# #
# This method assumes you want the Table.headers(), unless you explicitly # Defaults option +write_headers+ to +true+:
# pass <tt>:write_headers => false</tt>. # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# #
def to_csv(write_headers: true, **options) # Omits the headers if option +write_headers+ is given as +false+
# (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]):
# table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n"
#
# Limit rows if option +limit+ is given like +2+:
# table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n"
def to_csv(write_headers: true, limit: nil, **options)
array = write_headers ? [headers.to_csv(**options)] : [] array = write_headers ? [headers.to_csv(**options)] : []
@table.each do |row| limit ||= @table.size
limit = @table.size + 1 + limit if limit < 0
limit = 0 if limit < 0
@table.first(limit).each do |row|
array.push(row.fields.to_csv(**options)) unless row.header_row? array.push(row.fields.to_csv(**options)) unless row.header_row?
end end
@ -613,9 +1033,24 @@ class CSV
end end
end end
# Shows the mode and size of this table in a US-ASCII String. # :call-seq:
# table.inspect => string
#
# Returns a <tt>US-ASCII</tt>-encoded \String showing table:
# - Class: <tt>CSV::Table</tt>.
# - Access mode: <tt>:row</tt>, <tt>:col</tt>, or <tt>:col_or_row</tt>.
# - Size: Row count, including the header row.
#
# Example:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.inspect # => "#<CSV::Table mode:col_or_row row_count:4>\nName,Value\nfoo,0\nbar,1\nbaz,2\n"
#
def inspect def inspect
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
summary = to_csv(limit: 5)
inspected << "\n" << summary if summary.encoding.ascii_compatible?
inspected
end end
end end
end end

View file

@ -2,5 +2,5 @@
class CSV class CSV
# The version of the installed library. # The version of the installed library.
VERSION = "3.2.2" VERSION = "3.2.3"
end end

View file

@ -205,6 +205,16 @@ class TestCSVInterfaceRead < Test::Unit::TestCase
end end
end end
def test_open_with_newline
CSV.open(@input.path, col_sep: "\t", universal_newline: true) do |csv|
assert_equal(@rows, csv.to_a)
end
File.binwrite(@input.path, "1,2,3\r\n" "4,5\n")
CSV.open(@input.path, newline: :universal) do |csv|
assert_equal(@rows, csv.to_a)
end
end
def test_parse def test_parse
assert_equal(@rows, assert_equal(@rows,
CSV.parse(@data, col_sep: "\t", row_sep: "\r\n")) CSV.parse(@data, col_sep: "\t", row_sep: "\r\n"))

View file

@ -199,6 +199,32 @@ line,5,jkl
field_size_limit: 2048 ) field_size_limit: 2048 )
end end
def test_field_size_limit_max_allowed
column = "abcde"
assert_equal([[column]],
CSV.parse("\"#{column}\"",
field_size_limit: column.size + 1))
end
def test_field_size_limit_quote_simple
column = "abcde"
assert_parse_errors_out("\"#{column}\"",
field_size_limit: column.size)
end
def test_field_size_limit_no_quote_implicitly
column = "abcde"
assert_parse_errors_out("#{column}",
field_size_limit: column.size)
end
def test_field_size_limit_no_quote_explicitly
column = "abcde"
assert_parse_errors_out("#{column}",
field_size_limit: column.size,
quote_char: nil)
end
def test_field_size_limit_in_extended_column_not_exceeding def test_field_size_limit_in_extended_column_not_exceeding
data = <<~DATA data = <<~DATA
"a","b" "a","b"
@ -221,6 +247,59 @@ line,5,jkl
assert_parse_errors_out(data, field_size_limit: 5) assert_parse_errors_out(data, field_size_limit: 5)
end end
def test_max_field_size_controls_lookahead
assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
max_field_size: 2048 )
end
def test_max_field_size_max_allowed
column = "abcde"
assert_equal([[column]],
CSV.parse("\"#{column}\"",
max_field_size: column.size))
end
def test_max_field_size_quote_simple
column = "abcde"
assert_parse_errors_out("\"#{column}\"",
max_field_size: column.size - 1)
end
def test_max_field_size_no_quote_implicitly
column = "abcde"
assert_parse_errors_out("#{column}",
max_field_size: column.size - 1)
end
def test_max_field_size_no_quote_explicitly
column = "abcde"
assert_parse_errors_out("#{column}",
max_field_size: column.size - 1,
quote_char: nil)
end
def test_max_field_size_in_extended_column_not_exceeding
data = <<~DATA
"a","b"
"
2
",""
DATA
assert_nothing_raised(CSV::MalformedCSVError) do
CSV.parse(data, max_field_size: 3)
end
end
def test_max_field_size_in_extended_column_exceeding
data = <<~DATA
"a","b"
"
2345
",""
DATA
assert_parse_errors_out(data, max_field_size: 4)
end
def test_row_sep_auto_cr def test_row_sep_auto_cr
assert_equal([["a"]], CSV.parse("a\r")) assert_equal([["a"]], CSV.parse("a\r"))
end end
@ -246,11 +325,7 @@ line,5,jkl
private private
def assert_parse_errors_out(data, **options) def assert_parse_errors_out(data, **options)
assert_raise(CSV::MalformedCSVError) do assert_raise(CSV::MalformedCSVError) do
timeout = 0.2 Timeout.timeout(0.2) do
if defined?(RubyVM::MJIT.enabled?) and RubyVM::MJIT.enabled?
timeout = 5 # for --jit-wait
end
Timeout.timeout(timeout) do
CSV.parse(data, **options) CSV.parse(data, **options)
fail("Parse didn't error out") fail("Parse didn't error out")
end end

View file

@ -218,6 +218,13 @@ A,B,C
assert_equal([:one, :two_three], csv.headers) assert_equal([:one, :two_three], csv.headers)
end end
def test_builtin_symbol_raw_converter
csv = CSV.parse( "a b,c d", headers: true,
return_headers: true,
header_converters: :symbol_raw )
assert_equal([:"a b", :"c d"], csv.headers)
end
def test_builtin_symbol_converter_with_punctuation def test_builtin_symbol_converter_with_punctuation
csv = CSV.parse( "One, Two & Three ($)", headers: true, csv = CSV.parse( "One, Two & Three ($)", headers: true,
return_headers: true, return_headers: true,
@ -228,7 +235,7 @@ A,B,C
def test_builtin_converters_with_blank_header def test_builtin_converters_with_blank_header
csv = CSV.parse( "one,,three", headers: true, csv = CSV.parse( "one,,three", headers: true,
return_headers: true, return_headers: true,
header_converters: [:downcase, :symbol] ) header_converters: [:downcase, :symbol, :symbol_raw] )
assert_equal([:one, nil, :three], csv.headers) assert_equal([:one, nil, :three], csv.headers)
end end

View file

@ -0,0 +1,63 @@
require_relative "../helper"
class TestCSVParseInputsScanner < Test::Unit::TestCase
include Helper
def test_scan_keep_over_chunks_nested_back
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
scanner = CSV::Parser::InputsScanner.new([input],
Encoding::UTF_8,
nil,
chunk_size: 2)
scanner.keep_start
assert_equal("abc", scanner.scan_all(/[a-c]+/))
scanner.keep_start
assert_equal("def", scanner.scan_all(/[d-f]+/))
scanner.keep_back
scanner.keep_back
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
end
def test_scan_keep_over_chunks_nested_drop_back
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
scanner = CSV::Parser::InputsScanner.new([input],
Encoding::UTF_8,
nil,
chunk_size: 3)
scanner.keep_start
assert_equal("ab", scanner.scan(/../))
scanner.keep_start
assert_equal("c", scanner.scan(/./))
assert_equal("d", scanner.scan(/./))
scanner.keep_drop
scanner.keep_back
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
end
def test_each_line_keep_over_chunks_multibyte
input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n")
scanner = CSV::Parser::InputsScanner.new([input],
Encoding::UTF_8,
nil,
chunk_size: 1)
each_line = scanner.each_line("\n")
assert_equal("ab\n", each_line.next)
scanner.keep_start
assert_equal("\u{3000}a\n", each_line.next)
scanner.keep_back
assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/))
end
def test_each_line_keep_over_chunks_fit_chunk_size
input = CSV::Parser::UnoptimizedStringIO.new("\na")
scanner = CSV::Parser::InputsScanner.new([input],
Encoding::UTF_8,
nil,
chunk_size: 1)
each_line = scanner.each_line("\n")
assert_equal("\n", each_line.next)
scanner.keep_start
assert_equal("a", each_line.next)
scanner.keep_back
end
end

View file

@ -28,6 +28,17 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
CSV.parse_line(input, liberal_parsing: true)) CSV.parse_line(input, liberal_parsing: true))
end end
def test_endline_after_quoted_field_end
csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true)
assert_equal(["A"], csv.gets)
error = assert_raise(CSV::MalformedCSVError) do
csv.gets
end
assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.',
error.message)
assert_equal(["C"], csv.gets)
end
def test_quote_after_column_separator def test_quote_after_column_separator
error = assert_raise(CSV::MalformedCSVError) do error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)

27
test/csv/test_patterns.rb Normal file
View file

@ -0,0 +1,27 @@
# frozen_string_literal: true
require_relative "helper"
class TestCSVPatternMatching < Test::Unit::TestCase
def test_hash
case CSV::Row.new(%i{A B C}, [1, 2, 3])
in B: b, C: c
assert_equal([2, 3], [b, c])
end
end
def test_hash_rest
case CSV::Row.new(%i{A B C}, [1, 2, 3])
in B: b, **rest
assert_equal([2, { A: 1, C: 3 }], [b, rest])
end
end
def test_array
case CSV::Row.new(%i{A B C}, [1, 2, 3])
in *, matched
assert_equal(3, matched)
end
end
end

View file

@ -274,6 +274,22 @@ A,B,C,Type,Index
@table.each { |row| assert_instance_of(CSV::Row, row) } @table.each { |row| assert_instance_of(CSV::Row, row) }
end end
def test_each_by_col_duplicated_headers
table = CSV.parse(<<-CSV, headers: true)
a,a,,,b
1,2,3,4,5
11,12,13,14,15
CSV
assert_equal([
["a", ["1", "11"]],
["a", ["2", "12"]],
[nil, ["3", "13"]],
[nil, ["4", "14"]],
["b", ["5", "15"]],
],
table.by_col.each.to_a)
end
def test_each_split def test_each_split
yielded_values = [] yielded_values = []
@table.each do |column1, column2, column3| @table.each do |column1, column2, column3|
@ -320,6 +336,43 @@ A,B,C
assert_equal(csv, @header_table.to_csv) assert_equal(csv, @header_table.to_csv)
end end
def test_to_csv_limit_positive
assert_equal(<<-CSV, @table.to_csv(limit: 2))
A,B,C
1,2,3
4,5,6
CSV
end
def test_to_csv_limit_positive_over
assert_equal(<<-CSV, @table.to_csv(limit: 5))
A,B,C
1,2,3
4,5,6
7,8,9
CSV
end
def test_to_csv_limit_zero
assert_equal(<<-CSV, @table.to_csv(limit: 0))
A,B,C
CSV
end
def test_to_csv_limit_negative
assert_equal(<<-CSV, @table.to_csv(limit: -2))
A,B,C
1,2,3
4,5,6
CSV
end
def test_to_csv_limit_negative_over
assert_equal(<<-CSV, @table.to_csv(limit: -5))
A,B,C
CSV
end
def test_append def test_append
# verify that we can chain the call # verify that we can chain the call
assert_equal(@table, @table << [10, 11, 12]) assert_equal(@table, @table << [10, 11, 12])
@ -552,6 +605,24 @@ A
"inspect() was not ASCII compatible." ) "inspect() was not ASCII compatible." )
end end
def test_inspect_with_rows
additional_rows = [ CSV::Row.new(%w{A B C}, [101, 102, 103]),
CSV::Row.new(%w{A B C}, [104, 105, 106]),
CSV::Row.new(%w{A B C}, [107, 108, 109]) ]
table = CSV::Table.new(@rows + additional_rows)
str_table = table.inspect
assert_equal(<<-CSV, str_table)
#<CSV::Table mode:col_or_row row_count:7>
A,B,C
1,2,3
4,5,6
7,8,9
101,102,103
104,105,106
CSV
end
def test_dig_mixed def test_dig_mixed
# by row # by row
assert_equal(@rows[0], @table.dig(0)) assert_equal(@rows[0], @table.dig(0))