Merge csv-3.2.3

This commit is contained in:
Hiroshi SHIBATA 2022-08-25 10:49:13 +09:00 committed by nagachika
parent a9bf13a4df
commit c69fffe67d
13 changed files with 1431 additions and 364 deletions

View file

@ -48,7 +48,7 @@
#
# === Interface
#
# * CSV now uses Hash-style parameters to set options.
# * CSV now uses keyword parameters to set options.
# * CSV no longer has generate_row() or parse_row().
# * The old CSV's Reader and Writer classes have been dropped.
# * CSV::open() is now more like Ruby's open().
@ -104,7 +104,18 @@ require_relative "csv/writer"
using CSV::MatchP if CSV.const_defined?(:MatchP)
# == \CSV
# \CSV (comma-separated variables) data is a text representation of a table:
#
# === In a Hurry?
#
# If you are familiar with \CSV data and have a particular task in mind,
# you may want to go directly to the:
# - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
#
# Otherwise, read on here, about the API: classes, methods, and constants.
#
# === \CSV Data
#
# \CSV (comma-separated values) data is a text representation of a table:
# - A _row_ _separator_ delimits table rows.
# A common row separator is the newline character <tt>"\n"</tt>.
# - A _column_ _separator_ delimits fields in a row.
@ -346,7 +357,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
# - +row_sep+: Specifies the row separator; used to delimit rows.
# - +col_sep+: Specifies the column separator; used to delimit fields.
# - +quote_char+: Specifies the quote character; used to quote fields.
# - +field_size_limit+: Specifies the maximum field size allowed.
# - +field_size_limit+: Specifies the maximum field size + 1 allowed.
# Deprecated since 3.2.3. Use +max_field_size+ instead.
# - +max_field_size+: Specifies the maximum field size allowed.
# - +converters+: Specifies the field converters to be used.
# - +unconverted_fields+: Specifies whether unconverted fields are to be available.
# - +headers+: Specifies whether data contains headers,
@ -703,7 +716,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
# Header converters operate only on headers (and not on other rows).
#
# There are three ways to use header \converters;
# these examples use built-in header converter +:dowhcase+,
# these examples use built-in header converter +:downcase+,
# which downcases each parsed header.
#
# - Option +header_converters+ with a singleton parsing method:
@ -915,7 +928,8 @@ class CSV
symbol: lambda { |h|
h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip.
gsub(/\s+/, "_").to_sym
}
},
symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym }
}
# Default values for method options.
@ -926,6 +940,7 @@ class CSV
quote_char: '"',
# For parsing.
field_size_limit: nil,
max_field_size: nil,
converters: nil,
unconverted_fields: nil,
headers: false,
@ -1007,60 +1022,185 @@ class CSV
end
# :call-seq:
# filter(**options) {|row| ... }
# filter(in_string, **options) {|row| ... }
# filter(in_io, **options) {|row| ... }
# filter(in_string, out_string, **options) {|row| ... }
# filter(in_string, out_io, **options) {|row| ... }
# filter(in_io, out_string, **options) {|row| ... }
# filter(in_io, out_io, **options) {|row| ... }
# filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
# filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
# filter(**options) {|row| ... } -> array_of_arrays or csv_table
#
# Reads \CSV input and writes \CSV output.
# - Parses \CSV from a source (\String, \IO stream, or ARGF).
# - Calls the given block with each parsed row:
# - Without headers, each row is an \Array.
# - With headers, each row is a CSV::Row.
# - Generates \CSV to an output (\String, \IO stream, or STDOUT).
# - Returns the parsed source:
# - Without headers, an \Array of \Arrays.
# - With headers, a CSV::Table.
#
# For each input row:
# - Forms the data into:
# - A CSV::Row object, if headers are in use.
# - An \Array of Arrays, otherwise.
# - Calls the block with that object.
# - Appends the block's return value to the output.
# When +in_string_or_io+ is given, but not +out_string_or_io+,
# parses from the given +in_string_or_io+
# and generates to STDOUT.
#
# Arguments:
# * \CSV source:
# * Argument +in_string+, if given, should be a \String object;
# it will be put into a new StringIO object positioned at the beginning.
# * Argument +in_io+, if given, should be an IO object that is
# open for reading; on return, the IO object will be closed.
# * If neither +in_string+ nor +in_io+ is given,
# the input stream defaults to {ARGF}[https://ruby-doc.org/core/ARGF.html].
# * \CSV output:
# * Argument +out_string+, if given, should be a \String object;
# it will be put into a new StringIO object positioned at the beginning.
# * Argument +out_io+, if given, should be an IO object that is
# ppen for writing; on return, the IO object will be closed.
# * If neither +out_string+ nor +out_io+ is given,
# the output stream defaults to <tt>$stdout</tt>.
# * Argument +options+ should be keyword arguments.
# - Each argument name that is prefixed with +in_+ or +input_+
# is stripped of its prefix and is treated as an option
# for parsing the input.
# Option +input_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
# - Each argument name that is prefixed with +out_+ or +output_+
# is stripped of its prefix and is treated as an option
# for generating the output.
# Option +output_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
# - Each argument not prefixed as above is treated as an option
# both for parsing the input and for generating the output.
# - See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
# and {Options for Generating}[#class-CSV-label-Options+for+Generating].
# \String input without headers:
#
# Example:
# in_string = "foo,0\nbar,1\nbaz,2\n"
# in_string = "foo,0\nbar,1\nbaz,2"
# CSV.filter(in_string) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
#
# Output (to STDOUT):
#
# FOO,0
# BAR,-1
# BAZ,-2
#
# \String input with headers:
#
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
# CSV.filter(in_string, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end # => #<CSV::Table mode:col_or_row row_count:4>
#
# Output (to STDOUT):
#
# Name,Value
# FOO,0
# BAR,-1
# BAZ,-2
#
# \IO stream input without headers:
#
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
# File.open('t.csv') do |in_io|
# CSV.filter(in_io) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
#
# Output (to STDOUT):
#
# FOO,0
# BAR,-1
# BAZ,-2
#
# \IO stream input with headers:
#
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
# File.open('t.csv') do |in_io|
# CSV.filter(in_io, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# end # => #<CSV::Table mode:col_or_row row_count:4>
#
# Output (to STDOUT):
#
# Name,Value
# FOO,0
# BAR,-1
# BAZ,-2
#
# When both +in_string_or_io+ and +out_string_or_io+ are given,
# parses from +in_string_or_io+ and generates to +out_string_or_io+.
#
# \String output without headers:
#
# in_string = "foo,0\nbar,1\nbaz,2"
# out_string = ''
# CSV.filter(in_string, out_string) do |row|
# row[0] = row[0].upcase
# row[1] *= 4
# end
# out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
# row[0].upcase!
# row[1] = - row[1].to_i
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
# out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n"
#
# \String output with headers:
#
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
# out_string = ''
# CSV.filter(in_string, out_string, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end # => #<CSV::Table mode:col_or_row row_count:4>
# out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
#
# \IO stream output without headers:
#
# in_string = "foo,0\nbar,1\nbaz,2"
# File.open('t.csv', 'w') do |out_io|
# CSV.filter(in_string, out_io) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
# File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n"
#
# \IO stream output with headers:
#
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
# File.open('t.csv', 'w') do |out_io|
# CSV.filter(in_string, out_io, headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# end # => #<CSV::Table mode:col_or_row row_count:4>
# File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
#
# When neither +in_string_or_io+ nor +out_string_or_io+ given,
# parses from {ARGF}[https://docs.ruby-lang.org/en/master/ARGF.html]
# and generates to STDOUT.
#
# Without headers:
#
# # Put Ruby code into a file.
# ruby = <<-EOT
# require 'csv'
# CSV.filter do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# EOT
# File.write('t.rb', ruby)
# # Put some CSV into a file.
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
# # Run the Ruby code with CSV filename as argument.
# system(Gem.ruby, "t.rb", "t.csv")
#
# Output (to STDOUT):
#
# FOO,0
# BAR,-1
# BAZ,-2
#
# With headers:
#
# # Put Ruby code into a file.
# ruby = <<-EOT
# require 'csv'
# CSV.filter(headers: true) do |row|
# row[0].upcase!
# row[1] = - row[1].to_i
# end
# EOT
# File.write('t.rb', ruby)
# # Put some CSV into a file.
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
# # Run the Ruby code with CSV filename as argument.
# system(Gem.ruby, "t.rb", "t.csv")
#
# Output (to STDOUT):
#
# Name,Value
# FOO,0
# BAR,-1
# BAZ,-2
#
# Arguments:
#
# * Argument +in_string_or_io+ must be a \String or an \IO stream.
# * Argument +out_string_or_io+ must be a \String or an \IO stream.
# * Arguments <tt>**options</tt> must be keyword options.
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
def filter(input=nil, output=nil, **options)
# parse options for input, output, or both
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
@ -1107,19 +1247,78 @@ class CSV
#
# :call-seq:
# foreach(path, mode='r', **options) {|row| ... )
# foreach(io, mode='r', **options {|row| ... )
# foreach(path, mode='r', headers: ..., **options) {|row| ... )
# foreach(io, mode='r', headers: ..., **options {|row| ... )
# foreach(path, mode='r', **options) -> new_enumerator
# foreach(io, mode='r', **options -> new_enumerator
# foreach(path_or_io, mode='r', **options) {|row| ... )
# foreach(path_or_io, mode='r', **options) -> new_enumerator
#
# Calls the block with each row read from source +path+ or +io+.
# Calls the block with each row read from source +path_or_io+.
#
# * Argument +path+, if given, must be the path to a file.
# :include: ../doc/csv/arguments/io.rdoc
# \Path input without headers:
#
# string = "foo,0\nbar,1\nbaz,2\n"
# in_path = 't.csv'
# File.write(in_path, string)
# CSV.foreach(in_path) {|row| p row }
#
# Output:
#
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# \Path input with headers:
#
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# in_path = 't.csv'
# File.write(in_path, string)
# CSV.foreach(in_path, headers: true) {|row| p row }
#
# Output:
#
# <CSV::Row "Name":"foo" "Value":"0">
# <CSV::Row "Name":"bar" "Value":"1">
# <CSV::Row "Name":"baz" "Value":"2">
#
# \IO stream input without headers:
#
# string = "foo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
# File.open('t.csv') do |in_io|
# CSV.foreach(in_io) {|row| p row }
# end
#
# Output:
#
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# \IO stream input with headers:
#
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
# File.open('t.csv') do |in_io|
# CSV.foreach(in_io, headers: true) {|row| p row }
# end
#
# Output:
#
# <CSV::Row "Name":"foo" "Value":"0">
# <CSV::Row "Name":"bar" "Value":"1">
# <CSV::Row "Name":"baz" "Value":"2">
#
# With no block given, returns an \Enumerator:
#
# string = "foo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
# CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
#
# Arguments:
# * Argument +path_or_io+ must be a file path or an \IO stream.
# * Argument +mode+, if given, must be a \File mode
# See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
# See {Open Mode}[https://ruby-doc.org/core/IO.html#method-c-new-label-Open+Mode].
# * Arguments <tt>**options</tt> must be keyword options.
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
# * This method optionally accepts an additional <tt>:encoding</tt> option
@ -1132,86 +1331,6 @@ class CSV
# encoding: 'UTF-32BE:UTF-8'
# would read +UTF-32BE+ data from the file
# but transcode it to +UTF-8+ before parsing.
#
# ====== Without Option +headers+
#
# Without option +headers+, returns each row as an \Array object.
#
# These examples assume prior execution of:
# string = "foo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
#
# Read rows from a file at +path+:
# CSV.foreach(path) {|row| p row }
# Output:
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# Read rows from an \IO object:
# File.open(path) do |file|
# CSV.foreach(file) {|row| p row }
# end
#
# Output:
# ["foo", "0"]
# ["bar", "1"]
# ["baz", "2"]
#
# Returns a new \Enumerator if no block given:
# CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
# CSV.foreach(File.open(path)) # => #<Enumerator: CSV:foreach(#<File:t.csv>, "r")>
#
# Issues a warning if an encoding is unsupported:
# CSV.foreach(File.open(path), encoding: 'foo:bar') {|row| }
# Output:
# warning: Unsupported encoding foo ignored
# warning: Unsupported encoding bar ignored
#
# ====== With Option +headers+
#
# With {option +headers+}[#class-CSV-label-Option+headers],
# returns each row as a CSV::Row object.
#
# These examples assume prior execution of:
# string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
# path = 't.csv'
# File.write(path, string)
#
# Read rows from a file at +path+:
# CSV.foreach(path, headers: true) {|row| p row }
#
# Output:
# #<CSV::Row "Name":"foo" "Count":"0">
# #<CSV::Row "Name":"bar" "Count":"1">
# #<CSV::Row "Name":"baz" "Count":"2">
#
# Read rows from an \IO object:
# File.open(path) do |file|
# CSV.foreach(file, headers: true) {|row| p row }
# end
#
# Output:
# #<CSV::Row "Name":"foo" "Count":"0">
# #<CSV::Row "Name":"bar" "Count":"1">
# #<CSV::Row "Name":"baz" "Count":"2">
#
# ---
#
# Raises an exception if +path+ is a \String, but not the path to a readable file:
# # Raises Errno::ENOENT (No such file or directory @ rb_sysopen - nosuch.csv):
# CSV.foreach('nosuch.csv') {|row| }
#
# Raises an exception if +io+ is an \IO object, but not open for reading:
# io = File.open(path, 'w') {|row| }
# # Raises TypeError (no implicit conversion of nil into String):
# CSV.foreach(io) {|row| }
#
# Raises an exception if +mode+ is invalid:
# # Raises ArgumentError (invalid access mode nosuch):
# CSV.foreach(path, 'nosuch') {|row| }
#
def foreach(path, mode="r", **options, &block)
return to_enum(__method__, path, mode, **options) unless block_given?
open(path, mode, **options) do |csv|
@ -1357,7 +1476,7 @@ class CSV
# open(io, mode = "rb", **options ) { |csv| ... } -> object
#
# possible options elements:
# hash form:
# keyword form:
# :invalid => nil # raise error on invalid byte sequence (default)
# :invalid => :replace # replace invalid byte sequence
# :undef => :replace # replace undefined conversion
@ -1424,10 +1543,14 @@ class CSV
def open(filename, mode="r", **options)
# wrap a File opened with the remaining +args+ with no newline
# decorator
file_opts = {universal_newline: false}.merge(options)
file_opts = options.dup
unless file_opts.key?(:newline)
file_opts[:universal_newline] ||= false
end
options.delete(:invalid)
options.delete(:undef)
options.delete(:replace)
options.delete_if {|k, _| /newline\z/.match?(k)}
begin
f = File.open(filename, mode, **file_opts)
@ -1746,6 +1869,7 @@ class CSV
row_sep: :auto,
quote_char: '"',
field_size_limit: nil,
max_field_size: nil,
converters: nil,
unconverted_fields: nil,
headers: false,
@ -1788,11 +1912,14 @@ class CSV
@initial_header_converters = header_converters
@initial_write_converters = write_converters
if max_field_size.nil? and field_size_limit
max_field_size = field_size_limit - 1
end
@parser_options = {
column_separator: col_sep,
row_separator: row_sep,
quote_character: quote_char,
field_size_limit: field_size_limit,
max_field_size: max_field_size,
unconverted_fields: unconverted_fields,
headers: headers,
return_headers: return_headers,
@ -1860,10 +1987,24 @@ class CSV
# Returns the limit for field size; used for parsing;
# see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
# CSV.new('').field_size_limit # => nil
#
# Deprecated since 3.2.3. Use +max_field_size+ instead.
def field_size_limit
parser.field_size_limit
end
# :call-seq:
# csv.max_field_size -> integer or nil
#
# Returns the limit for field size; used for parsing;
# see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]:
# CSV.new('').max_field_size # => nil
#
# Since 3.2.3.
def max_field_size
parser.max_field_size
end
# :call-seq:
# csv.skip_lines -> regexp or nil
#
@ -1994,7 +2135,7 @@ class CSV
end
# :call-seq:
# csv.encoding -> endcoding
# csv.encoding -> encoding
#
# Returns the encoding used for parsing and generating;
# see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]: