mirror of
https://github.com/ruby/ruby.git
synced 2025-09-21 03:24:00 +02:00
Merge csv-3.2.3
This commit is contained in:
parent
a9bf13a4df
commit
c69fffe67d
13 changed files with 1431 additions and 364 deletions
|
@ -4,20 +4,7 @@ require "stringio"
|
|||
class CSV
|
||||
module InputRecordSeparator
|
||||
class << self
|
||||
is_input_record_separator_deprecated = false
|
||||
verbose, $VERBOSE = $VERBOSE, true
|
||||
stderr, $stderr = $stderr, StringIO.new
|
||||
input_record_separator = $INPUT_RECORD_SEPARATOR
|
||||
begin
|
||||
$INPUT_RECORD_SEPARATOR = "\r\n"
|
||||
is_input_record_separator_deprecated = (not $stderr.string.empty?)
|
||||
ensure
|
||||
$INPUT_RECORD_SEPARATOR = input_record_separator
|
||||
$stderr = stderr
|
||||
$VERBOSE = verbose
|
||||
end
|
||||
|
||||
if is_input_record_separator_deprecated
|
||||
if RUBY_VERSION >= "3.0.0"
|
||||
def value
|
||||
"\n"
|
||||
end
|
||||
|
|
|
@ -27,6 +27,10 @@ class CSV
|
|||
class InvalidEncoding < StandardError
|
||||
end
|
||||
|
||||
# Raised when unexpected case is happen.
|
||||
class UnexpectedError < StandardError
|
||||
end
|
||||
|
||||
#
|
||||
# CSV::Scanner receives a CSV output, scans it and return the content.
|
||||
# It also controls the life cycle of the object with its methods +keep_start+,
|
||||
|
@ -78,10 +82,10 @@ class CSV
|
|||
# +keep_end+, +keep_back+, +keep_drop+.
|
||||
#
|
||||
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
|
||||
# If there's a match, the scanner advances the “scan pointer” and returns the matched string.
|
||||
# If there's a match, the scanner advances the "scan pointer" and returns the matched string.
|
||||
# Otherwise, the scanner returns nil.
|
||||
#
|
||||
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
|
||||
# CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer).
|
||||
# If there is no more data (eos? = true), it returns "".
|
||||
#
|
||||
class InputsScanner
|
||||
|
@ -96,11 +100,13 @@ class CSV
|
|||
end
|
||||
|
||||
def each_line(row_separator)
|
||||
return enum_for(__method__, row_separator) unless block_given?
|
||||
buffer = nil
|
||||
input = @scanner.rest
|
||||
position = @scanner.pos
|
||||
offset = 0
|
||||
n_row_separator_chars = row_separator.size
|
||||
# trace(__method__, :start, line, input)
|
||||
while true
|
||||
input.each_line(row_separator) do |line|
|
||||
@scanner.pos += line.bytesize
|
||||
|
@ -140,25 +146,28 @@ class CSV
|
|||
end
|
||||
|
||||
def scan(pattern)
|
||||
# trace(__method__, pattern, :start)
|
||||
value = @scanner.scan(pattern)
|
||||
# trace(__method__, pattern, :done, :last, value) if @last_scanner
|
||||
return value if @last_scanner
|
||||
|
||||
if value
|
||||
read_chunk if @scanner.eos?
|
||||
return value
|
||||
else
|
||||
nil
|
||||
end
|
||||
read_chunk if value and @scanner.eos?
|
||||
# trace(__method__, pattern, :done, value)
|
||||
value
|
||||
end
|
||||
|
||||
def scan_all(pattern)
|
||||
# trace(__method__, pattern, :start)
|
||||
value = @scanner.scan(pattern)
|
||||
# trace(__method__, pattern, :done, :last, value) if @last_scanner
|
||||
return value if @last_scanner
|
||||
|
||||
return nil if value.nil?
|
||||
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
|
||||
# trace(__method__, pattern, :sub, sub_value)
|
||||
value << sub_value
|
||||
end
|
||||
# trace(__method__, pattern, :done, value)
|
||||
value
|
||||
end
|
||||
|
||||
|
@ -167,68 +176,126 @@ class CSV
|
|||
end
|
||||
|
||||
def keep_start
|
||||
@keeps.push([@scanner.pos, nil])
|
||||
# trace(__method__, :start)
|
||||
adjust_last_keep
|
||||
@keeps.push([@scanner, @scanner.pos, nil])
|
||||
# trace(__method__, :done)
|
||||
end
|
||||
|
||||
def keep_end
|
||||
start, buffer = @keeps.pop
|
||||
keep = @scanner.string.byteslice(start, @scanner.pos - start)
|
||||
# trace(__method__, :start)
|
||||
scanner, start, buffer = @keeps.pop
|
||||
if scanner == @scanner
|
||||
keep = @scanner.string.byteslice(start, @scanner.pos - start)
|
||||
else
|
||||
keep = @scanner.string.byteslice(0, @scanner.pos)
|
||||
end
|
||||
if buffer
|
||||
buffer << keep
|
||||
keep = buffer
|
||||
end
|
||||
# trace(__method__, :done, keep)
|
||||
keep
|
||||
end
|
||||
|
||||
def keep_back
|
||||
start, buffer = @keeps.pop
|
||||
# trace(__method__, :start)
|
||||
scanner, start, buffer = @keeps.pop
|
||||
if buffer
|
||||
# trace(__method__, :rescan, start, buffer)
|
||||
string = @scanner.string
|
||||
keep = string.byteslice(start, string.bytesize - start)
|
||||
if scanner == @scanner
|
||||
keep = string.byteslice(start, string.bytesize - start)
|
||||
else
|
||||
keep = string
|
||||
end
|
||||
if keep and not keep.empty?
|
||||
@inputs.unshift(StringIO.new(keep))
|
||||
@last_scanner = false
|
||||
end
|
||||
@scanner = StringScanner.new(buffer)
|
||||
else
|
||||
if @scanner != scanner
|
||||
message = "scanners are different but no buffer: "
|
||||
message += "#{@scanner.inspect}(#{@scanner.object_id}): "
|
||||
message += "#{scanner.inspect}(#{scanner.object_id})"
|
||||
raise UnexpectedError, message
|
||||
end
|
||||
# trace(__method__, :repos, start, buffer)
|
||||
@scanner.pos = start
|
||||
end
|
||||
read_chunk if @scanner.eos?
|
||||
end
|
||||
|
||||
def keep_drop
|
||||
@keeps.pop
|
||||
_, _, buffer = @keeps.pop
|
||||
# trace(__method__, :done, :empty) unless buffer
|
||||
return unless buffer
|
||||
|
||||
last_keep = @keeps.last
|
||||
# trace(__method__, :done, :no_last_keep) unless last_keep
|
||||
return unless last_keep
|
||||
|
||||
if last_keep[2]
|
||||
last_keep[2] << buffer
|
||||
else
|
||||
last_keep[2] = buffer
|
||||
end
|
||||
# trace(__method__, :done)
|
||||
end
|
||||
|
||||
def rest
|
||||
@scanner.rest
|
||||
end
|
||||
|
||||
def check(pattern)
|
||||
@scanner.check(pattern)
|
||||
end
|
||||
|
||||
private
|
||||
def trace(*args)
|
||||
pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps])
|
||||
end
|
||||
|
||||
def adjust_last_keep
|
||||
# trace(__method__, :start)
|
||||
|
||||
keep = @keeps.last
|
||||
# trace(__method__, :done, :empty) if keep.nil?
|
||||
return if keep.nil?
|
||||
|
||||
scanner, start, buffer = keep
|
||||
string = @scanner.string
|
||||
if @scanner != scanner
|
||||
start = 0
|
||||
end
|
||||
if start == 0 and @scanner.eos?
|
||||
keep_data = string
|
||||
else
|
||||
keep_data = string.byteslice(start, @scanner.pos - start)
|
||||
end
|
||||
if keep_data
|
||||
if buffer
|
||||
buffer << keep_data
|
||||
else
|
||||
keep[2] = keep_data.dup
|
||||
end
|
||||
end
|
||||
|
||||
# trace(__method__, :done)
|
||||
end
|
||||
|
||||
def read_chunk
|
||||
return false if @last_scanner
|
||||
|
||||
unless @keeps.empty?
|
||||
keep = @keeps.last
|
||||
keep_start = keep[0]
|
||||
string = @scanner.string
|
||||
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
|
||||
if keep_data
|
||||
keep_buffer = keep[1]
|
||||
if keep_buffer
|
||||
keep_buffer << keep_data
|
||||
else
|
||||
keep[1] = keep_data.dup
|
||||
end
|
||||
end
|
||||
keep[0] = 0
|
||||
end
|
||||
adjust_last_keep
|
||||
|
||||
input = @inputs.first
|
||||
case input
|
||||
when StringIO
|
||||
string = input.read
|
||||
raise InvalidEncoding unless string.valid_encoding?
|
||||
# trace(__method__, :stringio, string)
|
||||
@scanner = StringScanner.new(string)
|
||||
@inputs.shift
|
||||
@last_scanner = @inputs.empty?
|
||||
|
@ -237,6 +304,7 @@ class CSV
|
|||
chunk = input.gets(@row_separator, @chunk_size)
|
||||
if chunk
|
||||
raise InvalidEncoding unless chunk.valid_encoding?
|
||||
# trace(__method__, :chunk, chunk)
|
||||
@scanner = StringScanner.new(chunk)
|
||||
if input.respond_to?(:eof?) and input.eof?
|
||||
@inputs.shift
|
||||
|
@ -244,6 +312,7 @@ class CSV
|
|||
end
|
||||
true
|
||||
else
|
||||
# trace(__method__, :no_chunk)
|
||||
@scanner = StringScanner.new("".encode(@encoding))
|
||||
@inputs.shift
|
||||
@last_scanner = @inputs.empty?
|
||||
|
@ -278,7 +347,11 @@ class CSV
|
|||
end
|
||||
|
||||
def field_size_limit
|
||||
@field_size_limit
|
||||
@max_field_size&.succ
|
||||
end
|
||||
|
||||
def max_field_size
|
||||
@max_field_size
|
||||
end
|
||||
|
||||
def skip_lines
|
||||
|
@ -346,6 +419,16 @@ class CSV
|
|||
end
|
||||
message = "Invalid byte sequence in #{@encoding}"
|
||||
raise MalformedCSVError.new(message, lineno)
|
||||
rescue UnexpectedError => error
|
||||
if @scanner
|
||||
ignore_broken_line
|
||||
lineno = @lineno
|
||||
else
|
||||
lineno = @lineno + 1
|
||||
end
|
||||
message = "This should not be happen: #{error.message}: "
|
||||
message += "Please report this to https://github.com/ruby/csv/issues"
|
||||
raise MalformedCSVError.new(message, lineno)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -390,7 +473,7 @@ class CSV
|
|||
@backslash_quote = false
|
||||
end
|
||||
@unconverted_fields = @options[:unconverted_fields]
|
||||
@field_size_limit = @options[:field_size_limit]
|
||||
@max_field_size = @options[:max_field_size]
|
||||
@skip_blanks = @options[:skip_blanks]
|
||||
@fields_converter = @options[:fields_converter]
|
||||
@header_fields_converter = @options[:header_fields_converter]
|
||||
|
@ -729,28 +812,28 @@ class CSV
|
|||
sample[0, 128].index(@quote_character)
|
||||
end
|
||||
|
||||
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
|
||||
if SCANNER_TEST
|
||||
class UnoptimizedStringIO
|
||||
def initialize(string)
|
||||
@io = StringIO.new(string, "rb:#{string.encoding}")
|
||||
end
|
||||
|
||||
def gets(*args)
|
||||
@io.gets(*args)
|
||||
end
|
||||
|
||||
def each_line(*args, &block)
|
||||
@io.each_line(*args, &block)
|
||||
end
|
||||
|
||||
def eof?
|
||||
@io.eof?
|
||||
end
|
||||
class UnoptimizedStringIO # :nodoc:
|
||||
def initialize(string)
|
||||
@io = StringIO.new(string, "rb:#{string.encoding}")
|
||||
end
|
||||
|
||||
SCANNER_TEST_CHUNK_SIZE =
|
||||
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
|
||||
def gets(*args)
|
||||
@io.gets(*args)
|
||||
end
|
||||
|
||||
def each_line(*args, &block)
|
||||
@io.each_line(*args, &block)
|
||||
end
|
||||
|
||||
def eof?
|
||||
@io.eof?
|
||||
end
|
||||
end
|
||||
|
||||
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
|
||||
if SCANNER_TEST
|
||||
SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"
|
||||
SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
|
||||
def build_scanner
|
||||
inputs = @samples.collect do |sample|
|
||||
UnoptimizedStringIO.new(sample)
|
||||
|
@ -760,10 +843,17 @@ class CSV
|
|||
else
|
||||
inputs << @input
|
||||
end
|
||||
begin
|
||||
chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
|
||||
rescue # Ractor::IsolationError
|
||||
# Ractor on Ruby 3.0 can't read ENV value.
|
||||
chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE
|
||||
end
|
||||
chunk_size = Integer((chunk_size_value || "1"), 10)
|
||||
InputsScanner.new(inputs,
|
||||
@encoding,
|
||||
@row_separator,
|
||||
chunk_size: SCANNER_TEST_CHUNK_SIZE)
|
||||
chunk_size: chunk_size)
|
||||
end
|
||||
else
|
||||
def build_scanner
|
||||
|
@ -826,6 +916,14 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
def validate_field_size(field)
|
||||
return unless @max_field_size
|
||||
return if field.size <= @max_field_size
|
||||
ignore_broken_line
|
||||
message = "Field size exceeded: #{field.size} > #{@max_field_size}"
|
||||
raise MalformedCSVError.new(message, @lineno)
|
||||
end
|
||||
|
||||
def parse_no_quote(&block)
|
||||
@scanner.each_line(@row_separator) do |line|
|
||||
next if @skip_lines and skip_line?(line)
|
||||
|
@ -838,6 +936,11 @@ class CSV
|
|||
else
|
||||
line = strip_value(line)
|
||||
row = line.split(@split_column_separator, -1)
|
||||
if @max_field_size
|
||||
row.each do |column|
|
||||
validate_field_size(column)
|
||||
end
|
||||
end
|
||||
n_columns = row.size
|
||||
i = 0
|
||||
while i < n_columns
|
||||
|
@ -893,6 +996,7 @@ class CSV
|
|||
@need_robust_parsing = true
|
||||
return parse_quotable_robust(&block)
|
||||
end
|
||||
validate_field_size(row[i])
|
||||
end
|
||||
i += 1
|
||||
end
|
||||
|
@ -916,10 +1020,7 @@ class CSV
|
|||
value = parse_column_value
|
||||
if value
|
||||
@scanner.scan_all(@strip_value) if @strip_value
|
||||
if @field_size_limit and value.size >= @field_size_limit
|
||||
ignore_broken_line
|
||||
raise MalformedCSVError.new("Field size exceeded", @lineno)
|
||||
end
|
||||
validate_field_size(value)
|
||||
end
|
||||
if parse_column_end
|
||||
row << value
|
||||
|
@ -940,8 +1041,14 @@ class CSV
|
|||
break
|
||||
else
|
||||
if @quoted_column_value
|
||||
if liberal_parsing? and (new_line = @scanner.check(@line_end))
|
||||
message =
|
||||
"Illegal end-of-line sequence outside of a quoted field " +
|
||||
"<#{new_line.inspect}>"
|
||||
else
|
||||
message = "Any value after quoted field isn't allowed"
|
||||
end
|
||||
ignore_broken_line
|
||||
message = "Any value after quoted field isn't allowed"
|
||||
raise MalformedCSVError.new(message, @lineno)
|
||||
elsif @unquoted_column_value and
|
||||
(new_line = @scanner.scan(@line_end))
|
||||
|
|
221
lib/csv/row.rb
221
lib/csv/row.rb
|
@ -3,30 +3,105 @@
|
|||
require "forwardable"
|
||||
|
||||
class CSV
|
||||
# = \CSV::Row
|
||||
# A \CSV::Row instance represents a \CSV table row.
|
||||
# (see {class CSV}[../CSV.html]).
|
||||
#
|
||||
# A CSV::Row is part Array and part Hash. It retains an order for the fields
|
||||
# and allows duplicates just as an Array would, but also allows you to access
|
||||
# fields by name just as you could if they were in a Hash.
|
||||
# The instance may have:
|
||||
# - Fields: each is an object, not necessarily a \String.
|
||||
# - Headers: each serves a key, and also need not be a \String.
|
||||
#
|
||||
# All rows returned by CSV will be constructed from this class, if header row
|
||||
# processing is activated.
|
||||
# === Instance Methods
|
||||
#
|
||||
# \CSV::Row has three groups of instance methods:
|
||||
# - Its own internally defined instance methods.
|
||||
# - Methods included by module Enumerable.
|
||||
# - Methods delegated to class Array.:
|
||||
# * Array#empty?
|
||||
# * Array#length
|
||||
# * Array#size
|
||||
#
|
||||
# == Creating a \CSV::Row Instance
|
||||
#
|
||||
# Commonly, a new \CSV::Row instance is created by parsing \CSV source
|
||||
# that has headers:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.each {|row| p row }
|
||||
# Output:
|
||||
# #<CSV::Row "Name":"foo" "Value":"0">
|
||||
# #<CSV::Row "Name":"bar" "Value":"1">
|
||||
# #<CSV::Row "Name":"baz" "Value":"2">
|
||||
#
|
||||
# You can also create a row directly. See ::new.
|
||||
#
|
||||
# == Headers
|
||||
#
|
||||
# Like a \CSV::Table, a \CSV::Row has headers.
|
||||
#
|
||||
# A \CSV::Row that was created by parsing \CSV source
|
||||
# inherits its headers from the table:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# row = table.first
|
||||
# row.headers # => ["Name", "Value"]
|
||||
#
|
||||
# You can also create a new row with headers;
|
||||
# like the keys in a \Hash, the headers need not be Strings:
|
||||
# row = CSV::Row.new([:name, :value], ['foo', 0])
|
||||
# row.headers # => [:name, :value]
|
||||
#
|
||||
# The new row retains its headers even if added to a table
|
||||
# that has headers:
|
||||
# table << row # => #<CSV::Table mode:col_or_row row_count:5>
|
||||
# row.headers # => [:name, :value]
|
||||
# row[:name] # => "foo"
|
||||
# row['Name'] # => nil
|
||||
#
|
||||
#
|
||||
#
|
||||
# == Accessing Fields
|
||||
#
|
||||
# You may access a field in a \CSV::Row with either its \Integer index
|
||||
# (\Array-style) or its header (\Hash-style).
|
||||
#
|
||||
# Fetch a field using method #[]:
|
||||
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
|
||||
# row[1] # => 0
|
||||
# row['Value'] # => 0
|
||||
#
|
||||
# Set a field using method #[]=:
|
||||
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
|
||||
# row # => #<CSV::Row "Name":"foo" "Value":0>
|
||||
# row[0] = 'bar'
|
||||
# row['Value'] = 1
|
||||
# row # => #<CSV::Row "Name":"bar" "Value":1>
|
||||
#
|
||||
class Row
|
||||
# :call-seq:
|
||||
# CSV::Row.new(headers, fields, header_row = false) -> csv_row
|
||||
#
|
||||
# Constructs a new CSV::Row from +headers+ and +fields+, which are expected
|
||||
# to be Arrays. If one Array is shorter than the other, it will be padded
|
||||
# with +nil+ objects.
|
||||
# Returns the new \CSV::Row instance constructed from
|
||||
# arguments +headers+ and +fields+; both should be Arrays;
|
||||
# note that the fields need not be Strings:
|
||||
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
|
||||
# row # => #<CSV::Row "Name":"foo" "Value":0>
|
||||
#
|
||||
# The optional +header_row+ parameter can be set to +true+ to indicate, via
|
||||
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
|
||||
# row. Otherwise, the row assumes to be a field row.
|
||||
# If the \Array lengths are different, the shorter is +nil+-filled:
|
||||
# row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0])
|
||||
# row # => #<CSV::Row "Name":"foo" "Value":0 "Date":nil "Size":nil>
|
||||
#
|
||||
# A CSV::Row object supports the following Array methods through delegation:
|
||||
#
|
||||
# * empty?()
|
||||
# * length()
|
||||
# * size()
|
||||
# Each \CSV::Row object is either a <i>field row</i> or a <i>header row</i>;
|
||||
# by default, a new row is a field row; for the row created above:
|
||||
# row.field_row? # => true
|
||||
# row.header_row? # => false
|
||||
#
|
||||
# If the optional argument +header_row+ is given as +true+,
|
||||
# the created row is a header row:
|
||||
# row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true)
|
||||
# row # => #<CSV::Row "Name":"foo" "Value":0>
|
||||
# row.field_row? # => false
|
||||
# row.header_row? # => true
|
||||
def initialize(headers, fields, header_row = false)
|
||||
@header_row = header_row
|
||||
headers.each { |h| h.freeze if h.is_a? String }
|
||||
|
@ -48,6 +123,10 @@ class CSV
|
|||
extend Forwardable
|
||||
def_delegators :@row, :empty?, :length, :size
|
||||
|
||||
# :call-seq:
|
||||
# row.initialize_copy(other_row) -> self
|
||||
#
|
||||
# Calls superclass method.
|
||||
def initialize_copy(other)
|
||||
super_return_value = super
|
||||
@row = @row.collect(&:dup)
|
||||
|
@ -71,7 +150,7 @@ class CSV
|
|||
end
|
||||
|
||||
# :call-seq:
|
||||
# row.headers
|
||||
# row.headers -> array_of_headers
|
||||
#
|
||||
# Returns the headers for this row:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
|
@ -83,9 +162,9 @@ class CSV
|
|||
end
|
||||
|
||||
# :call-seq:
|
||||
# field(index)
|
||||
# field(header)
|
||||
# field(header, offset)
|
||||
# field(index) -> value
|
||||
# field(header) -> value
|
||||
# field(header, offset) -> value
|
||||
#
|
||||
# Returns the field value for the given +index+ or +header+.
|
||||
#
|
||||
|
@ -137,9 +216,9 @@ class CSV
|
|||
|
||||
#
|
||||
# :call-seq:
|
||||
# fetch(header)
|
||||
# fetch(header, default)
|
||||
# fetch(header) {|row| ... }
|
||||
# fetch(header) -> value
|
||||
# fetch(header, default) -> value
|
||||
# fetch(header) {|row| ... } -> value
|
||||
#
|
||||
# Returns the field value as specified by +header+.
|
||||
#
|
||||
|
@ -193,7 +272,7 @@ class CSV
|
|||
end
|
||||
|
||||
# :call-seq:
|
||||
# row.has_key?(header)
|
||||
# row.has_key?(header) -> true or false
|
||||
#
|
||||
# Returns +true+ if there is a field with the given +header+,
|
||||
# +false+ otherwise.
|
||||
|
@ -320,7 +399,7 @@ class CSV
|
|||
end
|
||||
|
||||
# :call-seq:
|
||||
# row.push(*values) ->self
|
||||
# row.push(*values) -> self
|
||||
#
|
||||
# Appends each of the given +values+ to +self+ as a field; returns +self+:
|
||||
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
|
||||
|
@ -403,7 +482,7 @@ class CSV
|
|||
end
|
||||
|
||||
# :call-seq:
|
||||
# self.fields(*specifiers)
|
||||
# self.fields(*specifiers) -> array_of_fields
|
||||
#
|
||||
# Returns field values per the given +specifiers+, which may be any mixture of:
|
||||
# - \Integer index.
|
||||
|
@ -471,15 +550,26 @@ class CSV
|
|||
end
|
||||
alias_method :values_at, :fields
|
||||
|
||||
#
|
||||
# :call-seq:
|
||||
# index( header )
|
||||
# index( header, offset )
|
||||
# index(header) -> index
|
||||
# index(header, offset) -> index
|
||||
#
|
||||
# This method will return the index of a field with the provided +header+.
|
||||
# The +offset+ can be used to locate duplicate header names, as described in
|
||||
# CSV::Row.field().
|
||||
# Returns the index for the given header, if it exists;
|
||||
# otherwise returns +nil+.
|
||||
#
|
||||
# With the single argument +header+, returns the index
|
||||
# of the first-found field with the given +header+:
|
||||
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# row = table[0]
|
||||
# row.index('Name') # => 0
|
||||
# row.index('NAME') # => nil
|
||||
#
|
||||
# With arguments +header+ and +offset+,
|
||||
# returns the index of the first-found field with given +header+,
|
||||
# but ignoring the first +offset+ fields:
|
||||
# row.index('Name', 1) # => 1
|
||||
# row.index('Name', 3) # => nil
|
||||
def index(header, minimum_index = 0)
|
||||
# find the pair
|
||||
index = headers[minimum_index..-1].index(header)
|
||||
|
@ -487,24 +577,36 @@ class CSV
|
|||
index.nil? ? nil : index + minimum_index
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# row.field?(value) -> true or false
|
||||
#
|
||||
# Returns +true+ if +data+ matches a field in this row, and +false+
|
||||
# otherwise.
|
||||
#
|
||||
# Returns +true+ if +value+ is a field in this row, +false+ otherwise:
|
||||
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# row = table[0]
|
||||
# row.field?('Bar') # => true
|
||||
# row.field?('BAR') # => false
|
||||
def field?(data)
|
||||
fields.include? data
|
||||
end
|
||||
|
||||
include Enumerable
|
||||
|
||||
# :call-seq:
|
||||
# row.each {|header, value| ... } -> self
|
||||
#
|
||||
# Yields each pair of the row as header and field tuples (much like
|
||||
# iterating over a Hash). This method returns the row for chaining.
|
||||
#
|
||||
# If no block is given, an Enumerator is returned.
|
||||
#
|
||||
# Support for Enumerable.
|
||||
# Calls the block with each header-value pair; returns +self+:
|
||||
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# row = table[0]
|
||||
# row.each {|header, value| p [header, value] }
|
||||
# Output:
|
||||
# ["Name", "Foo"]
|
||||
# ["Name", "Bar"]
|
||||
# ["Name", "Baz"]
|
||||
#
|
||||
# If no block is given, returns a new Enumerator:
|
||||
# row.each # => #<Enumerator: #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz">:each>
|
||||
def each(&block)
|
||||
return enum_for(__method__) { size } unless block_given?
|
||||
|
||||
|
@ -515,10 +617,19 @@ class CSV
|
|||
|
||||
alias_method :each_pair, :each
|
||||
|
||||
# :call-seq:
|
||||
# row == other -> true or false
|
||||
#
|
||||
# Returns +true+ if this row contains the same headers and fields in the
|
||||
# same order as +other+.
|
||||
#
|
||||
# Returns +true+ if +other+ is a /CSV::Row that has the same
|
||||
# fields (headers and values) in the same order as +self+;
|
||||
# otherwise returns +false+:
|
||||
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# row = table[0]
|
||||
# other_row = table[0]
|
||||
# row == other_row # => true
|
||||
# other_row = table[1]
|
||||
# row == other_row # => false
|
||||
def ==(other)
|
||||
return @row == other.row if other.is_a? CSV::Row
|
||||
@row == other
|
||||
|
@ -548,8 +659,30 @@ class CSV
|
|||
end
|
||||
alias_method :to_hash, :to_h
|
||||
|
||||
# :call-seq:
|
||||
# row.deconstruct_keys(keys) -> hash
|
||||
#
|
||||
# Returns the new \Hash suitable for pattern matching containing only the
|
||||
# keys specified as an argument.
|
||||
def deconstruct_keys(keys)
|
||||
if keys.nil?
|
||||
to_h
|
||||
else
|
||||
keys.to_h { |key| [key, self[key]] }
|
||||
end
|
||||
end
|
||||
|
||||
alias_method :to_ary, :to_a
|
||||
|
||||
# :call-seq:
|
||||
# row.deconstruct -> array
|
||||
#
|
||||
# Returns the new \Array suitable for pattern matching containing the values
|
||||
# of the row.
|
||||
def deconstruct
|
||||
fields
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# row.to_csv -> csv_string
|
||||
#
|
||||
|
|
623
lib/csv/table.rb
623
lib/csv/table.rb
|
@ -3,31 +3,199 @@
|
|||
require "forwardable"
|
||||
|
||||
class CSV
|
||||
# = \CSV::Table
|
||||
# A \CSV::Table instance represents \CSV data.
|
||||
# (see {class CSV}[../CSV.html]).
|
||||
#
|
||||
# A CSV::Table is a two-dimensional data structure for representing CSV
|
||||
# documents. Tables allow you to work with the data by row or column,
|
||||
# manipulate the data, and even convert the results back to CSV, if needed.
|
||||
# The instance may have:
|
||||
# - Rows: each is a Table::Row object.
|
||||
# - Headers: names for the columns.
|
||||
#
|
||||
# All tables returned by CSV will be constructed from this class, if header
|
||||
# row processing is activated.
|
||||
# === Instance Methods
|
||||
#
|
||||
# \CSV::Table has three groups of instance methods:
|
||||
# - Its own internally defined instance methods.
|
||||
# - Methods included by module Enumerable.
|
||||
# - Methods delegated to class Array.:
|
||||
# * Array#empty?
|
||||
# * Array#length
|
||||
# * Array#size
|
||||
#
|
||||
# == Creating a \CSV::Table Instance
|
||||
#
|
||||
# Commonly, a new \CSV::Table instance is created by parsing \CSV source
|
||||
# using headers:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.class # => CSV::Table
|
||||
#
|
||||
# You can also create an instance directly. See ::new.
|
||||
#
|
||||
# == Headers
|
||||
#
|
||||
# If a table has headers, the headers serve as labels for the columns of data.
|
||||
# Each header serves as the label for its column.
|
||||
#
|
||||
# The headers for a \CSV::Table object are stored as an \Array of Strings.
|
||||
#
|
||||
# Commonly, headers are defined in the first row of \CSV source:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.headers # => ["Name", "Value"]
|
||||
#
|
||||
# If no headers are defined, the \Array is empty:
|
||||
# table = CSV::Table.new([])
|
||||
# table.headers # => []
|
||||
#
|
||||
# == Access Modes
|
||||
#
|
||||
# \CSV::Table provides three modes for accessing table data:
|
||||
# - \Row mode.
|
||||
# - Column mode.
|
||||
# - Mixed mode (the default for a new table).
|
||||
#
|
||||
# The access mode for a\CSV::Table instance affects the behavior
|
||||
# of some of its instance methods:
|
||||
# - #[]
|
||||
# - #[]=
|
||||
# - #delete
|
||||
# - #delete_if
|
||||
# - #each
|
||||
# - #values_at
|
||||
#
|
||||
# === \Row Mode
|
||||
#
|
||||
# Set a table to row mode with method #by_row!:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_row! # => #<CSV::Table mode:row row_count:4>
|
||||
#
|
||||
# Specify a single row by an \Integer index:
|
||||
# # Get a row.
|
||||
# table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
|
||||
# # Set a row, then get it.
|
||||
# table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
|
||||
# table[1] # => #<CSV::Row "Name":"bam" "Value":3>
|
||||
#
|
||||
# Specify a sequence of rows by a \Range:
|
||||
# # Get rows.
|
||||
# table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
|
||||
# # Set rows, then get them.
|
||||
# table[1..2] = [
|
||||
# CSV::Row.new(['Name', 'Value'], ['bat', 4]),
|
||||
# CSV::Row.new(['Name', 'Value'], ['bad', 5]),
|
||||
# ]
|
||||
# table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
|
||||
#
|
||||
# === Column Mode
|
||||
#
|
||||
# Set a table to column mode with method #by_col!:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_col! # => #<CSV::Table mode:col row_count:4>
|
||||
#
|
||||
# Specify a column by an \Integer index:
|
||||
# # Get a column.
|
||||
# table[0]
|
||||
# # Set a column, then get it.
|
||||
# table[0] = ['FOO', 'BAR', 'BAZ']
|
||||
# table[0] # => ["FOO", "BAR", "BAZ"]
|
||||
#
|
||||
# Specify a column by its \String header:
|
||||
# # Get a column.
|
||||
# table['Name'] # => ["FOO", "BAR", "BAZ"]
|
||||
# # Set a column, then get it.
|
||||
# table['Name'] = ['Foo', 'Bar', 'Baz']
|
||||
# table['Name'] # => ["Foo", "Bar", "Baz"]
|
||||
#
|
||||
# === Mixed Mode
|
||||
#
|
||||
# In mixed mode, you can refer to either rows or columns:
|
||||
# - An \Integer index refers to a row.
|
||||
# - A \Range index refers to multiple rows.
|
||||
# - A \String index refers to a column.
|
||||
#
|
||||
# Set a table to mixed mode with method #by_col_or_row!:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
|
||||
#
|
||||
# Specify a single row by an \Integer index:
|
||||
# # Get a row.
|
||||
# table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
|
||||
# # Set a row, then get it.
|
||||
# table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
|
||||
# table[1] # => #<CSV::Row "Name":"bam" "Value":3>
|
||||
#
|
||||
# Specify a sequence of rows by a \Range:
|
||||
# # Get rows.
|
||||
# table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
|
||||
# # Set rows, then get them.
|
||||
# table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4])
|
||||
# table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5])
|
||||
# table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
|
||||
#
|
||||
# Specify a column by its \String header:
|
||||
# # Get a column.
|
||||
# table['Name'] # => ["foo", "bat", "bad"]
|
||||
# # Set a column, then get it.
|
||||
# table['Name'] = ['Foo', 'Bar', 'Baz']
|
||||
# table['Name'] # => ["Foo", "Bar", "Baz"]
|
||||
class Table
|
||||
# :call-seq:
|
||||
# CSV::Table.new(array_of_rows, headers = nil) -> csv_table
|
||||
#
|
||||
# Constructs a new CSV::Table from +array_of_rows+, which are expected
|
||||
# to be CSV::Row objects. All rows are assumed to have the same headers.
|
||||
# Returns a new \CSV::Table object.
|
||||
#
|
||||
# The optional +headers+ parameter can be set to Array of headers.
|
||||
# If headers aren't set, headers are fetched from CSV::Row objects.
|
||||
# Otherwise, headers() method will return headers being set in
|
||||
# headers argument.
|
||||
# - Argument +array_of_rows+ must be an \Array of CSV::Row objects.
|
||||
# - Argument +headers+, if given, may be an \Array of Strings.
|
||||
#
|
||||
# A CSV::Table object supports the following Array methods through
|
||||
# delegation:
|
||||
# ---
|
||||
#
|
||||
# * empty?()
|
||||
# * length()
|
||||
# * size()
|
||||
# Create an empty \CSV::Table object:
|
||||
# table = CSV::Table.new([])
|
||||
# table # => #<CSV::Table mode:col_or_row row_count:1>
|
||||
#
|
||||
# Create a non-empty \CSV::Table object:
|
||||
# rows = [
|
||||
# CSV::Row.new([], []),
|
||||
# CSV::Row.new([], []),
|
||||
# CSV::Row.new([], []),
|
||||
# ]
|
||||
# table = CSV::Table.new(rows)
|
||||
# table # => #<CSV::Table mode:col_or_row row_count:4>
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# If argument +headers+ is an \Array of Strings,
|
||||
# those Strings become the table's headers:
|
||||
# table = CSV::Table.new([], headers: ['Name', 'Age'])
|
||||
# table.headers # => ["Name", "Age"]
|
||||
#
|
||||
# If argument +headers+ is not given and the table has rows,
|
||||
# the headers are taken from the first row:
|
||||
# rows = [
|
||||
# CSV::Row.new(['Foo', 'Bar'], []),
|
||||
# CSV::Row.new(['foo', 'bar'], []),
|
||||
# CSV::Row.new(['FOO', 'BAR'], []),
|
||||
# ]
|
||||
# table = CSV::Table.new(rows)
|
||||
# table.headers # => ["Foo", "Bar"]
|
||||
#
|
||||
# If argument +headers+ is not given and the table is empty (has no rows),
|
||||
# the headers are also empty:
|
||||
# table = CSV::Table.new([])
|
||||
# table.headers # => []
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# Raises an exception if argument +array_of_rows+ is not an \Array object:
|
||||
# # Raises NoMethodError (undefined method `first' for :foo:Symbol):
|
||||
# CSV::Table.new(:foo)
|
||||
#
|
||||
# Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object:
|
||||
# # Raises NoMethodError (undefined method `headers' for :foo:Symbol):
|
||||
# CSV::Table.new([:foo])
|
||||
def initialize(array_of_rows, headers: nil)
|
||||
@table = array_of_rows
|
||||
@headers = headers
|
||||
|
@ -54,88 +222,141 @@ class CSV
|
|||
extend Forwardable
|
||||
def_delegators :@table, :empty?, :length, :size
|
||||
|
||||
# :call-seq:
|
||||
# table.by_col -> table_dup
|
||||
#
|
||||
# Returns a duplicate table object, in column mode. This is handy for
|
||||
# chaining in a single call without changing the table mode, but be aware
|
||||
# that this method can consume a fair amount of memory for bigger data sets.
|
||||
# Returns a duplicate of +self+, in column mode
|
||||
# (see {Column Mode}[#class-CSV::Table-label-Column+Mode]):
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.mode # => :col_or_row
|
||||
# dup_table = table.by_col
|
||||
# dup_table.mode # => :col
|
||||
# dup_table.equal?(table) # => false # It's a dup
|
||||
#
|
||||
# This method returns the duplicate table for chaining. Don't chain
|
||||
# destructive methods (like []=()) this way though, since you are working
|
||||
# with a duplicate.
|
||||
# This may be used to chain method calls without changing the mode
|
||||
# (but also will affect performance and memory usage):
|
||||
# dup_table.by_col['Name']
|
||||
#
|
||||
# Also note that changes to the duplicate table will not affect the original.
|
||||
def by_col
|
||||
self.class.new(@table.dup).by_col!
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.by_col! -> self
|
||||
#
|
||||
# Switches the mode of this table to column mode. All calls to indexing and
|
||||
# iteration methods will work with columns until the mode is changed again.
|
||||
#
|
||||
# This method returns the table and is safe to chain.
|
||||
#
|
||||
# Sets the mode for +self+ to column mode
|
||||
# (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.mode # => :col_or_row
|
||||
# table1 = table.by_col!
|
||||
# table.mode # => :col
|
||||
# table1.equal?(table) # => true # Returned self
|
||||
def by_col!
|
||||
@mode = :col
|
||||
|
||||
self
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.by_col_or_row -> table_dup
|
||||
#
|
||||
# Returns a duplicate table object, in mixed mode. This is handy for
|
||||
# chaining in a single call without changing the table mode, but be aware
|
||||
# that this method can consume a fair amount of memory for bigger data sets.
|
||||
# Returns a duplicate of +self+, in mixed mode
|
||||
# (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]):
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true).by_col!
|
||||
# table.mode # => :col
|
||||
# dup_table = table.by_col_or_row
|
||||
# dup_table.mode # => :col_or_row
|
||||
# dup_table.equal?(table) # => false # It's a dup
|
||||
#
|
||||
# This method returns the duplicate table for chaining. Don't chain
|
||||
# destructive methods (like []=()) this way though, since you are working
|
||||
# with a duplicate.
|
||||
# This may be used to chain method calls without changing the mode
|
||||
# (but also will affect performance and memory usage):
|
||||
# dup_table.by_col_or_row['Name']
|
||||
#
|
||||
# Also note that changes to the duplicate table will not affect the original.
|
||||
def by_col_or_row
|
||||
self.class.new(@table.dup).by_col_or_row!
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.by_col_or_row! -> self
|
||||
#
|
||||
# Switches the mode of this table to mixed mode. All calls to indexing and
|
||||
# iteration methods will use the default intelligent indexing system until
|
||||
# the mode is changed again. In mixed mode an index is assumed to be a row
|
||||
# reference while anything else is assumed to be column access by headers.
|
||||
#
|
||||
# This method returns the table and is safe to chain.
|
||||
#
|
||||
# Sets the mode for +self+ to mixed mode
|
||||
# (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true).by_col!
|
||||
# table.mode # => :col
|
||||
# table1 = table.by_col_or_row!
|
||||
# table.mode # => :col_or_row
|
||||
# table1.equal?(table) # => true # Returned self
|
||||
def by_col_or_row!
|
||||
@mode = :col_or_row
|
||||
|
||||
self
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.by_row -> table_dup
|
||||
#
|
||||
# Returns a duplicate table object, in row mode. This is handy for chaining
|
||||
# in a single call without changing the table mode, but be aware that this
|
||||
# method can consume a fair amount of memory for bigger data sets.
|
||||
# Returns a duplicate of +self+, in row mode
|
||||
# (see {Row Mode}[#class-CSV::Table-label-Row+Mode]):
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.mode # => :col_or_row
|
||||
# dup_table = table.by_row
|
||||
# dup_table.mode # => :row
|
||||
# dup_table.equal?(table) # => false # It's a dup
|
||||
#
|
||||
# This method returns the duplicate table for chaining. Don't chain
|
||||
# destructive methods (like []=()) this way though, since you are working
|
||||
# with a duplicate.
|
||||
# This may be used to chain method calls without changing the mode
|
||||
# (but also will affect performance and memory usage):
|
||||
# dup_table.by_row[1]
|
||||
#
|
||||
# Also note that changes to the duplicate table will not affect the original.
|
||||
def by_row
|
||||
self.class.new(@table.dup).by_row!
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.by_row! -> self
|
||||
#
|
||||
# Switches the mode of this table to row mode. All calls to indexing and
|
||||
# iteration methods will work with rows until the mode is changed again.
|
||||
#
|
||||
# This method returns the table and is safe to chain.
|
||||
#
|
||||
# Sets the mode for +self+ to row mode
|
||||
# (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.mode # => :col_or_row
|
||||
# table1 = table.by_row!
|
||||
# table.mode # => :row
|
||||
# table1.equal?(table) # => true # Returned self
|
||||
def by_row!
|
||||
@mode = :row
|
||||
|
||||
self
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.headers -> array_of_headers
|
||||
#
|
||||
# Returns the headers for the first row of this table (assumed to match all
|
||||
# other rows). The headers Array passed to CSV::Table.new is returned for
|
||||
# empty tables.
|
||||
# Returns a new \Array containing the \String headers for the table.
|
||||
#
|
||||
# If the table is not empty, returns the headers from the first row:
|
||||
# rows = [
|
||||
# CSV::Row.new(['Foo', 'Bar'], []),
|
||||
# CSV::Row.new(['FOO', 'BAR'], []),
|
||||
# CSV::Row.new(['foo', 'bar'], []),
|
||||
# ]
|
||||
# table = CSV::Table.new(rows)
|
||||
# table.headers # => ["Foo", "Bar"]
|
||||
# table.delete(0)
|
||||
# table.headers # => ["FOO", "BAR"]
|
||||
# table.delete(0)
|
||||
# table.headers # => ["foo", "bar"]
|
||||
#
|
||||
# If the table is empty, returns a copy of the headers in the table itself:
|
||||
# table.delete(0)
|
||||
# table.headers # => ["Foo", "Bar"]
|
||||
def headers
|
||||
if @table.empty?
|
||||
@headers.dup
|
||||
|
@ -145,17 +366,21 @@ class CSV
|
|||
end
|
||||
|
||||
# :call-seq:
|
||||
# table[n] -> row
|
||||
# table[range] -> array_of_rows
|
||||
# table[header] -> array_of_fields
|
||||
# table[n] -> row or column_data
|
||||
# table[range] -> array_of_rows or array_of_column_data
|
||||
# table[header] -> array_of_column_data
|
||||
#
|
||||
# Returns data from the table; does not modify the table.
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# The expression <tt>table[n]</tt>, where +n+ is a non-negative \Integer,
|
||||
# returns the +n+th row of the table, if that row exists,
|
||||
# and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>:
|
||||
# Fetch a \Row by Its \Integer Index::
|
||||
# - Form: <tt>table[n]</tt>, +n+ an integer.
|
||||
# - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
|
||||
# - Return value: _nth_ row of the table, if that row exists;
|
||||
# otherwise +nil+.
|
||||
#
|
||||
# Returns the _nth_ row of the table if that row exists:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_row! # => #<CSV::Table mode:row row_count:4>
|
||||
|
@ -168,20 +393,45 @@ class CSV
|
|||
#
|
||||
# Returns +nil+ if +n+ is too large or too small:
|
||||
# table[4] # => nil
|
||||
# table[-4] => nil
|
||||
# table[-4] # => nil
|
||||
#
|
||||
# Raises an exception if the access mode is <tt>:row</tt>
|
||||
# and +n+ is not an
|
||||
# {Integer-convertible object}[https://docs.ruby-lang.org/en/master/implicit_conversion_rdoc.html#label-Integer-Convertible+Objects].
|
||||
# and +n+ is not an \Integer:
|
||||
# table.by_row! # => #<CSV::Table mode:row row_count:4>
|
||||
# # Raises TypeError (no implicit conversion of String into Integer):
|
||||
# table['Name']
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# The expression <tt>table[range]</tt>, where +range+ is a Range object,
|
||||
# returns rows from the table, beginning at row <tt>range.first</tt>,
|
||||
# if those rows exist, and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>:
|
||||
# Fetch a Column by Its \Integer Index::
|
||||
# - Form: <tt>table[n]</tt>, +n+ an \Integer.
|
||||
# - Access mode: <tt>:col</tt>.
|
||||
# - Return value: _nth_ column of the table, if that column exists;
|
||||
# otherwise an \Array of +nil+ fields of length <tt>self.size</tt>.
|
||||
#
|
||||
# Returns the _nth_ column of the table if that column exists:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_col! # => #<CSV::Table mode:col row_count:4>
|
||||
# table[1] # => ["0", "1", "2"]
|
||||
#
|
||||
# Counts backward from the last column if +n+ is negative:
|
||||
# table[-2] # => ["foo", "bar", "baz"]
|
||||
#
|
||||
# Returns an \Array of +nil+ fields if +n+ is too large or too small:
|
||||
# table[4] # => [nil, nil, nil]
|
||||
# table[-4] # => [nil, nil, nil]
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# Fetch Rows by \Range::
|
||||
# - Form: <tt>table[range]</tt>, +range+ a \Range object.
|
||||
# - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
|
||||
# - Return value: rows from the table, beginning at row <tt>range.start</tt>,
|
||||
# if those rows exists.
|
||||
#
|
||||
# Returns rows from the table, beginning at row <tt>range.first</tt>,
|
||||
# if those rows exist:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_row! # => #<CSV::Table mode:row row_count:4>
|
||||
|
@ -191,11 +441,11 @@ class CSV
|
|||
# rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1">
|
||||
# rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
|
||||
#
|
||||
# If there are too few rows, returns all from <tt>range.first</tt> to the end:
|
||||
# If there are too few rows, returns all from <tt>range.start</tt> to the end:
|
||||
# rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1">
|
||||
# rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
|
||||
#
|
||||
# Special case: if <tt>range.start == table.size</tt>, returns an empty \Array:
|
||||
# Special case: if <tt>range.start == table.size</tt>, returns an empty \Array:
|
||||
# table[table.size..50] # => []
|
||||
#
|
||||
# If <tt>range.end</tt> is negative, calculates the ending index from the end:
|
||||
|
@ -211,9 +461,41 @@ class CSV
|
|||
#
|
||||
# ---
|
||||
#
|
||||
# The expression <tt>table[header]</tt>, where +header+ is a \String,
|
||||
# returns column values (\Array of \Strings) if the column exists
|
||||
# and if the access mode is <tt>:col</tt> or <tt>:col_or_row</tt>:
|
||||
# Fetch Columns by \Range::
|
||||
# - Form: <tt>table[range]</tt>, +range+ a \Range object.
|
||||
# - Access mode: <tt>:col</tt>.
|
||||
# - Return value: column data from the table, beginning at column <tt>range.start</tt>,
|
||||
# if those columns exist.
|
||||
#
|
||||
# Returns column values from the table, if the column exists;
|
||||
# the values are arranged by row:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_col!
|
||||
# table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
||||
#
|
||||
# Special case: if <tt>range.start == headers.size</tt>,
|
||||
# returns an \Array (size: <tt>table.size</tt>) of empty \Arrays:
|
||||
# table[table.headers.size..50] # => [[], [], []]
|
||||
#
|
||||
# If <tt>range.end</tt> is negative, calculates the ending index from the end:
|
||||
# table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
||||
#
|
||||
# If <tt>range.start</tt> is negative, calculates the starting index from the end:
|
||||
# table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
||||
#
|
||||
# If <tt>range.start</tt> is larger than <tt>table.size</tt>,
|
||||
# returns an \Array of +nil+ values:
|
||||
# table[4..4] # => [nil, nil, nil]
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# Fetch a Column by Its \String Header::
|
||||
# - Form: <tt>table[header]</tt>, +header+ a \String header.
|
||||
# - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>
|
||||
# - Return value: column data from the table, if that +header+ exists.
|
||||
#
|
||||
# Returns column values from the table, if the column exists:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.by_col! # => #<CSV::Table mode:col row_count:4>
|
||||
|
@ -238,22 +520,132 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table[n] = row -> row
|
||||
# table[n] = field_or_array_of_fields -> field_or_array_of_fields
|
||||
# table[header] = field_or_array_of_fields -> field_or_array_of_fields
|
||||
#
|
||||
# In the default mixed mode, this method assigns rows for index access and
|
||||
# columns for header access. You can force the index association by first
|
||||
# calling by_col!() or by_row!().
|
||||
# Puts data onto the table.
|
||||
#
|
||||
# Rows may be set to an Array of values (which will inherit the table's
|
||||
# headers()) or a CSV::Row.
|
||||
# ---
|
||||
#
|
||||
# Columns may be set to a single value, which is copied to each row of the
|
||||
# column, or an Array of values. Arrays of values are assigned to rows top
|
||||
# to bottom in row major order. Excess values are ignored and if the Array
|
||||
# does not have a value for each row the extra rows will receive a +nil+.
|
||||
# Set a \Row by Its \Integer Index::
|
||||
# - Form: <tt>table[n] = row</tt>, +n+ an \Integer,
|
||||
# +row+ a \CSV::Row instance or an \Array of fields.
|
||||
# - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
|
||||
# - Return value: +row+.
|
||||
#
|
||||
# Assigning to an existing column or row clobbers the data. Assigning to
|
||||
# new columns creates them at the right end of the table.
|
||||
# If the row exists, it is replaced:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3])
|
||||
# table.by_row! # => #<CSV::Table mode:row row_count:4>
|
||||
# return_value = table[0] = new_row
|
||||
# return_value.equal?(new_row) # => true # Returned the row
|
||||
# table[0].to_h # => {"Name"=>"bat", "Value"=>3}
|
||||
#
|
||||
# With access mode <tt>:col_or_row</tt>:
|
||||
# table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
|
||||
# table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4])
|
||||
# table[0].to_h # => {"Name"=>"bam", "Value"=>4}
|
||||
#
|
||||
# With an \Array instead of a \CSV::Row, inherits headers from the table:
|
||||
# array = ['bad', 5]
|
||||
# return_value = table[0] = array
|
||||
# return_value.equal?(array) # => true # Returned the array
|
||||
# table[0].to_h # => {"Name"=>"bad", "Value"=>5}
|
||||
#
|
||||
# If the row does not exist, extends the table by adding rows:
|
||||
# assigns rows with +nil+ as needed:
|
||||
# table.size # => 3
|
||||
# table[5] = ['bag', 6]
|
||||
# table.size # => 6
|
||||
# table[3] # => nil
|
||||
# table[4]# => nil
|
||||
# table[5].to_h # => {"Name"=>"bag", "Value"=>6}
|
||||
#
|
||||
# Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields.
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# Set a Column by Its \Integer Index::
|
||||
# - Form: <tt>table[n] = array_of_fields</tt>, +n+ an \Integer,
|
||||
# +array_of_fields+ an \Array of \String fields.
|
||||
# - Access mode: <tt>:col</tt>.
|
||||
# - Return value: +array_of_fields+.
|
||||
#
|
||||
# If the column exists, it is replaced:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# new_col = [3, 4, 5]
|
||||
# table.by_col! # => #<CSV::Table mode:col row_count:4>
|
||||
# return_value = table[1] = new_col
|
||||
# return_value.equal?(new_col) # => true # Returned the column
|
||||
# table[1] # => [3, 4, 5]
|
||||
# # The rows, as revised:
|
||||
# table.by_row! # => #<CSV::Table mode:row row_count:4>
|
||||
# table[0].to_h # => {"Name"=>"foo", "Value"=>3}
|
||||
# table[1].to_h # => {"Name"=>"bar", "Value"=>4}
|
||||
# table[2].to_h # => {"Name"=>"baz", "Value"=>5}
|
||||
# table.by_col! # => #<CSV::Table mode:col row_count:4>
|
||||
#
|
||||
# If there are too few values, fills with +nil+ values:
|
||||
# table[1] = [0]
|
||||
# table[1] # => [0, nil, nil]
|
||||
#
|
||||
# If there are too many values, ignores the extra values:
|
||||
# table[1] = [0, 1, 2, 3, 4]
|
||||
# table[1] # => [0, 1, 2]
|
||||
#
|
||||
# If a single value is given, replaces all fields in the column with that value:
|
||||
# table[1] = 'bat'
|
||||
# table[1] # => ["bat", "bat", "bat"]
|
||||
#
|
||||
# ---
|
||||
#
|
||||
# Set a Column by Its \String Header::
|
||||
# - Form: <tt>table[header] = field_or_array_of_fields</tt>,
|
||||
# +header+ a \String header, +field_or_array_of_fields+ a field value
|
||||
# or an \Array of \String fields.
|
||||
# - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>.
|
||||
# - Return value: +field_or_array_of_fields+.
|
||||
#
|
||||
# If the column exists, it is replaced:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# new_col = [3, 4, 5]
|
||||
# table.by_col! # => #<CSV::Table mode:col row_count:4>
|
||||
# return_value = table['Value'] = new_col
|
||||
# return_value.equal?(new_col) # => true # Returned the column
|
||||
# table['Value'] # => [3, 4, 5]
|
||||
# # The rows, as revised:
|
||||
# table.by_row! # => #<CSV::Table mode:row row_count:4>
|
||||
# table[0].to_h # => {"Name"=>"foo", "Value"=>3}
|
||||
# table[1].to_h # => {"Name"=>"bar", "Value"=>4}
|
||||
# table[2].to_h # => {"Name"=>"baz", "Value"=>5}
|
||||
# table.by_col! # => #<CSV::Table mode:col row_count:4>
|
||||
#
|
||||
# If there are too few values, fills with +nil+ values:
|
||||
# table['Value'] = [0]
|
||||
# table['Value'] # => [0, nil, nil]
|
||||
#
|
||||
# If there are too many values, ignores the extra values:
|
||||
# table['Value'] = [0, 1, 2, 3, 4]
|
||||
# table['Value'] # => [0, 1, 2]
|
||||
#
|
||||
# If the column does not exist, extends the table by adding columns:
|
||||
# table['Note'] = ['x', 'y', 'z']
|
||||
# table['Note'] # => ["x", "y", "z"]
|
||||
# # The rows, as revised:
|
||||
# table.by_row!
|
||||
# table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"}
|
||||
# table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"}
|
||||
# table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"}
|
||||
# table.by_col!
|
||||
#
|
||||
# If a single value is given, replaces all fields in the column with that value:
|
||||
# table['Value'] = 'bat'
|
||||
# table['Value'] # => ["bat", "bat", "bat"]
|
||||
def []=(index_or_header, value)
|
||||
if @mode == :row or # by index
|
||||
(@mode == :col_or_row and index_or_header.is_a? Integer)
|
||||
|
@ -463,6 +855,9 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.delete_if {|row_or_column| ... } -> self
|
||||
#
|
||||
# Removes rows or columns for which the block returns a truthy value;
|
||||
# returns +self+.
|
||||
#
|
||||
|
@ -506,6 +901,9 @@ class CSV
|
|||
|
||||
include Enumerable
|
||||
|
||||
# :call-seq:
|
||||
# table.each {|row_or_column| ... ) -> self
|
||||
#
|
||||
# Calls the block with each row or column; returns +self+.
|
||||
#
|
||||
# When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>,
|
||||
|
@ -534,7 +932,9 @@ class CSV
|
|||
return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given?
|
||||
|
||||
if @mode == :col
|
||||
headers.each { |header| yield([header, self[header]]) }
|
||||
headers.each.with_index do |header, i|
|
||||
yield([header, @table.map {|row| row[header, i]}])
|
||||
end
|
||||
else
|
||||
@table.each(&block)
|
||||
end
|
||||
|
@ -542,6 +942,9 @@ class CSV
|
|||
self # for chaining
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table == other_table -> true or false
|
||||
#
|
||||
# Returns +true+ if all each row of +self+ <tt>==</tt>
|
||||
# the corresponding row of +other_table+, otherwise, +false+.
|
||||
#
|
||||
|
@ -565,10 +968,14 @@ class CSV
|
|||
@table == other
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.to_a -> array_of_arrays
|
||||
#
|
||||
# Returns the table as an Array of Arrays. Headers will be the first row,
|
||||
# then all of the field rows will follow.
|
||||
#
|
||||
# Returns the table as an \Array of \Arrays;
|
||||
# the headers are in the first row:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
||||
def to_a
|
||||
array = [headers]
|
||||
@table.each do |row|
|
||||
|
@ -578,16 +985,29 @@ class CSV
|
|||
array
|
||||
end
|
||||
|
||||
# :call-seq:
|
||||
# table.to_csv(**options) -> csv_string
|
||||
#
|
||||
# Returns the table as a complete CSV String. Headers will be listed first,
|
||||
# then all of the field rows.
|
||||
# Returns the table as \CSV string.
|
||||
# See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating].
|
||||
#
|
||||
# This method assumes you want the Table.headers(), unless you explicitly
|
||||
# pass <tt>:write_headers => false</tt>.
|
||||
# Defaults option +write_headers+ to +true+:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
#
|
||||
def to_csv(write_headers: true, **options)
|
||||
# Omits the headers if option +write_headers+ is given as +false+
|
||||
# (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]):
|
||||
# table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n"
|
||||
#
|
||||
# Limit rows if option +limit+ is given like +2+:
|
||||
# table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n"
|
||||
def to_csv(write_headers: true, limit: nil, **options)
|
||||
array = write_headers ? [headers.to_csv(**options)] : []
|
||||
@table.each do |row|
|
||||
limit ||= @table.size
|
||||
limit = @table.size + 1 + limit if limit < 0
|
||||
limit = 0 if limit < 0
|
||||
@table.first(limit).each do |row|
|
||||
array.push(row.fields.to_csv(**options)) unless row.header_row?
|
||||
end
|
||||
|
||||
|
@ -613,9 +1033,24 @@ class CSV
|
|||
end
|
||||
end
|
||||
|
||||
# Shows the mode and size of this table in a US-ASCII String.
|
||||
# :call-seq:
|
||||
# table.inspect => string
|
||||
#
|
||||
# Returns a <tt>US-ASCII</tt>-encoded \String showing table:
|
||||
# - Class: <tt>CSV::Table</tt>.
|
||||
# - Access mode: <tt>:row</tt>, <tt>:col</tt>, or <tt>:col_or_row</tt>.
|
||||
# - Size: Row count, including the header row.
|
||||
#
|
||||
# Example:
|
||||
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
# table = CSV.parse(source, headers: true)
|
||||
# table.inspect # => "#<CSV::Table mode:col_or_row row_count:4>\nName,Value\nfoo,0\nbar,1\nbaz,2\n"
|
||||
#
|
||||
def inspect
|
||||
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII")
|
||||
inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
|
||||
summary = to_csv(limit: 5)
|
||||
inspected << "\n" << summary if summary.encoding.ascii_compatible?
|
||||
inspected
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,5 +2,5 @@
|
|||
|
||||
class CSV
|
||||
# The version of the installed library.
|
||||
VERSION = "3.2.2"
|
||||
VERSION = "3.2.3"
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue