* lib/rexml: Merge fixes since 1.8.6 made solely on the ruby_1_8_6

branch.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@16067 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
knu 2008-04-18 07:07:11 +00:00
parent 40e7794993
commit 2f1fa7e402
28 changed files with 1785 additions and 1373 deletions

View file

@ -1,3 +1,8 @@
Fri Apr 18 16:01:37 2008 Akinori MUSHA <knu@iDaemons.org>
* lib/rexml: Merge fixes since 1.8.6 made solely on the ruby_1_8_6
branch.
Fri Apr 18 07:56:18 2008 Hidetoshi NAGAI <nagai@ai.kyutech.ac.jp> Fri Apr 18 07:56:18 2008 Hidetoshi NAGAI <nagai@ai.kyutech.ac.jp>
* ext/tk/lib/tk.rb, ext/tk/lib/tk/scrollbar.rb, ext/tk/lib/tk/scale.rb: * ext/tk/lib/tk.rb, ext/tk/lib/tk/scrollbar.rb, ext/tk/lib/tk/scale.rb:

View file

@ -18,25 +18,41 @@ module REXML
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
# Constructor. # Constructor.
# FIXME: The parser doesn't catch illegal characters in attributes
#
# first::
# Either: an Attribute, which this new attribute will become a
# clone of; or a String, which is the name of this attribute
# second::
# If +first+ is an Attribute, then this may be an Element, or nil.
# If nil, then the Element parent of this attribute is the parent
# of the +first+ Attribute. If the first argument is a String,
# then this must also be a String, and is the content of the attribute.
# If this is the content, it must be fully normalized (contain no
# illegal characters).
# parent::
# Ignored unless +first+ is a String; otherwise, may be the Element
# parent of this attribute, or nil.
#
# #
# Attribute.new( attribute_to_clone ) # Attribute.new( attribute_to_clone )
# Attribute.new( source ) # Attribute.new( attribute_to_clone, parent_element )
# Attribute.new( "attr", "attr_value" ) # Attribute.new( "attr", "attr_value" )
# Attribute.new( "attr", "attr_value", parent_element ) # Attribute.new( "attr", "attr_value", parent_element )
def initialize( first, second=nil, parent=nil ) def initialize( first, second=nil, parent=nil )
@normalized = @unnormalized = @element = nil @normalized = @unnormalized = @element = nil
if first.kind_of? Attribute if first.kind_of? Attribute
self.name = first.expanded_name self.name = first.expanded_name
@value = first.value @unnormalized = first.value
if second.kind_of? Element if second.kind_of? Element
@element = second @element = second
else else
@element = first.element @element = first.element
end end
elsif first.kind_of? String elsif first.kind_of? String
@element = parent if parent.kind_of? Element @element = parent
self.name = first self.name = first
@value = second.to_s @normalized = second.to_s
else else
raise "illegal argument #{first.class.name} to Attribute constructor" raise "illegal argument #{first.class.name} to Attribute constructor"
end end
@ -72,7 +88,7 @@ module REXML
# Returns true if other is an Attribute and has the same name and value, # Returns true if other is an Attribute and has the same name and value,
# false otherwise. # false otherwise.
def ==( other ) def ==( other )
other.kind_of?(Attribute) and other.name==name and other.value==@value other.kind_of?(Attribute) and other.name==name and other.value==value
end end
# Creates (and returns) a hash from both the name and value # Creates (and returns) a hash from both the name and value
@ -87,7 +103,11 @@ module REXML
# b = Attribute.new( "ns:x", "y" ) # b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'" # b.to_string # -> "ns:x='y'"
def to_string def to_string
"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'" if @element and @element.context and @element.context[:attribute_quote] == :quote
%Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
else
"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
end
end end
# Returns the attribute value, with entities replaced # Returns the attribute value, with entities replaced
@ -100,8 +120,9 @@ module REXML
doctype = doc.doctype if doc doctype = doc.doctype if doc
end end
@normalized = Text::normalize( @unnormalized, doctype )
@unnormalized = nil @unnormalized = nil
@normalized = Text::normalize( @value, doctype ) @normalized
end end
# Returns the UNNORMALIZED value of this attribute. That is, entities # Returns the UNNORMALIZED value of this attribute. That is, entities
@ -113,8 +134,9 @@ module REXML
doc = @element.document doc = @element.document
doctype = doc.doctype if doc doctype = doc.doctype if doc
end end
@unnormalized = Text::unnormalize( @normalized, doctype )
@normalized = nil @normalized = nil
@unnormalized = Text::unnormalize( @value, doctype ) @unnormalized
end end
# Returns a copy of this attribute # Returns a copy of this attribute

View file

@ -39,31 +39,26 @@ module REXML
@string @string
end end
# == DEPRECATED
# See the rexml/formatters package
#
# Generates XML output of this object # Generates XML output of this object
# #
# output:: # output::
# Where to write the string. Defaults to $stdout # Where to write the string. Defaults to $stdout
# indent:: # indent::
# An integer. If -1, no indenting will be used; otherwise, the # The amount to indent this node by
# indentation will be this number of spaces, and children will be
# indented an additional amount. Defaults to -1.
# transitive:: # transitive::
# If transitive is true and indent is >= 0, then the output will be # Ignored
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# ie_hack:: # ie_hack::
# Internet Explorer is the worst piece of crap to have ever been # Ignored
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags.
# #
# _Examples_ # _Examples_
# c = CData.new( " Some text " ) # c = CData.new( " Some text " )
# c.write( $stdout ) #-> <![CDATA[ Some text ]]> # c.write( $stdout ) #-> <![CDATA[ Some text ]]>
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
#indent( output, indent ) unless transitive Kernel.warn( "#{self.class.name}.write is deprecated" )
indent( output, indent )
output << START output << START
output << @string output << @string
output << STOP output << STOP

View file

@ -34,6 +34,9 @@ module REXML
Comment.new self Comment.new self
end end
# == DEPRECATED
# See REXML::Formatters
#
# output:: # output::
# Where to write the string # Where to write the string
# indent:: # indent::
@ -45,6 +48,7 @@ module REXML
# ie_hack:: # ie_hack::
# Needed for conformity to the child API, but not used by this class. # Needed for conformity to the child API, but not used by this class.
def write( output, indent=-1, transitive=false, ie_hack=false ) def write( output, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
indent( output, indent ) indent( output, indent )
output << START output << START
output << @string output << @string

View file

@ -98,38 +98,30 @@ module REXML
# output:: # output::
# Where to write the string # Where to write the string
# indent:: # indent::
# An integer. If -1, no indenting will be used; otherwise, the # An integer. If -1, no indentation will be used; otherwise, the
# indentation will be this number of spaces, and children will be # indentation will be this number of spaces, and children will be
# indented an additional amount. # indented an additional amount.
# transitive:: # transitive::
# If transitive is true and indent is >= 0, then the output will be # Ignored
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# ie_hack:: # ie_hack::
# Internet Explorer is the worst piece of crap to have ever been # Ignored
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags.
#
def write( output, indent=0, transitive=false, ie_hack=false ) def write( output, indent=0, transitive=false, ie_hack=false )
f = REXML::Formatters::Default.new
indent( output, indent ) indent( output, indent )
output << START output << START
output << ' ' output << ' '
output << @name output << @name
output << " #@external_id" if @external_id output << " #@external_id" if @external_id
output << " #@long_name" if @long_name output << " #{@long_name.inspect}" if @long_name
output << " #@uri" if @uri output << " #{@uri.inspect}" if @uri
unless @children.empty? unless @children.empty?
next_indent = indent + 1 next_indent = indent + 1
output << ' [' output << ' ['
child = nil # speed child = nil # speed
@children.each { |child| @children.each { |child|
output << "\n" output << "\n"
child.write( output, next_indent ) f.write( child, output )
} }
#output << ' '*next_indent
output << "\n]" output << "\n]"
end end
output << STOP output << STOP
@ -219,8 +211,10 @@ module REXML
@string+'>' @string+'>'
end end
# == DEPRECATED
# See REXML::Formatters
#
def write( output, indent ) def write( output, indent )
output << (' '*indent) if indent > 0
output << to_s output << to_s
end end
end end
@ -264,7 +258,6 @@ module REXML
end end
def write( output, indent=-1 ) def write( output, indent=-1 )
output << (' '*indent) if indent > 0
output << to_s output << to_s
end end

View file

@ -31,9 +31,6 @@ module REXML
# to be sources of valid XML documents. # to be sources of valid XML documents.
# @param context if supplied, contains the context of the document; # @param context if supplied, contains the context of the document;
# this should be a Hash. # this should be a Hash.
# NOTE that I'm not sure what the context is for; I cloned it out of
# the Electric XML API (in which it also seems to do nothing), and it
# is now legacy. It may do something, someday... it may disappear.
def initialize( source = nil, context = {} ) def initialize( source = nil, context = {} )
super() super()
@context = context @context = context
@ -69,6 +66,7 @@ module REXML
def add( child ) def add( child )
if child.kind_of? XMLDecl if child.kind_of? XMLDecl
@children.unshift child @children.unshift child
child.parent = self
elsif child.kind_of? DocType elsif child.kind_of? DocType
# Find first Element or DocType node and insert the decl right # Find first Element or DocType node and insert the decl right
# before it. If there is no such node, just insert the child at the # before it. If there is no such node, just insert the child at the
@ -142,42 +140,59 @@ module REXML
xml_decl().stand_alone? xml_decl().stand_alone?
end end
# Write the XML tree out, optionally with indent. This writes out the # Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations, # entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given). # and processing instructions (if any are given).
# A controversial point is whether Document should always write the XML #
# declaration (<?xml version='1.0'?>) whether or not one is given by the # A controversial point is whether Document should always write the XML
# user (or source document). REXML does not write one if one was not # declaration (<?xml version='1.0'?>) whether or not one is given by the
# specified, because it adds unneccessary bandwidth to applications such # user (or source document). REXML does not write one if one was not
# as XML-RPC. # specified, because it adds unneccessary bandwidth to applications such
# # as XML-RPC.
# #
# output:: # See also the classes in the rexml/formatters package for the proper way
# output an object which supports '<< string'; this is where the # to change the default formatting of XML output
# document will be written. #
# indent:: # _Examples_
# An integer. If -1, no indenting will be used; otherwise, the # Document.new("<a><b/></a>").serialize
# indentation will be this number of spaces, and children will be #
# indented an additional amount. Defaults to -1 # output_string = ""
# transitive:: # tr = Transitive.new( output_string )
# If transitive is true and indent is >= 0, then the output will be # Document.new("<a><b/></a>").serialize( tr )
# pretty-printed in such a way that the added whitespace does not affect #
# the absolute *value* of the document -- that is, it leaves the value # output::
# and number of Text nodes in the document unchanged. # output an object which supports '<< string'; this is where the
# ie_hack:: # document will be written.
# Internet Explorer is the worst piece of crap to have ever been # indent::
# written, with the possible exception of Windows itself. Since IE is # An integer. If -1, no indenting will be used; otherwise, the
# unable to parse proper XML, we have to provide a hack to generate XML # indentation will be twice this number of spaces, and children will be
# that IE's limited abilities can handle. This hack inserts a space # indented an additional amount. For a value of 3, every item will be
# before the /> on empty tags. Defaults to false # indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) # trans::
output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) # If transitive is true and indent is >= 0, then the output will be
@children.each { |node| # pretty-printed in such a way that the added whitespace does not affect
indent( output, indent ) if node.node_type == :element # the absolute *value* of the document -- that is, it leaves the value
if node.write( output, indent, transitive, ie_hack ) # and number of Text nodes in the document unchanged.
output << "\n" unless indent<0 or node == @children[-1] # ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
formatter = if indent > -1
if trans
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
end
else
REXML::Formatters::Default.new( ie_hack )
end end
} formatter.write( self, output )
end end

File diff suppressed because it is too large Load diff

View file

@ -56,10 +56,15 @@ module REXML
def check_encoding str def check_encoding str
# We have to recognize UTF-16, LSB UTF-16, and UTF-8 # We have to recognize UTF-16, LSB UTF-16, and UTF-8
return UTF_16 if /\A\xfe\xff/n =~ str if str[0] == 0xfe && str[1] == 0xff
return UNILE if /\A\xff\xfe/n =~ str str[0,2] = ""
str =~ /^\s*<?xml\s*version\s*=\s*(['"]).*?\2\s*encoding\s*=\s*(["'])(.*?)\2/um return UTF_16
return $1.upcase if $1 elsif str[0] == 0xff && str[1] == 0xfe
str[0,2] = ""
return UNILE
end
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
return $3.upcase if $3
return UTF_8 return UTF_8
end end
end end

View file

@ -3,9 +3,15 @@
# #
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods = %q~ register( "CP-1252" ) do |o|
class << o
alias encode encode_cp1252
alias decode decode_cp1252
end
end
# Convert from UTF-8 # Convert from UTF-8
def encode content def encode_cp1252(content)
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -54,7 +60,7 @@ module REXML
end end
# Convert to UTF-8 # Convert to UTF-8
def decode(str) def decode_cp1252(str)
array_latin9 = str.unpack('C*') array_latin9 = str.unpack('C*')
array_enc = [] array_enc = []
array_latin9.each do |num| array_latin9.each do |num|
@ -93,6 +99,5 @@ module REXML
end end
array_enc.pack('U*') array_enc.pack('U*')
end end
~
end end
end end

View file

@ -3,9 +3,13 @@
# #
module REXML module REXML
module Encoding module Encoding
@@__REXML_encoding_methods = %q~ register("ISO-8859-15") do |o|
alias encode to_iso_8859_15
alias decode from_iso_8859_15
end
# Convert from UTF-8 # Convert from UTF-8
def to_iso_8859_15 content def to_iso_8859_15(content)
array_utf8 = content.unpack('U*') array_utf8 = content.unpack('U*')
array_enc = [] array_enc = []
array_utf8.each do |num| array_utf8.each do |num|
@ -64,6 +68,5 @@ module REXML
end end
array_enc.pack('U*') array_enc.pack('U*')
end end
~
end end
end end

View file

@ -16,7 +16,7 @@ module REXML
end end
def decode_utf16(str) def decode_utf16(str)
str = str[2..-1] if /^\376\377/ =~ str str = str[2..-1] if /^\376\377/n =~ str
array_enc=str.unpack('C*') array_enc=str.unpack('C*')
array_utf8 = [] array_utf8 = []
0.step(array_enc.size-1, 2){|i| 0.step(array_enc.size-1, 2){|i|

View file

@ -89,6 +89,12 @@ module REXML
# Write out a fully formed, correct entity definition (assuming the Entity # Write out a fully formed, correct entity definition (assuming the Entity
# object itself is valid.) # object itself is valid.)
#
# out::
# An object implementing <TT>&lt;&lt;<TT> to which the entity will be
# output
# indent::
# *DEPRECATED* and ignored
def write out, indent=-1 def write out, indent=-1
out << '<!ENTITY ' out << '<!ENTITY '
out << '% ' if @reference out << '% ' if @reference

View file

@ -0,0 +1,109 @@
module REXML
module Formatters
class Default
# Prints out the XML document with no formatting -- except if id_hack is
# set.
#
# ie_hack::
# If set to true, then inserts whitespace before the close of an empty
# tag, so that IE's bad XML parser doesn't choke.
def initialize( ie_hack=false )
@ie_hack = ie_hack
end
# Writes the node to some output.
#
# node::
# The node to write
# output::
# A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
# change the output encoding.
def write( node, output )
case node
when Document
if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )
when Element
write_element( node, output )
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
Attribute, AttlistDecl
node.write( output,-1 )
when Instruction
write_instruction( node, output )
when DocType, XMLDecl
node.write( output )
when Comment
write_comment( node, output )
when CData
write_cdata( node, output )
when Text
write_text( node, output )
else
raise Exception.new("XML FORMATTING ERROR")
end
end
protected
def write_document( node, output )
node.children.each { |child| write( child, output ) }
end
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
output << " " if @ie_hack
output << "/"
else
output << ">"
node.children.each { |child|
write( child, output )
}
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
def write_comment( node, output )
output << Comment::START
output << node.to_s
output << Comment::STOP
end
def write_cdata( node, output )
output << CData::START
output << node.to_s
output << CData::STOP
end
def write_instruction( node, output )
output << Instruction::START.sub(/\\/u, '')
output << node.target
output << ' '
output << node.content
output << Instruction::STOP.sub(/\\/u, '')
end
end
end
end

View file

@ -0,0 +1,137 @@
require 'rexml/formatters/default'
module REXML
module Formatters
# Pretty-prints an XML document. This destroys whitespace in text nodes
# and will insert carriage returns and indentations.
#
# TODO: Add an option to print attributes on new lines
class Pretty < Default
# If compact is set to true, then the formatter will attempt to use as
# little space as possible
attr_accessor :compact
# The width of a page. Used for formatting text
attr_accessor :width
# Create a new pretty printer.
#
# output::
# An object implementing '<<(String)', to which the output will be written.
# indentation::
# An integer greater than 0. The indentation of each level will be
# this number of spaces. If this is < 1, the behavior of this object
# is undefined. Defaults to 2.
# ie_hack::
# If true, the printer will insert whitespace before closing empty
# tags, thereby allowing Internet Explorer's feeble XML parser to
# function. Defaults to false.
def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
@ie_hack = ie_hack
@width = 80
end
protected
def write_element(node, output)
output << ' '*@level
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
if @ie_hack
output << " "
end
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
string = ""
old_level = @level
@level = 0
node.children.each { |child| write( child, string ) }
@level = old_level
if string.length < @width
output << string
skip = true
end
end
end
unless skip
output << "\n"
@level += @indentation
node.children.each { |child|
next if child.kind_of?(Text) and child.to_s.strip.length == 0
write( child, output )
output << "\n"
}
@level -= @indentation
output << ' '*@level
end
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
s = node.to_s()
s.gsub!(/\s/,' ')
s.squeeze!(" ")
s = wrap(s, 80-@level)
s = indent_text(s, @level, " ", true)
output << (' '*@level + s)
end
def write_comment( node, output)
output << ' ' * @level
super
end
def write_cdata( node, output)
output << ' ' * @level
super
end
def write_document( node, output )
# Ok, this is a bit odd. All XML documents have an XML declaration,
# but it may not write itself if the user didn't specifically add it,
# either through the API or in the input document. If it doesn't write
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
next if child == node.children[-1] and child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
end
write( child, output )
}
end
private
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
string.gsub(/\n/, "\n#{style*level}")
end
def wrap(string, width)
# Recursivly wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
end
end

View file

@ -0,0 +1,56 @@
require 'rexml/formatters/pretty'
module REXML
module Formatters
# The Transitive formatter writes an XML document that parses to an
# identical document as the source document. This means that no extra
# whitespace nodes are inserted, and whitespace within text nodes is
# preserved. Within these constraints, the document is pretty-printed,
# with whitespace inserted into the metadata to introduce formatting.
#
# Note that this is only useful if the original XML is not already
# formatted. Since this formatter does not alter whitespace nodes, the
# results of formatting already formatted XML will be odd.
class Transitive < Default
def initialize( indentation=2 )
@indentation = indentation
@level = 0
end
protected
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
output << "\n"
output << ' '*@level
if node.children.empty?
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
@level += @indentation
node.children.each { |child|
write( child, output )
}
@level -= @indentation
output << "</#{node.expanded_name}"
output << "\n"
output << ' '*@level
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
end
end
end

View file

@ -339,7 +339,6 @@ module REXML
object.to_f object.to_f
else else
str = string( object ) str = string( object )
#puts "STRING OF #{object.inspect} = #{str}"
# If XPath ever gets scientific notation... # If XPath ever gets scientific notation...
#if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/ #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/ if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/

View file

@ -38,7 +38,11 @@ module REXML
Instruction.new self Instruction.new self
end end
# == DEPRECATED
# See the rexml/formatters package
#
def write writer, indent=-1, transitive=false, ie_hack=false def write writer, indent=-1, transitive=false, ie_hack=false
Kernel.warn( "#{self.class.name}.write is deprecated" )
indent(writer, indent) indent(writer, indent)
writer << START.sub(/\\/u, '') writer << START.sub(/\\/u, '')
writer << @target writer << @target

View file

@ -1,4 +1,6 @@
require "rexml/parseexception" require "rexml/parseexception"
require "rexml/formatters/pretty"
require "rexml/formatters/default"
module REXML module REXML
# Represents a node in the tree. Nodes are never encountered except as # Represents a node in the tree. Nodes are never encountered except as
@ -18,10 +20,19 @@ module REXML
@parent[ ind - 1 ] @parent[ ind - 1 ]
end end
def to_s indent=-1 # indent::
rv = "" # *DEPRECATED* This parameter is now ignored. See the formatters in the
write rv,indent # REXML::Formatters package for changing the output style.
rv def to_s indent=nil
unless indent.nil?
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
f = REXML::Formatters::Pretty.new( indent )
f.write( self, rv, indent )
else
f = REXML::Formatters::Default.new
f.write( self, rv = "" )
end
return rv
end end
def indent to, ind def indent to, ind

View file

@ -1,5 +1,7 @@
require 'rexml/parseexception' require 'rexml/parseexception'
require 'rexml/undefinednamespaceexception'
require 'rexml/source' require 'rexml/source'
require 'set'
module REXML module REXML
module Parsers module Parsers
@ -24,7 +26,8 @@ module REXML
# Nat Price gave me some good ideas for the API. # Nat Price gave me some good ideas for the API.
class BaseParser class BaseParser
NCNAME_STR= '[\w:][\-\w\d.]*' NCNAME_STR= '[\w:][\-\w\d.]*'
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]' NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)" NAME = "([\\w:]#{NAMECHAR}*)"
@ -35,7 +38,7 @@ module REXML
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u CDATA_START = /\A<!\[CDATA\[/u
@ -45,7 +48,7 @@ module REXML
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
@ -53,7 +56,7 @@ module REXML
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
ENTITY_START = /^\s*<!ENTITY/ ENTITY_START = /^\s*<!ENTITY/
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
@ -133,6 +136,7 @@ module REXML
@tags = [] @tags = []
@stack = [] @stack = []
@entities = [] @entities = []
@nsstack = []
end end
def position def position
@ -188,6 +192,7 @@ module REXML
end end
return [ :end_document ] if empty? return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0 return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
@source.read if @source.buffer.size<2 @source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}" #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil if @document_status == nil
@ -213,14 +218,15 @@ module REXML
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true ) md = @source.match( DOCTYPE_PATTERN, true )
@nsstack.unshift(curr_ns=Set.new)
identity = md[1] identity = md[1]
close = md[2] close = md[2]
identity =~ IDENTITY identity =~ IDENTITY
name = $1 name = $1
raise REXML::ParseException("DOCTYPE is missing a name") if name.nil? raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip pub_sys = $2.nil? ? nil : $2.strip
long_name = $3.nil? ? nil : $3.strip long_name = $4.nil? ? nil : $4.strip
uri = $4.nil? ? nil : $4.strip uri = $6.nil? ? nil : $6.strip
args = [ :start_doctype, name, pub_sys, long_name, uri ] args = [ :start_doctype, name, pub_sys, long_name, uri ]
if close == ">" if close == ">"
@document_status = :after_doctype @document_status = :after_doctype
@ -288,6 +294,9 @@ module REXML
val = attdef[3] val = attdef[3]
val = attdef[4] if val == "#FIXED " val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val pairs[attdef[0]] = val
if attdef[0] =~ /^xmlns:(.*)/
@nsstack[0] << $1
end
end end
end end
return [ :attlistdecl, element, pairs, contents ] return [ :attlistdecl, element, pairs, contents ]
@ -312,6 +321,7 @@ module REXML
begin begin
if @source.buffer[0] == ?< if @source.buffer[0] == ?<
if @source.buffer[1] == ?/ if @source.buffer[1] == ?/
@nsstack.shift
last_tag = @tags.pop last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH) #md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true ) md = @source.match( CLOSE_MATCH, true )
@ -345,19 +355,47 @@ module REXML
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source) raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end end
attrs = [] attributes = {}
if md[2].size > 0 prefixes = Set.new
attrs = md[2].scan( ATTRIBUTE_PATTERN ) prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
if md[4].size > 0
attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
attrs.each { |a,b,c,d,e|
if b == "xmlns"
if c == "xml"
if d != "http://www.w3.org/XML/1998/namespace"
msg = "The 'xml' prefix must not be bound to any other namespace "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self )
end
elsif c == "xmlns"
msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self)
end
curr_ns << c
elsif b
prefixes << b unless b == "xml"
end
attributes[a] = e
}
end end
if md[4] # Verify that all of the prefixes have been defined
for prefix in prefixes
unless @nsstack.find{|k| k.member?(prefix)}
raise UndefinedNamespaceException.new(prefix,@source,self)
end
end
if md[6]
@closed = md[1] @closed = md[1]
@nsstack.shift
else else
@tags.push( md[1] ) @tags.push( md[1] )
end end
attributes = {}
attrs.each { |a,b,c| attributes[a] = c }
return [ :start_element, md[1], attributes ] return [ :start_element, md[1], attributes ]
end end
else else
@ -371,6 +409,8 @@ module REXML
# return PullEvent.new( :text, md[1], unnormalized ) # return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ] return [ :text, md[1] ]
end end
rescue REXML::UndefinedNamespaceException
raise
rescue REXML::ParseException rescue REXML::ParseException
raise raise
rescue Exception, NameError => error rescue Exception, NameError => error

View file

@ -94,6 +94,8 @@ module REXML
when :end_document when :end_document
handle( :end_document ) handle( :end_document )
break break
when :start_doctype
handle( :doctype, *event[1..-1])
when :end_doctype when :end_doctype
context = context[1] context = context[1]
when :start_element when :start_element
@ -167,7 +169,7 @@ module REXML
when :entitydecl when :entitydecl
@entities[ event[1] ] = event[2] if event.size == 3 @entities[ event[1] ] = event[2] if event.size == 3
handle( *event ) handle( *event )
when :processing_instruction, :comment, :doctype, :attlistdecl, when :processing_instruction, :comment, :attlistdecl,
:elementdecl, :cdata, :notationdecl, :xmldecl :elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event ) handle( *event )
end end

View file

@ -1,4 +1,5 @@
require 'rexml/validation/validationexception' require 'rexml/validation/validationexception'
require 'rexml/undefinednamespaceexception'
module REXML module REXML
module Parsers module Parsers
@ -29,8 +30,7 @@ module REXML
return return
when :start_element when :start_element
tag_stack.push(event[1]) tag_stack.push(event[1])
# find the observers for namespaces el = @build_context = @build_context.add_element( event[1], event[2] )
@build_context = @build_context.add_element( event[1], event[2] )
when :end_element when :end_element
tag_stack.pop tag_stack.pop
@build_context = @build_context.parent @build_context = @build_context.parent
@ -86,6 +86,8 @@ module REXML
end end
rescue REXML::Validation::ValidationException rescue REXML::Validation::ValidationException
raise raise
rescue REXML::UndefinedNamespaceException
raise
rescue rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! ) raise ParseException.new( $!.message, @parser.source, @parser, $! )
end end

View file

@ -551,7 +551,7 @@ module REXML
end end
end end
#puts "BEFORE WITH '#{rest}'" #puts "BEFORE WITH '#{rest}'"
rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/ rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
parsed.concat(n) parsed.concat(n)
return rest return rest
end end

View file

@ -1,3 +1,4 @@
# -*- encoding: utf-8 -*-
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby. # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
# #
# REXML is a _pure_ Ruby, XML 1.0 conforming, # REXML is a _pure_ Ruby, XML 1.0 conforming,
@ -10,8 +11,9 @@
# #
# Main page:: http://www.germane-software.com/software/rexml # Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom> # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.6 # Version:: 3.1.7.2
# Date:: 2006/335 # Date:: 2007/275
# Revision:: $Revision$
# #
# This API documentation can be downloaded from the REXML home page, or can # This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc] # be accessed online[http://www.germane-software.com/software/rexml_doc]
@ -20,9 +22,10 @@
# or can be accessed # or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html] # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML module REXML
COPYRIGHT = "Copyright © 2001-2006 Sean Russell <ser@germane-software.com>" COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
DATE = "2006/335" VERSION = "3.1.7.2"
VERSION = "3.1.6" DATE = "2007/275"
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
Copyright = COPYRIGHT Copyright = COPYRIGHT
Version = VERSION Version = VERSION

View file

@ -1,139 +1,140 @@
require 'rexml/encoding' require 'rexml/encoding'
module REXML module REXML
# Generates Source-s. USE THIS CLASS. # Generates Source-s. USE THIS CLASS.
class SourceFactory class SourceFactory
# Generates a Source object # Generates a Source object
# @param arg Either a String, or an IO # @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given # @return a Source, or nil if a bad argument was given
def SourceFactory::create_from(arg) def SourceFactory::create_from(arg)
if arg.kind_of? String if arg.kind_of? String
Source.new(arg) Source.new(arg)
elsif arg.respond_to? :read and elsif arg.respond_to? :read and
arg.respond_to? :readline and arg.respond_to? :readline and
arg.respond_to? :nil? and arg.respond_to? :nil? and
arg.respond_to? :eof? arg.respond_to? :eof?
IOSource.new(arg) IOSource.new(arg)
elsif arg.kind_of? Source elsif arg.kind_of? Source
arg arg
else else
raise "#{arg.class} is not a valid input stream. It must walk \n"+ raise "#{arg.class} is not a valid input stream. It must walk \n"+
"like either a String, IO, or Source." "like either a String, an IO, or a Source."
end end
end end
end end
# A Source can be searched for patterns, and wraps buffers and other # A Source can be searched for patterns, and wraps buffers and other
# objects and provides consumption of text # objects and provides consumption of text
class Source class Source
include Encoding include Encoding
# The current buffer (what we're going to read next) # The current buffer (what we're going to read next)
attr_reader :buffer attr_reader :buffer
# The line number of the last consumed text # The line number of the last consumed text
attr_reader :line attr_reader :line
attr_reader :encoding attr_reader :encoding
# Constructor # Constructor
# @param arg must be a String, and should be a valid XML document # @param arg must be a String, and should be a valid XML document
# @param encoding if non-null, sets the encoding of the source to this # @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection # value, overriding all encoding detection
def initialize(arg, encoding=nil) def initialize(arg, encoding=nil)
@orig = @buffer = arg @orig = @buffer = arg
if encoding if encoding
self.encoding = encoding self.encoding = encoding
else else
self.encoding = check_encoding( @buffer ) self.encoding = check_encoding( @buffer )
end end
@line = 0 @line = 0
end end
# Inherited from Encoding # Inherited from Encoding
# Overridden to support optimized en/decoding # Overridden to support optimized en/decoding
def encoding=(enc) def encoding=(enc)
return unless super return unless super
@line_break = encode( '>' ) @line_break = encode( '>' )
if enc != UTF_8 if enc != UTF_8
@buffer = decode(@buffer) @buffer = decode(@buffer)
@to_utf = true @to_utf = true
else else
@to_utf = false @to_utf = false
end end
end end
# Scans the source for a given pattern. Note, that this is not your # Scans the source for a given pattern. Note, that this is not your
# usual scan() method. For one thing, the pattern argument has some # usual scan() method. For one thing, the pattern argument has some
# requirements; for another, the source can be consumed. You can easily # requirements; for another, the source can be consumed. You can easily
# confuse this method. Originally, the patterns were easier # confuse this method. Originally, the patterns were easier
# to construct and this method more robust, because this method # to construct and this method more robust, because this method
# generated search regexes on the fly; however, this was # generated search regexes on the fly; however, this was
# computationally expensive and slowed down the entire REXML package # computationally expensive and slowed down the entire REXML package
# considerably, since this is by far the most commonly called method. # considerably, since this is by far the most commonly called method.
# @param pattern must be a Regexp, and must be in the form of # @param pattern must be a Regexp, and must be in the form of
# /^\s*(#{your pattern, with no groups})(.*)/. The first group # /^\s*(#{your pattern, with no groups})(.*)/. The first group
# will be returned; the second group is used if the consume flag is # will be returned; the second group is used if the consume flag is
# set. # set.
# @param consume if true, the pattern returned will be consumed, leaving # @param consume if true, the pattern returned will be consumed, leaving
# everything after it in the Source. # everything after it in the Source.
# @return the pattern, if found, or nil if the Source is empty or the # @return the pattern, if found, or nil if the Source is empty or the
# pattern is not found. # pattern is not found.
def scan(pattern, cons=false) def scan(pattern, cons=false)
return nil if @buffer.nil? return nil if @buffer.nil?
rv = @buffer.scan(pattern) rv = @buffer.scan(pattern)
@buffer = $' if cons and rv.size>0 @buffer = $' if cons and rv.size>0
rv rv
end end
def read def read
end end
def consume( pattern ) def consume( pattern )
@buffer = $' if pattern.match( @buffer ) @buffer = $' if pattern.match( @buffer )
end end
def match_to( char, pattern ) def match_to( char, pattern )
return pattern.match(@buffer) return pattern.match(@buffer)
end end
def match_to_consume( char, pattern ) def match_to_consume( char, pattern )
md = pattern.match(@buffer) md = pattern.match(@buffer)
@buffer = $' @buffer = $'
return md return md
end end
def match(pattern, cons=false) def match(pattern, cons=false)
md = pattern.match(@buffer) md = pattern.match(@buffer)
@buffer = $' if cons and md @buffer = $' if cons and md
return md return md
end end
# @return true if the Source is exhausted # @return true if the Source is exhausted
def empty? def empty?
@buffer == "" @buffer == ""
end end
def position def position
@orig.index( @buffer ) @orig.index( @buffer )
end end
# @return the current line in the source # @return the current line in the source
def current_line def current_line
lines = @orig.split lines = @orig.split
res = lines.grep @buffer[0..30] res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array res = res[-1] if res.kind_of? Array
lines.index( res ) if res lines.index( res ) if res
end end
end end
# A Source that wraps an IO. See the Source class for method # A Source that wraps an IO. See the Source class for method
# documentation # documentation
class IOSource < Source class IOSource < Source
#attr_reader :block_size #attr_reader :block_size
# block_size has been deprecated # block_size has been deprecated
def initialize(arg, block_size=500, encoding=nil) def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg @er_source = @source = arg
@to_utf = false @to_utf = false
# Determining the encoding is a deceptively difficult issue to resolve. # Determining the encoding is a deceptively difficult issue to resolve.
# First, we check the first two bytes for UTF-16. Then we # First, we check the first two bytes for UTF-16. Then we
# assume that the encoding is at least ASCII enough for the '>', and # assume that the encoding is at least ASCII enough for the '>', and
@ -145,88 +146,94 @@ module REXML
str = @source.read( 2 ) str = @source.read( 2 )
if encoding if encoding
self.encoding = encoding self.encoding = encoding
elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str elsif 0xfe == str[0] && 0xff == str[1]
self.encoding = check_encoding( str ) @line_break = "\000>"
elsif 0xff == str[0] && 0xfe == str[1]
@line_break = ">\000"
elsif 0xef == str[0] && 0xbb == str[1]
str += @source.read(1)
str = '' if (0xbf == str[2])
@line_break = ">"
else else
@line_break = '>' @line_break = ">"
end end
super str+@source.readline( @line_break ) super str+@source.readline( @line_break )
end end
def scan(pattern, cons=false) def scan(pattern, cons=false)
rv = super rv = super
# You'll notice that this next section is very similar to the same # You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is # section in match(), but just a liiittle different. This is
# because it is a touch faster to do it this way with scan() # because it is a touch faster to do it this way with scan()
# than the way match() does it; enough faster to warrent duplicating # than the way match() does it; enough faster to warrent duplicating
# some code # some code
if rv.size == 0 if rv.size == 0
until @buffer =~ pattern or @source.nil? until @buffer =~ pattern or @source.nil?
begin begin
# READLINE OPT # READLINE OPT
#str = @source.read(@block_size) #str = @source.read(@block_size)
str = @source.readline(@line_break) str = @source.readline(@line_break)
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rescue Iconv::IllegalSequence rescue Iconv::IllegalSequence
raise raise
rescue rescue
@source = nil @source = nil
end end
end end
rv = super rv = super
end end
rv.taint rv.taint
rv rv
end end
def read def read
begin begin
str = @source.readline(@line_break) str = @source.readline(@line_break)
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rescue Exception, NameError rescue Exception, NameError
@source = nil @source = nil
end end
end end
def consume( pattern ) def consume( pattern )
match( pattern, true ) match( pattern, true )
end end
def match( pattern, cons=false ) def match( pattern, cons=false )
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if cons and rv @buffer = $' if cons and rv
while !rv and @source while !rv and @source
begin begin
str = @source.readline(@line_break) str = @source.readline(@line_break)
str = decode(str) if @to_utf and str str = decode(str) if @to_utf and str
@buffer << str @buffer << str
rv = pattern.match(@buffer) rv = pattern.match(@buffer)
@buffer = $' if cons and rv @buffer = $' if cons and rv
rescue rescue
@source = nil @source = nil
end end
end end
rv.taint rv.taint
rv rv
end end
def empty? def empty?
super and ( @source.nil? || @source.eof? ) super and ( @source.nil? || @source.eof? )
end end
def position def position
@er_source.stat.pipe? ? 0 : @er_source.pos @er_source.stat.pipe? ? 0 : @er_source.pos
end end
# @return the current line in the source # @return the current line in the source
def current_line def current_line
begin begin
pos = @er_source.pos # The byte position in the source pos = @er_source.pos # The byte position in the source
lineno = @er_source.lineno # The XML < position in the source lineno = @er_source.lineno # The XML < position in the source
@er_source.rewind @er_source.rewind
line = 0 # The \r\n position in the source line = 0 # The \r\n position in the source
begin begin
while @er_source.pos < pos while @er_source.pos < pos
@er_source.readline @er_source.readline
@ -238,7 +245,7 @@ module REXML
pos = -1 pos = -1
line = -1 line = -1
end end
[pos, lineno, line] [pos, lineno, line]
end end
end end
end end

View file

@ -211,16 +211,17 @@ module REXML
return new_string return new_string
end end
# == DEPRECATED
# See REXML::Formatters
#
def write( writer, indent=-1, transitive=false, ie_hack=false ) def write( writer, indent=-1, transitive=false, ie_hack=false )
s = to_s() Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
if not (@parent and @parent.whitespace) then formatter = if indent > -1
s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all REXML::Formatters::Pretty.new( indent )
if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0 else
s = indent_text(s, indent, @parent.context[:indentstyle], false) REXML::Formatters::Default.new
end end
s.squeeze!(" \n\t") if @parent and !@parent.whitespace formatter.write( self, writer )
end
writer << s
end end
# FIXME # FIXME

View file

@ -0,0 +1,8 @@
require 'rexml/parseexception'
module REXML
class UndefinedNamespaceException < ParseException
def initialize( prefix, source, parser )
super( "Undefined prefix #{prefix} found" )
end
end
end

View file

@ -13,7 +13,7 @@ module REXML
STOP = '\?>'; STOP = '\?>';
attr_accessor :version, :standalone attr_accessor :version, :standalone
attr_reader :writeencoding attr_reader :writeencoding, :writethis
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil) def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true @writethis = true
@ -37,9 +37,14 @@ module REXML
XMLDecl.new(self) XMLDecl.new(self)
end end
def write writer, indent=-1, transitive=false, ie_hack=false # indent::
# Ignored. There must be no whitespace before an XML declaration
# transitive::
# Ignored
# ie_hack::
# Ignored
def write(writer, indent=-1, transitive=false, ie_hack=false)
return nil unless @writethis or writer.kind_of? Output return nil unless @writethis or writer.kind_of? Output
indent( writer, indent )
writer << START.sub(/\\/u, '') writer << START.sub(/\\/u, '')
if writer.kind_of? Output if writer.kind_of? Output
writer << " #{content writer.encoding}" writer << " #{content writer.encoding}"

View file

@ -160,6 +160,7 @@ module REXML
node_types = ELEMENTS node_types = ELEMENTS
return nodeset if path_stack.length == 0 || nodeset.length == 0 return nodeset if path_stack.length == 0 || nodeset.length == 0
while path_stack.length > 0 while path_stack.length > 0
#puts "#"*5
#puts "Path stack = #{path_stack.inspect}" #puts "Path stack = #{path_stack.inspect}"
#puts "Nodeset is #{nodeset.inspect}" #puts "Nodeset is #{nodeset.inspect}"
if nodeset.length == 0 if nodeset.length == 0
@ -351,7 +352,8 @@ module REXML
when :following_sibling when :following_sibling
#puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}" #puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
results = [] results = []
for node in nodeset nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children all_siblings = node.parent.children
current_index = all_siblings.index( node ) current_index = all_siblings.index( node )
following_siblings = all_siblings[ current_index+1 .. -1 ] following_siblings = all_siblings[ current_index+1 .. -1 ]
@ -362,13 +364,14 @@ module REXML
when :preceding_sibling when :preceding_sibling
results = [] results = []
for node in nodeset nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children all_siblings = node.parent.children
current_index = all_siblings.index( node ) current_index = all_siblings.index( node )
preceding_siblings = all_siblings[ 0 .. current_index-1 ].reverse preceding_siblings = all_siblings[ 0, current_index ].reverse
#results += expr( path_stack.dclone, preceding_siblings ) results += preceding_siblings
end end
nodeset = preceding_siblings || [] nodeset = results
node_types = ELEMENTS node_types = ELEMENTS
when :preceding when :preceding
@ -389,15 +392,21 @@ module REXML
node_types = ELEMENTS node_types = ELEMENTS
when :namespace when :namespace
#puts "In :namespace"
new_nodeset = [] new_nodeset = []
prefix = path_stack.shift prefix = path_stack.shift
for node in nodeset for node in nodeset
if (node.node_type == :element or node.node_type == :attribute) if (node.node_type == :element or node.node_type == :attribute)
if (node.node_type == :element) if @namespaces
namespaces = @namespaces
elsif (node.node_type == :element)
namespaces = node.namespaces namespaces = node.namespaces
else else
namespaces = node.element.namesapces namespaces = node.element.namesapces
end end
#puts "Namespaces = #{namespaces.inspect}"
#puts "Prefix = #{prefix.inspect}"
#puts "Node.namespace = #{node.namespace}"
if (node.namespace == namespaces[prefix]) if (node.namespace == namespaces[prefix])
new_nodeset << node new_nodeset << node
end end