merge revision(s) 35766:

* io.c (io_strip_bom): check EOF.  [Bug #6487][ruby-core:45203]


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@35782 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2012-05-25 01:39:30 +00:00
parent 046883d2ca
commit cb1a793510
4 changed files with 89 additions and 36 deletions

View file

@ -1,3 +1,7 @@
Fri May 25 10:38:06 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
* io.c (io_strip_bom): check EOF. [Bug #6487][ruby-core:45203]
Fri May 25 10:36:38 2012 Nobuyoshi Nakada <nobu@ruby-lang.org> Fri May 25 10:36:38 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
* parse.y (f_arglist): should reset lexical states after empty * parse.y (f_arglist): should reset lexical states after empty

68
io.c
View file

@ -4900,65 +4900,63 @@ static void io_encoding_set(rb_io_t *, VALUE, VALUE, VALUE);
static int static int
io_strip_bom(VALUE io) io_strip_bom(VALUE io)
{ {
int b1, b2, b3, b4; VALUE b1, b2, b3, b4;
switch (b1 = FIX2INT(rb_io_getbyte(io))) {
case 0xEF: if (NIL_P(b1 = rb_io_getbyte(io))) return 0;
b2 = FIX2INT(rb_io_getbyte(io)); switch (b1) {
if (b2 == 0xBB) { case INT2FIX(0xEF):
b3 = FIX2INT(rb_io_getbyte(io)); if (NIL_P(b2 = rb_io_getbyte(io))) break;
if (b3 == 0xBF) { if (b2 == INT2FIX(0xBB) && !NIL_P(b3 = rb_io_getbyte(io))) {
if (b3 == INT2FIX(0xBF)) {
return rb_utf8_encindex(); return rb_utf8_encindex();
} }
rb_io_ungetbyte(io, INT2FIX(b3)); rb_io_ungetbyte(io, b3);
} }
rb_io_ungetbyte(io, INT2FIX(b2)); rb_io_ungetbyte(io, b2);
break; break;
case 0xFE: case INT2FIX(0xFE):
b2 = FIX2INT(rb_io_getbyte(io)); if (NIL_P(b2 = rb_io_getbyte(io))) break;
if (b2 == 0xFF) { if (b2 == INT2FIX(0xFF)) {
return rb_enc_find_index("UTF-16BE"); return rb_enc_find_index("UTF-16BE");
} }
rb_io_ungetbyte(io, INT2FIX(b2)); rb_io_ungetbyte(io, b2);
break; break;
case 0xFF: case INT2FIX(0xFF):
b2 = FIX2INT(rb_io_getbyte(io)); if (NIL_P(b2 = rb_io_getbyte(io))) break;
if (b2 == 0xFE) { if (b2 == INT2FIX(0xFE)) {
b3 = FIX2INT(rb_io_getbyte(io)); b3 = rb_io_getbyte(io);
if (b3 == 0) { if (b3 == INT2FIX(0) && !NIL_P(b4 = rb_io_getbyte(io))) {
b4 = FIX2INT(rb_io_getbyte(io)); if (b4 == INT2FIX(0)) {
if (b4 == 0) {
return rb_enc_find_index("UTF-32LE"); return rb_enc_find_index("UTF-32LE");
} }
rb_io_ungetbyte(io, INT2FIX(b4)); rb_io_ungetbyte(io, b4);
rb_io_ungetbyte(io, b3);
} }
else { else {
rb_io_ungetbyte(io, INT2FIX(b3)); rb_io_ungetbyte(io, b3);
return rb_enc_find_index("UTF-16LE"); return rb_enc_find_index("UTF-16LE");
} }
rb_io_ungetbyte(io, INT2FIX(b3));
} }
rb_io_ungetbyte(io, INT2FIX(b2)); rb_io_ungetbyte(io, b2);
break; break;
case 0: case INT2FIX(0):
b2 = FIX2INT(rb_io_getbyte(io)); if (NIL_P(b2 = rb_io_getbyte(io))) break;
if (b2 == 0) { if (b2 == INT2FIX(0) && !NIL_P(b3 = rb_io_getbyte(io))) {
b3 = FIX2INT(rb_io_getbyte(io)); if (b3 == INT2FIX(0xFE) && !NIL_P(b4 = rb_io_getbyte(io))) {
if (b3 == 0xFE) { if (b4 == INT2FIX(0xFF)) {
b4 = FIX2INT(rb_io_getbyte(io));
if (b4 == 0xFF) {
return rb_enc_find_index("UTF-32BE"); return rb_enc_find_index("UTF-32BE");
} }
rb_io_ungetbyte(io, INT2FIX(b4)); rb_io_ungetbyte(io, b4);
} }
rb_io_ungetbyte(io, INT2FIX(b3)); rb_io_ungetbyte(io, b3);
} }
rb_io_ungetbyte(io, INT2FIX(b2)); rb_io_ungetbyte(io, b2);
break; break;
} }
rb_io_ungetbyte(io, INT2FIX(b1)); rb_io_ungetbyte(io, b1);
return 0; return 0;
} }

View file

@ -37,6 +37,57 @@ class TestFile < Test::Unit::TestCase
include TestEOF::Seek include TestEOF::Seek
def test_empty_file_bom
bug6487 = '[ruby-core:45203]'
f = Tempfile.new(__method__.to_s)
f.close
assert File.exist? f.path
assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:utf-8')}
assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:bom|utf-8')}
f.close(true)
end
def assert_bom(bytes, name)
bug6487 = '[ruby-core:45203]'
f = Tempfile.new(name.to_s)
f.sync = true
expected = ""
result = nil
bytes[0...-1].each do |x|
f.write x
f.write ' '
f.pos -= 1
expected << x
assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
assert_equal("#{expected} ".force_encoding("utf-8"), result)
end
f.write bytes[-1]
assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
assert_equal '', result, "valid bom"
f.close(true)
end
def test_bom_8
assert_bom(["\xEF", "\xBB", "\xBF"], __method__)
end
def test_bom_16be
assert_bom(["\xFE", "\xFF"], __method__)
end
def test_bom_16le
assert_bom(["\xFF", "\xFE"], __method__)
end
def test_bom_32be
assert_bom(["\0", "\0", "\xFE", "\xFF"], __method__)
end
def test_bom_32le
assert_bom(["\xFF\xFE\0", "\0"], __method__)
end
def test_truncate_wbuf def test_truncate_wbuf
f = Tempfile.new("test-truncate") f = Tempfile.new("test-truncate")
f.print "abc" f.print "abc"

View file

@ -1,5 +1,5 @@
#define RUBY_VERSION "1.9.3" #define RUBY_VERSION "1.9.3"
#define RUBY_PATCHLEVEL 227 #define RUBY_PATCHLEVEL 228
#define RUBY_RELEASE_DATE "2012-05-25" #define RUBY_RELEASE_DATE "2012-05-25"
#define RUBY_RELEASE_YEAR 2012 #define RUBY_RELEASE_YEAR 2012