* lib/net/http/response.rb: Automatically inflate gzip and

deflate-encoded response bodies.  [Feature #6942]
* lib/net/http/generic_request.rb:  Automatically accept gzip and
  deflate content-encoding for requests.  [Feature #6494]
* lib/net/http/request.rb:  Updated documentation for #6494.
* lib/net/http.rb:  Updated documentation for #6492 and #6494, removed
  Content-Encoding handling now present in Net::HTTPResponse.
* test/net/http/test_httpresponse.rb:  Tests for #6492
* test/net/http/test_http_request.rb:  Tests for #6494
* test/open-uri/test_open-uri.rb (test_content_encoding):  Updated test
  for automatic content-encoding handling.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36473 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
drbrain 2012-07-19 22:43:38 +00:00
parent ef19dcf96d
commit b1a0509b54
8 changed files with 388 additions and 46 deletions

View file

@ -1,3 +1,17 @@
Fri Jul 20 07:40:32 2012 Eric Hodel <drbrain@segment7.net>
* lib/net/http/response.rb: Automatically inflate gzip and
deflate-encoded response bodies. [Feature #6942]
* lib/net/http/generic_request.rb: Automatically accept gzip and
deflate content-encoding for requests. [Feature #6494]
* lib/net/http/request.rb: Updated documentation for #6494.
* lib/net/http.rb: Updated documentation for #6492 and #6494, removed
Content-Encoding handling now present in Net::HTTPResponse.
* test/net/http/test_httpresponse.rb: Tests for #6492
* test/net/http/test_http_request.rb: Tests for #6494
* test/open-uri/test_open-uri.rb (test_content_encoding): Updated test
for automatic content-encoding handling.
Fri Jul 20 03:42:54 2012 NARUSE, Yui <naruse@ruby-lang.org> Fri Jul 20 03:42:54 2012 NARUSE, Yui <naruse@ruby-lang.org>
* thread_pthread.c: use #ifdef, not #if. * thread_pthread.c: use #ifdef, not #if.

View file

@ -283,6 +283,14 @@ module Net #:nodoc:
# See Net::HTTP::Proxy for further details and examples such as proxies that # See Net::HTTP::Proxy for further details and examples such as proxies that
# require a username and password. # require a username and password.
# #
# === Compression
#
# Net::HTTP automatically adds Accept-Encoding for compression of response
# bodies and automatically decompresses gzip and deflate responses unless a
# Range header was sent.
#
# Compression can be disabled through the Accept-Encoding: identity header.
#
# == HTTP Request Classes # == HTTP Request Classes
# #
# Here is the HTTP request class hierarchy. # Here is the HTTP request class hierarchy.
@ -602,7 +610,6 @@ module Net #:nodoc:
@use_ssl = false @use_ssl = false
@ssl_context = nil @ssl_context = nil
@enable_post_connection_check = true @enable_post_connection_check = true
@compression = nil
@sspi_enabled = false @sspi_enabled = false
SSL_IVNAMES.each do |ivname| SSL_IVNAMES.each do |ivname|
instance_variable_set ivname, nil instance_variable_set ivname, nil
@ -1052,28 +1059,10 @@ module Net #:nodoc:
initheader = initheader.merge({ initheader = initheader.merge({
"accept-encoding" => "gzip;q=1.0,deflate;q=0.6,identity;q=0.3" "accept-encoding" => "gzip;q=1.0,deflate;q=0.6,identity;q=0.3"
}) })
@compression = true
end end
end end
request(Get.new(path, initheader)) {|r| request(Get.new(path, initheader)) {|r|
if r.key?("content-encoding") and @compression r.read_body dest, &block
@compression = nil # Clear it till next set.
the_body = r.read_body dest, &block
case r["content-encoding"]
when "gzip"
r.body= Zlib::GzipReader.new(StringIO.new(the_body), encoding: "ASCII-8BIT").read
r.delete("content-encoding")
when "deflate"
r.body= Zlib::Inflate.inflate(the_body);
r.delete("content-encoding")
when "identity"
; # nothing needed
else
; # Don't do anything dramatic, unless we need to later
end
else
r.read_body dest, &block
end
res = r res = r
} }
res res

View file

@ -14,6 +14,18 @@ class Net::HTTPGenericRequest
raise ArgumentError, "no HTTP request path given" unless path raise ArgumentError, "no HTTP request path given" unless path
raise ArgumentError, "HTTP request path is empty" if path.empty? raise ArgumentError, "HTTP request path is empty" if path.empty?
@path = path @path = path
if @response_has_body and Net::HTTP::HAVE_ZLIB then
if !initheader ||
!initheader.keys.any? { |k|
%w[accept-encoding range].include? k.downcase
} then
initheader = initheader ? initheader.dup : {}
initheader["accept-encoding"] =
"gzip;q=1.0,deflate;q=0.6,identity;q=0.3"
end
end
initialize_http_header initheader initialize_http_header initheader
self['Accept'] ||= '*/*' self['Accept'] ||= '*/*'
self['User-Agent'] ||= 'Ruby' self['User-Agent'] ||= 'Ruby'

View file

@ -4,7 +4,12 @@
# subclasses: Net::HTTP::Get, Net::HTTP::Post, Net::HTTP::Head. # subclasses: Net::HTTP::Get, Net::HTTP::Post, Net::HTTP::Head.
# #
class Net::HTTPRequest < Net::HTTPGenericRequest class Net::HTTPRequest < Net::HTTPGenericRequest
# Creates HTTP request object. # Creates an HTTP request object for +path+.
#
# +initheader+ are the default headers to use. Net::HTTP adds
# Accept-Encoding to enable compression of the response body unless
# Accept-Encoding or Range are supplied in +initheader+.
def initialize(path, initheader = nil) def initialize(path, initheader = nil)
super self.class::METHOD, super self.class::METHOD,
self.class::REQUEST_HAS_BODY, self.class::REQUEST_HAS_BODY,

View file

@ -222,25 +222,70 @@ class Net::HTTPResponse
private private
def read_body_0(dest) ##
if chunked? # Checks for a supported Content-Encoding header and yields an Inflate
read_chunked dest # wrapper for this response's socket when zlib is present. If the
return # Content-Encoding is unsupported or zlib is missing the plain socket is
# yielded.
#
# If a Content-Range header is present a plain socket is yielded as the
# bytes in the range may not be a complete deflate block.
def inflater # :nodoc:
return yield @socket unless Net::HTTP::HAVE_ZLIB
return yield @socket if self['content-range']
case self['content-encoding']
when 'deflate', 'gzip', 'x-gzip' then
self.delete 'content-encoding'
inflate_body_io = Inflater.new(@socket)
begin
yield inflate_body_io
ensure
inflate_body_io.finish
end
when 'none', 'identity' then
self.delete 'content-encoding'
yield @socket
else
yield @socket
end end
clen = content_length()
if clen
@socket.read clen, dest, true # ignore EOF
return
end
clen = range_length()
if clen
@socket.read clen, dest
return
end
@socket.read_all dest
end end
def read_chunked(dest) def read_body_0(dest)
inflater do |inflate_body_io|
if chunked?
read_chunked dest, inflate_body_io
return
end
@socket = inflate_body_io
clen = content_length()
if clen
@socket.read clen, dest, true # ignore EOF
return
end
clen = range_length()
if clen
@socket.read clen, dest
return
end
@socket.read_all dest
end
end
##
# read_chunked reads from +@socket+ for chunk-size, chunk-extension, CRLF,
# etc. and +chunk_data_io+ for chunk-data which may be deflate or gzip
# encoded.
#
# See RFC 2616 section 3.6.1 for definitions
def read_chunked(dest, chunk_data_io) # :nodoc:
len = nil len = nil
total = 0 total = 0
while true while true
@ -250,7 +295,7 @@ class Net::HTTPResponse
len = hexlen.hex len = hexlen.hex
break if len == 0 break if len == 0
begin begin
@socket.read len, dest chunk_data_io.read len, dest
ensure ensure
total += len total += len
@socket.read 2 # \r\n @socket.read 2 # \r\n
@ -266,8 +311,8 @@ class Net::HTTPResponse
end end
def procdest(dest, block) def procdest(dest, block)
raise ArgumentError, 'both arg and block given for HTTP method' \ raise ArgumentError, 'both arg and block given for HTTP method' if
if dest and block dest and block
if block if block
Net::ReadAdapter.new(block) Net::ReadAdapter.new(block)
else else
@ -275,5 +320,71 @@ class Net::HTTPResponse
end end
end end
##
# Inflater is a wrapper around Net::BufferedIO that transparently inflates
# zlib and gzip streams.
class Inflater # :nodoc:
##
# Creates a new Inflater wrapping +socket+
def initialize socket
@socket = socket
# zlib with automatic gzip detection
@inflate = Zlib::Inflate.new(32 + Zlib::MAX_WBITS)
end
##
# Finishes the inflate stream.
def finish
@inflate.finish
end
##
# Returns a Net::ReadAdapter that inflates each read chunk into +dest+.
#
# This allows a large response body to be inflated without storing the
# entire body in memory.
def inflate_adapter(dest)
block = proc do |compressed_chunk|
@inflate.inflate(compressed_chunk) do |chunk|
dest << chunk
end
end
Net::ReadAdapter.new(block)
end
##
# Reads +clen+ bytes from the socket, inflates them, then writes them to
# +dest+. +ignore_eof+ is passed down to Net::BufferedIO#read
#
# Unlike Net::BufferedIO#read, this method returns more than +clen+ bytes.
# At this time there is no way for a user of Net::HTTPResponse to read a
# specific number of bytes from the HTTP response body, so this internal
# API does not return the same number of bytes as were requested.
#
# See https://bugs.ruby-lang.org/issues/6492 for further discussion.
def read clen, dest, ignore_eof = false
temp_dest = inflate_adapter(dest)
data = @socket.read clen, temp_dest, ignore_eof
end
##
# Reads the rest of the socket, inflates it, then writes it to +dest+.
def read_all dest
temp_dest = inflate_adapter(dest)
@socket.read_all temp_dest
end
end
end end

View file

@ -0,0 +1,57 @@
require 'net/http'
require 'test/unit'
require 'stringio'
class HTTPRequestTest < Test::Unit::TestCase
def test_initialize_GET
req = Net::HTTP::Get.new '/'
assert_equal 'GET', req.method
refute req.request_body_permitted?
assert req.response_body_permitted?
expected = {
'accept' => %w[*/*],
'user-agent' => %w[Ruby],
}
expected['accept-encoding'] = %w[gzip;q=1.0,deflate;q=0.6,identity;q=0.3] if
Net::HTTP::HAVE_ZLIB
assert_equal expected, req.to_hash
end
def test_initialize_GET_range
req = Net::HTTP::Get.new '/', 'Range' => 'bytes=0-9'
assert_equal 'GET', req.method
refute req.request_body_permitted?
assert req.response_body_permitted?
expected = {
'accept' => %w[*/*],
'user-agent' => %w[Ruby],
'range' => %w[bytes=0-9],
}
assert_equal expected, req.to_hash
end
def test_initialize_HEAD
req = Net::HTTP::Head.new '/'
assert_equal 'HEAD', req.method
refute req.request_body_permitted?
refute req.response_body_permitted?
expected = {
'accept' => %w[*/*],
'user-agent' => %w[Ruby],
}
assert_equal expected, req.to_hash
end
end

View file

@ -4,7 +4,7 @@ require 'stringio'
class HTTPResponseTest < Test::Unit::TestCase class HTTPResponseTest < Test::Unit::TestCase
def test_singleline_header def test_singleline_header
io = dummy_io(<<EOS.gsub(/\n/, "\r\n")) io = dummy_io(<<EOS)
HTTP/1.1 200 OK HTTP/1.1 200 OK
Content-Length: 5 Content-Length: 5
Connection: close Connection: close
@ -17,7 +17,7 @@ EOS
end end
def test_multiline_header def test_multiline_header
io = dummy_io(<<EOS.gsub(/\n/, "\r\n")) io = dummy_io(<<EOS)
HTTP/1.1 200 OK HTTP/1.1 200 OK
X-Foo: XXX X-Foo: XXX
YYY YYY
@ -32,9 +32,163 @@ EOS
assert_equal('XXX YYY', res.header['x-bar']) assert_equal('XXX YYY', res.header['x-bar'])
end end
def test_read_body
io = dummy_io(<<EOS)
HTTP/1.1 200 OK
Connection: close
Content-Length: 5
hello
EOS
res = Net::HTTPResponse.read_new(io)
body = nil
res.reading_body io, true do
body = res.read_body
end
assert_equal 'hello', body
end
def test_read_body_block
io = dummy_io(<<EOS)
HTTP/1.1 200 OK
Connection: close
Content-Length: 5
hello
EOS
res = Net::HTTPResponse.read_new(io)
body = ''
res.reading_body io, true do
res.read_body do |chunk|
body << chunk
end
end
assert_equal 'hello', body
end
def test_read_body_content_encoding_deflate
io = dummy_io(<<EOS)
HTTP/1.1 200 OK
Connection: close
Content-Encoding: deflate
Content-Length: 13
x\x9C\xCBH\xCD\xC9\xC9\a\x00\x06,\x02\x15
EOS
res = Net::HTTPResponse.read_new(io)
body = nil
res.reading_body io, true do
body = res.read_body
end
assert_equal 'hello', body
end
def test_read_body_content_encoding_deflate_chunked
io = dummy_io(<<EOS)
HTTP/1.1 200 OK
Connection: close
Content-Encoding: deflate
Transfer-Encoding: chunked
6
x\x9C\xCBH\xCD\xC9
7
\xC9\a\x00\x06,\x02\x15
0
EOS
res = Net::HTTPResponse.read_new(io)
body = nil
res.reading_body io, true do
body = res.read_body
end
assert_equal 'hello', body
end
def test_read_body_content_encoding_deflate_no_length
io = dummy_io(<<EOS)
HTTP/1.1 200 OK
Connection: close
Content-Encoding: deflate
x\x9C\xCBH\xCD\xC9\xC9\a\x00\x06,\x02\x15
EOS
res = Net::HTTPResponse.read_new(io)
body = nil
res.reading_body io, true do
body = res.read_body
end
assert_equal 'hello', body
end
def test_read_body_content_encoding_deflate_content_range
io = dummy_io(<<EOS)
HTTP/1.1 200 OK
Accept-Ranges: bytes
Connection: close
Content-Encoding: gzip
Content-Length: 10
Content-Range: bytes 0-9/55
\x1F\x8B\b\x00\x00\x00\x00\x00\x00\x03
EOS
res = Net::HTTPResponse.read_new(io)
body = nil
res.reading_body io, true do
body = res.read_body
end
assert_equal "\x1F\x8B\b\x00\x00\x00\x00\x00\x00\x03", body
end
def test_read_body_string
io = dummy_io(<<EOS)
HTTP/1.1 200 OK
Connection: close
Content-Length: 5
hello
EOS
res = Net::HTTPResponse.read_new(io)
body = ''
res.reading_body io, true do
res.read_body body
end
assert_equal 'hello', body
end
private private
def dummy_io(str) def dummy_io(str)
str = str.gsub(/\n/, "\r\n")
Net::BufferedIO.new(StringIO.new(str)) Net::BufferedIO.new(StringIO.new(str))
end end
end end

View file

@ -488,12 +488,12 @@ class TestOpenURI < Test::Unit::TestCase
srv.mount_proc("/data2/") {|req, res| res.body = content_gz; res['content-encoding'] = 'gzip'; res.chunked = true } srv.mount_proc("/data2/") {|req, res| res.body = content_gz; res['content-encoding'] = 'gzip'; res.chunked = true }
srv.mount_proc("/noce/") {|req, res| res.body = content_gz } srv.mount_proc("/noce/") {|req, res| res.body = content_gz }
open("#{url}/data/") {|f| open("#{url}/data/") {|f|
assert_equal ['gzip'], f.content_encoding assert_equal [], f.content_encoding
assert_equal(content_gz, f.read.force_encoding("ascii-8bit")) assert_equal(content, f.read)
} }
open("#{url}/data2/") {|f| open("#{url}/data2/") {|f|
assert_equal ['gzip'], f.content_encoding assert_equal [], f.content_encoding
assert_equal(content_gz, f.read.force_encoding("ascii-8bit")) assert_equal(content, f.read)
} }
open("#{url}/noce/") {|f| open("#{url}/noce/") {|f|
assert_equal [], f.content_encoding assert_equal [], f.content_encoding