When reading from stdin, put a wrapper around the IO object

The purpose of this commit is to fix Bug #21188.  We need to detect when
stdin has run in to an EOF case.  Unfortunately we can't _call_ the eof
function on IO because it will block.

Here is a short script to demonstrate the issue:

```ruby
x = STDIN.gets
puts x
puts x.eof?
```

If you run the script, then type some characters (but _NOT_ a newline),
then hit Ctrl-D twice, it will print the input string.  Unfortunately,
calling `eof?` will try to read from STDIN again causing us to need a
3rd Ctrl-D to exit the program.

Before introducing the EOF callback to Prism, the input loop looked
kind of like this:

```ruby
loop do
  str = STDIN.gets
  process(str)

  if str.nil?
    p :DONE
  end
end
```

Which required 3 Ctrl-D to exit.  If we naively changed it to something
like this:

```ruby
loop do
  str = STDIN.gets
  process(str)

  if STDIN.eof?
    p :DONE
  end
end
```

It would still require 3 Ctrl-D because `eof?` would block.  In this
patch, we're wrapping the IO object, checking the buffer for a newline
and length, and then using that to simulate a non-blocking eof? method.

This commit wraps STDIN and emulates a non-blocking `eof` function.

[Bug #21188]
This commit is contained in:
Aaron Patterson 2025-07-17 15:20:20 -07:00 committed by Aaron Patterson
parent 1c6b36af18
commit 89d89fa49d
5 changed files with 66 additions and 13 deletions

View file

@ -86,6 +86,7 @@ module Prism
end
callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
callback :pm_parse_stream_feof_t, [:pointer], :int
enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
@ -101,7 +102,7 @@ module Prism
"pm_string_query_local",
"pm_string_query_constant",
"pm_string_query_method_name",
[:pm_parse_stream_fgets_t]
[:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t]
)
load_exported_functions_from(
@ -281,12 +282,14 @@ module Prism
end
}
eof_callback = -> (_) { stream.eof? }
# In the pm_serialize_parse_stream function it accepts a pointer to the
# IO object as a void* and then passes it through to the callback as the
# third argument, but it never touches it itself. As such, since we have
# access to the IO object already through the closure of the lambda, we
# can pass a null pointer here and not worry.
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, eof_callback, dump_options(options))
Prism.load(source, buffer.read, options.fetch(:freeze, false))
end
end

View file

@ -994,6 +994,14 @@ profile_file(int argc, VALUE *argv, VALUE self) {
return Qnil;
}
static int
parse_stream_eof(void *stream) {
if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) {
return 1;
}
return 0;
}
/**
* An implementation of fgets that is suitable for use with Ruby IO objects.
*/
@ -1034,7 +1042,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
pm_parser_t parser;
pm_buffer_t buffer;
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
VALUE source = pm_source_new(&parser, encoding, options.freeze);

View file

@ -22848,7 +22848,7 @@ pm_parse(pm_parser_t *parser) {
* otherwise return true.
*/
static bool
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
#define LINE_SIZE 4096
char line[LINE_SIZE];
@ -22884,6 +22884,12 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t
if (strncmp(line, "__END__\r\n", 9) == 0) return false;
break;
}
// All data should be read via gets. If the string returned by gets
// _doesn't_ end with a newline, then we assume we hit EOF condition.
if (stream_feof(stream)) {
break;
}
}
return true;
@ -22919,16 +22925,17 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
* can stream stdin in to Ruby so we need to support a streaming API.
*/
PRISM_EXPORTED_FUNCTION pm_node_t *
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
pm_buffer_init(buffer);
bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
pm_node_t *node = pm_parse(parser);
while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
pm_node_destroy(parser, node);
eof = pm_parse_stream_read(buffer, stream, stream_fgets);
eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
pm_parser_free(parser);
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
@ -23020,13 +23027,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
* given stream into to the given buffer.
*/
PRISM_EXPORTED_FUNCTION void
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
pm_parser_t parser;
pm_options_t options = { 0 };
pm_options_read(&options, data);
pm_buffer_t parser_buffer;
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
pm_serialize_header(buffer);
pm_serialize_content(&parser, node, buffer);
pm_buffer_append_byte(buffer, '\0');

View file

@ -87,6 +87,13 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
*/
typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
/**
* This function is used in pm_parse_stream to check whether a stream is EOF.
* It closely mirrors that of feof so that feof can be used as the
* default implementation.
*/
typedef int (pm_parse_stream_feof_t)(void *stream);
/**
* Parse a stream of Ruby source and return the tree.
*
@ -94,10 +101,11 @@ typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
* @param buffer The buffer to use.
* @param stream The stream to parse.
* @param stream_fgets The function to use to read from the stream.
* @param stream_feof The function to use to determine if the stream has hit eof.
* @param options The optional options to use when parsing.
* @return The AST representing the source.
*/
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options);
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options);
// We optionally support serializing to a binary string. For systems that don't
// want or need this functionality, it can be turned off with the
@ -111,9 +119,10 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buff
* @param buffer The buffer to serialize to.
* @param stream The stream to parse.
* @param stream_fgets The function to use to read from the stream.
* @param stream_feof The function to use to tell if the stream has hit eof.
* @param data The optional data to pass to the parser.
*/
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data);
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data);
/**
* Serialize the given list of comments to the given buffer.

View file

@ -11492,6 +11492,18 @@ pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath, VALUE *
return pm_parse_process(result, node, script_lines);
}
struct rb_stdin_wrapper {
VALUE rb_stdin;
int eof_seen;
};
static int
pm_parse_stdin_eof(void *stream)
{
struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream;
return wrapped_stdin->eof_seen;
}
/**
* An implementation of fgets that is suitable for use with Ruby IO objects.
*/
@ -11500,7 +11512,9 @@ pm_parse_stdin_fgets(char *string, int size, void *stream)
{
RUBY_ASSERT(size > 0);
VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream;
VALUE line = rb_funcall(wrapped_stdin->rb_stdin, rb_intern("gets"), 1, INT2FIX(size - 1));
if (NIL_P(line)) {
return NULL;
}
@ -11511,6 +11525,13 @@ pm_parse_stdin_fgets(char *string, int size, void *stream)
memcpy(string, cstr, length);
string[length] = '\0';
// We're reading strings from stdin via gets. We'll assume that if the
// string is smaller than the requested length, and doesn't end with a
// newline, that we hit EOF.
if (length < (size - 1) && string[length - 1] != '\n') {
wrapped_stdin->eof_seen = 1;
}
return string;
}
@ -11527,8 +11548,13 @@ pm_parse_stdin(pm_parse_result_t *result)
{
pm_options_frozen_string_literal_init(&result->options);
struct rb_stdin_wrapper wrapped_stdin = {
rb_stdin,
0
};
pm_buffer_t buffer;
pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) rb_stdin, pm_parse_stdin_fgets, &result->options);
pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) &wrapped_stdin, pm_parse_stdin_fgets, pm_parse_stdin_eof, &result->options);
// Copy the allocated buffer contents into the input string so that it gets
// freed. At this point we've handed over ownership, so we don't need to