When reading from stdin, put a wrapper around the IO object

The purpose of this commit is to fix Bug #21188.  We need to detect when
stdin has run in to an EOF case.  Unfortunately we can't _call_ the eof
function on IO because it will block.

Here is a short script to demonstrate the issue:

```ruby
x = STDIN.gets
puts x
puts x.eof?
```

If you run the script, then type some characters (but _NOT_ a newline),
then hit Ctrl-D twice, it will print the input string.  Unfortunately,
calling `eof?` will try to read from STDIN again causing us to need a
3rd Ctrl-D to exit the program.

Before introducing the EOF callback to Prism, the input loop looked
kind of like this:

```ruby
loop do
  str = STDIN.gets
  process(str)

  if str.nil?
    p :DONE
  end
end
```

Which required 3 Ctrl-D to exit.  If we naively changed it to something
like this:

```ruby
loop do
  str = STDIN.gets
  process(str)

  if STDIN.eof?
    p :DONE
  end
end
```

It would still require 3 Ctrl-D because `eof?` would block.  In this
patch, we're wrapping the IO object, checking the buffer for a newline
and length, and then using that to simulate a non-blocking eof? method.

This commit wraps STDIN and emulates a non-blocking `eof` function.

[Bug #21188]
This commit is contained in:
Aaron Patterson 2025-07-17 15:20:20 -07:00 committed by Aaron Patterson
parent 1c6b36af18
commit 89d89fa49d
5 changed files with 66 additions and 13 deletions

View file

@ -86,6 +86,7 @@ module Prism
end end
callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
callback :pm_parse_stream_feof_t, [:pointer], :int
enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY] enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
@ -101,7 +102,7 @@ module Prism
"pm_string_query_local", "pm_string_query_local",
"pm_string_query_constant", "pm_string_query_constant",
"pm_string_query_method_name", "pm_string_query_method_name",
[:pm_parse_stream_fgets_t] [:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t]
) )
load_exported_functions_from( load_exported_functions_from(
@ -281,12 +282,14 @@ module Prism
end end
} }
eof_callback = -> (_) { stream.eof? }
# In the pm_serialize_parse_stream function it accepts a pointer to the # In the pm_serialize_parse_stream function it accepts a pointer to the
# IO object as a void* and then passes it through to the callback as the # IO object as a void* and then passes it through to the callback as the
# third argument, but it never touches it itself. As such, since we have # third argument, but it never touches it itself. As such, since we have
# access to the IO object already through the closure of the lambda, we # access to the IO object already through the closure of the lambda, we
# can pass a null pointer here and not worry. # can pass a null pointer here and not worry.
LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options)) LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, eof_callback, dump_options(options))
Prism.load(source, buffer.read, options.fetch(:freeze, false)) Prism.load(source, buffer.read, options.fetch(:freeze, false))
end end
end end

View file

@ -994,6 +994,14 @@ profile_file(int argc, VALUE *argv, VALUE self) {
return Qnil; return Qnil;
} }
static int
parse_stream_eof(void *stream) {
if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) {
return 1;
}
return 0;
}
/** /**
* An implementation of fgets that is suitable for use with Ruby IO objects. * An implementation of fgets that is suitable for use with Ruby IO objects.
*/ */
@ -1034,7 +1042,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
pm_parser_t parser; pm_parser_t parser;
pm_buffer_t buffer; pm_buffer_t buffer;
pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options); pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options);
rb_encoding *encoding = rb_enc_find(parser.encoding->name); rb_encoding *encoding = rb_enc_find(parser.encoding->name);
VALUE source = pm_source_new(&parser, encoding, options.freeze); VALUE source = pm_source_new(&parser, encoding, options.freeze);

View file

@ -22848,7 +22848,7 @@ pm_parse(pm_parser_t *parser) {
* otherwise return true. * otherwise return true.
*/ */
static bool static bool
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) { pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
#define LINE_SIZE 4096 #define LINE_SIZE 4096
char line[LINE_SIZE]; char line[LINE_SIZE];
@ -22884,6 +22884,12 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t
if (strncmp(line, "__END__\r\n", 9) == 0) return false; if (strncmp(line, "__END__\r\n", 9) == 0) return false;
break; break;
} }
// All data should be read via gets. If the string returned by gets
// _doesn't_ end with a newline, then we assume we hit EOF condition.
if (stream_feof(stream)) {
break;
}
} }
return true; return true;
@ -22919,16 +22925,17 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
* can stream stdin in to Ruby so we need to support a streaming API. * can stream stdin in to Ruby so we need to support a streaming API.
*/ */
PRISM_EXPORTED_FUNCTION pm_node_t * PRISM_EXPORTED_FUNCTION pm_node_t *
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) { pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
pm_buffer_init(buffer); pm_buffer_init(buffer);
bool eof = pm_parse_stream_read(buffer, stream, stream_fgets); bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
pm_node_t *node = pm_parse(parser); pm_node_t *node = pm_parse(parser);
while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) { while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
pm_node_destroy(parser, node); pm_node_destroy(parser, node);
eof = pm_parse_stream_read(buffer, stream, stream_fgets); eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
pm_parser_free(parser); pm_parser_free(parser);
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
@ -23020,13 +23027,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
* given stream into to the given buffer. * given stream into to the given buffer.
*/ */
PRISM_EXPORTED_FUNCTION void PRISM_EXPORTED_FUNCTION void
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) { pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
pm_parser_t parser; pm_parser_t parser;
pm_options_t options = { 0 }; pm_options_t options = { 0 };
pm_options_read(&options, data); pm_options_read(&options, data);
pm_buffer_t parser_buffer; pm_buffer_t parser_buffer;
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options); pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
pm_serialize_header(buffer); pm_serialize_header(buffer);
pm_serialize_content(&parser, node, buffer); pm_serialize_content(&parser, node, buffer);
pm_buffer_append_byte(buffer, '\0'); pm_buffer_append_byte(buffer, '\0');

View file

@ -87,6 +87,13 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
*/ */
typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream); typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
/**
* This function is used in pm_parse_stream to check whether a stream is EOF.
* It closely mirrors that of feof so that feof can be used as the
* default implementation.
*/
typedef int (pm_parse_stream_feof_t)(void *stream);
/** /**
* Parse a stream of Ruby source and return the tree. * Parse a stream of Ruby source and return the tree.
* *
@ -94,10 +101,11 @@ typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
* @param buffer The buffer to use. * @param buffer The buffer to use.
* @param stream The stream to parse. * @param stream The stream to parse.
* @param stream_fgets The function to use to read from the stream. * @param stream_fgets The function to use to read from the stream.
* @param stream_feof The function to use to determine if the stream has hit eof.
* @param options The optional options to use when parsing. * @param options The optional options to use when parsing.
* @return The AST representing the source. * @return The AST representing the source.
*/ */
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options); PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options);
// We optionally support serializing to a binary string. For systems that don't // We optionally support serializing to a binary string. For systems that don't
// want or need this functionality, it can be turned off with the // want or need this functionality, it can be turned off with the
@ -111,9 +119,10 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buff
* @param buffer The buffer to serialize to. * @param buffer The buffer to serialize to.
* @param stream The stream to parse. * @param stream The stream to parse.
* @param stream_fgets The function to use to read from the stream. * @param stream_fgets The function to use to read from the stream.
* @param stream_feof The function to use to tell if the stream has hit eof.
* @param data The optional data to pass to the parser. * @param data The optional data to pass to the parser.
*/ */
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data); PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data);
/** /**
* Serialize the given list of comments to the given buffer. * Serialize the given list of comments to the given buffer.

View file

@ -11492,6 +11492,18 @@ pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath, VALUE *
return pm_parse_process(result, node, script_lines); return pm_parse_process(result, node, script_lines);
} }
struct rb_stdin_wrapper {
VALUE rb_stdin;
int eof_seen;
};
static int
pm_parse_stdin_eof(void *stream)
{
struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream;
return wrapped_stdin->eof_seen;
}
/** /**
* An implementation of fgets that is suitable for use with Ruby IO objects. * An implementation of fgets that is suitable for use with Ruby IO objects.
*/ */
@ -11500,7 +11512,9 @@ pm_parse_stdin_fgets(char *string, int size, void *stream)
{ {
RUBY_ASSERT(size > 0); RUBY_ASSERT(size > 0);
VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1)); struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream;
VALUE line = rb_funcall(wrapped_stdin->rb_stdin, rb_intern("gets"), 1, INT2FIX(size - 1));
if (NIL_P(line)) { if (NIL_P(line)) {
return NULL; return NULL;
} }
@ -11511,6 +11525,13 @@ pm_parse_stdin_fgets(char *string, int size, void *stream)
memcpy(string, cstr, length); memcpy(string, cstr, length);
string[length] = '\0'; string[length] = '\0';
// We're reading strings from stdin via gets. We'll assume that if the
// string is smaller than the requested length, and doesn't end with a
// newline, that we hit EOF.
if (length < (size - 1) && string[length - 1] != '\n') {
wrapped_stdin->eof_seen = 1;
}
return string; return string;
} }
@ -11527,8 +11548,13 @@ pm_parse_stdin(pm_parse_result_t *result)
{ {
pm_options_frozen_string_literal_init(&result->options); pm_options_frozen_string_literal_init(&result->options);
struct rb_stdin_wrapper wrapped_stdin = {
rb_stdin,
0
};
pm_buffer_t buffer; pm_buffer_t buffer;
pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) rb_stdin, pm_parse_stdin_fgets, &result->options); pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) &wrapped_stdin, pm_parse_stdin_fgets, pm_parse_stdin_eof, &result->options);
// Copy the allocated buffer contents into the input string so that it gets // Copy the allocated buffer contents into the input string so that it gets
// freed. At this point we've handed over ownership, so we don't need to // freed. At this point we've handed over ownership, so we don't need to