diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 1e1bf8b1c8..5ae177055f 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -86,6 +86,7 @@ module Prism end callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer + callback :pm_parse_stream_feof_t, [:pointer], :int enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY] enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] @@ -101,7 +102,7 @@ module Prism "pm_string_query_local", "pm_string_query_constant", "pm_string_query_method_name", - [:pm_parse_stream_fgets_t] + [:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t] ) load_exported_functions_from( @@ -281,12 +282,14 @@ module Prism end } + eof_callback = -> (_) { stream.eof? } + # In the pm_serialize_parse_stream function it accepts a pointer to the # IO object as a void* and then passes it through to the callback as the # third argument, but it never touches it itself. As such, since we have # access to the IO object already through the closure of the lambda, we # can pass a null pointer here and not worry. - LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options)) + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, eof_callback, dump_options(options)) Prism.load(source, buffer.read, options.fetch(:freeze, false)) end end diff --git a/prism/extension.c b/prism/extension.c index 1533ca7bb3..83415d0c29 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -994,6 +994,14 @@ profile_file(int argc, VALUE *argv, VALUE self) { return Qnil; } +static int +parse_stream_eof(void *stream) { + if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) { + return 1; + } + return 0; +} + /** * An implementation of fgets that is suitable for use with Ruby IO objects. */ @@ -1034,7 +1042,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_parser_t parser; pm_buffer_t buffer; - pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options); + pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options); rb_encoding *encoding = rb_enc_find(parser.encoding->name); VALUE source = pm_source_new(&parser, encoding, options.freeze); diff --git a/prism/prism.c b/prism/prism.c index ec8f84fb6b..d01c2a0766 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -22848,7 +22848,7 @@ pm_parse(pm_parser_t *parser) { * otherwise return true. */ static bool -pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) { +pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) { #define LINE_SIZE 4096 char line[LINE_SIZE]; @@ -22884,6 +22884,12 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t if (strncmp(line, "__END__\r\n", 9) == 0) return false; break; } + + // All data should be read via gets. If the string returned by gets + // _doesn't_ end with a newline, then we assume we hit EOF condition. + if (stream_feof(stream)) { + break; + } } return true; @@ -22919,16 +22925,17 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) { * can stream stdin in to Ruby so we need to support a streaming API. */ PRISM_EXPORTED_FUNCTION pm_node_t * -pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) { +pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) { pm_buffer_init(buffer); - bool eof = pm_parse_stream_read(buffer, stream, stream_fgets); + bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); + pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); pm_node_t *node = pm_parse(parser); while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) { pm_node_destroy(parser, node); - eof = pm_parse_stream_read(buffer, stream, stream_fgets); + eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); pm_parser_free(parser); pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); @@ -23020,13 +23027,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons * given stream into to the given buffer. */ PRISM_EXPORTED_FUNCTION void -pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) { +pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) { pm_parser_t parser; pm_options_t options = { 0 }; pm_options_read(&options, data); pm_buffer_t parser_buffer; - pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options); + pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options); pm_serialize_header(buffer); pm_serialize_content(&parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); diff --git a/prism/prism.h b/prism/prism.h index 317568aa0c..a6f22f1a5a 100644 --- a/prism/prism.h +++ b/prism/prism.h @@ -87,6 +87,13 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser); */ typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream); +/** + * This function is used in pm_parse_stream to check whether a stream is EOF. + * It closely mirrors that of feof so that feof can be used as the + * default implementation. + */ +typedef int (pm_parse_stream_feof_t)(void *stream); + /** * Parse a stream of Ruby source and return the tree. * @@ -94,10 +101,11 @@ typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream); * @param buffer The buffer to use. * @param stream The stream to parse. * @param stream_fgets The function to use to read from the stream. + * @param stream_feof The function to use to determine if the stream has hit eof. * @param options The optional options to use when parsing. * @return The AST representing the source. */ -PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options); +PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options); // We optionally support serializing to a binary string. For systems that don't // want or need this functionality, it can be turned off with the @@ -111,9 +119,10 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buff * @param buffer The buffer to serialize to. * @param stream The stream to parse. * @param stream_fgets The function to use to read from the stream. + * @param stream_feof The function to use to tell if the stream has hit eof. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data); /** * Serialize the given list of comments to the given buffer. diff --git a/prism_compile.c b/prism_compile.c index 17db5e58c8..2f5bb4ebe3 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -11492,6 +11492,18 @@ pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath, VALUE * return pm_parse_process(result, node, script_lines); } +struct rb_stdin_wrapper { + VALUE rb_stdin; + int eof_seen; +}; + +static int +pm_parse_stdin_eof(void *stream) +{ + struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream; + return wrapped_stdin->eof_seen; +} + /** * An implementation of fgets that is suitable for use with Ruby IO objects. */ @@ -11500,7 +11512,9 @@ pm_parse_stdin_fgets(char *string, int size, void *stream) { RUBY_ASSERT(size > 0); - VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1)); + struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream; + + VALUE line = rb_funcall(wrapped_stdin->rb_stdin, rb_intern("gets"), 1, INT2FIX(size - 1)); if (NIL_P(line)) { return NULL; } @@ -11511,6 +11525,13 @@ pm_parse_stdin_fgets(char *string, int size, void *stream) memcpy(string, cstr, length); string[length] = '\0'; + // We're reading strings from stdin via gets. We'll assume that if the + // string is smaller than the requested length, and doesn't end with a + // newline, that we hit EOF. + if (length < (size - 1) && string[length - 1] != '\n') { + wrapped_stdin->eof_seen = 1; + } + return string; } @@ -11527,8 +11548,13 @@ pm_parse_stdin(pm_parse_result_t *result) { pm_options_frozen_string_literal_init(&result->options); + struct rb_stdin_wrapper wrapped_stdin = { + rb_stdin, + 0 + }; + pm_buffer_t buffer; - pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) rb_stdin, pm_parse_stdin_fgets, &result->options); + pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) &wrapped_stdin, pm_parse_stdin_fgets, pm_parse_stdin_eof, &result->options); // Copy the allocated buffer contents into the input string so that it gets // freed. At this point we've handed over ownership, so we don't need to