ruby/ext/-test-/file/newline_conv.c
KJ Tsanaktsidis 31371b2e24 Fix CRLF -> LF conversion on read for rb_io_fdopen & rb_file_open
When opening a file with `File.open`, and then setting the encoding with
`IO#set_encoding`, it still correctly performs CRLF -> LF conversion on
Windows when reading files with a CRLF line ending in them (in text
mode).

However, the file is opened instead with either the `rb_io_fdopen` or
`rb_file_open` APIs from C, the CRLF conversion is _NOT_ set up
correctly; it works if the encoding is not specified, but if
`IO#set_encoding` is called, the conversion stops happening. This seems
to be because the encflags never get ECONV_DEFAULT_NEWLINE_DECORATOR
set in these codepaths.

Concretely, this means that the conversion doesn't happen in the
following circumstances:
  * When loading ruby files with require (that calls rb_io_fdopen)
  * When parsing ruuby files with RubyVM::AbstractSyntaxTree (that calls
    rb_file_open).
This then causes the ErrorHighlight tests to fail on windows if git has
checked them out with CRLF line endings - the error messages it's
testing wind up with literal \r\n sequences in them because the iseq
text from the parser contains un-newline-converted strings.

This commit fixes the problem by copy-pasting the relevant snippet which
sets this up in `rb_io_extract_modeenc` (for the File.open path) into
the relevant codepaths for `rb_io_fdopen` and `rb_file_open`.

[Bug #20101]
2024-01-10 21:02:23 +11:00

73 lines
2 KiB
C

#include "ruby/ruby.h"
#include "ruby/io.h"
#include <fcntl.h>
static VALUE
open_with_rb_file_open(VALUE self, VALUE filename, VALUE read_or_write, VALUE binary_or_text)
{
char fmode[3] = { 0 };
if (rb_sym2id(read_or_write) == rb_intern("read")) {
fmode[0] = 'r';
}
else if (rb_sym2id(read_or_write) == rb_intern("write")) {
fmode[0] = 'w';
}
else {
rb_raise(rb_eArgError, "read_or_write param must be :read or :write");
}
if (rb_sym2id(binary_or_text) == rb_intern("binary")) {
fmode[1] = 'b';
}
else if (rb_sym2id(binary_or_text) == rb_intern("text")) {
}
else {
rb_raise(rb_eArgError, "binary_or_text param must be :binary or :text");
}
return rb_file_open(StringValueCStr(filename), fmode);
}
static VALUE
open_with_rb_io_fdopen(VALUE self, VALUE filename, VALUE read_or_write, VALUE binary_or_text)
{
int omode = 0;
if (rb_sym2id(read_or_write) == rb_intern("read")) {
omode |= O_RDONLY;
}
else if (rb_sym2id(read_or_write) == rb_intern("write")) {
omode |= O_WRONLY;
}
else {
rb_raise(rb_eArgError, "read_or_write param must be :read or :write");
}
if (rb_sym2id(binary_or_text) == rb_intern("binary")) {
#ifdef O_BINARY
omode |= O_BINARY;
#endif
}
else if (rb_sym2id(binary_or_text) == rb_intern("text")) {
}
else {
rb_raise(rb_eArgError, "binary_or_text param must be :binary or :text");
}
int fd = rb_cloexec_open(StringValueCStr(filename), omode, 0);
if (fd < 0) {
rb_raise(rb_eIOError, "failed to open the file");
}
rb_update_max_fd(fd);
return rb_io_fdopen(fd, omode, StringValueCStr(filename));
}
void
Init_newline_conv(VALUE module)
{
VALUE newline_conv = rb_define_module_under(module, "NewlineConv");
rb_define_module_function(newline_conv, "rb_file_open", open_with_rb_file_open, 3);
rb_define_module_function(newline_conv, "rb_io_fdopen", open_with_rb_io_fdopen, 3);
}