reuse open(2) from rb_file_load_ok on POSIX-like system

When loading Ruby source files, we can save the result of
successful opens as open(2)/openat(2) are a fairly expensive
syscalls.  This also avoids a time-of-check-to-time-of-use
(TOCTTOU) problem.

This reduces open(2) syscalls during `require'; but should be
most apparent when users have a small $LOAD_PATH.  Users with
large $LOAD_PATH will benefit less since there'll be more
open(2) failures due to ENOENT.

With `strace -c -e openat ruby -e exit' under Linux, this
results in a ~14% reduction of openat(2) syscalls
(glibc uses openat(2) to implement open(2)).

 % time     seconds  usecs/call     calls    errors syscall
 ------ ----------- ----------- --------- --------- ----------------
   0.00    0.000000           0       296       110 openat
   0.00    0.000000           0       254       110 openat

Additionally, the introduction of `struct ruby_file_load_state'
may make future optimizations more apparent.

This change cannot benefit binary (.so) loading since the
dlopen(3) API requires a filename and I'm not aware of an
alternative that takes a pre-existing FD.  In typical
situations, Ruby source files outnumber the mount of .so
files.
This commit is contained in:
Eric Wong 2023-02-24 18:05:36 +00:00 committed by Eric Wong
parent 6e6992e5db
commit 35136e1e9c
6 changed files with 101 additions and 27 deletions

38
file.c
View file

@ -6359,7 +6359,7 @@ ruby_is_fd_loadable(int fd)
#ifndef _WIN32
int
rb_file_load_ok(const char *path)
rb_file_load_ok(const char *path, struct ruby_file_load_state *fls)
{
int ret = 1;
/*
@ -6381,10 +6381,21 @@ rb_file_load_ok(const char *path)
}
rb_update_max_fd(fd);
ret = ruby_is_fd_loadable(fd);
(void)close(fd);
#if defined(DOSISH) || defined(__CYGWIN__)
fls = NULL; /* need to set xflag via open_load_file */
#endif
if (ret && fls) {
/* TODO: avoid path object alloc in rb_io_fdopen */
fls->filev = rb_io_fdopen(fd, mode, path);
fls->is_fifo = ret < 0 ? 1 : 0;
fls->is_nonblock = mode == O_RDONLY ? 0 : 1;
} else {
(void)close(fd);
}
return ret;
}
#endif
#endif /* !_WIN32 */
static int
is_explicit_relative(const char *path)
@ -6409,6 +6420,13 @@ copy_path_class(VALUE path, VALUE orig)
int
rb_find_file_ext(VALUE *filep, const char *const *ext)
{
return ruby_find_file_ext(filep, ext, NULL);
}
int
ruby_find_file_ext(VALUE *filep, const char *const *ext,
struct ruby_file_load_state *fls)
{
const char *f = StringValueCStr(*filep);
VALUE fname = *filep, load_path, tmp;
@ -6429,7 +6447,7 @@ rb_find_file_ext(VALUE *filep, const char *const *ext)
fnlen = RSTRING_LEN(fname);
for (i=0; ext[i]; i++) {
rb_str_cat2(fname, ext[i]);
if (rb_file_load_ok(RSTRING_PTR(fname))) {
if (rb_file_load_ok(RSTRING_PTR(fname), fls)) {
*filep = copy_path_class(fname, *filep);
return (int)(i+1);
}
@ -6454,7 +6472,7 @@ rb_find_file_ext(VALUE *filep, const char *const *ext)
RB_GC_GUARD(str) = rb_get_path(str);
if (RSTRING_LEN(str) == 0) continue;
rb_file_expand_path_internal(fname, str, 0, 0, tmp);
if (rb_file_load_ok(RSTRING_PTR(tmp))) {
if (rb_file_load_ok(RSTRING_PTR(tmp), fls)) {
*filep = copy_path_class(tmp, *filep);
return (int)(j+1);
}
@ -6468,6 +6486,12 @@ rb_find_file_ext(VALUE *filep, const char *const *ext)
VALUE
rb_find_file(VALUE path)
{
return ruby_find_file(path, NULL);
}
VALUE
ruby_find_file(VALUE path, struct ruby_file_load_state *fls)
{
VALUE tmp, load_path;
const char *f = StringValueCStr(path);
@ -6481,7 +6505,7 @@ rb_find_file(VALUE path)
}
if (expanded || rb_is_absolute_path(f) || is_explicit_relative(f)) {
if (!rb_file_load_ok(f)) return 0;
if (!rb_file_load_ok(f, fls)) return 0;
if (!expanded)
path = copy_path_class(file_expand_path_1(path), path);
return path;
@ -6499,7 +6523,7 @@ rb_find_file(VALUE path)
if (RSTRING_LEN(str) > 0) {
rb_file_expand_path_internal(path, str, 0, 0, tmp);
f = RSTRING_PTR(tmp);
if (rb_file_load_ok(f)) goto found;
if (rb_file_load_ok(f, fls)) goto found;
}
}
rb_str_resize(tmp, 0);