Allow encodings to be autoloaded through transcoding functions

Make sure VM lock is not held when calling `load_transcoder_entry`, as
that causes deadlock inside ractors. `String#encode` now works inside
ractors, among others.

Atomic load the rb_encoding_list

Without this, wbcheck would sometimes hit a missing write barrier.

Co-authored-by: John Hawthorn <john.hawthorn@shopify.com>

Hold VM lock when iterating over global_enc_table.names

This st_table can be inserted into at runtime when autoloading
encodings.

minor optimization when calling Encoding.list
This commit is contained in:
Luke Gruber 2025-08-06 14:30:03 -04:00 committed by John Hawthorn
parent 31e8a9fced
commit 1afc07e815
4 changed files with 157 additions and 94 deletions

View file

@ -340,7 +340,7 @@ transcode_search_path(const char *sname, const char *dname,
bfs.queue_last_ptr = &q->next;
bfs.queue = q;
bfs.visited = st_init_strcasetable();
bfs.visited = st_init_strcasetable(); // due to base encodings, we need to do search in a loop
st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL);
RB_VM_LOCKING() {
@ -351,14 +351,14 @@ transcode_search_path(const char *sname, const char *dname,
bfs.queue_last_ptr = &bfs.queue;
}
lookup_res = st_lookup(transcoder_table, (st_data_t)q->enc, &val);
lookup_res = st_lookup(transcoder_table, (st_data_t)q->enc, &val); // src => table2
if (!lookup_res) {
xfree(q);
continue;
}
table2 = (st_table *)val;
if (st_lookup(table2, (st_data_t)dname, &val)) {
if (st_lookup(table2, (st_data_t)dname, &val)) { // dest => econv
st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
xfree(q);
found = true;
@ -411,8 +411,7 @@ int rb_require_internal_silent(VALUE fname);
static const rb_transcoder *
load_transcoder_entry(transcoder_entry_t *entry)
{
// changes result of entry->transcoder depending on if it's required or not, so needs lock
ASSERT_vm_locking();
ASSERT_vm_unlocking();
if (entry->transcoder)
return entry->transcoder;
@ -427,7 +426,7 @@ load_transcoder_entry(transcoder_entry_t *entry)
memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len);
rb_str_set_len(fn, total_len);
OBJ_FREEZE(fn);
rb_require_internal_silent(fn);
rb_require_internal_silent(fn); // Sets entry->transcoder
}
if (entry->transcoder)
@ -981,7 +980,6 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
{
rb_econv_t *ec;
int i, ret;
ASSERT_vm_locking();
for (i = 0; i < n; i++) {
const rb_transcoder *tr;
@ -1026,10 +1024,8 @@ rb_econv_open0(const char *sname, const char *dname, int ecflags)
transcoder_entry_t **entries = NULL;
int num_trans;
rb_econv_t *ec;
ASSERT_vm_locking();
/* Just check if sname and dname are defined */
/* (This check is needed?) */
// loads encodings if not loaded already
if (*sname) rb_enc_find_index(sname);
if (*dname) rb_enc_find_index(dname);
@ -1117,15 +1113,13 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
if (num_decorators == -1)
return NULL;
RB_VM_LOCKING() {
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
if (ec) {
for (i = 0; i < num_decorators; i++) {
if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
rb_econv_close(ec);
ec = NULL;
break;
}
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
if (ec) {
for (i = 0; i < num_decorators; i++) {
if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
rb_econv_close(ec);
ec = NULL;
break;
}
}
}
@ -1960,12 +1954,9 @@ rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int
if (ec->started != 0)
return -1;
RB_VM_LOCKING() {
entry = get_transcoder_entry(sname, dname);
if (entry) {
tr = load_transcoder_entry(entry);
}
entry = get_transcoder_entry(sname, dname);
if (entry) {
tr = load_transcoder_entry(entry);
}
return tr ? rb_econv_add_transcoder_at(ec, tr, n) : -1;
@ -2681,21 +2672,19 @@ rb_econv_open_opts(const char *source_encoding, const char *destination_encoding
replacement = rb_hash_aref(opthash, sym_replace);
}
RB_VM_LOCKING() {
ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
if (ec) {
if (!NIL_P(replacement)) {
int ret;
rb_encoding *enc = rb_enc_get(replacement);
ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
if (ec) {
if (!NIL_P(replacement)) {
int ret;
rb_encoding *enc = rb_enc_get(replacement);
ret = rb_econv_set_replacement(ec,
(const unsigned char *)RSTRING_PTR(replacement),
RSTRING_LEN(replacement),
rb_enc_name(enc));
if (ret == -1) {
rb_econv_close(ec);
ec = NULL;
}
ret = rb_econv_set_replacement(ec,
(const unsigned char *)RSTRING_PTR(replacement),
RSTRING_LEN(replacement),
rb_enc_name(enc));
if (ret == -1) {
rb_econv_close(ec);
ec = NULL;
}
}
}
@ -3132,10 +3121,8 @@ decorate_convpath(VALUE convpath, int ecflags)
const char *dname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 1)));
transcoder_entry_t *entry;
const rb_transcoder *tr;
RB_VM_LOCKING() {
entry = get_transcoder_entry(sname, dname);
tr = load_transcoder_entry(entry);
}
entry = get_transcoder_entry(sname, dname);
tr = load_transcoder_entry(entry);
if (!tr)
return -1;
if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&