Get transcoding to work across ractors by locking certain operations

Ex: `str.encode` and `str.encode!` work across ractors now.

The global table `transcoder_table` needs a lock around each st_lookup/st_insert, and it's a two-level
table so the second level also needs to be locked around insertion/deletion. In addition to this, the
transcoder entries (values in the second-level hash table) need to be locked around retrieving them and
loading them as they are loaded lazily.

The transcoding objects (`Encoding::Converter`) can't be made shareable, so their operations don't need to be locked.
This commit is contained in:
Luke Gruber 2025-06-12 15:13:08 -04:00 committed by John Hawthorn
parent ed3fd94e77
commit 4f4408e989
2 changed files with 144 additions and 113 deletions

View file

@ -19515,6 +19515,7 @@ transcode.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
transcode.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
transcode.$(OBJEXT): {$(VPATH)}config.h
transcode.$(OBJEXT): {$(VPATH)}constant.h
transcode.$(OBJEXT): {$(VPATH)}debug_counter.h
transcode.$(OBJEXT): {$(VPATH)}defines.h
transcode.$(OBJEXT): {$(VPATH)}encoding.h
transcode.$(OBJEXT): {$(VPATH)}id.h
@ -19678,6 +19679,8 @@ transcode.$(OBJEXT): {$(VPATH)}st.h
transcode.$(OBJEXT): {$(VPATH)}subst.h
transcode.$(OBJEXT): {$(VPATH)}transcode.c
transcode.$(OBJEXT): {$(VPATH)}transcode_data.h
transcode.$(OBJEXT): {$(VPATH)}vm_debug.h
transcode.$(OBJEXT): {$(VPATH)}vm_sync.h
util.$(OBJEXT): $(hdrdir)/ruby/ruby.h
util.$(OBJEXT): $(top_srcdir)/internal/array.h
util.$(OBJEXT): $(top_srcdir)/internal/compilers.h

View file

@ -20,6 +20,7 @@
#include "internal/string.h"
#include "internal/transcode.h"
#include "ruby/encoding.h"
#include "vm_sync.h"
#include "transcode_data.h"
#include "id.h"
@ -209,19 +210,21 @@ make_transcoder_entry(const char *sname, const char *dname)
st_data_t val;
st_table *table2;
if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
val = (st_data_t)st_init_strcasetable();
st_add_direct(transcoder_table, (st_data_t)sname, val);
}
table2 = (st_table *)val;
if (!st_lookup(table2, (st_data_t)dname, &val)) {
transcoder_entry_t *entry = ALLOC(transcoder_entry_t);
entry->sname = sname;
entry->dname = dname;
entry->lib = NULL;
entry->transcoder = NULL;
val = (st_data_t)entry;
st_add_direct(table2, (st_data_t)dname, val);
RB_VM_LOCKING() {
if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
val = (st_data_t)st_init_strcasetable();
st_add_direct(transcoder_table, (st_data_t)sname, val);
}
table2 = (st_table *)val;
if (!st_lookup(table2, (st_data_t)dname, &val)) {
transcoder_entry_t *entry = ALLOC(transcoder_entry_t);
entry->sname = sname;
entry->dname = dname;
entry->lib = NULL;
entry->transcoder = NULL;
val = (st_data_t)entry;
st_add_direct(table2, (st_data_t)dname, val);
}
}
return (transcoder_entry_t *)val;
}
@ -229,15 +232,15 @@ make_transcoder_entry(const char *sname, const char *dname)
static transcoder_entry_t *
get_transcoder_entry(const char *sname, const char *dname)
{
st_data_t val;
st_data_t val = 0;
st_table *table2;
if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
return NULL;
}
table2 = (st_table *)val;
if (!st_lookup(table2, (st_data_t)dname, &val)) {
return NULL;
RB_VM_LOCKING() {
if (st_lookup(transcoder_table, (st_data_t)sname, &val)) {
table2 = (st_table *)val;
if (!st_lookup(table2, (st_data_t)dname, &val)) {
val = 0;
}
}
}
return (transcoder_entry_t *)val;
}
@ -250,13 +253,14 @@ rb_register_transcoder(const rb_transcoder *tr)
transcoder_entry_t *entry;
entry = make_transcoder_entry(sname, dname);
if (entry->transcoder) {
rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
sname, dname);
RB_VM_LOCKING() {
entry = make_transcoder_entry(sname, dname);
if (entry->transcoder) {
rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
sname, dname);
}
entry->transcoder = tr;
}
entry->transcoder = tr;
}
static void
@ -323,8 +327,9 @@ transcode_search_path(const char *sname, const char *dname,
search_path_queue_t *q;
st_data_t val;
st_table *table2;
int found;
int pathlen = -1;
bool found = false;
bool lookup_res;
if (encoding_equal(sname, dname))
return -1;
@ -338,34 +343,36 @@ transcode_search_path(const char *sname, const char *dname,
bfs.visited = st_init_strcasetable();
st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL);
while (bfs.queue) {
q = bfs.queue;
bfs.queue = q->next;
if (!bfs.queue)
bfs.queue_last_ptr = &bfs.queue;
RB_VM_LOCKING() {
while (bfs.queue) {
q = bfs.queue;
bfs.queue = q->next;
if (!bfs.queue) {
bfs.queue_last_ptr = &bfs.queue;
}
if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
lookup_res = st_lookup(transcoder_table, (st_data_t)q->enc, &val);
if (!lookup_res) {
xfree(q);
continue;
}
table2 = (st_table *)val;
if (st_lookup(table2, (st_data_t)dname, &val)) {
st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
xfree(q);
found = true;
break;
}
bfs.base_enc = q->enc;
st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
bfs.base_enc = NULL;
xfree(q);
continue;
}
table2 = (st_table *)val;
if (st_lookup(table2, (st_data_t)dname, &val)) {
st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
xfree(q);
found = 1;
goto cleanup;
}
bfs.base_enc = q->enc;
st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
bfs.base_enc = NULL;
xfree(q);
}
found = 0;
cleanup:
while (bfs.queue) {
q = bfs.queue;
bfs.queue = q->next;
@ -404,6 +411,8 @@ int rb_require_internal_silent(VALUE fname);
static const rb_transcoder *
load_transcoder_entry(transcoder_entry_t *entry)
{
// changes result of entry->transcoder depending on if it's required or not, so needs lock
ASSERT_vm_locking();
if (entry->transcoder)
return entry->transcoder;
@ -972,6 +981,7 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
{
rb_econv_t *ec;
int i, ret;
ASSERT_vm_locking();
for (i = 0; i < n; i++) {
const rb_transcoder *tr;
@ -1016,6 +1026,7 @@ rb_econv_open0(const char *sname, const char *dname, int ecflags)
transcoder_entry_t **entries = NULL;
int num_trans;
rb_econv_t *ec;
ASSERT_vm_locking();
/* Just check if sname and dname are defined */
/* (This check is needed?) */
@ -1106,19 +1117,23 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
if (num_decorators == -1)
return NULL;
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
if (!ec)
return NULL;
for (i = 0; i < num_decorators; i++)
if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
rb_econv_close(ec);
return NULL;
RB_VM_LOCKING() {
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
if (ec) {
for (i = 0; i < num_decorators; i++) {
if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
rb_econv_close(ec);
ec = NULL;
break;
}
}
}
}
ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
return ec;
if (ec) {
ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
}
return ec; // can be NULL
}
static int
@ -1815,26 +1830,29 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
{
st_data_t v;
st_table *table2;
struct asciicompat_encoding_t data;
struct asciicompat_encoding_t data = {0};
if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
return NULL;
table2 = (st_table *)v;
RB_VM_LOCKING() {
if (st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) {
table2 = (st_table *)v;
/*
* Assumption:
* There is at most one transcoder for
* converting from ASCII incompatible encoding.
*
* For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
*/
if (table2->num_entries == 1) {
data.ascii_incompat_name = ascii_incompat_name;
data.ascii_compat_name = NULL;
st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
}
/*
* Assumption:
* There is at most one transcoder for
* converting from ASCII incompatible encoding.
*
* For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
*/
if (table2->num_entries != 1)
return NULL;
}
data.ascii_incompat_name = ascii_incompat_name;
data.ascii_compat_name = NULL;
st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
return data.ascii_compat_name;
}
return data.ascii_compat_name; // can be NULL
}
/*
@ -1937,19 +1955,20 @@ static int
rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n)
{
transcoder_entry_t *entry;
const rb_transcoder *tr;
const rb_transcoder *tr = NULL;
if (ec->started != 0)
return -1;
entry = get_transcoder_entry(sname, dname);
if (!entry)
return -1;
RB_VM_LOCKING() {
entry = get_transcoder_entry(sname, dname);
if (entry) {
tr = load_transcoder_entry(entry);
}
tr = load_transcoder_entry(entry);
if (!tr) return -1;
}
return rb_econv_add_transcoder_at(ec, tr, n);
return tr ? rb_econv_add_transcoder_at(ec, tr, n) : -1;
}
static int
@ -2662,24 +2681,25 @@ rb_econv_open_opts(const char *source_encoding, const char *destination_encoding
replacement = rb_hash_aref(opthash, sym_replace);
}
ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
if (!ec)
return ec;
RB_VM_LOCKING() {
ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
if (ec) {
if (!NIL_P(replacement)) {
int ret;
rb_encoding *enc = rb_enc_get(replacement);
if (!NIL_P(replacement)) {
int ret;
rb_encoding *enc = rb_enc_get(replacement);
ret = rb_econv_set_replacement(ec,
(const unsigned char *)RSTRING_PTR(replacement),
RSTRING_LEN(replacement),
rb_enc_name(enc));
if (ret == -1) {
rb_econv_close(ec);
return NULL;
ret = rb_econv_set_replacement(ec,
(const unsigned char *)RSTRING_PTR(replacement),
RSTRING_LEN(replacement),
rb_enc_name(enc));
if (ret == -1) {
rb_econv_close(ec);
ec = NULL;
}
}
}
}
return ec;
return ec; // can be NULL
}
static int
@ -2979,9 +2999,11 @@ static rb_encoding *
make_encoding(const char *name)
{
rb_encoding *enc;
enc = rb_enc_find(name);
if (!enc)
enc = make_dummy_encoding(name);
RB_VM_LOCKING() {
enc = rb_enc_find(name);
if (!enc)
enc = make_dummy_encoding(name);
}
return enc;
}
@ -3014,17 +3036,19 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
{
const char *arg_name, *result_name;
rb_encoding *arg_enc, *result_enc;
VALUE enc = Qnil;
enc_arg(&arg, &arg_name, &arg_enc);
result_name = rb_econv_asciicompat_encoding(arg_name);
RB_VM_LOCKING() {
result_name = rb_econv_asciicompat_encoding(arg_name);
if (result_name == NULL)
return Qnil;
result_enc = make_encoding(result_name);
return rb_enc_from_encoding(result_enc);
if (result_name) {
result_enc = make_encoding(result_name);
enc = rb_enc_from_encoding(result_enc);
}
}
return enc;
}
static void
@ -3105,8 +3129,12 @@ decorate_convpath(VALUE convpath, int ecflags)
if (RB_TYPE_P(pair, T_ARRAY)) {
const char *sname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 0)));
const char *dname = rb_enc_name(rb_to_encoding(RARRAY_AREF(pair, 1)));
transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
const rb_transcoder *tr = load_transcoder_entry(entry);
transcoder_entry_t *entry;
const rb_transcoder *tr;
RB_VM_LOCKING() {
entry = get_transcoder_entry(sname, dname);
tr = load_transcoder_entry(entry);
}
if (!tr)
return -1;
if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&