Make rb_enc_autoload_p atomic

Using `encoding->max_enc_len` as a way to check if the encoding
has been loaded isn't atomic, because it's not atomically set
last.

Intead we can use a dedicated atomic value inside the encoding table.
This commit is contained in:
Jean Boussier 2025-07-10 12:38:15 +02:00
parent 51252ef8d7
commit 1fb4929ace
2 changed files with 38 additions and 8 deletions

View file

@ -24,6 +24,7 @@
#include "internal/string.h" #include "internal/string.h"
#include "internal/vm.h" #include "internal/vm.h"
#include "regenc.h" #include "regenc.h"
#include "ruby/atomic.h"
#include "ruby/encoding.h" #include "ruby/encoding.h"
#include "ruby/util.h" #include "ruby/util.h"
#include "ruby_assert.h" #include "ruby_assert.h"
@ -60,6 +61,7 @@ VALUE rb_cEncoding;
static VALUE rb_encoding_list; static VALUE rb_encoding_list;
struct rb_encoding_entry { struct rb_encoding_entry {
rb_atomic_t loaded;
const char *name; const char *name;
rb_encoding *enc; rb_encoding *enc;
rb_encoding *base; rb_encoding *base;
@ -344,6 +346,8 @@ enc_table_expand(struct enc_table *enc_table, int newsize)
static int static int
enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding) enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding)
{ {
ASSERT_vm_locking();
struct rb_encoding_entry *ent = &enc_table->list[index]; struct rb_encoding_entry *ent = &enc_table->list[index];
rb_raw_encoding *encoding; rb_raw_encoding *encoding;
@ -358,6 +362,7 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc
if (!encoding) { if (!encoding) {
encoding = xmalloc(sizeof(rb_encoding)); encoding = xmalloc(sizeof(rb_encoding));
} }
if (base_encoding) { if (base_encoding) {
*encoding = *base_encoding; *encoding = *base_encoding;
} }
@ -370,12 +375,18 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc
st_insert(enc_table->names, (st_data_t)name, (st_data_t)index); st_insert(enc_table->names, (st_data_t)name, (st_data_t)index);
enc_list_update(index, encoding); enc_list_update(index, encoding);
// max_enc_len is used to mark a fully loaded encoding.
RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len);
return index; return index;
} }
static int static int
enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encoding) enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encoding)
{ {
ASSERT_vm_locking();
int index = enc_table->count; int index = enc_table->count;
enc_table->count = enc_table_expand(enc_table, index + 1); enc_table->count = enc_table_expand(enc_table, index + 1);
@ -431,6 +442,7 @@ rb_enc_register(const char *name, rb_encoding *encoding)
int int
enc_registered(struct enc_table *enc_table, const char *name) enc_registered(struct enc_table *enc_table, const char *name)
{ {
ASSERT_vm_locking();
st_data_t idx = 0; st_data_t idx = 0;
if (!name) return -1; if (!name) return -1;
@ -637,6 +649,7 @@ enc_dup_name(st_data_t name)
static int static int
enc_alias_internal(struct enc_table *enc_table, const char *alias, int idx) enc_alias_internal(struct enc_table *enc_table, const char *alias, int idx)
{ {
ASSERT_vm_locking();
return st_insert2(enc_table->names, (st_data_t)alias, (st_data_t)idx, return st_insert2(enc_table->names, (st_data_t)alias, (st_data_t)idx,
enc_dup_name); enc_dup_name);
} }
@ -688,6 +701,7 @@ rb_encdb_alias(const char *alias, const char *orig)
static void static void
rb_enc_init(struct enc_table *enc_table) rb_enc_init(struct enc_table *enc_table)
{ {
ASSERT_vm_locking();
enc_table_expand(enc_table, ENCODING_COUNT + 1); enc_table_expand(enc_table, ENCODING_COUNT + 1);
if (!enc_table->names) { if (!enc_table->names) {
enc_table->names = st_init_strcasetable_with_size(ENCODING_LIST_CAPA); enc_table->names = st_init_strcasetable_with_size(ENCODING_LIST_CAPA);
@ -810,11 +824,22 @@ rb_enc_autoload(rb_encoding *enc)
return i; return i;
} }
bool
rb_enc_autoload_p(rb_encoding *enc)
{
int idx = ENC_TO_ENCINDEX(enc);
RUBY_ASSERT(rb_enc_from_index(idx) == enc);
return !RUBY_ATOMIC_LOAD(global_enc_table.list[idx].loaded);
}
/* Return encoding index or UNSPECIFIED_ENCODING from encoding name */ /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
int int
rb_enc_find_index(const char *name) rb_enc_find_index(const char *name)
{ {
int i = enc_registered(&global_enc_table, name); int i;
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
i = enc_registered(enc_table, name);
}
rb_encoding *enc; rb_encoding *enc;
if (i < 0) { if (i < 0) {
@ -1495,13 +1520,15 @@ rb_locale_encindex(void)
if (idx < 0) idx = ENCINDEX_UTF_8; if (idx < 0) idx = ENCINDEX_UTF_8;
if (enc_registered(&global_enc_table, "locale") < 0) { GLOBAL_ENC_TABLE_LOCKING(enc_table) {
if (enc_registered(enc_table, "locale") < 0) {
# if defined _WIN32 # if defined _WIN32
void Init_w32_codepage(void); void Init_w32_codepage(void);
Init_w32_codepage(); Init_w32_codepage();
# endif # endif
GLOBAL_ENC_TABLE_LOCKING(enc_table) { GLOBAL_ENC_TABLE_LOCKING(enc_table) {
enc_alias_internal(enc_table, "locale", idx); enc_alias_internal(enc_table, "locale", idx);
}
} }
} }
@ -1517,7 +1544,10 @@ rb_locale_encoding(void)
int int
rb_filesystem_encindex(void) rb_filesystem_encindex(void)
{ {
int idx = enc_registered(&global_enc_table, "filesystem"); int idx;
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
idx = enc_registered(enc_table, "filesystem");
}
if (idx < 0) idx = ENCINDEX_ASCII_8BIT; if (idx < 0) idx = ENCINDEX_ASCII_8BIT;
return idx; return idx;
} }

View file

@ -11,7 +11,6 @@
#include "ruby/ruby.h" /* for ID */ #include "ruby/ruby.h" /* for ID */
#include "ruby/encoding.h" /* for rb_encoding */ #include "ruby/encoding.h" /* for rb_encoding */
#define rb_enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
#define rb_is_usascii_enc(enc) ((enc) == rb_usascii_encoding()) #define rb_is_usascii_enc(enc) ((enc) == rb_usascii_encoding())
#define rb_is_ascii8bit_enc(enc) ((enc) == rb_ascii8bit_encoding()) #define rb_is_ascii8bit_enc(enc) ((enc) == rb_ascii8bit_encoding())
#define rb_is_locale_enc(enc) ((enc) == rb_locale_encoding()) #define rb_is_locale_enc(enc) ((enc) == rb_locale_encoding())
@ -24,6 +23,7 @@ rb_encoding *rb_enc_check_str(VALUE str1, VALUE str2);
int rb_encdb_replicate(const char *alias, const char *orig); int rb_encdb_replicate(const char *alias, const char *orig);
int rb_encdb_alias(const char *alias, const char *orig); int rb_encdb_alias(const char *alias, const char *orig);
int rb_enc_autoload(rb_encoding *enc); int rb_enc_autoload(rb_encoding *enc);
bool rb_enc_autoload_p(rb_encoding *enc);
int rb_encdb_dummy(const char *name); int rb_encdb_dummy(const char *name);
void rb_encdb_declare(const char *name); void rb_encdb_declare(const char *name);
void rb_enc_set_base(const char *name, const char *orig); void rb_enc_set_base(const char *name, const char *orig);