mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 05:29:10 +02:00
set.c: Store set_table->bins
at the end of set_table->entries
This saves one pointer in `struct set_table`, which would allow
`Set` objects to still fit in 80B TypedData slots even if RTypedData
goes from 32B to 40B large.
The existing set benchmark seem to show this doesn't have a very
significant impact. Smaller sets are a bit faster, larger sets
a bit slower.
It seem consistent over multiple runs, but it's unclear how much
of that is just error margin.
```
compare-ruby: ruby 3.5.0dev (2025-08-12T02:14:57Z master 428937a536
) +YJIT +PRISM [arm64-darwin24]
built-ruby: ruby 3.5.0dev (2025-08-12T07:22:26Z set-entries-bounds da30024fdc) +YJIT +PRISM [arm64-darwin24]
warming up........
| |compare-ruby|built-ruby|
|:------------------------|-----------:|---------:|
|new_0 | 15.459M| 15.823M|
| | -| 1.02x|
|new_10 | 3.484M| 3.574M|
| | -| 1.03x|
|new_100 | 546.992k| 564.679k|
| | -| 1.03x|
|new_1000 | 49.391k| 48.169k|
| | 1.03x| -|
|aref_0 | 18.643M| 19.350M|
| | -| 1.04x|
|aref_10 | 5.941M| 6.006M|
| | -| 1.01x|
|aref_100 | 822.197k| 814.219k|
| | 1.01x| -|
|aref_1000 | 83.230k| 79.411k|
| | 1.05x| -|
```
This commit is contained in:
parent
507b1e4bde
commit
85c52079aa
4 changed files with 73 additions and 45 deletions
|
@ -15,13 +15,16 @@ struct set_table {
|
|||
const struct st_hash_type *type;
|
||||
/* Number of entries currently in the table. */
|
||||
st_index_t num_entries;
|
||||
/* Array of bins used for access by keys. */
|
||||
st_index_t *bins;
|
||||
|
||||
/* Start and bound index of entries in array entries.
|
||||
entries_starts and entries_bound are in interval
|
||||
[0,allocated_entries]. */
|
||||
st_index_t entries_start, entries_bound;
|
||||
/* Array of size 2^entry_power. */
|
||||
|
||||
/**
|
||||
* Array of size 2^entry_power.
|
||||
* Followed by st_index_t *bins, Array of bins used for access by keys.
|
||||
*/
|
||||
set_table_entry *entries;
|
||||
};
|
||||
|
||||
|
|
1
set.c
1
set.c
|
@ -139,7 +139,6 @@ set_mark(void *ptr)
|
|||
static void
|
||||
set_free_embedded(struct set_object *sobj)
|
||||
{
|
||||
free((&sobj->table)->bins);
|
||||
free((&sobj->table)->entries);
|
||||
}
|
||||
|
||||
|
|
94
st.c
94
st.c
|
@ -2395,18 +2395,44 @@ set_get_allocated_entries(const set_table *tab)
|
|||
return ((st_index_t) 1)<<tab->entry_power;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
set_allocated_entries_size(const set_table *tab)
|
||||
{
|
||||
return set_get_allocated_entries(tab) * sizeof(set_table_entry);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
set_has_bins(const set_table *tab)
|
||||
{
|
||||
return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
|
||||
}
|
||||
|
||||
/* Return size of the allocated bins of table TAB. */
|
||||
static inline st_index_t
|
||||
set_bins_size(const set_table *tab)
|
||||
{
|
||||
if (set_has_bins(tab)) {
|
||||
return features[tab->entry_power].bins_words * sizeof (st_index_t);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline st_index_t *
|
||||
set_bins_ptr(const set_table *tab)
|
||||
{
|
||||
if (set_has_bins(tab)) {
|
||||
return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Mark all bins of table TAB as empty. */
|
||||
static void
|
||||
set_initialize_bins(set_table *tab)
|
||||
{
|
||||
memset(tab->bins, 0, set_bins_size(tab));
|
||||
memset(set_bins_ptr(tab), 0, set_bins_size(tab));
|
||||
}
|
||||
|
||||
/* Make table TAB empty. */
|
||||
|
@ -2415,7 +2441,7 @@ set_make_tab_empty(set_table *tab)
|
|||
{
|
||||
tab->num_entries = 0;
|
||||
tab->entries_start = tab->entries_bound = 0;
|
||||
if (tab->bins != NULL)
|
||||
if (set_bins_ptr(tab) != NULL)
|
||||
set_initialize_bins(tab);
|
||||
}
|
||||
|
||||
|
@ -2443,13 +2469,13 @@ set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *typ
|
|||
tab->entry_power = n;
|
||||
tab->bin_power = features[n].bin_power;
|
||||
tab->size_ind = features[n].size_ind;
|
||||
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
||||
tab->bins = NULL;
|
||||
else {
|
||||
tab->bins = (st_index_t *) malloc(set_bins_size(tab));
|
||||
|
||||
size_t memsize = 0;
|
||||
if (set_has_bins(tab)) {
|
||||
memsize += set_bins_size(tab);
|
||||
}
|
||||
tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(tab)
|
||||
* sizeof(set_table_entry));
|
||||
memsize += set_get_allocated_entries(tab) * sizeof(set_table_entry);
|
||||
tab->entries = (set_table_entry *)malloc(memsize);
|
||||
set_make_tab_empty(tab);
|
||||
tab->rebuilds_num = 0;
|
||||
return tab;
|
||||
|
@ -2499,7 +2525,6 @@ set_table_clear(set_table *tab)
|
|||
void
|
||||
set_free_table(set_table *tab)
|
||||
{
|
||||
free(tab->bins);
|
||||
free(tab->entries);
|
||||
free(tab);
|
||||
}
|
||||
|
@ -2509,7 +2534,7 @@ size_t
|
|||
set_memsize(const set_table *tab)
|
||||
{
|
||||
return(sizeof(set_table)
|
||||
+ (tab->bins == NULL ? 0 : set_bins_size(tab))
|
||||
+ (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
|
||||
+ set_get_allocated_entries(tab) * sizeof(set_table_entry));
|
||||
}
|
||||
|
||||
|
@ -2542,7 +2567,7 @@ set_rebuild_table(set_table *tab)
|
|||
|| tab->num_entries < (1 << MINIMAL_POWER2)) {
|
||||
/* Compaction: */
|
||||
tab->num_entries = 0;
|
||||
if (tab->bins != NULL)
|
||||
if (set_has_bins(tab))
|
||||
set_initialize_bins(tab);
|
||||
set_rebuild_table_with(tab, tab);
|
||||
}
|
||||
|
@ -2572,7 +2597,7 @@ set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
|
|||
new_entries = new_tab->entries;
|
||||
|
||||
ni = 0;
|
||||
bins = new_tab->bins;
|
||||
bins = set_bins_ptr(new_tab);
|
||||
size_ind = set_get_size_ind(new_tab);
|
||||
st_index_t bound = tab->entries_bound;
|
||||
set_table_entry *entries = tab->entries;
|
||||
|
@ -2602,8 +2627,6 @@ set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
|
|||
tab->entry_power = new_tab->entry_power;
|
||||
tab->bin_power = new_tab->bin_power;
|
||||
tab->size_ind = new_tab->size_ind;
|
||||
free(tab->bins);
|
||||
tab->bins = new_tab->bins;
|
||||
free(tab->entries);
|
||||
tab->entries = new_tab->entries;
|
||||
free(new_tab);
|
||||
|
@ -2688,7 +2711,7 @@ set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
|
|||
perturb = hash_value;
|
||||
#endif
|
||||
for (;;) {
|
||||
bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
|
||||
bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
|
||||
if (! EMPTY_OR_DELETED_BIN_P(bin)) {
|
||||
DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
|
||||
if (EXPECT(rebuilt_p, 0))
|
||||
|
@ -2732,7 +2755,7 @@ set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
|
|||
perturb = hash_value;
|
||||
#endif
|
||||
for (;;) {
|
||||
bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
|
||||
bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
|
||||
if (! EMPTY_OR_DELETED_BIN_P(bin)) {
|
||||
DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
|
||||
if (EXPECT(rebuilt_p, 0))
|
||||
|
@ -2773,7 +2796,7 @@ set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t ke
|
|||
perturb = hash_value;
|
||||
#endif
|
||||
for (;;) {
|
||||
bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
|
||||
bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
|
||||
if (EMPTY_OR_DELETED_BIN_P(bin))
|
||||
return ind;
|
||||
#ifdef QUADRATIC_PROBE
|
||||
|
@ -2787,7 +2810,7 @@ set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t ke
|
|||
|
||||
/* Mark I-th bin of table TAB as empty, in other words not
|
||||
corresponding to any entry. */
|
||||
#define MARK_SET_BIN_EMPTY(tab, i) (set_bin((tab)->bins, set_get_size_ind(tab), i, EMPTY_BIN))
|
||||
#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
|
||||
|
||||
/* Return index of table TAB bin for HASH_VALUE and KEY through
|
||||
BIN_IND and the pointed value as the function result. Reserve the
|
||||
|
@ -2823,7 +2846,7 @@ set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
|
|||
firset_deleted_bin_ind = UNDEFINED_BIN_IND;
|
||||
entries = tab->entries;
|
||||
for (;;) {
|
||||
entry_index = get_bin(tab->bins, set_get_size_ind(tab), ind);
|
||||
entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
|
||||
if (EMPTY_BIN_P(entry_index)) {
|
||||
tab->num_entries++;
|
||||
entry_index = UNDEFINED_ENTRY_IND;
|
||||
|
@ -2863,7 +2886,7 @@ set_table_lookup(set_table *tab, st_data_t key)
|
|||
st_hash_t hash = set_do_hash(key, tab);
|
||||
|
||||
retry:
|
||||
if (tab->bins == NULL) {
|
||||
if (!set_has_bins(tab)) {
|
||||
bin = set_find_entry(tab, hash, key);
|
||||
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
|
||||
goto retry;
|
||||
|
@ -2907,7 +2930,7 @@ set_insert(set_table *tab, st_data_t key)
|
|||
hash_value = set_do_hash(key, tab);
|
||||
retry:
|
||||
set_rebuild_table_if_necessary(tab);
|
||||
if (tab->bins == NULL) {
|
||||
if (!set_has_bins(tab)) {
|
||||
bin = set_find_entry(tab, hash_value, key);
|
||||
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
|
||||
goto retry;
|
||||
|
@ -2930,7 +2953,7 @@ set_insert(set_table *tab, st_data_t key)
|
|||
entry->hash = hash_value;
|
||||
entry->key = key;
|
||||
if (bin_ind != UNDEFINED_BIN_IND)
|
||||
set_bin(tab->bins, set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
|
||||
set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
|
@ -2941,18 +2964,9 @@ static set_table *
|
|||
set_replace(set_table *new_tab, set_table *old_tab)
|
||||
{
|
||||
*new_tab = *old_tab;
|
||||
if (old_tab->bins == NULL)
|
||||
new_tab->bins = NULL;
|
||||
else {
|
||||
new_tab->bins = (st_index_t *) malloc(set_bins_size(old_tab));
|
||||
}
|
||||
new_tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(old_tab)
|
||||
* sizeof(set_table_entry));
|
||||
MEMCPY(new_tab->entries, old_tab->entries, set_table_entry,
|
||||
set_get_allocated_entries(old_tab));
|
||||
if (old_tab->bins != NULL)
|
||||
MEMCPY(new_tab->bins, old_tab->bins, char, set_bins_size(old_tab));
|
||||
|
||||
size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
|
||||
new_tab->entries = (set_table_entry *)malloc(memsize);
|
||||
MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
|
||||
return new_tab;
|
||||
}
|
||||
|
||||
|
@ -2991,7 +3005,7 @@ set_update_range_for_deleted(set_table *tab, st_index_t n)
|
|||
corresponding to deleted entries. */
|
||||
#define MARK_SET_BIN_DELETED(tab, i) \
|
||||
do { \
|
||||
set_bin((tab)->bins, set_get_size_ind(tab), i, DELETED_BIN); \
|
||||
set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
|
||||
} while (0)
|
||||
|
||||
/* Delete entry with KEY from table TAB, and return non-zero. If
|
||||
|
@ -3006,7 +3020,7 @@ set_table_delete(set_table *tab, st_data_t *key)
|
|||
|
||||
hash = set_do_hash(*key, tab);
|
||||
retry:
|
||||
if (tab->bins == NULL) {
|
||||
if (!set_has_bins(tab)) {
|
||||
bin = set_find_entry(tab, hash, *key);
|
||||
if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
|
||||
goto retry;
|
||||
|
@ -3021,7 +3035,7 @@ set_table_delete(set_table *tab, st_data_t *key)
|
|||
if (bin_ind == UNDEFINED_BIN_IND) {
|
||||
return 0;
|
||||
}
|
||||
bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
|
||||
bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
|
||||
MARK_SET_BIN_DELETED(tab, bin_ind);
|
||||
}
|
||||
entry = &tab->entries[bin];
|
||||
|
@ -3052,7 +3066,7 @@ set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
|
|||
st_index_t i, rebuilds_num;
|
||||
st_hash_t hash;
|
||||
st_data_t key;
|
||||
int error_p, packed_p = tab->bins == NULL;
|
||||
int error_p, packed_p = !set_has_bins(tab);
|
||||
|
||||
entries = tab->entries;
|
||||
/* The bound can change inside the loop even without rebuilding
|
||||
|
@ -3074,7 +3088,7 @@ set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
|
|||
if (rebuilds_num != tab->rebuilds_num) {
|
||||
retry:
|
||||
entries = tab->entries;
|
||||
packed_p = tab->bins == NULL;
|
||||
packed_p = !set_has_bins(tab);
|
||||
if (packed_p) {
|
||||
i = set_find_entry(tab, hash, key);
|
||||
if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
|
||||
|
@ -3122,7 +3136,7 @@ set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
|
|||
goto again;
|
||||
if (bin_ind == UNDEFINED_BIN_IND)
|
||||
break;
|
||||
bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
|
||||
bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
|
||||
MARK_SET_BIN_DELETED(tab, bin_ind);
|
||||
}
|
||||
curr_entry_ptr = &entries[bin];
|
||||
|
|
|
@ -924,6 +924,18 @@ class TC_Set < Test::Unit::TestCase
|
|||
end
|
||||
end;
|
||||
end
|
||||
|
||||
def test_larger_sets
|
||||
set = Set.new
|
||||
10_000.times do |i|
|
||||
set << i
|
||||
end
|
||||
set = set.dup
|
||||
|
||||
10_000.times do |i|
|
||||
assert_includes set, i
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class TC_Enumerable < Test::Unit::TestCase
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue