Ruby 3.3.2p78 (2024-05-30 revision e5a195edf62fe1bf7146a191da13fa1c4fecbd71)
pm_constant_pool.c
2
6void
7pm_constant_id_list_init(pm_constant_id_list_t *list) {
8 list->ids = NULL;
9 list->size = 0;
10 list->capacity = 0;
11}
12
17bool
18pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
19 if (list->size >= list->capacity) {
20 list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
21 list->ids = (pm_constant_id_t *) realloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
22 if (list->ids == NULL) return false;
23 }
24
25 list->ids[list->size++] = id;
26 return true;
27}
28
32bool
33pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
34 for (size_t index = 0; index < list->size; index++) {
35 if (list->ids[index] == id) return true;
36 }
37 return false;
38}
39
43size_t
44pm_constant_id_list_memsize(pm_constant_id_list_t *list) {
45 return sizeof(pm_constant_id_list_t) + (list->capacity * sizeof(pm_constant_id_t));
46}
47
51void
52pm_constant_id_list_free(pm_constant_id_list_t *list) {
53 if (list->ids != NULL) {
54 free(list->ids);
55 }
56}
57
62static inline uint32_t
63pm_constant_pool_hash(const uint8_t *start, size_t length) {
64 // This is a prime number used as the initial value for the hash function.
65 uint32_t value = 5381;
66
67 for (size_t index = 0; index < length; index++) {
68 value = ((value << 5) + value) + start[index];
69 }
70
71 return value;
72}
73
77static uint32_t
78next_power_of_two(uint32_t v) {
79 // Avoid underflow in subtraction on next line.
80 if (v == 0) {
81 // 1 is the nearest power of 2 to 0 (2^0)
82 return 1;
83 }
84 v--;
85 v |= v >> 1;
86 v |= v >> 2;
87 v |= v >> 4;
88 v |= v >> 8;
89 v |= v >> 16;
90 v++;
91 return v;
92}
93
94#ifndef NDEBUG
95static bool
96is_power_of_two(uint32_t size) {
97 return (size & (size - 1)) == 0;
98}
99#endif
100
104static inline bool
105pm_constant_pool_resize(pm_constant_pool_t *pool) {
106 assert(is_power_of_two(pool->capacity));
107
108 uint32_t next_capacity = pool->capacity * 2;
109 if (next_capacity < pool->capacity) return false;
110
111 const uint32_t mask = next_capacity - 1;
112 const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
113
114 void *next = calloc(next_capacity, element_size);
115 if (next == NULL) return false;
116
117 pm_constant_pool_bucket_t *next_buckets = next;
118 pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
119
120 // For each bucket in the current constant pool, find the index in the
121 // next constant pool, and insert it.
122 for (uint32_t index = 0; index < pool->capacity; index++) {
123 pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
124
125 // If an id is set on this constant, then we know we have content here.
126 // In this case we need to insert it into the next constant pool.
127 if (bucket->id != 0) {
128 uint32_t next_index = bucket->hash & mask;
129
130 // This implements linear scanning to find the next available slot
131 // in case this index is already taken. We don't need to bother
132 // comparing the values since we know that the hash is unique.
133 while (next_buckets[next_index].id != 0) {
134 next_index = (next_index + 1) & mask;
135 }
136
137 // Here we copy over the entire bucket, which includes the id so
138 // that they are consistent between resizes.
139 next_buckets[next_index] = *bucket;
140 }
141 }
142
143 // The constants are stable with respect to hash table resizes.
144 memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
145
146 // pool->constants and pool->buckets are allocated out of the same chunk
147 // of memory, with the buckets coming first.
148 free(pool->buckets);
149 pool->constants = next_constants;
150 pool->buckets = next_buckets;
151 pool->capacity = next_capacity;
152 return true;
153}
154
158bool
159pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
160 const uint32_t maximum = (~((uint32_t) 0));
161 if (capacity >= ((maximum / 2) + 1)) return false;
162
163 capacity = next_power_of_two(capacity);
164 const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
165 void *memory = calloc(capacity, element_size);
166 if (memory == NULL) return false;
167
168 pool->buckets = memory;
169 pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
170 pool->size = 0;
171 pool->capacity = capacity;
172 return true;
173}
174
179pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
180 assert(constant_id > 0 && constant_id <= pool->size);
181 return &pool->constants[constant_id - 1];
182}
183
187static inline pm_constant_id_t
188pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
189 if (pool->size >= (pool->capacity / 4 * 3)) {
190 if (!pm_constant_pool_resize(pool)) return 0;
191 }
192
193 assert(is_power_of_two(pool->capacity));
194 const uint32_t mask = pool->capacity - 1;
195
196 uint32_t hash = pm_constant_pool_hash(start, length);
197 uint32_t index = hash & mask;
199
200 while (bucket = &pool->buckets[index], bucket->id != 0) {
201 // If there is a collision, then we need to check if the content is the
202 // same as the content we are trying to insert. If it is, then we can
203 // return the id of the existing constant.
204 pm_constant_t *constant = &pool->constants[bucket->id - 1];
205
206 if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
207 // Since we have found a match, we need to check if this is
208 // attempting to insert a shared or an owned constant. We want to
209 // prefer shared constants since they don't require allocations.
210 if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
211 // If we're attempting to insert an owned constant and we have
212 // an existing constant, then either way we don't want the given
213 // memory. Either it's duplicated with the existing constant or
214 // it's not necessary because we have a shared version.
215 free((void *) start);
216 } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
217 // If we're attempting to insert a shared constant and the
218 // existing constant is owned, then we can free the owned
219 // constant and replace it with the shared constant.
220 free((void *) constant->start);
221 constant->start = start;
222 bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
223 }
224
225 return bucket->id;
226 }
227
228 index = (index + 1) & mask;
229 }
230
231 // IDs are allocated starting at 1, since the value 0 denotes a non-existant
232 // constant.
233 uint32_t id = ++pool->size;
234 assert(pool->size < ((uint32_t) (1 << 30)));
235
236 *bucket = (pm_constant_pool_bucket_t) {
237 .id = (unsigned int) (id & 0x3fffffff),
238 .type = (unsigned int) (type & 0x3),
239 .hash = hash
240 };
241
242 pool->constants[id - 1] = (pm_constant_t) {
243 .start = start,
244 .length = length,
245 };
246
247 return id;
248}
249
255pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
256 return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
257}
258
265pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
266 return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
267}
268
274pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
275 return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
276}
277
281void
282pm_constant_pool_free(pm_constant_pool_t *pool) {
283 // For each constant in the current constant pool, free the contents if the
284 // contents are owned.
285 for (uint32_t index = 0; index < pool->capacity; index++) {
286 pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
287
288 // If an id is set on this constant, then we know we have content here.
289 if (bucket->id != 0 && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
290 pm_constant_t *constant = &pool->constants[bucket->id - 1];
291 free((void *) constant->start);
292 }
293 }
294
295 free(pool->buckets);
296}
A data structure that stores a set of strings.
static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT
By default, each constant is a slice of the source.
unsigned int pm_constant_pool_bucket_type_t
The type of bucket in the constant pool hash map.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED
An owned constant is one for which memory has been allocated.
static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT
A constant constant is known at compile time.
A list of constant IDs.
size_t size
The number of constant ids in the list.
size_t capacity
The number of constant ids that have been allocated in the list.
pm_constant_id_t * ids
The constant ids in the list.
A bucket in the hash map.
uint32_t hash
The hash of the bucket.
unsigned int id
The incremental ID used for indexing back into the pool.
pm_constant_pool_bucket_type_t type
The type of the bucket, which determines how to free it.
The overall constant pool, which stores constants found while parsing.
uint32_t capacity
The number of buckets that have been allocated in the hash map.
pm_constant_pool_bucket_t * buckets
The buckets in the hash map.
uint32_t size
The number of buckets in the hash map.
pm_constant_t * constants
The constants that are stored in the buckets.
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.