xref: /ext-ds/src/ds/ds_htable.c (revision daf9b1ec)
1 #include "../common.h"
2 
3 #include "ds_htable.h"
4 #include "ds_set.h"
5 #include "ds_vector.h"
6 
7 #include "../php/classes/php_hashable_ce.h"
8 
ds_htable_allocate_buckets(uint32_t capacity)9 static inline ds_htable_bucket_t *ds_htable_allocate_buckets(uint32_t capacity)
10 {
11     return ecalloc(capacity, sizeof(ds_htable_bucket_t));
12 }
13 
ds_htable_reallocate_buckets(ds_htable_t * table,uint32_t capacity)14 static inline ds_htable_bucket_t *ds_htable_reallocate_buckets(ds_htable_t *table, uint32_t capacity)
15 {
16     return erealloc(table->buckets, capacity * sizeof(ds_htable_bucket_t));
17 }
18 
ds_htable_allocate_lookup(uint32_t capacity)19 static inline uint32_t *ds_htable_allocate_lookup(uint32_t capacity)
20 {
21     return emalloc(capacity * sizeof(uint32_t));
22 }
23 
ds_htable_reallocate_lookup(uint32_t * lookup,uint32_t capacity)24 static inline uint32_t *ds_htable_reallocate_lookup(uint32_t *lookup, uint32_t capacity)
25 {
26     return erealloc(lookup, capacity * sizeof(uint32_t));
27 }
28 
ds_htable_reset_lookup(ds_htable_t * table)29 static inline void ds_htable_reset_lookup(ds_htable_t *table)
30 {
31     memset(table->lookup, DS_HTABLE_INVALID_INDEX, table->capacity * sizeof(uint32_t));
32 }
33 
ds_htable_realloc(ds_htable_t * table,uint32_t capacity)34 static inline void ds_htable_realloc(ds_htable_t *table, uint32_t capacity)
35 {
36     table->buckets  = ds_htable_reallocate_buckets(table, capacity);
37     table->lookup   = ds_htable_reallocate_lookup(table->lookup, capacity);
38     table->capacity = capacity;
39 }
40 
ds_htable_rehash(ds_htable_t * table)41 static void ds_htable_rehash(ds_htable_t *table)
42 {
43     const uint32_t mask = table->capacity - 1;
44 
45     ds_htable_reset_lookup(table);
46 
47     // Rehash removes all deleted buckets, so we can reset min deleted.
48     table->min_deleted = table->capacity;
49 
50     // No need to rehash if the table is empty.
51     if (table->size == 0) {
52         table->next = 0;
53         return;
54 
55     } else {
56         uint32_t index  = 0;
57         ds_htable_bucket_t *bucket = table->buckets;
58 
59         if (DS_HTABLE_IS_PACKED(table)) { // No deleted buckets
60             do {
61                 DS_HTABLE_BUCKET_REHASH(table, bucket, mask, index);
62                 bucket++;
63             } while (++index < table->next);
64 
65         } else {
66             // There are deleted buckets
67             do {
68                 if (DS_HTABLE_BUCKET_DELETED(bucket)) {
69                     uint32_t j = index;
70                     ds_htable_bucket_t *q = bucket;
71 
72                     while (++index < table->next) {
73                         if ( ! DS_HTABLE_BUCKET_DELETED(++bucket)) {
74                             *q = *bucket;
75 
76                             DS_HTABLE_BUCKET_REHASH(table, q, mask, j);
77                             q++;
78                             j++;
79                         }
80                     }
81 
82                     table->next = j;
83                     break;
84                 }
85                 DS_HTABLE_BUCKET_REHASH(table, bucket, mask, index);
86                 bucket++;
87 
88             } while (++index < table->next);
89         }
90     }
91 }
92 
ds_htable_pack(ds_htable_t * table)93 static void ds_htable_pack(ds_htable_t *table)
94 {
95     if ( ! DS_HTABLE_IS_PACKED(table)) {
96         ds_htable_bucket_t *end = table->buckets + table->next;
97         ds_htable_bucket_t *src = table->buckets + table->min_deleted;
98         ds_htable_bucket_t *dst = src;
99 
100         while (++src != end) {
101             if ( ! DS_HTABLE_BUCKET_DELETED(src)) {
102                 if (dst != src) *dst = *src;
103                 dst++;
104             }
105         }
106 
107         table->next = table->size;
108         table->min_deleted = table->capacity;
109     }
110 }
111 
ds_htable_auto_truncate(ds_htable_t * table)112 static inline void ds_htable_auto_truncate(ds_htable_t *table)
113 {
114     const uint32_t capacity = table->capacity;
115 
116     if (table->size <= (capacity / 4) && (capacity / 2) >= DS_HTABLE_MIN_CAPACITY) {
117         ds_htable_pack(table);
118         ds_htable_realloc(table, capacity / 2);
119         ds_htable_rehash(table);
120     }
121 }
122 
ds_htable_with_capacity(uint32_t capacity)123 static ds_htable_t *ds_htable_with_capacity(uint32_t capacity)
124 {
125     ds_htable_t *table = ecalloc(1, sizeof(ds_htable_t));
126 
127     table->buckets     = ds_htable_allocate_buckets(capacity);
128     table->lookup      = ds_htable_allocate_lookup(capacity);
129     table->capacity    = capacity;
130     table->min_deleted = capacity;
131     table->size        = 0;
132     table->next        = 0;
133 
134     ds_htable_reset_lookup(table);
135     return table;
136 }
137 
ds_htable()138 ds_htable_t *ds_htable()
139 {
140     return ds_htable_with_capacity(DS_HTABLE_MIN_CAPACITY);
141 }
142 
ds_htable_copy(ds_htable_t * _src,ds_htable_t * _dst)143 static void ds_htable_copy(ds_htable_t *_src, ds_htable_t *_dst)
144 {
145     ds_htable_bucket_t *src = _src->buckets;
146     ds_htable_bucket_t *end = _src->buckets + _src->next;
147     ds_htable_bucket_t *dst = _dst->buckets;
148 
149     memcpy(_dst->lookup, _src->lookup, _src->capacity * sizeof(uint32_t));
150 
151     for (; src != end; ++src, ++dst) {
152         if ( ! DS_HTABLE_BUCKET_DELETED(src)) {
153             DS_HTABLE_BUCKET_COPY(dst, src);
154         } else {
155             DS_HTABLE_BUCKET_DELETE(dst);
156         }
157     }
158 }
159 
ds_htable_clone(ds_htable_t * src)160 ds_htable_t *ds_htable_clone(ds_htable_t *src)
161 {
162     ds_htable_t *dst = ecalloc(1, sizeof(ds_htable_t));
163 
164     dst->buckets     = ds_htable_allocate_buckets(src->capacity);
165     dst->lookup      = ds_htable_allocate_lookup(src->capacity);
166     dst->capacity    = src->capacity;
167     dst->size        = src->size;
168     dst->next        = src->next;
169     dst->min_deleted = src->min_deleted;
170 
171     ds_htable_copy(src, dst);
172     return dst;
173 }
174 
implements_hashable(zval * key)175 static inline bool implements_hashable(zval *key) {
176     return Z_TYPE_P(key) == IS_OBJECT && instanceof_function(Z_OBJCE_P(key), hashable_ce);
177 }
178 
user_hashable_equals(zval * a,zval * b)179  static inline bool user_hashable_equals(zval *a, zval *b)
180  {
181     if (Z_OBJCE_P(a) != Z_OBJCE_P(b)) {
182         return false;
183 
184     } else {
185         zval equals;
186 #if PHP_VERSION_ID >= 80000
187         zend_call_method_with_1_params(Z_OBJ_P(a), Z_OBJCE_P(a), NULL, "equals", &equals, b);
188 #else
189         zend_call_method_with_1_params(a, Z_OBJCE_P(a), NULL, "equals", &equals, b);
190 #endif
191         return Z_TYPE(equals) == IS_TRUE;
192      }
193  }
194 
key_is_identical(zval * key,zval * other)195 static inline bool key_is_identical(zval *key, zval *other)
196 {
197     if (Z_TYPE_P(key) == IS_OBJECT && implements_hashable(key)) {
198         return Z_TYPE_P(other) == IS_OBJECT && user_hashable_equals(key, other);
199     }
200 
201     return zend_is_identical(key, other);
202 }
203 
ds_htable_bucket_key_match(ds_htable_bucket_t * bucket,zval * key)204 static inline bool ds_htable_bucket_key_match(ds_htable_bucket_t *bucket, zval *key)
205 {
206     return key_is_identical(&bucket->key, key);
207 }
208 
get_string_hash(zend_string * str)209 static inline uint32_t get_string_hash(zend_string *str)
210 {
211     return ZSTR_HASH(str);
212 }
213 
get_string_zval_hash(zval * value)214 static inline uint32_t get_string_zval_hash(zval *value)
215 {
216     return get_string_hash(Z_STR_P(value));
217 }
218 
get_array_hash(zval * array)219 static uint32_t get_array_hash(zval *array)
220 {
221     uint32_t                   hash;
222     php_serialize_data_t       var_hash;
223     smart_str                  buffer = {0};
224 
225     PHP_VAR_SERIALIZE_INIT(var_hash);
226     php_var_serialize(&buffer, array, &var_hash);
227     PHP_VAR_SERIALIZE_DESTROY(var_hash);
228 
229     smart_str_0(&buffer);
230 
231     if (buffer.s) {
232         hash = get_string_hash(buffer.s);
233         zend_string_free(buffer.s);
234     } else {
235         hash = 0;
236     }
237 
238     return hash;
239 }
240 
get_spl_object_hash(zval * obj)241 static inline uint32_t get_spl_object_hash(zval *obj)
242 {
243     return Z_OBJ_HANDLE_P(obj);
244 }
245 
get_resource_hash(zval * resource)246 static inline uint32_t get_resource_hash(zval *resource)
247 {
248     return Z_RES_HANDLE_P(resource);
249 }
250 
get_double_hash(zval * value)251 static inline uint32_t get_double_hash(zval *value)
252 {
253     union ulld {
254         unsigned long long ull;
255         double d;
256     } hack;
257 
258     hack.d = Z_DVAL_P(value);
259     if (hack.d == -0.0) {
260         hack.d = 0.0;
261     }
262 
263     return (uint32_t)(hack.ull ^ (hack.ull >> 32));
264 }
265 
get_object_hash(zval * obj)266 static uint32_t get_object_hash(zval *obj)
267 {
268     if (implements_hashable(obj)) {
269         zval hash;
270 #if PHP_VERSION_ID >= 80000
271         zend_call_method_with_0_params(Z_OBJ_P(obj), Z_OBJCE_P(obj), NULL, "hash", &hash);
272 #else
273         zend_call_method_with_0_params(obj, Z_OBJCE_P(obj), NULL, "hash", &hash);
274 #endif
275 
276         switch (Z_TYPE(hash)) {
277             case IS_LONG:
278                 return Z_LVAL(hash);
279 
280             case IS_DOUBLE:
281                 return get_double_hash(&hash);
282 
283             case IS_STRING:
284                 return get_string_zval_hash(&hash);
285 
286             case IS_TRUE:
287                 return 1;
288 
289             case IS_FALSE:
290             case IS_NULL:
291                 return 0;
292 
293             default:
294                 OBJ_HASH_MUST_BE_SCALAR(&hash);
295                 return 0;
296         }
297     }
298 
299     // Not instance of Hashable, so use spl_object_hash
300     return get_spl_object_hash(obj);
301 }
302 
get_hash(zval * value)303 static uint32_t get_hash(zval *value)
304 {
305     switch (Z_TYPE_P(value)) {
306         case IS_LONG:
307             return Z_LVAL_P(value);
308 
309         case IS_DOUBLE:
310             return get_double_hash(value);
311 
312         case IS_STRING:
313             return get_string_zval_hash(value);
314 
315         case IS_TRUE:
316             return 1;
317 
318         case IS_FALSE:
319         case IS_NULL:
320             return 0;
321 
322         case IS_OBJECT:
323             return get_object_hash(value);
324 
325         case IS_ARRAY:
326             return get_array_hash(value);
327 
328         case IS_RESOURCE:
329             return get_resource_hash(value);
330 
331         case IS_REFERENCE:
332             return get_hash(Z_REFVAL_P(value));
333 
334         default:
335             return 0;
336     }
337 }
338 
ds_htable_lookup_bucket_by_hash(ds_htable_t * table,zval * key,const uint32_t hash)339 static ds_htable_bucket_t *ds_htable_lookup_bucket_by_hash(
340     ds_htable_t     *table,
341     zval            *key,
342     const uint32_t   hash
343 ) {
344     ds_htable_bucket_t *bucket;
345     uint32_t index;
346 
347     // Begin by finding the start of the collision chain by using the lookup
348     // array and the hash to determine the first bucket's buffer index.
349     // Follow the chain until the next bucket in the chain is invalid.
350     for (
351         index  = DS_HTABLE_BUCKET_LOOKUP(table, hash);
352         index != DS_HTABLE_INVALID_INDEX;
353         index  = DS_HTABLE_BUCKET_NEXT(bucket)
354     ) {
355         bucket = &table->buckets[index];
356 
357         if (DS_HTABLE_BUCKET_HASH(bucket) == hash) {
358             if (ds_htable_bucket_key_match(bucket, key)) {
359                 return bucket;
360             }
361         }
362     }
363 
364     return NULL;
365 }
366 
ds_htable_lookup_by_key(ds_htable_t * table,zval * key)367 ds_htable_bucket_t *ds_htable_lookup_by_key(ds_htable_t *table, zval *key)
368 {
369     return ds_htable_lookup_bucket_by_hash(table, key, get_hash(key));
370 }
371 
ds_htable_lookup_by_position(ds_htable_t * table,uint32_t position)372 ds_htable_bucket_t *ds_htable_lookup_by_position(ds_htable_t *table, uint32_t position)
373 {
374     if (table->size == 0 || position >= table->size) {
375         return NULL;
376 
377     // A packed table or contiguous section allows us to do a direct lookup.
378     } else if (DS_HTABLE_IS_PACKED(table) || position < table->min_deleted) {
379         return &table->buckets[position];
380 
381     } else {
382         uint32_t index;
383 
384         ds_htable_bucket_t *bucket;
385         ds_htable_bucket_t *stop;
386 
387         // Skip ahead if to the first deleted bucket if we know we can.
388         if (table->min_deleted <= position) {
389             index = table->min_deleted;
390         } else {
391             index = 0;
392         }
393 
394         bucket = &table->buckets[index];
395         stop   = &table->buckets[table->next];
396 
397         // Scan through the buckets skipping deleted ones until we've counted
398         // enough valid buckets to have reached the one at the given position.
399         for (; bucket < stop; ++bucket) {
400             if (DS_HTABLE_BUCKET_DELETED(bucket)) {
401                 continue;
402             }
403 
404             if (position == index) {
405                 return bucket;
406             }
407 
408             index++;
409         }
410     }
411 
412     return NULL;
413 }
414 
ds_htable_lookup_by_value(ds_htable_t * table,zval * value)415 ds_htable_bucket_t *ds_htable_lookup_by_value(ds_htable_t *table, zval *value)
416 {
417     ds_htable_bucket_t *bucket;
418 
419     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
420         if (zend_is_identical(value, &bucket->value)) {
421             return bucket;
422         }
423     }
424     DS_HTABLE_FOREACH_END();
425 
426     return NULL;
427 }
428 
ds_htable_isset(ds_htable_t * table,zval * key,bool check_empty)429 bool ds_htable_isset(ds_htable_t *table, zval *key, bool check_empty)
430 {
431     ds_htable_bucket_t *bucket = ds_htable_lookup_by_key(table, key);
432 
433     return bucket && ds_zval_isset(&bucket->value, check_empty);
434 }
435 
ds_htable_get(ds_htable_t * table,zval * key)436 zval *ds_htable_get(ds_htable_t *table, zval *key)
437 {
438     ds_htable_bucket_t *bucket = ds_htable_lookup_by_key(table, key);
439 
440     return bucket ? &bucket->value : NULL;
441 }
442 
ds_htable_has_key(ds_htable_t * table,zval * key)443 bool ds_htable_has_key(ds_htable_t *table, zval *key)
444 {
445     return ds_htable_lookup_by_key(table, key) != NULL;
446 }
447 
ds_htable_has_keys(ds_htable_t * table,VA_PARAMS)448 bool ds_htable_has_keys(ds_htable_t *table, VA_PARAMS)
449 {
450     while (argc-- > 0) {
451         if ( ! ds_htable_has_key(table, argv++)) {
452             return false;
453         }
454     }
455 
456     return true;
457 }
458 
ds_htable_has_value(ds_htable_t * table,zval * value)459 bool ds_htable_has_value(ds_htable_t *table, zval *value)
460 {
461     return ds_htable_lookup_by_value(table, value) != NULL;
462 }
463 
ds_htable_has_values(ds_htable_t * table,VA_PARAMS)464 bool ds_htable_has_values(ds_htable_t *table, VA_PARAMS)
465 {
466     while (argc-- > 0) {
467         if ( ! ds_htable_lookup_by_value(table, argv++)) {
468             return false;
469         }
470     }
471 
472     return true;
473 }
474 
ds_htable_clear_buffer(ds_htable_t * table)475 static void ds_htable_clear_buffer(ds_htable_t *table)
476 {
477     ds_htable_bucket_t *bucket;
478 
479     if (table->size == 0) {
480         return;
481     }
482 
483     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
484         DS_HTABLE_BUCKET_DELETE(bucket);
485     }
486     DS_HTABLE_FOREACH_END();
487 
488     table->size = 0;
489     table->next = 0;
490     table->min_deleted = table->capacity;
491 }
492 
ds_htable_clear(ds_htable_t * table)493 void ds_htable_clear(ds_htable_t *table)
494 {
495     ds_htable_clear_buffer(table);
496 
497     if (table->capacity > DS_HTABLE_MIN_CAPACITY) {
498         ds_htable_realloc(table, DS_HTABLE_MIN_CAPACITY);
499     }
500 
501     ds_htable_reset_lookup(table);
502 
503     table->min_deleted = table->capacity;
504 }
505 
ds_htable_free(ds_htable_t * table)506 void ds_htable_free(ds_htable_t *table)
507 {
508     ds_htable_clear_buffer(table);
509 
510     efree(table->buckets);
511     efree(table->lookup);
512     efree(table);
513 }
514 
ds_htable_sort_ex(ds_htable_t * table,compare_func_t compare_func)515 static inline void ds_htable_sort_ex(ds_htable_t *table, compare_func_t compare_func)
516 {
517     ds_htable_pack(table);
518     qsort(table->buckets, table->size, sizeof(ds_htable_bucket_t), compare_func);
519     ds_htable_rehash(table);
520 }
521 
ds_htable_sort(ds_htable_t * table,compare_func_t compare_func)522 void ds_htable_sort(ds_htable_t *table, compare_func_t compare_func)
523 {
524     ds_htable_sort_ex(table, compare_func);
525 }
526 
user_compare_by_value(const void * a,const void * b)527 static int user_compare_by_value(const void *a, const void *b)
528 {
529     zval params[2];
530     zval retval;
531 
532     ds_htable_bucket_t *x = ((ds_htable_bucket_t*)a);
533     ds_htable_bucket_t *y = ((ds_htable_bucket_t*)b);
534 
535     ZVAL_COPY_VALUE(&params[0], &x->value);
536     ZVAL_COPY_VALUE(&params[1], &y->value);
537 
538     DSG(user_compare_fci).param_count = 2;
539     DSG(user_compare_fci).params      = params;
540     DSG(user_compare_fci).retval      = &retval;
541 
542     if (zend_call_function(&DSG(user_compare_fci), &DSG(user_compare_fci_cache)) == SUCCESS) {
543         return zval_get_long(&retval);
544     }
545 
546     return 0;
547 }
548 
user_compare_by_key(const void * a,const void * b)549 static int user_compare_by_key(const void *a, const void *b)
550 {
551     zval params[2];
552     zval retval;
553 
554     ZVAL_COPY_VALUE(&params[0], (zval*) a);
555     ZVAL_COPY_VALUE(&params[1], (zval*) b);
556 
557     DSG(user_compare_fci).param_count = 2;
558     DSG(user_compare_fci).params      = params;
559     DSG(user_compare_fci).retval      = &retval;
560 
561     if (zend_call_function(&DSG(user_compare_fci), &DSG(user_compare_fci_cache)) == SUCCESS) {
562         return zval_get_long(&retval);
563     }
564 
565     return 0;
566 }
567 
compare_by_key(const void * a,const void * b)568 static int compare_by_key(const void *a, const void *b)
569 {
570     zval retval;
571 
572     if (compare_function(&retval, (zval*) a, (zval*) b) == SUCCESS) {
573         return zval_get_long(&retval);
574     }
575 
576     return 0;
577 }
578 
compare_by_value(const void * a,const void * b)579 static int compare_by_value(const void *a, const void *b)
580 {
581     zval retval;
582     ds_htable_bucket_t *x = (ds_htable_bucket_t*) a;
583     ds_htable_bucket_t *y = (ds_htable_bucket_t*) b;
584 
585     if (compare_function(&retval, &x->value, &y->value) == SUCCESS) {
586         return zval_get_long(&retval);
587     }
588 
589     return 0;
590 }
591 
ds_htable_sort_by_key(ds_htable_t * table)592 void ds_htable_sort_by_key(ds_htable_t *table)
593 {
594     ds_htable_sort(table, compare_by_key);
595 }
596 
ds_htable_sort_by_value(ds_htable_t * table)597 void ds_htable_sort_by_value(ds_htable_t *table)
598 {
599     ds_htable_sort(table, compare_by_value);
600 }
601 
ds_htable_sort_callback_by_key(ds_htable_t * table)602 void ds_htable_sort_callback_by_key(ds_htable_t *table)
603 {
604     ds_htable_sort(table, user_compare_by_key);
605 }
606 
ds_htable_sort_callback_by_value(ds_htable_t * table)607 void ds_htable_sort_callback_by_value(ds_htable_t *table)
608 {
609     ds_htable_sort(table, user_compare_by_value);
610 }
611 
ds_htable_increase_capacity(ds_htable_t * table)612 static inline void ds_htable_increase_capacity(ds_htable_t *table)
613 {
614     if (table->next > table->size + (table->size >> 5)) {
615         ds_htable_rehash(table);
616         return;
617     }
618 
619     ds_htable_realloc(table, table->capacity << 1);
620     ds_htable_rehash(table);
621 }
622 
ds_htable_get_capacity_for_size(zend_long size)623 static inline zend_long ds_htable_get_capacity_for_size(zend_long size)
624 {
625     return ds_next_power_of_2(size, DS_HTABLE_MIN_CAPACITY);
626 }
627 
ds_htable_ensure_capacity(ds_htable_t * table,uint32_t capacity)628 void ds_htable_ensure_capacity(ds_htable_t *table, uint32_t capacity)
629 {
630     capacity = ds_htable_get_capacity_for_size(capacity);
631 
632     if (capacity > table->capacity) {
633         ds_htable_realloc(table, capacity);
634         ds_htable_rehash(table);
635     }
636 }
637 
638 /**
639  * Adds a bucket to the table knowing that its key doesn't already exist.
640  */
ds_htable_put_distinct_bucket(ds_htable_t * table,ds_htable_bucket_t * bucket)641 static void ds_htable_put_distinct_bucket(ds_htable_t *table, ds_htable_bucket_t *bucket)
642 {
643     ds_htable_bucket_t *next = &table->buckets[table->next];
644 
645     DS_HTABLE_BUCKET_COPY(next, bucket);
646     DS_HTABLE_BUCKET_REHASH(table, next, table->capacity - 1, table->next);
647 
648     table->next++;
649     table->size++;
650 
651     if (table->next == table->capacity) {
652         ds_htable_increase_capacity(table);
653     }
654 }
655 
ds_htable_init_next_bucket(ds_htable_t * table,zval * key,zval * value,const uint32_t hash)656 static ds_htable_bucket_t *ds_htable_init_next_bucket(
657     ds_htable_t *table,
658     zval *key,
659     zval *value,
660     const uint32_t hash
661 ) {
662     ds_htable_bucket_t *bucket = &table->buckets[table->next];
663 
664     DS_HTABLE_BUCKET_HASH(bucket) = hash;
665     DS_HTABLE_BUCKET_REHASH(table, bucket, table->capacity - 1, table->next);
666     ZVAL_COPY(&bucket->key, key);
667 
668     // Only attempt to copy the value if provided.
669     if (value) {
670         ZVAL_COPY(&bucket->value, value);
671     } else {
672         ZVAL_NULL(&bucket->value);
673     }
674 
675     table->next++;
676     table->size++;
677     return bucket;
678 }
679 
ds_htable_lookup_or_next(ds_htable_t * table,zval * key,ds_htable_bucket_t ** bucket)680 bool ds_htable_lookup_or_next(ds_htable_t *table, zval *key, ds_htable_bucket_t **bucket)
681 {
682     const uint32_t hash = get_hash(key);
683 
684     // Attempt to find the bucket
685     if ((*bucket = ds_htable_lookup_bucket_by_hash(table, key, hash))) {
686         return true;
687     }
688 
689     if (table->next == table->capacity) {
690         ds_htable_increase_capacity(table);
691     }
692 
693     // Bucket couldn't be found, so create as new bucket in the buffer.
694     *bucket = ds_htable_init_next_bucket(table, key, NULL, hash);
695     return false;
696 }
697 
ds_htable_put(ds_htable_t * table,zval * key,zval * value)698 void ds_htable_put(ds_htable_t *table, zval *key, zval *value)
699 {
700     ds_htable_bucket_t *bucket;
701 
702     // Attempt to find the bucket or initialize it as a new bucket.
703     bool found = ds_htable_lookup_or_next(table, key, &bucket);
704 
705     // If found, destruct the current value so that we can replace it.
706     if (found) {
707         DTOR_AND_UNDEF(&bucket->value);
708     }
709     if (value) {
710         ZVAL_COPY(&bucket->value, value);
711     }
712 }
713 
ds_htable_values(ds_htable_t * table)714 zval *ds_htable_values(ds_htable_t *table)
715 {
716     zval *buffer = ds_allocate_zval_buffer(table->size);
717     zval *target = buffer;
718     zval *value;
719 
720     DS_HTABLE_FOREACH_VALUE(table, value) {
721         ZVAL_COPY(target++, value);
722     }
723     DS_HTABLE_FOREACH_END();
724 
725     return buffer;
726 }
727 
ds_htable_last(ds_htable_t * table)728 ds_htable_bucket_t *ds_htable_last(ds_htable_t *table)
729 {
730     if (table->size == 0) {
731         return NULL;
732 
733     } else {
734         ds_htable_bucket_t *bucket = &table->buckets[table->next - 1];
735         while (DS_HTABLE_BUCKET_DELETED(bucket)) {
736             bucket--;
737         }
738 
739         return bucket;
740     }
741 }
742 
ds_htable_first(ds_htable_t * table)743 ds_htable_bucket_t *ds_htable_first(ds_htable_t *table)
744 {
745     if (table->size == 0) {
746         return NULL;
747 
748     } else {
749         ds_htable_bucket_t *bucket = table->buckets;
750         while (DS_HTABLE_BUCKET_DELETED(bucket)) {
751             bucket++;
752         }
753 
754         return bucket;
755     }
756 }
757 
ds_htable_join_keys(ds_htable_t * table,const char * glue,const size_t len)758 zend_string *ds_htable_join_keys(ds_htable_t *table, const char* glue, const size_t len)
759 {
760     smart_str str = {0};
761 
762     if (table->size == 0) {
763         return ZSTR_EMPTY_ALLOC();
764     }
765 
766     if (table->size == 1) {
767         return zval_get_string(&ds_htable_last(table)->key);
768     }
769 
770     if (glue && len) {
771         ds_htable_bucket_t *pos = table->buckets;
772         ds_htable_bucket_t *end = ds_htable_last(table);
773         do {
774             if ( ! DS_HTABLE_BUCKET_DELETED(pos)) {
775                 smart_str_appendz(&str, &pos->key);
776                 smart_str_appendl(&str, glue, len);
777             }
778         } while (++pos != end);
779 
780         // Append last keys
781         smart_str_appendz(&str, &end->key);
782 
783     } else {
784         // No glue, so just append the keys.
785         zval *key;
786         DS_HTABLE_FOREACH_KEY(table, key) {
787             smart_str_appendz(&str, key);
788         }
789         DS_HTABLE_FOREACH_END();
790     }
791 
792     smart_str_0(&str);
793     return str.s;
794 }
795 
ds_htable_remove(ds_htable_t * table,zval * key,zval * return_value)796 int ds_htable_remove(ds_htable_t *table, zval *key, zval *return_value)
797 {
798     uint32_t hash  = get_hash(key);
799     uint32_t index = DS_HTABLE_BUCKET_LOOKUP(table, hash);
800 
801     ds_htable_bucket_t *bucket = NULL;
802     ds_htable_bucket_t *prev   = NULL;
803 
804     for (; index != DS_HTABLE_INVALID_INDEX; index = DS_HTABLE_BUCKET_NEXT(bucket)) {
805         bucket = &table->buckets[index];
806 
807         if (DS_HTABLE_BUCKET_HASH(bucket) != hash || ! ds_htable_bucket_key_match(bucket, key)) {
808             prev = bucket;
809             continue;
810         }
811 
812         if (return_value) {
813             ZVAL_COPY(return_value, &bucket->value);
814         }
815 
816         // Clear the lookup if there wasn't a collision chain,
817         // otherwise remove the link in the chain.
818         if (prev == NULL) {
819             DS_HTABLE_BUCKET_LOOKUP(table, hash) = DS_HTABLE_BUCKET_NEXT(bucket);
820         } else {
821             DS_HTABLE_BUCKET_NEXT(prev) = DS_HTABLE_BUCKET_NEXT(bucket);
822         }
823 
824         DS_HTABLE_BUCKET_DELETE(bucket);
825 
826         // If we're removing the last bucket, we might be able to move the
827         // next open slot back if preceeding buckets are also deleted.
828         if (index == table->next - 1 && table->size > 1) {
829             do {
830                 table->next--;
831                 bucket--;
832             }
833             while (DS_HTABLE_BUCKET_DELETED(bucket));
834         }
835 
836         // Update the left-most deleted index.
837         if (index < table->min_deleted) {
838             table->min_deleted = index;
839         }
840 
841         table->size--;
842 
843         // Check whether the buffer should be truncated.
844         ds_htable_auto_truncate(table);
845 
846         return SUCCESS;
847     }
848 
849     if (return_value) {
850         ZVAL_NULL(return_value);
851     }
852 
853     return FAILURE;
854 }
855 
ds_htable_slice(ds_htable_t * table,zend_long index,zend_long length)856 ds_htable_t *ds_htable_slice(ds_htable_t *table, zend_long index, zend_long length)
857 {
858     ds_normalize_slice_args(&index, &length, table->size);
859 
860     if (length == 0) {
861         return ds_htable();
862 
863     } else {
864         ds_htable_t *slice = ds_htable_with_capacity(length);
865 
866         /**
867          * If the table doesn't have any deleted buckets or if the first deleted
868          * bucket comes after the slice we're after, we can safely seek
869          * to the index and copy each bucket.
870          */
871         if (DS_HTABLE_IS_PACKED(table) ||
872             ((uint32_t) (index + length)) <= table->min_deleted) {
873 
874             ds_htable_bucket_t *src = &table->buckets[index];
875 
876             for (; length-- > 0; src++) {
877                 ds_htable_init_next_bucket(
878                     slice, &src->key, &src->value, DS_HTABLE_BUCKET_HASH(src));
879             }
880 
881         /**
882          * If the table does have deleted buckets but the first one comes after
883          * the index of the slice, we can safely seek to the index.
884          */
885         } else if ((uint32_t) index < table->min_deleted) {
886             ds_htable_bucket_t *src = &table->buckets[index];
887 
888             for (;;) {
889                 ds_htable_init_next_bucket(
890                     slice, &src->key, &src->value, DS_HTABLE_BUCKET_HASH(src));
891 
892                 if (--length == 0) {
893                     break;
894                 }
895 
896                 // Skip deleted buckets
897                 while (DS_HTABLE_BUCKET_DELETED(++src));
898             }
899 
900         /**
901          * Otherwise there are deleted buckets but they're in the slice range.
902          * The best we can do is just skip any deleted buckets we encounter.
903          */
904         } else {
905             zend_long seek = 0;
906             ds_htable_bucket_t *src = table->buckets;
907 
908             // We have to seek iteratively until we reach the index
909             for (; seek < index; ++src) {
910                 if ( ! DS_HTABLE_BUCKET_DELETED(src)) {
911                     seek++;
912                 }
913             }
914 
915             // We're at the index, so gather across.
916             for (; length > 0; src++) {
917                 if (DS_HTABLE_BUCKET_DELETED(src)) {
918                     continue;
919                 }
920 
921                 ds_htable_init_next_bucket(
922                     slice, &src->key, &src->value, DS_HTABLE_BUCKET_HASH(src));
923 
924                 length--;
925             }
926         }
927 
928         return slice;
929     }
930 }
931 
ds_htable_apply(ds_htable_t * table,FCI_PARAMS)932 void ds_htable_apply(ds_htable_t *table, FCI_PARAMS)
933 {
934     zval retval;
935     ds_htable_bucket_t *bucket;
936 
937     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
938         fci.param_count = 2;
939         fci.params      = (zval*) bucket;
940         fci.retval      = &retval;
941 
942         if (zend_call_function(&fci, &fci_cache) == FAILURE || Z_ISUNDEF(retval)) {
943             return;
944         }
945 
946         zval_ptr_dtor(&bucket->value);
947         ZVAL_COPY_VALUE(&bucket->value, &retval);
948     }
949     DS_HTABLE_FOREACH_END();
950 }
951 
ds_htable_map(ds_htable_t * table,FCI_PARAMS)952 ds_htable_t *ds_htable_map(ds_htable_t *table, FCI_PARAMS)
953 {
954     ds_htable_bucket_t *bucket;
955     zval retval;
956 
957     ds_htable_t *mapped = ds_htable_with_capacity(table->capacity);
958 
959     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
960         fci.param_count = 2;
961         fci.params      = (zval*) bucket;
962         fci.retval      = &retval;
963 
964         if (zend_call_function(&fci, &fci_cache) == FAILURE || Z_ISUNDEF(retval)) {
965             ds_htable_free(mapped);
966             zval_ptr_dtor(&retval);
967             return NULL;
968         }
969 
970         ds_htable_init_next_bucket(
971             mapped, &bucket->key, &retval, DS_HTABLE_BUCKET_HASH(bucket));
972 
973         zval_ptr_dtor(&retval);
974     }
975     DS_HTABLE_FOREACH_END();
976 
977     return mapped;
978 }
979 
ds_htable_filter(ds_htable_t * table)980 ds_htable_t *ds_htable_filter(ds_htable_t *table)
981 {
982     ds_htable_bucket_t *bucket;
983     ds_htable_t *filtered = ds_htable_with_capacity(table->capacity);
984 
985     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
986         if (zend_is_true(&bucket->value)) {
987             ds_htable_init_next_bucket(
988                 filtered,
989                 &bucket->key,
990                 &bucket->value,
991                 DS_HTABLE_BUCKET_HASH(bucket));
992         }
993     }
994     DS_HTABLE_FOREACH_END();
995 
996     return filtered;
997 }
998 
ds_htable_filter_callback(ds_htable_t * table,FCI_PARAMS)999 ds_htable_t *ds_htable_filter_callback(ds_htable_t *table, FCI_PARAMS)
1000 {
1001     ds_htable_bucket_t *src;
1002     zval retval;
1003 
1004     ds_htable_t *filtered = ds_htable_with_capacity(table->capacity);
1005 
1006     DS_HTABLE_FOREACH_BUCKET(table, src) {
1007         fci.param_count = 2;
1008         fci.params      = (zval*) src;
1009         fci.retval      = &retval;
1010 
1011         if (zend_call_function(&fci, &fci_cache) == FAILURE || Z_ISUNDEF(retval)) {
1012             ds_htable_free(filtered);
1013             zval_ptr_dtor(&retval);
1014             return NULL;
1015         }
1016 
1017         // Add as next key => value if the return value is true
1018         if (zend_is_true(&retval)) {
1019             ds_htable_init_next_bucket(
1020                 filtered, &src->key, &src->value, DS_HTABLE_BUCKET_HASH(src));
1021         }
1022 
1023         zval_ptr_dtor(&retval);
1024     }
1025     DS_HTABLE_FOREACH_END();
1026 
1027     return filtered;
1028 }
1029 
ds_htable_reduce(ds_htable_t * table,FCI_PARAMS,zval * initial,zval * return_value)1030 void ds_htable_reduce(ds_htable_t *table, FCI_PARAMS, zval *initial, zval *return_value)
1031 {
1032     zval *key, *value;
1033     zval carry;
1034     zval params[3];
1035 
1036     if (initial == NULL) {
1037         ZVAL_NULL(&carry);
1038     } else {
1039         ZVAL_COPY_VALUE(&carry, initial);
1040     }
1041 
1042     DS_HTABLE_FOREACH_KEY_VALUE(table, key, value) {
1043         ZVAL_COPY_VALUE(&params[0], &carry);
1044         ZVAL_COPY_VALUE(&params[1], key);
1045         ZVAL_COPY_VALUE(&params[2], value);
1046 
1047         fci.param_count = 3;
1048         fci.params      = params;
1049         fci.retval      = &carry;
1050 
1051         if (zend_call_function(&fci, &fci_cache) == FAILURE || Z_ISUNDEF(carry)) {
1052             ZVAL_NULL(return_value);
1053             return;
1054         }
1055 
1056         Z_TRY_DELREF_P(&carry);
1057     }
1058     DS_HTABLE_FOREACH_END();
1059     ZVAL_COPY(return_value, &carry);
1060 }
1061 
ds_htable_xor(ds_htable_t * table,ds_htable_t * other)1062 ds_htable_t *ds_htable_xor(ds_htable_t *table, ds_htable_t *other)
1063 {
1064     ds_htable_bucket_t *bucket;
1065     ds_htable_t *xor = ds_htable();
1066 
1067     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
1068         if ( ! ds_htable_has_key(other, &bucket->key)) {
1069             ds_htable_put_distinct_bucket(xor, bucket);
1070         }
1071     }
1072     DS_HTABLE_FOREACH_END();
1073 
1074     DS_HTABLE_FOREACH_BUCKET(other, bucket) {
1075         if ( ! ds_htable_has_key(table, &bucket->key)) {
1076             ds_htable_put_distinct_bucket(xor, bucket);
1077         }
1078     }
1079     DS_HTABLE_FOREACH_END();
1080 
1081     return xor;
1082 }
1083 
ds_htable_diff(ds_htable_t * table,ds_htable_t * other)1084 ds_htable_t *ds_htable_diff(ds_htable_t *table, ds_htable_t *other)
1085 {
1086     ds_htable_bucket_t *bucket;
1087     ds_htable_t *diff = ds_htable();
1088 
1089     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
1090         if ( ! ds_htable_has_key(other, &bucket->key)) {
1091             ds_htable_put_distinct_bucket(diff, bucket);
1092         }
1093     }
1094     DS_HTABLE_FOREACH_END();
1095 
1096     return diff;
1097 }
1098 
ds_htable_intersect(ds_htable_t * table,ds_htable_t * other)1099 ds_htable_t *ds_htable_intersect(ds_htable_t *table, ds_htable_t *other)
1100 {
1101     ds_htable_bucket_t *bucket;
1102 
1103     ds_htable_t *intersection = ds_htable();
1104 
1105     DS_HTABLE_FOREACH_BUCKET(table, bucket) {
1106         if (ds_htable_has_key(other, &bucket->key)) {
1107             ds_htable_put_distinct_bucket(intersection, bucket);
1108         }
1109     }
1110     DS_HTABLE_FOREACH_END();
1111 
1112     return intersection;
1113 }
1114 
ds_htable_merge(ds_htable_t * table,ds_htable_t * other)1115 ds_htable_t *ds_htable_merge(ds_htable_t *table, ds_htable_t *other)
1116 {
1117     ds_htable_bucket_t *bucket;
1118     ds_htable_t *merged = ds_htable_clone(table);
1119 
1120     DS_HTABLE_FOREACH_BUCKET(other, bucket) {
1121         ds_htable_put(merged, &bucket->key, &bucket->value);
1122     }
1123     DS_HTABLE_FOREACH_END();
1124 
1125     return merged;
1126 }
1127 
ds_htable_reverse(ds_htable_t * table)1128 void ds_htable_reverse(ds_htable_t *table)
1129 {
1130     ds_htable_pack(table);
1131     {
1132         ds_htable_bucket_t *a = table->buckets;
1133         ds_htable_bucket_t *b = table->buckets + table->size - 1;
1134 
1135         for (; a < b; ++a, --b) {
1136             ds_htable_bucket_t c = *a;
1137             *a = *b;
1138             *b = c;
1139         }
1140     }
1141     ds_htable_rehash(table);
1142 }
1143 
ds_htable_reversed(ds_htable_t * table)1144 ds_htable_t *ds_htable_reversed(ds_htable_t *table)
1145 {
1146     ds_htable_t *reversed = ds_htable_with_capacity(table->capacity);
1147 
1148     ds_htable_bucket_t *src = NULL;
1149     ds_htable_bucket_t *dst = reversed->buckets;
1150 
1151     const uint32_t mask = reversed->capacity - 1;
1152 
1153     DS_HTABLE_FOREACH_BUCKET_REVERSED(table, src) {
1154         uint32_t *lookup = &reversed->lookup[DS_HTABLE_BUCKET_HASH(src) & mask];
1155 
1156         DS_HTABLE_BUCKET_COPY(dst, src);
1157         DS_HTABLE_BUCKET_NEXT(dst) = *lookup;
1158         *lookup = reversed->next++;
1159         dst++;
1160     }
1161     DS_HTABLE_FOREACH_END();
1162 
1163     reversed->size = table->size;
1164     return reversed;
1165 }
1166 
ds_htable_to_array(ds_htable_t * table,zval * return_value)1167 void ds_htable_to_array(ds_htable_t *table, zval *return_value)
1168 {
1169     HashTable *array;
1170     zval *key;
1171     zval *val;
1172 
1173     array_init_size(return_value, table->size);
1174     array = Z_ARR_P(return_value);
1175 
1176     DS_HTABLE_FOREACH_KEY_VALUE(table, key, val) {
1177         array_set_zval_key(array, key, val);
1178     }
1179     DS_HTABLE_FOREACH_END();
1180 }
1181 
ds_htable_serialize(ds_htable_t * table,unsigned char ** buffer,size_t * length,zend_serialize_data * data)1182 int ds_htable_serialize(ds_htable_t *table, unsigned char **buffer, size_t *length, zend_serialize_data *data)
1183 {
1184     php_serialize_data_t serialize_data = (php_serialize_data_t) data;
1185     PHP_VAR_SERIALIZE_INIT(serialize_data);
1186 
1187     if (table->size == 0) {
1188         SERIALIZE_SET_ZSTR(ZSTR_EMPTY_ALLOC());
1189 
1190     } else {
1191         zval *key, *value;
1192 
1193         smart_str buf = {0};
1194 
1195         DS_HTABLE_FOREACH_KEY_VALUE(table, key, value) {
1196             php_var_serialize(&buf, key, &serialize_data);
1197             php_var_serialize(&buf, value, &serialize_data);
1198         }
1199         DS_HTABLE_FOREACH_END();
1200 
1201         smart_str_0(&buf);
1202 
1203         SERIALIZE_SET_ZSTR(buf.s);
1204         zend_string_release(buf.s);
1205     }
1206 
1207     PHP_VAR_SERIALIZE_DESTROY(serialize_data);
1208     return SUCCESS;
1209 }
1210 
ds_htable_unserialize(ds_htable_t * table,const unsigned char * buffer,size_t length,zend_unserialize_data * data)1211 int ds_htable_unserialize(ds_htable_t *table, const unsigned char *buffer, size_t length, zend_unserialize_data *data)
1212 {
1213     php_unserialize_data_t unserialize_data = (php_unserialize_data_t) data;
1214 
1215     const unsigned char *pos = buffer;
1216     const unsigned char *end = buffer + length;
1217 
1218     PHP_VAR_UNSERIALIZE_INIT(unserialize_data);
1219 
1220     while (pos != end) {
1221 
1222         zval *key   = var_tmp_var(&unserialize_data);
1223         zval *value = var_tmp_var(&unserialize_data);
1224 
1225         if (php_var_unserialize(key, &pos, end, &unserialize_data)) {
1226             var_push_dtor(&unserialize_data, key);
1227         } else {
1228             goto error;
1229         }
1230 
1231         if (php_var_unserialize(value, &pos, end, &unserialize_data)) {
1232             var_push_dtor(&unserialize_data, value);
1233         } else {
1234             goto error;
1235         }
1236 
1237         ds_htable_put(table, key, value);
1238     }
1239 
1240     PHP_VAR_UNSERIALIZE_DESTROY(unserialize_data);
1241     return SUCCESS;
1242 
1243 error:
1244     PHP_VAR_UNSERIALIZE_DESTROY(unserialize_data);
1245     UNSERIALIZE_ERROR();
1246     return FAILURE;
1247 }
1248