xref: /php-src/ext/dom/lexbor/lexbor/core/hash.c (revision f0934090)
1 /*
2  * Copyright (C) 2019 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #define LEXBOR_HASH_EXTERN
8 #include "lexbor/core/hash.h"
9 #undef LEXBOR_HASH_EXTERN
10 
11 #include "lexbor/core/str.h"
12 
13 #define LEXBOR_STR_RES_MAP_LOWERCASE
14 #define LEXBOR_STR_RES_MAP_UPPERCASE
15 #include "lexbor/core/str_res.h"
16 
17 
18 /* Insert variable. */
19 const lexbor_hash_insert_t lexbor_hash_insert_var = {
20     .hash = lexbor_hash_make_id,
21     .copy = lexbor_hash_copy,
22     .cmp = lexbor_str_data_ncmp
23 };
24 
25 const lexbor_hash_insert_t lexbor_hash_insert_lower_var = {
26     .hash = lexbor_hash_make_id_lower,
27     .copy = lexbor_hash_copy_lower,
28     .cmp = lexbor_str_data_nlocmp_right
29 };
30 
31 const lexbor_hash_insert_t lexbor_hash_insert_upper_var = {
32     .hash = lexbor_hash_make_id_upper,
33     .copy = lexbor_hash_copy_upper,
34     .cmp = lexbor_str_data_nupcmp_right
35 };
36 
37 LXB_API const lexbor_hash_insert_t
38 *lexbor_hash_insert_raw = &lexbor_hash_insert_var;
39 
40 LXB_API const lexbor_hash_insert_t
41 *lexbor_hash_insert_lower = &lexbor_hash_insert_lower_var;
42 
43 LXB_API const lexbor_hash_insert_t
44 *lexbor_hash_insert_upper = &lexbor_hash_insert_upper_var;
45 
46 /* Search variable. */
47 const lexbor_hash_search_t lexbor_hash_search_var = {
48     .hash = lexbor_hash_make_id,
49     .cmp = lexbor_str_data_ncmp
50 };
51 
52 const lexbor_hash_search_t lexbor_hash_search_lower_var = {
53     .hash = lexbor_hash_make_id_lower,
54     .cmp = lexbor_str_data_nlocmp_right
55 };
56 
57 const lexbor_hash_search_t lexbor_hash_search_upper_var = {
58     .hash = lexbor_hash_make_id_upper,
59     .cmp = lexbor_str_data_nupcmp_right
60 };
61 
62 LXB_API const lexbor_hash_search_t
63 *lexbor_hash_search_raw = &lexbor_hash_search_var;
64 
65 LXB_API const lexbor_hash_search_t
66 *lexbor_hash_search_lower = &lexbor_hash_search_lower_var;
67 
68 LXB_API const lexbor_hash_search_t
69 *lexbor_hash_search_upper = &lexbor_hash_search_upper_var;
70 
71 
72 lxb_inline lexbor_hash_entry_t **
lexbor_hash_table_create(lexbor_hash_t * hash)73 lexbor_hash_table_create(lexbor_hash_t *hash)
74 {
75     return lexbor_calloc(hash->table_size, sizeof(lexbor_hash_entry_t *));
76 }
77 
78 lxb_inline void
lexbor_hash_table_clean(lexbor_hash_t * hash)79 lexbor_hash_table_clean(lexbor_hash_t *hash)
80 {
81     memset(hash->table, 0, sizeof(lexbor_hash_t *) * hash->table_size);
82 }
83 
84 lxb_inline lexbor_hash_entry_t **
lexbor_hash_table_destroy(lexbor_hash_t * hash)85 lexbor_hash_table_destroy(lexbor_hash_t *hash)
86 {
87     if (hash->table != NULL) {
88         return lexbor_free(hash->table);
89     }
90 
91     return NULL;
92 }
93 
94 lxb_inline lexbor_hash_entry_t *
_lexbor_hash_entry_create(lexbor_hash_t * hash,const lexbor_hash_copy_f copy_func,const lxb_char_t * key,size_t length)95 _lexbor_hash_entry_create(lexbor_hash_t *hash, const lexbor_hash_copy_f copy_func,
96                           const lxb_char_t *key, size_t length)
97 {
98     lexbor_hash_entry_t *entry = lexbor_dobject_calloc(hash->entries);
99     if (entry == NULL) {
100         return NULL;
101     }
102 
103     entry->length = length;
104 
105     if (copy_func(hash, entry, key, length) != LXB_STATUS_OK) {
106         lexbor_dobject_free(hash->entries, entry);
107         return NULL;
108     }
109 
110     return entry;
111 }
112 
113 lexbor_hash_t *
lexbor_hash_create(void)114 lexbor_hash_create(void)
115 {
116     return lexbor_calloc(1, sizeof(lexbor_hash_t));
117 }
118 
119 lxb_status_t
lexbor_hash_init(lexbor_hash_t * hash,size_t table_size,size_t struct_size)120 lexbor_hash_init(lexbor_hash_t *hash, size_t table_size, size_t struct_size)
121 {
122     lxb_status_t status;
123     size_t chunk_size;
124 
125     if (hash == NULL) {
126         return LXB_STATUS_ERROR_OBJECT_IS_NULL;
127     }
128 
129     if (table_size < LEXBOR_HASH_TABLE_MIN_SIZE) {
130         table_size = LEXBOR_HASH_TABLE_MIN_SIZE;
131     }
132 
133     chunk_size = table_size / 2;
134 
135     hash->table_size = table_size;
136 
137     hash->entries = lexbor_dobject_create();
138     status = lexbor_dobject_init(hash->entries, chunk_size, struct_size);
139     if (status != LXB_STATUS_OK) {
140         return status;
141     }
142 
143     hash->mraw = lexbor_mraw_create();
144     status = lexbor_mraw_init(hash->mraw, chunk_size * 12);
145     if (status != LXB_STATUS_OK) {
146         return status;
147     }
148 
149     hash->table = lexbor_hash_table_create(hash);
150     if (hash->table == NULL) {
151         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
152     }
153 
154     hash->struct_size = struct_size;
155 
156     return LXB_STATUS_OK;
157 }
158 
159 void
lexbor_hash_clean(lexbor_hash_t * hash)160 lexbor_hash_clean(lexbor_hash_t *hash)
161 {
162     lexbor_dobject_clean(hash->entries);
163     lexbor_mraw_clean(hash->mraw);
164     lexbor_hash_table_clean(hash);
165 }
166 
167 lexbor_hash_t *
lexbor_hash_destroy(lexbor_hash_t * hash,bool destroy_obj)168 lexbor_hash_destroy(lexbor_hash_t *hash, bool destroy_obj)
169 {
170     if (hash == NULL) {
171         return NULL;
172     }
173 
174     hash->entries = lexbor_dobject_destroy(hash->entries, true);
175     hash->mraw = lexbor_mraw_destroy(hash->mraw, true);
176     hash->table = lexbor_hash_table_destroy(hash);
177 
178     if (destroy_obj) {
179         return lexbor_free(hash);
180     }
181 
182     return hash;
183 }
184 
185 void *
lexbor_hash_insert(lexbor_hash_t * hash,const lexbor_hash_insert_t * insert,const lxb_char_t * key,size_t length)186 lexbor_hash_insert(lexbor_hash_t *hash, const lexbor_hash_insert_t *insert,
187                    const lxb_char_t *key, size_t length)
188 {
189     lxb_char_t *str;
190     uint32_t hash_id, table_idx;
191     lexbor_hash_entry_t *entry;
192 
193     hash_id = insert->hash(key, length);
194     table_idx = hash_id % hash->table_size;
195 
196     entry = hash->table[table_idx];
197 
198     if (entry == NULL) {
199         entry = _lexbor_hash_entry_create(hash, insert->copy, key, length);
200         hash->table[table_idx] = entry;
201 
202         return entry;
203     }
204 
205     do {
206         str = lexbor_hash_entry_str(entry);
207 
208         if (entry->length == length && insert->cmp(str, key, length)) {
209             return entry;
210         }
211 
212         if (entry->next == NULL) {
213             break;
214         }
215 
216         entry = entry->next;
217     }
218     while (1);
219 
220     entry->next = _lexbor_hash_entry_create(hash, insert->copy, key, length);
221 
222     return entry->next;
223 }
224 
225 void *
lexbor_hash_insert_by_entry(lexbor_hash_t * hash,lexbor_hash_entry_t * entry,const lexbor_hash_search_t * search,const lxb_char_t * key,size_t length)226 lexbor_hash_insert_by_entry(lexbor_hash_t *hash, lexbor_hash_entry_t *entry,
227                             const lexbor_hash_search_t *search,
228                             const lxb_char_t *key, size_t length)
229 {
230     lxb_char_t *str;
231     uint32_t hash_id, table_idx;
232     lexbor_hash_entry_t *item;
233 
234     hash_id = search->hash(key, length);
235     table_idx = hash_id % hash->table_size;
236 
237     item = hash->table[table_idx];
238 
239     if (item == NULL) {
240         hash->table[table_idx] = entry;
241 
242         return entry;
243     }
244 
245     do {
246         str = lexbor_hash_entry_str(item);
247 
248         if (item->length == length && search->cmp(str, key, length)) {
249             return item;
250         }
251 
252         if (item->next == NULL) {
253             break;
254         }
255 
256         item = item->next;
257     }
258     while (1);
259 
260     item->next = entry;
261 
262     return entry;
263 }
264 
265 void
lexbor_hash_remove(lexbor_hash_t * hash,const lexbor_hash_search_t * search,const lxb_char_t * key,size_t length)266 lexbor_hash_remove(lexbor_hash_t *hash, const lexbor_hash_search_t *search,
267                    const lxb_char_t *key, size_t length)
268 {
269     lexbor_hash_remove_by_hash_id(hash, search->hash(key, length),
270                                   key, length, search->cmp);
271 }
272 
273 void *
lexbor_hash_search(lexbor_hash_t * hash,const lexbor_hash_search_t * search,const lxb_char_t * key,size_t length)274 lexbor_hash_search(lexbor_hash_t *hash, const lexbor_hash_search_t *search,
275                    const lxb_char_t *key, size_t length)
276 {
277     return lexbor_hash_search_by_hash_id(hash, search->hash(key, length),
278                                          key, length, search->cmp);
279 }
280 
281 void
lexbor_hash_remove_by_hash_id(lexbor_hash_t * hash,uint32_t hash_id,const lxb_char_t * key,size_t length,const lexbor_hash_cmp_f cmp_func)282 lexbor_hash_remove_by_hash_id(lexbor_hash_t *hash, uint32_t hash_id,
283                               const lxb_char_t *key, size_t length,
284                               const lexbor_hash_cmp_f cmp_func)
285 {
286     uint32_t table_idx;
287     lxb_char_t *str;
288     lexbor_hash_entry_t *entry, *prev;
289 
290     table_idx = hash_id % hash->table_size;
291     entry = hash->table[table_idx];
292     prev = NULL;
293 
294     while (entry != NULL) {
295         str = lexbor_hash_entry_str(entry);
296 
297         if (entry->length == length && cmp_func(str, key, length)) {
298             if (prev == NULL) {
299                 hash->table[table_idx] = entry->next;
300             }
301             else {
302                 prev->next = entry->next;
303             }
304 
305             if (length > LEXBOR_HASH_SHORT_SIZE) {
306                 lexbor_mraw_free(hash->mraw, entry->u.long_str);
307             }
308 
309             lexbor_dobject_free(hash->entries, entry);
310 
311             return;
312         }
313 
314         prev = entry;
315         entry = entry->next;
316     }
317 }
318 
319 void *
lexbor_hash_search_by_hash_id(lexbor_hash_t * hash,uint32_t hash_id,const lxb_char_t * key,size_t length,const lexbor_hash_cmp_f cmp_func)320 lexbor_hash_search_by_hash_id(lexbor_hash_t *hash, uint32_t hash_id,
321                               const lxb_char_t *key, size_t length,
322                               const lexbor_hash_cmp_f cmp_func)
323 {
324     lxb_char_t *str;
325     lexbor_hash_entry_t *entry;
326 
327     entry = hash->table[ hash_id % hash->table_size ];
328 
329     while (entry != NULL) {
330         str = lexbor_hash_entry_str(entry);
331 
332         if (entry->length == length && cmp_func(str, key, length)) {
333             return entry;
334         }
335 
336         entry = entry->next;
337     }
338 
339     return NULL;
340 }
341 
342 uint32_t
lexbor_hash_make_id(const lxb_char_t * key,size_t length)343 lexbor_hash_make_id(const lxb_char_t *key, size_t length)
344 {
345     size_t i;
346     uint32_t hash_id;
347 
348     for (i = hash_id = 0; i < length; i++) {
349         hash_id += key[i];
350         hash_id += (hash_id << 10);
351         hash_id ^= (hash_id >> 6);
352     }
353 
354     hash_id += (hash_id << 3);
355     hash_id ^= (hash_id >> 11);
356     hash_id += (hash_id << 15);
357 
358     return hash_id;
359 }
360 
361 uint32_t
lexbor_hash_make_id_lower(const lxb_char_t * key,size_t length)362 lexbor_hash_make_id_lower(const lxb_char_t *key, size_t length)
363 {
364     size_t i;
365     uint32_t hash_id;
366 
367     for (i = hash_id = 0; i < length; i++) {
368         hash_id += lexbor_str_res_map_lowercase[ key[i] ];
369         hash_id += (hash_id << 10);
370         hash_id ^= (hash_id >> 6);
371     }
372 
373     hash_id += (hash_id << 3);
374     hash_id ^= (hash_id >> 11);
375     hash_id += (hash_id << 15);
376 
377     return hash_id;
378 }
379 
380 uint32_t
lexbor_hash_make_id_upper(const lxb_char_t * key,size_t length)381 lexbor_hash_make_id_upper(const lxb_char_t *key, size_t length)
382 {
383     size_t i;
384     uint32_t hash_id;
385 
386     for (i = hash_id = 0; i < length; i++) {
387         hash_id += lexbor_str_res_map_uppercase[ key[i] ];
388         hash_id += (hash_id << 10);
389         hash_id ^= (hash_id >> 6);
390     }
391 
392     hash_id += (hash_id << 3);
393     hash_id ^= (hash_id >> 11);
394     hash_id += (hash_id << 15);
395 
396     return hash_id;
397 }
398 
399 lxb_status_t
lexbor_hash_copy(lexbor_hash_t * hash,lexbor_hash_entry_t * entry,const lxb_char_t * key,size_t length)400 lexbor_hash_copy(lexbor_hash_t *hash, lexbor_hash_entry_t *entry,
401                  const lxb_char_t *key, size_t length)
402 {
403     lxb_char_t *to;
404 
405     if (length <= LEXBOR_HASH_SHORT_SIZE) {
406         to = entry->u.short_str;
407     }
408     else {
409         entry->u.long_str = lexbor_mraw_alloc(hash->mraw, length + 1);
410         if (entry->u.long_str == NULL) {
411             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
412         }
413 
414         to = entry->u.long_str;
415     }
416 
417     memcpy(to, key, length);
418 
419     to[length] = '\0';
420 
421     return LXB_STATUS_OK;
422 }
423 
424 lxb_status_t
lexbor_hash_copy_lower(lexbor_hash_t * hash,lexbor_hash_entry_t * entry,const lxb_char_t * key,size_t length)425 lexbor_hash_copy_lower(lexbor_hash_t *hash, lexbor_hash_entry_t *entry,
426                        const lxb_char_t *key, size_t length)
427 {
428     lxb_char_t *to;
429 
430     if (length <= LEXBOR_HASH_SHORT_SIZE) {
431         to = entry->u.short_str;
432     }
433     else {
434         entry->u.long_str = lexbor_mraw_alloc(hash->mraw, length + 1);
435         if (entry->u.long_str == NULL) {
436             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
437         }
438 
439         to = entry->u.long_str;
440     }
441 
442     for (size_t i = 0; i < length; i++) {
443         to[i] = lexbor_str_res_map_lowercase[ key[i] ];
444     }
445 
446     to[length] = '\0';
447 
448     return LXB_STATUS_OK;
449 }
450 
451 lxb_status_t
lexbor_hash_copy_upper(lexbor_hash_t * hash,lexbor_hash_entry_t * entry,const lxb_char_t * key,size_t length)452 lexbor_hash_copy_upper(lexbor_hash_t *hash, lexbor_hash_entry_t *entry,
453                        const lxb_char_t *key, size_t length)
454 {
455     lxb_char_t *to;
456 
457     if (length <= LEXBOR_HASH_SHORT_SIZE) {
458         to = entry->u.short_str;
459     }
460     else {
461         entry->u.long_str = lexbor_mraw_alloc(hash->mraw, length + 1);
462         if (entry->u.long_str == NULL) {
463             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
464         }
465 
466         to = entry->u.long_str;
467     }
468 
469     for (size_t i = 0; i < length; i++) {
470         to[i] = lexbor_str_res_map_uppercase[ key[i] ];
471     }
472 
473     to[length] = '\0';
474 
475     return LXB_STATUS_OK;
476 }
477