1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (String table)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8 #include "ir.h"
9 #include "ir_private.h"
10
11 typedef struct _ir_strtab_bucket {
12 uint32_t h;
13 uint32_t len;
14 const char *str;
15 uint32_t next;
16 ir_ref val;
17 } ir_strtab_bucket;
18
ir_str_hash(const char * str,size_t len)19 static uint32_t ir_str_hash(const char *str, size_t len)
20 {
21 size_t i;
22 uint32_t h = 5381;
23
24 for (i = 0; i < len; i++) {
25 h = ((h << 5) + h) + *str;
26 str++;
27 }
28 return h | 0x10000000;
29 }
30
ir_strtab_hash_size(uint32_t size)31 static uint32_t ir_strtab_hash_size(uint32_t size)
32 {
33 /* Use big enough power of 2 */
34 size -= 1;
35 size |= (size >> 1);
36 size |= (size >> 2);
37 size |= (size >> 4);
38 size |= (size >> 8);
39 size |= (size >> 16);
40 return size + 1;
41 }
42
ir_strtab_resize(ir_strtab * strtab)43 static void ir_strtab_resize(ir_strtab *strtab)
44 {
45 uint32_t old_hash_size = (uint32_t)(-(int32_t)strtab->mask);
46 char *old_data = strtab->data;
47 uint32_t size = strtab->size * 2;
48 uint32_t hash_size = ir_strtab_hash_size(size);
49 char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket));
50 ir_strtab_bucket *p;
51 uint32_t pos, i;
52
53 memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t));
54 strtab->data = data + (hash_size * sizeof(uint32_t));
55 strtab->mask = (uint32_t)(-(int32_t)hash_size);
56 strtab->size = size;
57
58 memcpy(strtab->data, old_data, strtab->count * sizeof(ir_strtab_bucket));
59 ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t)));
60
61 i = strtab->count;
62 pos = 0;
63 p = (ir_strtab_bucket*)strtab->data;
64 do {
65 uint32_t h = p->h | strtab->mask;
66 p->next = ((uint32_t*)strtab->data)[(int32_t)h];
67 ((uint32_t*)strtab->data)[(int32_t)h] = pos;
68 pos += sizeof(ir_strtab_bucket);
69 p++;
70 } while (--i);
71 }
72
ir_strtab_grow_buf(ir_strtab * strtab,uint32_t len)73 static void ir_strtab_grow_buf(ir_strtab *strtab, uint32_t len)
74 {
75 intptr_t old = (intptr_t)strtab->buf;
76
77 do {
78 strtab->buf_size *= 2;
79 } while (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1));
80
81 strtab->buf = ir_mem_realloc(strtab->buf, strtab->buf_size);
82 if ((intptr_t)strtab->buf != old) {
83 intptr_t offset = (intptr_t)strtab->buf - old;
84 ir_strtab_bucket *p = (ir_strtab_bucket*)strtab->data;
85 uint32_t i;
86 for (i = strtab->count; i > 0; i--) {
87 p->str += offset;
88 p++;
89 }
90 }
91 }
92
ir_strtab_init(ir_strtab * strtab,uint32_t size,uint32_t buf_size)93 void ir_strtab_init(ir_strtab *strtab, uint32_t size, uint32_t buf_size)
94 {
95 IR_ASSERT(size > 0);
96 uint32_t hash_size = ir_strtab_hash_size(size);
97 char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket));
98 memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t));
99 strtab->data = (data + (hash_size * sizeof(uint32_t)));
100 strtab->mask = (uint32_t)(-(int32_t)hash_size);
101 strtab->size = size;
102 strtab->count = 0;
103 strtab->pos = 0;
104 if (buf_size) {
105 strtab->buf = ir_mem_malloc(buf_size);
106 strtab->buf_size = buf_size;
107 strtab->buf_top = 0;
108 } else {
109 strtab->buf = NULL;
110 strtab->buf_size = 0;
111 strtab->buf_top = 0;
112 }
113 }
114
ir_strtab_find(const ir_strtab * strtab,const char * str,uint32_t len)115 ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len)
116 {
117 uint32_t h = ir_str_hash(str, len);
118 const char *data = (const char*)strtab->data;
119 uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
120 ir_strtab_bucket *p;
121
122 while (pos != IR_INVALID_IDX) {
123 p = (ir_strtab_bucket*)(data + pos);
124 if (p->h == h
125 && p->len == len
126 && memcmp(p->str, str, len) == 0) {
127 return p->val;
128 }
129 pos = p->next;
130 }
131 return 0;
132 }
133
ir_strtab_lookup(ir_strtab * strtab,const char * str,uint32_t len,ir_ref val)134 ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val)
135 {
136 uint32_t h = ir_str_hash(str, len);
137 char *data = (char*)strtab->data;
138 uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
139 ir_strtab_bucket *p;
140
141 while (pos != IR_INVALID_IDX) {
142 p = (ir_strtab_bucket*)(data + pos);
143 if (p->h == h
144 && p->len == len
145 && memcmp(p->str, str, len) == 0) {
146 return p->val;
147 }
148 pos = p->next;
149 }
150
151 IR_ASSERT(val != 0);
152
153 if (UNEXPECTED(strtab->count >= strtab->size)) {
154 ir_strtab_resize(strtab);
155 data = strtab->data;
156 }
157
158 if (strtab->buf) {
159 if (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)) {
160 ir_strtab_grow_buf(strtab, len + 1);
161 }
162
163 memcpy(strtab->buf + strtab->buf_top, str, len);
164 strtab->buf[strtab->buf_top + len] = 0;
165 str = (const char*)strtab->buf + strtab->buf_top;
166 strtab->buf_top += len + 1;
167 }
168
169 pos = strtab->pos;
170 strtab->pos += sizeof(ir_strtab_bucket);
171 strtab->count++;
172 p = (ir_strtab_bucket*)(data + pos);
173 p->h = h;
174 p->len = len;
175 p->str = str;
176 h |= strtab->mask;
177 p->next = ((uint32_t*)data)[(int32_t)h];
178 ((uint32_t*)data)[(int32_t)h] = pos;
179 p->val = val;
180 return val;
181 }
182
ir_strtab_update(ir_strtab * strtab,const char * str,uint32_t len,ir_ref val)183 ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val)
184 {
185 uint32_t h = ir_str_hash(str, len);
186 char *data = (char*)strtab->data;
187 uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
188 ir_strtab_bucket *p;
189
190 while (pos != IR_INVALID_IDX) {
191 p = (ir_strtab_bucket*)(data + pos);
192 if (p->h == h
193 && p->len == len
194 && memcmp(p->str, str, len) == 0) {
195 return p->val = val;
196 }
197 pos = p->next;
198 }
199 return 0;
200 }
201
ir_strtab_str(const ir_strtab * strtab,ir_ref idx)202 const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx)
203 {
204 IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count);
205 return ((const ir_strtab_bucket*)strtab->data)[idx].str;
206 }
207
ir_strtab_strl(const ir_strtab * strtab,ir_ref idx,size_t * len)208 const char *ir_strtab_strl(const ir_strtab *strtab, ir_ref idx, size_t *len)
209 {
210 const ir_strtab_bucket *b = ((const ir_strtab_bucket*)strtab->data) + idx;
211 IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count);
212 *len = b->len;
213 return b->str;
214 }
215
ir_strtab_free(ir_strtab * strtab)216 void ir_strtab_free(ir_strtab *strtab)
217 {
218 uint32_t hash_size = (uint32_t)(-(int32_t)strtab->mask);
219 char *data = (char*)strtab->data - (hash_size * sizeof(uint32_t));
220 ir_mem_free(data);
221 strtab->data = NULL;
222 if (strtab->buf) {
223 ir_mem_free(strtab->buf);
224 strtab->buf = NULL;
225 }
226 }
227
ir_strtab_apply(const ir_strtab * strtab,ir_strtab_apply_t func)228 void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func)
229 {
230 uint32_t i;
231
232 for (i = 0; i < strtab->count; i++) {
233 const ir_strtab_bucket *b = &((ir_strtab_bucket*)strtab->data)[i];
234 func(b->str, b->len, b->val);
235 }
236 }
237