1 /*
2 +----------------------------------------------------------------------+
3 | Zend Engine |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 2.00 of the Zend license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.zend.com/license/2_00.txt. |
11 | If you did not receive a copy of the Zend license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@zend.com so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Dmitry Stogov <dmitry@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #ifndef ZEND_STRING_H
20 #define ZEND_STRING_H
21
22 #include "zend.h"
23
24 BEGIN_EXTERN_C()
25
26 typedef void (*zend_string_copy_storage_func_t)(void);
27 typedef zend_string *(ZEND_FASTCALL *zend_new_interned_string_func_t)(zend_string *str);
28 typedef zend_string *(ZEND_FASTCALL *zend_string_init_interned_func_t)(const char *str, size_t size, int permanent);
29
30 ZEND_API extern zend_new_interned_string_func_t zend_new_interned_string;
31 ZEND_API extern zend_string_init_interned_func_t zend_string_init_interned;
32
33 ZEND_API zend_ulong ZEND_FASTCALL zend_string_hash_func(zend_string *str);
34 ZEND_API zend_ulong ZEND_FASTCALL zend_hash_func(const char *str, size_t len);
35 ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_string *str);
36
37 ZEND_API void zend_interned_strings_init(void);
38 ZEND_API void zend_interned_strings_dtor(void);
39 ZEND_API void zend_interned_strings_activate(void);
40 ZEND_API void zend_interned_strings_deactivate(void);
41 ZEND_API void zend_interned_strings_set_request_storage_handlers(zend_new_interned_string_func_t handler, zend_string_init_interned_func_t init_handler);
42 ZEND_API void zend_interned_strings_set_permanent_storage_copy_handlers(zend_string_copy_storage_func_t copy_handler, zend_string_copy_storage_func_t restore_handler);
43 ZEND_API void zend_interned_strings_switch_storage(zend_bool request);
44
45 ZEND_API extern zend_string *zend_empty_string;
46 ZEND_API extern zend_string *zend_one_char_string[256];
47 ZEND_API extern zend_string **zend_known_strings;
48
END_EXTERN_C()49 END_EXTERN_C()
50
51 /* Shortcuts */
52
53 #define ZSTR_VAL(zstr) (zstr)->val
54 #define ZSTR_LEN(zstr) (zstr)->len
55 #define ZSTR_H(zstr) (zstr)->h
56 #define ZSTR_HASH(zstr) zend_string_hash_val(zstr)
57
58 /* Compatibility macros */
59
60 #define IS_INTERNED(s) ZSTR_IS_INTERNED(s)
61 #define STR_EMPTY_ALLOC() ZSTR_EMPTY_ALLOC()
62 #define _STR_HEADER_SIZE _ZSTR_HEADER_SIZE
63 #define STR_ALLOCA_ALLOC(str, _len, use_heap) ZSTR_ALLOCA_ALLOC(str, _len, use_heap)
64 #define STR_ALLOCA_INIT(str, s, len, use_heap) ZSTR_ALLOCA_INIT(str, s, len, use_heap)
65 #define STR_ALLOCA_FREE(str, use_heap) ZSTR_ALLOCA_FREE(str, use_heap)
66
67 /*---*/
68
69 #define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED)
70
71 #define ZSTR_EMPTY_ALLOC() zend_empty_string
72 #define ZSTR_CHAR(c) zend_one_char_string[c]
73 #define ZSTR_KNOWN(idx) zend_known_strings[idx]
74
75 #define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
76
77 #define _ZSTR_STRUCT_SIZE(len) (_ZSTR_HEADER_SIZE + len + 1)
78
79 #define ZSTR_ALLOCA_ALLOC(str, _len, use_heap) do { \
80 (str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
81 GC_SET_REFCOUNT(str, 1); \
82 GC_TYPE_INFO(str) = IS_STRING; \
83 zend_string_forget_hash_val(str); \
84 ZSTR_LEN(str) = _len; \
85 } while (0)
86
87 #define ZSTR_ALLOCA_INIT(str, s, len, use_heap) do { \
88 ZSTR_ALLOCA_ALLOC(str, len, use_heap); \
89 memcpy(ZSTR_VAL(str), (s), (len)); \
90 ZSTR_VAL(str)[(len)] = '\0'; \
91 } while (0)
92
93 #define ZSTR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
94
95 /*---*/
96
97 static zend_always_inline zend_ulong zend_string_hash_val(zend_string *s)
98 {
99 return ZSTR_H(s) ? ZSTR_H(s) : zend_string_hash_func(s);
100 }
101
zend_string_forget_hash_val(zend_string * s)102 static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
103 {
104 ZSTR_H(s) = 0;
105 }
106
zend_string_refcount(const zend_string * s)107 static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
108 {
109 if (!ZSTR_IS_INTERNED(s)) {
110 return GC_REFCOUNT(s);
111 }
112 return 1;
113 }
114
zend_string_addref(zend_string * s)115 static zend_always_inline uint32_t zend_string_addref(zend_string *s)
116 {
117 if (!ZSTR_IS_INTERNED(s)) {
118 return GC_ADDREF(s);
119 }
120 return 1;
121 }
122
zend_string_delref(zend_string * s)123 static zend_always_inline uint32_t zend_string_delref(zend_string *s)
124 {
125 if (!ZSTR_IS_INTERNED(s)) {
126 return GC_DELREF(s);
127 }
128 return 1;
129 }
130
zend_string_alloc(size_t len,int persistent)131 static zend_always_inline zend_string *zend_string_alloc(size_t len, int persistent)
132 {
133 zend_string *ret = (zend_string *)pemalloc(ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
134
135 GC_SET_REFCOUNT(ret, 1);
136 GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
137 zend_string_forget_hash_val(ret);
138 ZSTR_LEN(ret) = len;
139 return ret;
140 }
141
zend_string_safe_alloc(size_t n,size_t m,size_t l,int persistent)142 static zend_always_inline zend_string *zend_string_safe_alloc(size_t n, size_t m, size_t l, int persistent)
143 {
144 zend_string *ret = (zend_string *)safe_pemalloc(n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
145
146 GC_SET_REFCOUNT(ret, 1);
147 GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
148 zend_string_forget_hash_val(ret);
149 ZSTR_LEN(ret) = (n * m) + l;
150 return ret;
151 }
152
zend_string_init(const char * str,size_t len,int persistent)153 static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, int persistent)
154 {
155 zend_string *ret = zend_string_alloc(len, persistent);
156
157 memcpy(ZSTR_VAL(ret), str, len);
158 ZSTR_VAL(ret)[len] = '\0';
159 return ret;
160 }
161
zend_string_copy(zend_string * s)162 static zend_always_inline zend_string *zend_string_copy(zend_string *s)
163 {
164 if (!ZSTR_IS_INTERNED(s)) {
165 GC_ADDREF(s);
166 }
167 return s;
168 }
169
zend_string_dup(zend_string * s,int persistent)170 static zend_always_inline zend_string *zend_string_dup(zend_string *s, int persistent)
171 {
172 if (ZSTR_IS_INTERNED(s)) {
173 return s;
174 } else {
175 return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
176 }
177 }
178
zend_string_realloc(zend_string * s,size_t len,int persistent)179 static zend_always_inline zend_string *zend_string_realloc(zend_string *s, size_t len, int persistent)
180 {
181 zend_string *ret;
182
183 if (!ZSTR_IS_INTERNED(s)) {
184 if (EXPECTED(GC_REFCOUNT(s) == 1)) {
185 ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
186 ZSTR_LEN(ret) = len;
187 zend_string_forget_hash_val(ret);
188 return ret;
189 }
190 }
191 ret = zend_string_alloc(len, persistent);
192 memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN(len, ZSTR_LEN(s)) + 1);
193 if (!ZSTR_IS_INTERNED(s)) {
194 GC_DELREF(s);
195 }
196 return ret;
197 }
198
zend_string_extend(zend_string * s,size_t len,int persistent)199 static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t len, int persistent)
200 {
201 zend_string *ret;
202
203 ZEND_ASSERT(len >= ZSTR_LEN(s));
204 if (!ZSTR_IS_INTERNED(s)) {
205 if (EXPECTED(GC_REFCOUNT(s) == 1)) {
206 ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
207 ZSTR_LEN(ret) = len;
208 zend_string_forget_hash_val(ret);
209 return ret;
210 }
211 }
212 ret = zend_string_alloc(len, persistent);
213 memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), ZSTR_LEN(s) + 1);
214 if (!ZSTR_IS_INTERNED(s)) {
215 GC_DELREF(s);
216 }
217 return ret;
218 }
219
zend_string_truncate(zend_string * s,size_t len,int persistent)220 static zend_always_inline zend_string *zend_string_truncate(zend_string *s, size_t len, int persistent)
221 {
222 zend_string *ret;
223
224 ZEND_ASSERT(len <= ZSTR_LEN(s));
225 if (!ZSTR_IS_INTERNED(s)) {
226 if (EXPECTED(GC_REFCOUNT(s) == 1)) {
227 ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
228 ZSTR_LEN(ret) = len;
229 zend_string_forget_hash_val(ret);
230 return ret;
231 }
232 }
233 ret = zend_string_alloc(len, persistent);
234 memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), len + 1);
235 if (!ZSTR_IS_INTERNED(s)) {
236 GC_DELREF(s);
237 }
238 return ret;
239 }
240
zend_string_safe_realloc(zend_string * s,size_t n,size_t m,size_t l,int persistent)241 static zend_always_inline zend_string *zend_string_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, int persistent)
242 {
243 zend_string *ret;
244
245 if (!ZSTR_IS_INTERNED(s)) {
246 if (GC_REFCOUNT(s) == 1) {
247 ret = (zend_string *)safe_perealloc(s, n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
248 ZSTR_LEN(ret) = (n * m) + l;
249 zend_string_forget_hash_val(ret);
250 return ret;
251 }
252 }
253 ret = zend_string_safe_alloc(n, m, l, persistent);
254 memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN((n * m) + l, ZSTR_LEN(s)) + 1);
255 if (!ZSTR_IS_INTERNED(s)) {
256 GC_DELREF(s);
257 }
258 return ret;
259 }
260
zend_string_free(zend_string * s)261 static zend_always_inline void zend_string_free(zend_string *s)
262 {
263 if (!ZSTR_IS_INTERNED(s)) {
264 ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
265 pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
266 }
267 }
268
zend_string_efree(zend_string * s)269 static zend_always_inline void zend_string_efree(zend_string *s)
270 {
271 ZEND_ASSERT(!ZSTR_IS_INTERNED(s));
272 ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
273 ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
274 efree(s);
275 }
276
zend_string_release(zend_string * s)277 static zend_always_inline void zend_string_release(zend_string *s)
278 {
279 if (!ZSTR_IS_INTERNED(s)) {
280 if (GC_DELREF(s) == 0) {
281 pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
282 }
283 }
284 }
285
zend_string_release_ex(zend_string * s,int persistent)286 static zend_always_inline void zend_string_release_ex(zend_string *s, int persistent)
287 {
288 if (!ZSTR_IS_INTERNED(s)) {
289 if (GC_DELREF(s) == 0) {
290 if (persistent) {
291 ZEND_ASSERT(GC_FLAGS(s) & IS_STR_PERSISTENT);
292 free(s);
293 } else {
294 ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
295 efree(s);
296 }
297 }
298 }
299 }
300
301 #if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64__) && !defined(__ILP32__)))
302 BEGIN_EXTERN_C()
303 ZEND_API zend_bool ZEND_FASTCALL zend_string_equal_val(zend_string *s1, zend_string *s2);
END_EXTERN_C()304 END_EXTERN_C()
305 #else
306 static zend_always_inline zend_bool zend_string_equal_val(zend_string *s1, zend_string *s2)
307 {
308 return !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1));
309 }
310 #endif
311
312 static zend_always_inline zend_bool zend_string_equal_content(zend_string *s1, zend_string *s2)
313 {
314 return ZSTR_LEN(s1) == ZSTR_LEN(s2) && zend_string_equal_val(s1, s2);
315 }
316
zend_string_equals(zend_string * s1,zend_string * s2)317 static zend_always_inline zend_bool zend_string_equals(zend_string *s1, zend_string *s2)
318 {
319 return s1 == s2 || zend_string_equal_content(s1, s2);
320 }
321
322 #define zend_string_equals_ci(s1, s2) \
323 (ZSTR_LEN(s1) == ZSTR_LEN(s2) && !zend_binary_strcasecmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)))
324
325 #define zend_string_equals_literal_ci(str, c) \
326 (ZSTR_LEN(str) == sizeof(c) - 1 && !zend_binary_strcasecmp(ZSTR_VAL(str), ZSTR_LEN(str), (c), sizeof(c) - 1))
327
328 #define zend_string_equals_literal(str, literal) \
329 (ZSTR_LEN(str) == sizeof(literal)-1 && !memcmp(ZSTR_VAL(str), literal, sizeof(literal) - 1))
330
331 /*
332 * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
333 *
334 * This is Daniel J. Bernstein's popular `times 33' hash function as
335 * posted by him years ago on comp.lang.c. It basically uses a function
336 * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
337 * known hash functions for strings. Because it is both computed very
338 * fast and distributes very well.
339 *
340 * The magic of number 33, i.e. why it works better than many other
341 * constants, prime or not, has never been adequately explained by
342 * anyone. So I try an explanation: if one experimentally tests all
343 * multipliers between 1 and 256 (as RSE did now) one detects that even
344 * numbers are not useable at all. The remaining 128 odd numbers
345 * (except for the number 1) work more or less all equally well. They
346 * all distribute in an acceptable way and this way fill a hash table
347 * with an average percent of approx. 86%.
348 *
349 * If one compares the Chi^2 values of the variants, the number 33 not
350 * even has the best value. But the number 33 and a few other equally
351 * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
352 * advantage to the remaining numbers in the large set of possible
353 * multipliers: their multiply operation can be replaced by a faster
354 * operation based on just one shift plus either a single addition
355 * or subtraction operation. And because a hash function has to both
356 * distribute good _and_ has to be very fast to compute, those few
357 * numbers should be preferred and seems to be the reason why Daniel J.
358 * Bernstein also preferred it.
359 *
360 *
361 * -- Ralf S. Engelschall <rse@engelschall.com>
362 */
363
zend_inline_hash_func(const char * str,size_t len)364 static zend_always_inline zend_ulong zend_inline_hash_func(const char *str, size_t len)
365 {
366 zend_ulong hash = Z_UL(5381);
367
368 /* variant with the hash unrolled eight times */
369 for (; len >= 8; len -= 8) {
370 hash = ((hash << 5) + hash) + *str++;
371 hash = ((hash << 5) + hash) + *str++;
372 hash = ((hash << 5) + hash) + *str++;
373 hash = ((hash << 5) + hash) + *str++;
374 hash = ((hash << 5) + hash) + *str++;
375 hash = ((hash << 5) + hash) + *str++;
376 hash = ((hash << 5) + hash) + *str++;
377 hash = ((hash << 5) + hash) + *str++;
378 }
379 switch (len) {
380 case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
381 case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
382 case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
383 case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
384 case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
385 case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
386 case 1: hash = ((hash << 5) + hash) + *str++; break;
387 case 0: break;
388 EMPTY_SWITCH_DEFAULT_CASE()
389 }
390
391 /* Hash value can't be zero, so we always set the high bit */
392 #if SIZEOF_ZEND_LONG == 8
393 return hash | Z_UL(0x8000000000000000);
394 #elif SIZEOF_ZEND_LONG == 4
395 return hash | Z_UL(0x80000000);
396 #else
397 # error "Unknown SIZEOF_ZEND_LONG"
398 #endif
399 }
400
401 #define ZEND_KNOWN_STRINGS(_) \
402 _(ZEND_STR_FILE, "file") \
403 _(ZEND_STR_LINE, "line") \
404 _(ZEND_STR_FUNCTION, "function") \
405 _(ZEND_STR_CLASS, "class") \
406 _(ZEND_STR_OBJECT, "object") \
407 _(ZEND_STR_TYPE, "type") \
408 _(ZEND_STR_OBJECT_OPERATOR, "->") \
409 _(ZEND_STR_PAAMAYIM_NEKUDOTAYIM, "::") \
410 _(ZEND_STR_ARGS, "args") \
411 _(ZEND_STR_UNKNOWN, "unknown") \
412 _(ZEND_STR_EVAL, "eval") \
413 _(ZEND_STR_INCLUDE, "include") \
414 _(ZEND_STR_REQUIRE, "require") \
415 _(ZEND_STR_INCLUDE_ONCE, "include_once") \
416 _(ZEND_STR_REQUIRE_ONCE, "require_once") \
417 _(ZEND_STR_SCALAR, "scalar") \
418 _(ZEND_STR_ERROR_REPORTING, "error_reporting") \
419 _(ZEND_STR_STATIC, "static") \
420 _(ZEND_STR_THIS, "this") \
421 _(ZEND_STR_VALUE, "value") \
422 _(ZEND_STR_KEY, "key") \
423 _(ZEND_STR_MAGIC_AUTOLOAD, "__autoload") \
424 _(ZEND_STR_MAGIC_INVOKE, "__invoke") \
425 _(ZEND_STR_PREVIOUS, "previous") \
426 _(ZEND_STR_CODE, "code") \
427 _(ZEND_STR_MESSAGE, "message") \
428 _(ZEND_STR_SEVERITY, "severity") \
429 _(ZEND_STR_STRING, "string") \
430 _(ZEND_STR_TRACE, "trace") \
431 _(ZEND_STR_SCHEME, "scheme") \
432 _(ZEND_STR_HOST, "host") \
433 _(ZEND_STR_PORT, "port") \
434 _(ZEND_STR_USER, "user") \
435 _(ZEND_STR_PASS, "pass") \
436 _(ZEND_STR_PATH, "path") \
437 _(ZEND_STR_QUERY, "query") \
438 _(ZEND_STR_FRAGMENT, "fragment") \
439 _(ZEND_STR_NULL, "NULL") \
440 _(ZEND_STR_BOOLEAN, "boolean") \
441 _(ZEND_STR_INTEGER, "integer") \
442 _(ZEND_STR_DOUBLE, "double") \
443 _(ZEND_STR_ARRAY, "array") \
444 _(ZEND_STR_RESOURCE, "resource") \
445 _(ZEND_STR_CLOSED_RESOURCE, "resource (closed)") \
446 _(ZEND_STR_NAME, "name") \
447 _(ZEND_STR_ARGV, "argv") \
448 _(ZEND_STR_ARGC, "argc") \
449
450
451 typedef enum _zend_known_string_id {
452 #define _ZEND_STR_ID(id, str) id,
453 ZEND_KNOWN_STRINGS(_ZEND_STR_ID)
454 #undef _ZEND_STR_ID
455 ZEND_STR_LAST_KNOWN
456 } zend_known_string_id;
457
458 #endif /* ZEND_STRING_H */
459
460 /*
461 * Local variables:
462 * tab-width: 4
463 * c-basic-offset: 4
464 * indent-tabs-mode: t
465 * End:
466 * vim600: sw=4 ts=4 fdm=marker
467 * vim<600: sw=4 ts=4
468 */
469