xref: /php-src/Zend/zend_string.h (revision 5c3a6eae)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) Zend Technologies Ltd. (http://www.zend.com)           |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Dmitry Stogov <dmitry@php.net>                              |
16    +----------------------------------------------------------------------+
17 */
18 
19 #ifndef ZEND_STRING_H
20 #define ZEND_STRING_H
21 
22 #include "zend_types.h"
23 #include "zend_gc.h"
24 #include "zend_alloc.h"
25 
26 BEGIN_EXTERN_C()
27 
28 typedef void (*zend_string_copy_storage_func_t)(void);
29 typedef zend_string *(ZEND_FASTCALL *zend_new_interned_string_func_t)(zend_string *str);
30 typedef zend_string *(ZEND_FASTCALL *zend_string_init_interned_func_t)(const char *str, size_t size, bool permanent);
31 typedef zend_string *(ZEND_FASTCALL *zend_string_init_existing_interned_func_t)(const char *str, size_t size, bool permanent);
32 
33 ZEND_API extern zend_new_interned_string_func_t zend_new_interned_string;
34 ZEND_API extern zend_string_init_interned_func_t zend_string_init_interned;
35 /* Init an interned string if it already exists, but do not create a new one if it does not. */
36 ZEND_API extern zend_string_init_existing_interned_func_t zend_string_init_existing_interned;
37 
38 ZEND_API zend_ulong ZEND_FASTCALL zend_string_hash_func(zend_string *str);
39 ZEND_API zend_ulong ZEND_FASTCALL zend_hash_func(const char *str, size_t len);
40 ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_string *str);
41 
42 ZEND_API zend_string *zend_string_concat2(
43 	const char *str1, size_t str1_len,
44 	const char *str2, size_t str2_len);
45 ZEND_API zend_string *zend_string_concat3(
46 	const char *str1, size_t str1_len,
47 	const char *str2, size_t str2_len,
48 	const char *str3, size_t str3_len);
49 
50 ZEND_API void zend_interned_strings_init(void);
51 ZEND_API void zend_interned_strings_dtor(void);
52 ZEND_API void zend_interned_strings_activate(void);
53 ZEND_API void zend_interned_strings_deactivate(void);
54 ZEND_API void zend_interned_strings_set_request_storage_handlers(
55 	zend_new_interned_string_func_t handler,
56 	zend_string_init_interned_func_t init_handler,
57 	zend_string_init_existing_interned_func_t init_existing_handler);
58 ZEND_API void zend_interned_strings_switch_storage(bool request);
59 
60 ZEND_API extern zend_string  *zend_empty_string;
61 ZEND_API extern zend_string  *zend_one_char_string[256];
62 ZEND_API extern zend_string **zend_known_strings;
63 
END_EXTERN_C()64 END_EXTERN_C()
65 
66 /* Shortcuts */
67 
68 #define ZSTR_VAL(zstr)  (zstr)->val
69 #define ZSTR_LEN(zstr)  (zstr)->len
70 #define ZSTR_H(zstr)    (zstr)->h
71 #define ZSTR_HASH(zstr) zend_string_hash_val(zstr)
72 
73 /* Compatibility macros */
74 
75 #define IS_INTERNED(s)	ZSTR_IS_INTERNED(s)
76 #define STR_EMPTY_ALLOC()	ZSTR_EMPTY_ALLOC()
77 #define _STR_HEADER_SIZE _ZSTR_HEADER_SIZE
78 #define STR_ALLOCA_ALLOC(str, _len, use_heap) ZSTR_ALLOCA_ALLOC(str, _len, use_heap)
79 #define STR_ALLOCA_INIT(str, s, len, use_heap) ZSTR_ALLOCA_INIT(str, s, len, use_heap)
80 #define STR_ALLOCA_FREE(str, use_heap) ZSTR_ALLOCA_FREE(str, use_heap)
81 
82 /*---*/
83 
84 #define ZSTR_IS_INTERNED(s)					(GC_FLAGS(s) & IS_STR_INTERNED)
85 #define ZSTR_IS_VALID_UTF8(s)				(GC_FLAGS(s) & IS_STR_VALID_UTF8)
86 
87 /* These are properties, encoded as flags, that will hold on the resulting string
88  * after concatenating two strings that have these property.
89  * Example: concatenating two UTF-8 strings yields another UTF-8 string. */
90 #define ZSTR_COPYABLE_CONCAT_PROPERTIES		(IS_STR_VALID_UTF8)
91 
92 #define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(s) 				(GC_FLAGS(s) & ZSTR_COPYABLE_CONCAT_PROPERTIES)
93 /* This macro returns the copyable concat properties which hold on both strings. */
94 #define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(s1, s2)	(GC_FLAGS(s1) & GC_FLAGS(s2) & ZSTR_COPYABLE_CONCAT_PROPERTIES)
95 
96 #define ZSTR_COPY_CONCAT_PROPERTIES(out, in) do { \
97 	zend_string *_out = (out); \
98 	uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES((in)); \
99 	GC_ADD_FLAGS(_out, properties); \
100 } while (0)
101 
102 #define ZSTR_COPY_CONCAT_PROPERTIES_BOTH(out, in1, in2) do { \
103 	zend_string *_out = (out); \
104 	uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH((in1), (in2)); \
105 	GC_ADD_FLAGS(_out, properties); \
106 } while (0)
107 
108 #define ZSTR_EMPTY_ALLOC() zend_empty_string
109 #define ZSTR_CHAR(c) zend_one_char_string[c]
110 #define ZSTR_KNOWN(idx) zend_known_strings[idx]
111 
112 #define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
113 
114 #define _ZSTR_STRUCT_SIZE(len) (_ZSTR_HEADER_SIZE + len + 1)
115 
116 #define ZSTR_MAX_OVERHEAD (ZEND_MM_ALIGNED_SIZE(_ZSTR_HEADER_SIZE + 1))
117 #define ZSTR_MAX_LEN (SIZE_MAX - ZSTR_MAX_OVERHEAD)
118 
119 #define ZSTR_ALLOCA_ALLOC(str, _len, use_heap) do { \
120 	(str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
121 	GC_SET_REFCOUNT(str, 1); \
122 	GC_TYPE_INFO(str) = GC_STRING; \
123 	ZSTR_H(str) = 0; \
124 	ZSTR_LEN(str) = _len; \
125 } while (0)
126 
127 #define ZSTR_ALLOCA_INIT(str, s, len, use_heap) do { \
128 	ZSTR_ALLOCA_ALLOC(str, len, use_heap); \
129 	memcpy(ZSTR_VAL(str), (s), (len)); \
130 	ZSTR_VAL(str)[(len)] = '\0'; \
131 } while (0)
132 
133 #define ZSTR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
134 
135 #define ZSTR_INIT_LITERAL(s, persistent) (zend_string_init((s), strlen(s), (persistent)))
136 
137 /*---*/
138 
139 static zend_always_inline zend_ulong zend_string_hash_val(zend_string *s)
140 {
141 	return ZSTR_H(s) ? ZSTR_H(s) : zend_string_hash_func(s);
142 }
143 
zend_string_forget_hash_val(zend_string * s)144 static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
145 {
146 	ZSTR_H(s) = 0;
147 	GC_DEL_FLAGS(s, IS_STR_VALID_UTF8);
148 }
149 
zend_string_refcount(const zend_string * s)150 static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
151 {
152 	if (!ZSTR_IS_INTERNED(s)) {
153 		return GC_REFCOUNT(s);
154 	}
155 	return 1;
156 }
157 
zend_string_addref(zend_string * s)158 static zend_always_inline uint32_t zend_string_addref(zend_string *s)
159 {
160 	if (!ZSTR_IS_INTERNED(s)) {
161 		return GC_ADDREF(s);
162 	}
163 	return 1;
164 }
165 
zend_string_delref(zend_string * s)166 static zend_always_inline uint32_t zend_string_delref(zend_string *s)
167 {
168 	if (!ZSTR_IS_INTERNED(s)) {
169 		return GC_DELREF(s);
170 	}
171 	return 1;
172 }
173 
zend_string_alloc(size_t len,bool persistent)174 static zend_always_inline zend_string *zend_string_alloc(size_t len, bool persistent)
175 {
176 	zend_string *ret = (zend_string *)pemalloc(ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
177 
178 	GC_SET_REFCOUNT(ret, 1);
179 	GC_TYPE_INFO(ret) = GC_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
180 	ZSTR_H(ret) = 0;
181 	ZSTR_LEN(ret) = len;
182 	return ret;
183 }
184 
zend_string_safe_alloc(size_t n,size_t m,size_t l,bool persistent)185 static zend_always_inline zend_string *zend_string_safe_alloc(size_t n, size_t m, size_t l, bool persistent)
186 {
187 	zend_string *ret = (zend_string *)safe_pemalloc(n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
188 
189 	GC_SET_REFCOUNT(ret, 1);
190 	GC_TYPE_INFO(ret) = GC_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
191 	ZSTR_H(ret) = 0;
192 	ZSTR_LEN(ret) = (n * m) + l;
193 	return ret;
194 }
195 
zend_string_init(const char * str,size_t len,bool persistent)196 static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, bool persistent)
197 {
198 	zend_string *ret = zend_string_alloc(len, persistent);
199 
200 	memcpy(ZSTR_VAL(ret), str, len);
201 	ZSTR_VAL(ret)[len] = '\0';
202 	return ret;
203 }
204 
zend_string_init_fast(const char * str,size_t len)205 static zend_always_inline zend_string *zend_string_init_fast(const char *str, size_t len)
206 {
207 	if (len > 1) {
208 		return zend_string_init(str, len, 0);
209 	} else if (len == 0) {
210 		return zend_empty_string;
211 	} else /* if (len == 1) */ {
212 		return ZSTR_CHAR((zend_uchar) *str);
213 	}
214 }
215 
zend_string_copy(zend_string * s)216 static zend_always_inline zend_string *zend_string_copy(zend_string *s)
217 {
218 	if (!ZSTR_IS_INTERNED(s)) {
219 		GC_ADDREF(s);
220 	}
221 	return s;
222 }
223 
zend_string_dup(zend_string * s,bool persistent)224 static zend_always_inline zend_string *zend_string_dup(zend_string *s, bool persistent)
225 {
226 	if (ZSTR_IS_INTERNED(s)) {
227 		return s;
228 	} else {
229 		return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
230 	}
231 }
232 
zend_string_separate(zend_string * s,bool persistent)233 static zend_always_inline zend_string *zend_string_separate(zend_string *s, bool persistent)
234 {
235 	if (ZSTR_IS_INTERNED(s) || GC_REFCOUNT(s) > 1) {
236 		if (!ZSTR_IS_INTERNED(s)) {
237 			GC_DELREF(s);
238 		}
239 		return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
240 	}
241 
242 	zend_string_forget_hash_val(s);
243 	return s;
244 }
245 
zend_string_realloc(zend_string * s,size_t len,bool persistent)246 static zend_always_inline zend_string *zend_string_realloc(zend_string *s, size_t len, bool persistent)
247 {
248 	zend_string *ret;
249 
250 	if (!ZSTR_IS_INTERNED(s)) {
251 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
252 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
253 			ZSTR_LEN(ret) = len;
254 			zend_string_forget_hash_val(ret);
255 			return ret;
256 		}
257 	}
258 	ret = zend_string_alloc(len, persistent);
259 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN(len, ZSTR_LEN(s)) + 1);
260 	if (!ZSTR_IS_INTERNED(s)) {
261 		GC_DELREF(s);
262 	}
263 	return ret;
264 }
265 
zend_string_extend(zend_string * s,size_t len,bool persistent)266 static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t len, bool persistent)
267 {
268 	zend_string *ret;
269 
270 	ZEND_ASSERT(len >= ZSTR_LEN(s));
271 	if (!ZSTR_IS_INTERNED(s)) {
272 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
273 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
274 			ZSTR_LEN(ret) = len;
275 			zend_string_forget_hash_val(ret);
276 			return ret;
277 		}
278 	}
279 	ret = zend_string_alloc(len, persistent);
280 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), ZSTR_LEN(s) + 1);
281 	if (!ZSTR_IS_INTERNED(s)) {
282 		GC_DELREF(s);
283 	}
284 	return ret;
285 }
286 
zend_string_truncate(zend_string * s,size_t len,bool persistent)287 static zend_always_inline zend_string *zend_string_truncate(zend_string *s, size_t len, bool persistent)
288 {
289 	zend_string *ret;
290 
291 	ZEND_ASSERT(len <= ZSTR_LEN(s));
292 	if (!ZSTR_IS_INTERNED(s)) {
293 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
294 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
295 			ZSTR_LEN(ret) = len;
296 			zend_string_forget_hash_val(ret);
297 			return ret;
298 		}
299 	}
300 	ret = zend_string_alloc(len, persistent);
301 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), len + 1);
302 	if (!ZSTR_IS_INTERNED(s)) {
303 		GC_DELREF(s);
304 	}
305 	return ret;
306 }
307 
zend_string_safe_realloc(zend_string * s,size_t n,size_t m,size_t l,bool persistent)308 static zend_always_inline zend_string *zend_string_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, bool persistent)
309 {
310 	zend_string *ret;
311 
312 	if (!ZSTR_IS_INTERNED(s)) {
313 		if (GC_REFCOUNT(s) == 1) {
314 			ret = (zend_string *)safe_perealloc(s, n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
315 			ZSTR_LEN(ret) = (n * m) + l;
316 			zend_string_forget_hash_val(ret);
317 			return ret;
318 		}
319 	}
320 	ret = zend_string_safe_alloc(n, m, l, persistent);
321 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN((n * m) + l, ZSTR_LEN(s)) + 1);
322 	if (!ZSTR_IS_INTERNED(s)) {
323 		GC_DELREF(s);
324 	}
325 	return ret;
326 }
327 
zend_string_free(zend_string * s)328 static zend_always_inline void zend_string_free(zend_string *s)
329 {
330 	if (!ZSTR_IS_INTERNED(s)) {
331 		ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
332 		pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
333 	}
334 }
335 
zend_string_efree(zend_string * s)336 static zend_always_inline void zend_string_efree(zend_string *s)
337 {
338 	ZEND_ASSERT(!ZSTR_IS_INTERNED(s));
339 	ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
340 	ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
341 	efree(s);
342 }
343 
zend_string_release(zend_string * s)344 static zend_always_inline void zend_string_release(zend_string *s)
345 {
346 	if (!ZSTR_IS_INTERNED(s)) {
347 		if (GC_DELREF(s) == 0) {
348 			pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
349 		}
350 	}
351 }
352 
zend_string_release_ex(zend_string * s,bool persistent)353 static zend_always_inline void zend_string_release_ex(zend_string *s, bool persistent)
354 {
355 	if (!ZSTR_IS_INTERNED(s)) {
356 		if (GC_DELREF(s) == 0) {
357 			if (persistent) {
358 				ZEND_ASSERT(GC_FLAGS(s) & IS_STR_PERSISTENT);
359 				free(s);
360 			} else {
361 				ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
362 				efree(s);
363 			}
364 		}
365 	}
366 }
367 
zend_string_equals_cstr(const zend_string * s1,const char * s2,size_t s2_length)368 static zend_always_inline bool zend_string_equals_cstr(const zend_string *s1, const char *s2, size_t s2_length)
369 {
370 	return ZSTR_LEN(s1) == s2_length && !memcmp(ZSTR_VAL(s1), s2, s2_length);
371 }
372 
373 #if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64__) && !defined(__ILP32__)))
374 BEGIN_EXTERN_C()
375 ZEND_API bool ZEND_FASTCALL zend_string_equal_val(const zend_string *s1, const zend_string *s2);
END_EXTERN_C()376 END_EXTERN_C()
377 #else
378 static zend_always_inline bool zend_string_equal_val(const zend_string *s1, const zend_string *s2)
379 {
380 	return !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1));
381 }
382 #endif
383 
384 static zend_always_inline bool zend_string_equal_content(const zend_string *s1, const zend_string *s2)
385 {
386 	return ZSTR_LEN(s1) == ZSTR_LEN(s2) && zend_string_equal_val(s1, s2);
387 }
388 
zend_string_equals(const zend_string * s1,const zend_string * s2)389 static zend_always_inline bool zend_string_equals(const zend_string *s1, const zend_string *s2)
390 {
391 	return s1 == s2 || zend_string_equal_content(s1, s2);
392 }
393 
394 #define zend_string_equals_ci(s1, s2) \
395 	(ZSTR_LEN(s1) == ZSTR_LEN(s2) && !zend_binary_strcasecmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)))
396 
397 #define zend_string_equals_literal_ci(str, c) \
398 	(ZSTR_LEN(str) == sizeof("" c) - 1 && !zend_binary_strcasecmp(ZSTR_VAL(str), ZSTR_LEN(str), (c), sizeof(c) - 1))
399 
400 #define zend_string_equals_literal(str, literal) \
401 	zend_string_equals_cstr(str, "" literal, sizeof(literal) - 1)
402 
zend_string_starts_with_cstr(const zend_string * str,const char * prefix,size_t prefix_length)403 static zend_always_inline bool zend_string_starts_with_cstr(const zend_string *str, const char *prefix, size_t prefix_length)
404 {
405 	return ZSTR_LEN(str) >= prefix_length && !memcmp(ZSTR_VAL(str), prefix, prefix_length);
406 }
407 
zend_string_starts_with(const zend_string * str,const zend_string * prefix)408 static zend_always_inline bool zend_string_starts_with(const zend_string *str, const zend_string *prefix)
409 {
410 	return zend_string_starts_with_cstr(str, ZSTR_VAL(prefix), ZSTR_LEN(prefix));
411 }
412 
413 #define zend_string_starts_with_literal(str, prefix) \
414 	zend_string_starts_with_cstr(str, prefix, strlen(prefix))
415 
zend_string_starts_with_cstr_ci(const zend_string * str,const char * prefix,size_t prefix_length)416 static zend_always_inline bool zend_string_starts_with_cstr_ci(const zend_string *str, const char *prefix, size_t prefix_length)
417 {
418 	return ZSTR_LEN(str) >= prefix_length && !strncasecmp(ZSTR_VAL(str), prefix, prefix_length);
419 }
420 
zend_string_starts_with_ci(const zend_string * str,const zend_string * prefix)421 static zend_always_inline bool zend_string_starts_with_ci(const zend_string *str, const zend_string *prefix)
422 {
423 	return zend_string_starts_with_cstr_ci(str, ZSTR_VAL(prefix), ZSTR_LEN(prefix));
424 }
425 
426 #define zend_string_starts_with_literal_ci(str, prefix) \
427 	zend_string_starts_with_cstr(str, prefix, strlen(prefix))
428 
429 /*
430  * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
431  *
432  * This is Daniel J. Bernstein's popular `times 33' hash function as
433  * posted by him years ago on comp.lang.c. It basically uses a function
434  * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
435  * known hash functions for strings. Because it is both computed very
436  * fast and distributes very well.
437  *
438  * The magic of number 33, i.e. why it works better than many other
439  * constants, prime or not, has never been adequately explained by
440  * anyone. So I try an explanation: if one experimentally tests all
441  * multipliers between 1 and 256 (as RSE did now) one detects that even
442  * numbers are not usable at all. The remaining 128 odd numbers
443  * (except for the number 1) work more or less all equally well. They
444  * all distribute in an acceptable way and this way fill a hash table
445  * with an average percent of approx. 86%.
446  *
447  * If one compares the Chi^2 values of the variants, the number 33 not
448  * even has the best value. But the number 33 and a few other equally
449  * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
450  * advantage to the remaining numbers in the large set of possible
451  * multipliers: their multiply operation can be replaced by a faster
452  * operation based on just one shift plus either a single addition
453  * or subtraction operation. And because a hash function has to both
454  * distribute good _and_ has to be very fast to compute, those few
455  * numbers should be preferred and seems to be the reason why Daniel J.
456  * Bernstein also preferred it.
457  *
458  *
459  *                  -- Ralf S. Engelschall <rse@engelschall.com>
460  */
461 
zend_inline_hash_func(const char * str,size_t len)462 static zend_always_inline zend_ulong zend_inline_hash_func(const char *str, size_t len)
463 {
464 	zend_ulong hash = Z_UL(5381);
465 
466 #if defined(_WIN32) || defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
467 	/* Version with multiplication works better on modern CPU */
468 	for (; len >= 8; len -= 8, str += 8) {
469 # if defined(__aarch64__) && !defined(WORDS_BIGENDIAN)
470 		/* On some architectures it is beneficial to load 8 bytes at a
471 		   time and extract each byte with a bit field extract instr. */
472 		uint64_t chunk;
473 
474 		memcpy(&chunk, str, sizeof(chunk));
475 		hash =
476 			hash                        * 33 * 33 * 33 * 33 +
477 			((chunk >> (8 * 0)) & 0xff) * 33 * 33 * 33 +
478 			((chunk >> (8 * 1)) & 0xff) * 33 * 33 +
479 			((chunk >> (8 * 2)) & 0xff) * 33 +
480 			((chunk >> (8 * 3)) & 0xff);
481 		hash =
482 			hash                        * 33 * 33 * 33 * 33 +
483 			((chunk >> (8 * 4)) & 0xff) * 33 * 33 * 33 +
484 			((chunk >> (8 * 5)) & 0xff) * 33 * 33 +
485 			((chunk >> (8 * 6)) & 0xff) * 33 +
486 			((chunk >> (8 * 7)) & 0xff);
487 # else
488 		hash =
489 			hash   * Z_L(33 * 33 * 33 * 33) +
490 			str[0] * Z_L(33 * 33 * 33) +
491 			str[1] * Z_L(33 * 33) +
492 			str[2] * Z_L(33) +
493 			str[3];
494 		hash =
495 			hash   * Z_L(33 * 33 * 33 * 33) +
496 			str[4] * Z_L(33 * 33 * 33) +
497 			str[5] * Z_L(33 * 33) +
498 			str[6] * Z_L(33) +
499 			str[7];
500 # endif
501 	}
502 	if (len >= 4) {
503 		hash =
504 			hash   * Z_L(33 * 33 * 33 * 33) +
505 			str[0] * Z_L(33 * 33 * 33) +
506 			str[1] * Z_L(33 * 33) +
507 			str[2] * Z_L(33) +
508 			str[3];
509 		len -= 4;
510 		str += 4;
511 	}
512 	if (len >= 2) {
513 		if (len > 2) {
514 			hash =
515 				hash   * Z_L(33 * 33 * 33) +
516 				str[0] * Z_L(33 * 33) +
517 				str[1] * Z_L(33) +
518 				str[2];
519 		} else {
520 			hash =
521 				hash   * Z_L(33 * 33) +
522 				str[0] * Z_L(33) +
523 				str[1];
524 		}
525 	} else if (len != 0) {
526 		hash = hash * Z_L(33) + *str;
527 	}
528 #else
529 	/* variant with the hash unrolled eight times */
530 	for (; len >= 8; len -= 8) {
531 		hash = ((hash << 5) + hash) + *str++;
532 		hash = ((hash << 5) + hash) + *str++;
533 		hash = ((hash << 5) + hash) + *str++;
534 		hash = ((hash << 5) + hash) + *str++;
535 		hash = ((hash << 5) + hash) + *str++;
536 		hash = ((hash << 5) + hash) + *str++;
537 		hash = ((hash << 5) + hash) + *str++;
538 		hash = ((hash << 5) + hash) + *str++;
539 	}
540 	switch (len) {
541 		case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
542 		case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
543 		case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
544 		case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
545 		case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
546 		case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
547 		case 1: hash = ((hash << 5) + hash) + *str++; break;
548 		case 0: break;
549 EMPTY_SWITCH_DEFAULT_CASE()
550 	}
551 #endif
552 
553 	/* Hash value can't be zero, so we always set the high bit */
554 #if SIZEOF_ZEND_LONG == 8
555 	return hash | Z_UL(0x8000000000000000);
556 #elif SIZEOF_ZEND_LONG == 4
557 	return hash | Z_UL(0x80000000);
558 #else
559 # error "Unknown SIZEOF_ZEND_LONG"
560 #endif
561 }
562 
563 #define ZEND_KNOWN_STRINGS(_) \
564 	_(ZEND_STR_FILE,                   "file") \
565 	_(ZEND_STR_LINE,                   "line") \
566 	_(ZEND_STR_FUNCTION,               "function") \
567 	_(ZEND_STR_CLASS,                  "class") \
568 	_(ZEND_STR_OBJECT,                 "object") \
569 	_(ZEND_STR_TYPE,                   "type") \
570 	_(ZEND_STR_OBJECT_OPERATOR,        "->") \
571 	_(ZEND_STR_PAAMAYIM_NEKUDOTAYIM,   "::") \
572 	_(ZEND_STR_ARGS,                   "args") \
573 	_(ZEND_STR_UNKNOWN,                "unknown") \
574 	_(ZEND_STR_UNKNOWN_CAPITALIZED,    "Unknown") \
575 	_(ZEND_STR_EVAL,                   "eval") \
576 	_(ZEND_STR_INCLUDE,                "include") \
577 	_(ZEND_STR_REQUIRE,                "require") \
578 	_(ZEND_STR_INCLUDE_ONCE,           "include_once") \
579 	_(ZEND_STR_REQUIRE_ONCE,           "require_once") \
580 	_(ZEND_STR_SCALAR,                 "scalar") \
581 	_(ZEND_STR_ERROR_REPORTING,        "error_reporting") \
582 	_(ZEND_STR_STATIC,                 "static") \
583 	_(ZEND_STR_THIS,                   "this") \
584 	_(ZEND_STR_VALUE,                  "value") \
585 	_(ZEND_STR_KEY,                    "key") \
586 	_(ZEND_STR_MAGIC_INVOKE,           "__invoke") \
587 	_(ZEND_STR_PREVIOUS,               "previous") \
588 	_(ZEND_STR_CODE,                   "code") \
589 	_(ZEND_STR_MESSAGE,                "message") \
590 	_(ZEND_STR_SEVERITY,               "severity") \
591 	_(ZEND_STR_STRING,                 "string") \
592 	_(ZEND_STR_TRACE,                  "trace") \
593 	_(ZEND_STR_SCHEME,                 "scheme") \
594 	_(ZEND_STR_HOST,                   "host") \
595 	_(ZEND_STR_PORT,                   "port") \
596 	_(ZEND_STR_USER,                   "user") \
597 	_(ZEND_STR_PASS,                   "pass") \
598 	_(ZEND_STR_PATH,                   "path") \
599 	_(ZEND_STR_QUERY,                  "query") \
600 	_(ZEND_STR_FRAGMENT,               "fragment") \
601 	_(ZEND_STR_NULL,                   "NULL") \
602 	_(ZEND_STR_BOOLEAN,                "boolean") \
603 	_(ZEND_STR_INTEGER,                "integer") \
604 	_(ZEND_STR_DOUBLE,                 "double") \
605 	_(ZEND_STR_ARRAY,                  "array") \
606 	_(ZEND_STR_RESOURCE,               "resource") \
607 	_(ZEND_STR_CLOSED_RESOURCE,        "resource (closed)") \
608 	_(ZEND_STR_NAME,                   "name") \
609 	_(ZEND_STR_ARGV,                   "argv") \
610 	_(ZEND_STR_ARGC,                   "argc") \
611 	_(ZEND_STR_ARRAY_CAPITALIZED,      "Array") \
612 	_(ZEND_STR_BOOL,                   "bool") \
613 	_(ZEND_STR_INT,                    "int") \
614 	_(ZEND_STR_FLOAT,                  "float") \
615 	_(ZEND_STR_CALLABLE,               "callable") \
616 	_(ZEND_STR_ITERABLE,               "iterable") \
617 	_(ZEND_STR_VOID,                   "void") \
618 	_(ZEND_STR_NEVER,                  "never") \
619 	_(ZEND_STR_FALSE,                  "false") \
620 	_(ZEND_STR_TRUE,                   "true") \
621 	_(ZEND_STR_NULL_LOWERCASE,         "null") \
622 	_(ZEND_STR_MIXED,                  "mixed") \
623 	_(ZEND_STR_TRAVERSABLE,            "Traversable") \
624 	_(ZEND_STR_SLEEP,                  "__sleep") \
625 	_(ZEND_STR_WAKEUP,                 "__wakeup") \
626 	_(ZEND_STR_CASES,                  "cases") \
627 	_(ZEND_STR_FROM,                   "from") \
628 	_(ZEND_STR_TRYFROM,                "tryFrom") \
629 	_(ZEND_STR_TRYFROM_LOWERCASE,      "tryfrom") \
630 	_(ZEND_STR_AUTOGLOBAL_SERVER,      "_SERVER") \
631 	_(ZEND_STR_AUTOGLOBAL_ENV,         "_ENV") \
632 	_(ZEND_STR_AUTOGLOBAL_REQUEST,     "_REQUEST") \
633 	_(ZEND_STR_COUNT,                  "count") \
634 	_(ZEND_STR_SENSITIVEPARAMETER,     "SensitiveParameter") \
635 	_(ZEND_STR_CONST_EXPR_PLACEHOLDER, "[constant expression]") \
636 
637 
638 typedef enum _zend_known_string_id {
639 #define _ZEND_STR_ID(id, str) id,
640 ZEND_KNOWN_STRINGS(_ZEND_STR_ID)
641 #undef _ZEND_STR_ID
642 	ZEND_STR_LAST_KNOWN
643 } zend_known_string_id;
644 
645 #endif /* ZEND_STRING_H */
646