xref: /PHP-7.3/Zend/zend_string.h (revision 573ad182)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Dmitry Stogov <dmitry@php.net>                              |
16    +----------------------------------------------------------------------+
17 */
18 
19 #ifndef ZEND_STRING_H
20 #define ZEND_STRING_H
21 
22 #include "zend.h"
23 
24 BEGIN_EXTERN_C()
25 
26 typedef void (*zend_string_copy_storage_func_t)(void);
27 typedef zend_string *(ZEND_FASTCALL *zend_new_interned_string_func_t)(zend_string *str);
28 typedef zend_string *(ZEND_FASTCALL *zend_string_init_interned_func_t)(const char *str, size_t size, int permanent);
29 
30 ZEND_API extern zend_new_interned_string_func_t zend_new_interned_string;
31 ZEND_API extern zend_string_init_interned_func_t zend_string_init_interned;
32 
33 ZEND_API zend_ulong ZEND_FASTCALL zend_string_hash_func(zend_string *str);
34 ZEND_API zend_ulong ZEND_FASTCALL zend_hash_func(const char *str, size_t len);
35 ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_string *str);
36 
37 ZEND_API void zend_interned_strings_init(void);
38 ZEND_API void zend_interned_strings_dtor(void);
39 ZEND_API void zend_interned_strings_activate(void);
40 ZEND_API void zend_interned_strings_deactivate(void);
41 ZEND_API void zend_interned_strings_set_request_storage_handlers(zend_new_interned_string_func_t handler, zend_string_init_interned_func_t init_handler);
42 ZEND_API void zend_interned_strings_set_permanent_storage_copy_handlers(zend_string_copy_storage_func_t copy_handler, zend_string_copy_storage_func_t restore_handler);
43 ZEND_API void zend_interned_strings_switch_storage(zend_bool request);
44 
45 ZEND_API extern zend_string  *zend_empty_string;
46 ZEND_API extern zend_string  *zend_one_char_string[256];
47 ZEND_API extern zend_string **zend_known_strings;
48 
END_EXTERN_C()49 END_EXTERN_C()
50 
51 /* Shortcuts */
52 
53 #define ZSTR_VAL(zstr)  (zstr)->val
54 #define ZSTR_LEN(zstr)  (zstr)->len
55 #define ZSTR_H(zstr)    (zstr)->h
56 #define ZSTR_HASH(zstr) zend_string_hash_val(zstr)
57 
58 /* Compatibility macros */
59 
60 #define IS_INTERNED(s)	ZSTR_IS_INTERNED(s)
61 #define STR_EMPTY_ALLOC()	ZSTR_EMPTY_ALLOC()
62 #define _STR_HEADER_SIZE _ZSTR_HEADER_SIZE
63 #define STR_ALLOCA_ALLOC(str, _len, use_heap) ZSTR_ALLOCA_ALLOC(str, _len, use_heap)
64 #define STR_ALLOCA_INIT(str, s, len, use_heap) ZSTR_ALLOCA_INIT(str, s, len, use_heap)
65 #define STR_ALLOCA_FREE(str, use_heap) ZSTR_ALLOCA_FREE(str, use_heap)
66 
67 /*---*/
68 
69 #define ZSTR_IS_INTERNED(s)					(GC_FLAGS(s) & IS_STR_INTERNED)
70 
71 #define ZSTR_EMPTY_ALLOC() zend_empty_string
72 #define ZSTR_CHAR(c) zend_one_char_string[c]
73 #define ZSTR_KNOWN(idx) zend_known_strings[idx]
74 
75 #define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
76 
77 #define _ZSTR_STRUCT_SIZE(len) (_ZSTR_HEADER_SIZE + len + 1)
78 
79 #define ZSTR_ALLOCA_ALLOC(str, _len, use_heap) do { \
80 	(str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
81 	GC_SET_REFCOUNT(str, 1); \
82 	GC_TYPE_INFO(str) = IS_STRING; \
83 	zend_string_forget_hash_val(str); \
84 	ZSTR_LEN(str) = _len; \
85 } while (0)
86 
87 #define ZSTR_ALLOCA_INIT(str, s, len, use_heap) do { \
88 	ZSTR_ALLOCA_ALLOC(str, len, use_heap); \
89 	memcpy(ZSTR_VAL(str), (s), (len)); \
90 	ZSTR_VAL(str)[(len)] = '\0'; \
91 } while (0)
92 
93 #define ZSTR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
94 
95 /*---*/
96 
97 static zend_always_inline zend_ulong zend_string_hash_val(zend_string *s)
98 {
99 	return ZSTR_H(s) ? ZSTR_H(s) : zend_string_hash_func(s);
100 }
101 
zend_string_forget_hash_val(zend_string * s)102 static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
103 {
104 	ZSTR_H(s) = 0;
105 }
106 
zend_string_refcount(const zend_string * s)107 static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
108 {
109 	if (!ZSTR_IS_INTERNED(s)) {
110 		return GC_REFCOUNT(s);
111 	}
112 	return 1;
113 }
114 
zend_string_addref(zend_string * s)115 static zend_always_inline uint32_t zend_string_addref(zend_string *s)
116 {
117 	if (!ZSTR_IS_INTERNED(s)) {
118 		return GC_ADDREF(s);
119 	}
120 	return 1;
121 }
122 
zend_string_delref(zend_string * s)123 static zend_always_inline uint32_t zend_string_delref(zend_string *s)
124 {
125 	if (!ZSTR_IS_INTERNED(s)) {
126 		return GC_DELREF(s);
127 	}
128 	return 1;
129 }
130 
zend_string_alloc(size_t len,int persistent)131 static zend_always_inline zend_string *zend_string_alloc(size_t len, int persistent)
132 {
133 	zend_string *ret = (zend_string *)pemalloc(ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
134 
135 	GC_SET_REFCOUNT(ret, 1);
136 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
137 	zend_string_forget_hash_val(ret);
138 	ZSTR_LEN(ret) = len;
139 	return ret;
140 }
141 
zend_string_safe_alloc(size_t n,size_t m,size_t l,int persistent)142 static zend_always_inline zend_string *zend_string_safe_alloc(size_t n, size_t m, size_t l, int persistent)
143 {
144 	zend_string *ret = (zend_string *)safe_pemalloc(n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
145 
146 	GC_SET_REFCOUNT(ret, 1);
147 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
148 	zend_string_forget_hash_val(ret);
149 	ZSTR_LEN(ret) = (n * m) + l;
150 	return ret;
151 }
152 
zend_string_init(const char * str,size_t len,int persistent)153 static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, int persistent)
154 {
155 	zend_string *ret = zend_string_alloc(len, persistent);
156 
157 	memcpy(ZSTR_VAL(ret), str, len);
158 	ZSTR_VAL(ret)[len] = '\0';
159 	return ret;
160 }
161 
zend_string_copy(zend_string * s)162 static zend_always_inline zend_string *zend_string_copy(zend_string *s)
163 {
164 	if (!ZSTR_IS_INTERNED(s)) {
165 		GC_ADDREF(s);
166 	}
167 	return s;
168 }
169 
zend_string_dup(zend_string * s,int persistent)170 static zend_always_inline zend_string *zend_string_dup(zend_string *s, int persistent)
171 {
172 	if (ZSTR_IS_INTERNED(s)) {
173 		return s;
174 	} else {
175 		return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
176 	}
177 }
178 
zend_string_realloc(zend_string * s,size_t len,int persistent)179 static zend_always_inline zend_string *zend_string_realloc(zend_string *s, size_t len, int persistent)
180 {
181 	zend_string *ret;
182 
183 	if (!ZSTR_IS_INTERNED(s)) {
184 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
185 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
186 			ZSTR_LEN(ret) = len;
187 			zend_string_forget_hash_val(ret);
188 			return ret;
189 		}
190 	}
191 	ret = zend_string_alloc(len, persistent);
192 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN(len, ZSTR_LEN(s)) + 1);
193 	if (!ZSTR_IS_INTERNED(s)) {
194 		GC_DELREF(s);
195 	}
196 	return ret;
197 }
198 
zend_string_extend(zend_string * s,size_t len,int persistent)199 static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t len, int persistent)
200 {
201 	zend_string *ret;
202 
203 	ZEND_ASSERT(len >= ZSTR_LEN(s));
204 	if (!ZSTR_IS_INTERNED(s)) {
205 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
206 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
207 			ZSTR_LEN(ret) = len;
208 			zend_string_forget_hash_val(ret);
209 			return ret;
210 		}
211 	}
212 	ret = zend_string_alloc(len, persistent);
213 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), ZSTR_LEN(s) + 1);
214 	if (!ZSTR_IS_INTERNED(s)) {
215 		GC_DELREF(s);
216 	}
217 	return ret;
218 }
219 
zend_string_truncate(zend_string * s,size_t len,int persistent)220 static zend_always_inline zend_string *zend_string_truncate(zend_string *s, size_t len, int persistent)
221 {
222 	zend_string *ret;
223 
224 	ZEND_ASSERT(len <= ZSTR_LEN(s));
225 	if (!ZSTR_IS_INTERNED(s)) {
226 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
227 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
228 			ZSTR_LEN(ret) = len;
229 			zend_string_forget_hash_val(ret);
230 			return ret;
231 		}
232 	}
233 	ret = zend_string_alloc(len, persistent);
234 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), len + 1);
235 	if (!ZSTR_IS_INTERNED(s)) {
236 		GC_DELREF(s);
237 	}
238 	return ret;
239 }
240 
zend_string_safe_realloc(zend_string * s,size_t n,size_t m,size_t l,int persistent)241 static zend_always_inline zend_string *zend_string_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, int persistent)
242 {
243 	zend_string *ret;
244 
245 	if (!ZSTR_IS_INTERNED(s)) {
246 		if (GC_REFCOUNT(s) == 1) {
247 			ret = (zend_string *)safe_perealloc(s, n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
248 			ZSTR_LEN(ret) = (n * m) + l;
249 			zend_string_forget_hash_val(ret);
250 			return ret;
251 		}
252 	}
253 	ret = zend_string_safe_alloc(n, m, l, persistent);
254 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN((n * m) + l, ZSTR_LEN(s)) + 1);
255 	if (!ZSTR_IS_INTERNED(s)) {
256 		GC_DELREF(s);
257 	}
258 	return ret;
259 }
260 
zend_string_free(zend_string * s)261 static zend_always_inline void zend_string_free(zend_string *s)
262 {
263 	if (!ZSTR_IS_INTERNED(s)) {
264 		ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
265 		pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
266 	}
267 }
268 
zend_string_efree(zend_string * s)269 static zend_always_inline void zend_string_efree(zend_string *s)
270 {
271 	ZEND_ASSERT(!ZSTR_IS_INTERNED(s));
272 	ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
273 	ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
274 	efree(s);
275 }
276 
zend_string_release(zend_string * s)277 static zend_always_inline void zend_string_release(zend_string *s)
278 {
279 	if (!ZSTR_IS_INTERNED(s)) {
280 		if (GC_DELREF(s) == 0) {
281 			pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
282 		}
283 	}
284 }
285 
zend_string_release_ex(zend_string * s,int persistent)286 static zend_always_inline void zend_string_release_ex(zend_string *s, int persistent)
287 {
288 	if (!ZSTR_IS_INTERNED(s)) {
289 		if (GC_DELREF(s) == 0) {
290 			if (persistent) {
291 				ZEND_ASSERT(GC_FLAGS(s) & IS_STR_PERSISTENT);
292 				free(s);
293 			} else {
294 				ZEND_ASSERT(!(GC_FLAGS(s) & IS_STR_PERSISTENT));
295 				efree(s);
296 			}
297 		}
298 	}
299 }
300 
301 #if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64__) && !defined(__ILP32__)))
302 BEGIN_EXTERN_C()
303 ZEND_API zend_bool ZEND_FASTCALL zend_string_equal_val(zend_string *s1, zend_string *s2);
END_EXTERN_C()304 END_EXTERN_C()
305 #else
306 static zend_always_inline zend_bool zend_string_equal_val(zend_string *s1, zend_string *s2)
307 {
308 	return !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1));
309 }
310 #endif
311 
312 static zend_always_inline zend_bool zend_string_equal_content(zend_string *s1, zend_string *s2)
313 {
314 	return ZSTR_LEN(s1) == ZSTR_LEN(s2) && zend_string_equal_val(s1, s2);
315 }
316 
zend_string_equals(zend_string * s1,zend_string * s2)317 static zend_always_inline zend_bool zend_string_equals(zend_string *s1, zend_string *s2)
318 {
319 	return s1 == s2 || zend_string_equal_content(s1, s2);
320 }
321 
322 #define zend_string_equals_ci(s1, s2) \
323 	(ZSTR_LEN(s1) == ZSTR_LEN(s2) && !zend_binary_strcasecmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)))
324 
325 #define zend_string_equals_literal_ci(str, c) \
326 	(ZSTR_LEN(str) == sizeof(c) - 1 && !zend_binary_strcasecmp(ZSTR_VAL(str), ZSTR_LEN(str), (c), sizeof(c) - 1))
327 
328 #define zend_string_equals_literal(str, literal) \
329 	(ZSTR_LEN(str) == sizeof(literal)-1 && !memcmp(ZSTR_VAL(str), literal, sizeof(literal) - 1))
330 
331 /*
332  * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
333  *
334  * This is Daniel J. Bernstein's popular `times 33' hash function as
335  * posted by him years ago on comp.lang.c. It basically uses a function
336  * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
337  * known hash functions for strings. Because it is both computed very
338  * fast and distributes very well.
339  *
340  * The magic of number 33, i.e. why it works better than many other
341  * constants, prime or not, has never been adequately explained by
342  * anyone. So I try an explanation: if one experimentally tests all
343  * multipliers between 1 and 256 (as RSE did now) one detects that even
344  * numbers are not useable at all. The remaining 128 odd numbers
345  * (except for the number 1) work more or less all equally well. They
346  * all distribute in an acceptable way and this way fill a hash table
347  * with an average percent of approx. 86%.
348  *
349  * If one compares the Chi^2 values of the variants, the number 33 not
350  * even has the best value. But the number 33 and a few other equally
351  * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
352  * advantage to the remaining numbers in the large set of possible
353  * multipliers: their multiply operation can be replaced by a faster
354  * operation based on just one shift plus either a single addition
355  * or subtraction operation. And because a hash function has to both
356  * distribute good _and_ has to be very fast to compute, those few
357  * numbers should be preferred and seems to be the reason why Daniel J.
358  * Bernstein also preferred it.
359  *
360  *
361  *                  -- Ralf S. Engelschall <rse@engelschall.com>
362  */
363 
zend_inline_hash_func(const char * str,size_t len)364 static zend_always_inline zend_ulong zend_inline_hash_func(const char *str, size_t len)
365 {
366 	zend_ulong hash = Z_UL(5381);
367 
368 	/* variant with the hash unrolled eight times */
369 	for (; len >= 8; len -= 8) {
370 		hash = ((hash << 5) + hash) + *str++;
371 		hash = ((hash << 5) + hash) + *str++;
372 		hash = ((hash << 5) + hash) + *str++;
373 		hash = ((hash << 5) + hash) + *str++;
374 		hash = ((hash << 5) + hash) + *str++;
375 		hash = ((hash << 5) + hash) + *str++;
376 		hash = ((hash << 5) + hash) + *str++;
377 		hash = ((hash << 5) + hash) + *str++;
378 	}
379 	switch (len) {
380 		case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
381 		case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
382 		case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
383 		case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
384 		case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
385 		case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
386 		case 1: hash = ((hash << 5) + hash) + *str++; break;
387 		case 0: break;
388 EMPTY_SWITCH_DEFAULT_CASE()
389 	}
390 
391 	/* Hash value can't be zero, so we always set the high bit */
392 #if SIZEOF_ZEND_LONG == 8
393 	return hash | Z_UL(0x8000000000000000);
394 #elif SIZEOF_ZEND_LONG == 4
395 	return hash | Z_UL(0x80000000);
396 #else
397 # error "Unknown SIZEOF_ZEND_LONG"
398 #endif
399 }
400 
401 #define ZEND_KNOWN_STRINGS(_) \
402 	_(ZEND_STR_FILE,                   "file") \
403 	_(ZEND_STR_LINE,                   "line") \
404 	_(ZEND_STR_FUNCTION,               "function") \
405 	_(ZEND_STR_CLASS,                  "class") \
406 	_(ZEND_STR_OBJECT,                 "object") \
407 	_(ZEND_STR_TYPE,                   "type") \
408 	_(ZEND_STR_OBJECT_OPERATOR,        "->") \
409 	_(ZEND_STR_PAAMAYIM_NEKUDOTAYIM,   "::") \
410 	_(ZEND_STR_ARGS,                   "args") \
411 	_(ZEND_STR_UNKNOWN,                "unknown") \
412 	_(ZEND_STR_EVAL,                   "eval") \
413 	_(ZEND_STR_INCLUDE,                "include") \
414 	_(ZEND_STR_REQUIRE,                "require") \
415 	_(ZEND_STR_INCLUDE_ONCE,           "include_once") \
416 	_(ZEND_STR_REQUIRE_ONCE,           "require_once") \
417 	_(ZEND_STR_SCALAR,                 "scalar") \
418 	_(ZEND_STR_ERROR_REPORTING,        "error_reporting") \
419 	_(ZEND_STR_STATIC,                 "static") \
420 	_(ZEND_STR_THIS,                   "this") \
421 	_(ZEND_STR_VALUE,                  "value") \
422 	_(ZEND_STR_KEY,                    "key") \
423 	_(ZEND_STR_MAGIC_AUTOLOAD,         "__autoload") \
424 	_(ZEND_STR_MAGIC_INVOKE,           "__invoke") \
425 	_(ZEND_STR_PREVIOUS,               "previous") \
426 	_(ZEND_STR_CODE,                   "code") \
427 	_(ZEND_STR_MESSAGE,                "message") \
428 	_(ZEND_STR_SEVERITY,               "severity") \
429 	_(ZEND_STR_STRING,                 "string") \
430 	_(ZEND_STR_TRACE,                  "trace") \
431 	_(ZEND_STR_SCHEME,                 "scheme") \
432 	_(ZEND_STR_HOST,                   "host") \
433 	_(ZEND_STR_PORT,                   "port") \
434 	_(ZEND_STR_USER,                   "user") \
435 	_(ZEND_STR_PASS,                   "pass") \
436 	_(ZEND_STR_PATH,                   "path") \
437 	_(ZEND_STR_QUERY,                  "query") \
438 	_(ZEND_STR_FRAGMENT,               "fragment") \
439 	_(ZEND_STR_NULL,                   "NULL") \
440 	_(ZEND_STR_BOOLEAN,                "boolean") \
441 	_(ZEND_STR_INTEGER,                "integer") \
442 	_(ZEND_STR_DOUBLE,                 "double") \
443 	_(ZEND_STR_ARRAY,                  "array") \
444 	_(ZEND_STR_RESOURCE,               "resource") \
445 	_(ZEND_STR_CLOSED_RESOURCE,        "resource (closed)") \
446 	_(ZEND_STR_NAME,                   "name") \
447 	_(ZEND_STR_ARGV,                   "argv") \
448 	_(ZEND_STR_ARGC,                   "argc") \
449 
450 
451 typedef enum _zend_known_string_id {
452 #define _ZEND_STR_ID(id, str) id,
453 ZEND_KNOWN_STRINGS(_ZEND_STR_ID)
454 #undef _ZEND_STR_ID
455 	ZEND_STR_LAST_KNOWN
456 } zend_known_string_id;
457 
458 #endif /* ZEND_STRING_H */
459 
460 /*
461  * Local variables:
462  * tab-width: 4
463  * c-basic-offset: 4
464  * indent-tabs-mode: t
465  * End:
466  * vim600: sw=4 ts=4 fdm=marker
467  * vim<600: sw=4 ts=4
468  */
469