xref: /PHP-7.2/Zend/zend_string.h (revision 7a7ec01a)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Dmitry Stogov <dmitry@zend.com>                             |
16    +----------------------------------------------------------------------+
17 */
18 
19 /* $Id: $ */
20 
21 #ifndef ZEND_STRING_H
22 #define ZEND_STRING_H
23 
24 #include "zend.h"
25 
26 BEGIN_EXTERN_C()
27 
28 typedef void (*zend_string_copy_storage_func_t)(void);
29 typedef zend_string *(*zend_new_interned_string_func_t)(zend_string *str);
30 
31 ZEND_API extern zend_new_interned_string_func_t zend_new_interned_string;
32 
33 ZEND_API zend_ulong zend_hash_func(const char *str, size_t len);
34 ZEND_API void zend_interned_strings_init(void);
35 ZEND_API void zend_interned_strings_dtor(void);
36 ZEND_API void zend_interned_strings_activate(void);
37 ZEND_API void zend_interned_strings_deactivate(void);
38 ZEND_API zend_string *zend_interned_string_find_permanent(zend_string *str);
39 ZEND_API void zend_interned_strings_set_request_storage_handler(zend_new_interned_string_func_t handler);
40 ZEND_API void zend_interned_strings_set_permanent_storage_copy_handler(zend_string_copy_storage_func_t handler);
41 ZEND_API void zend_interned_strings_switch_storage(void);
42 
43 ZEND_API extern zend_string  *zend_empty_string;
44 ZEND_API extern zend_string  *zend_one_char_string[256];
45 ZEND_API extern zend_string **zend_known_strings;
46 
END_EXTERN_C()47 END_EXTERN_C()
48 
49 /* Shortcuts */
50 
51 #define ZSTR_VAL(zstr)  (zstr)->val
52 #define ZSTR_LEN(zstr)  (zstr)->len
53 #define ZSTR_H(zstr)    (zstr)->h
54 #define ZSTR_HASH(zstr) zend_string_hash_val(zstr)
55 
56 /* Compatibility macros */
57 
58 #define IS_INTERNED(s)	ZSTR_IS_INTERNED(s)
59 #define STR_EMPTY_ALLOC()	ZSTR_EMPTY_ALLOC()
60 #define _STR_HEADER_SIZE _ZSTR_HEADER_SIZE
61 #define STR_ALLOCA_ALLOC(str, _len, use_heap) ZSTR_ALLOCA_ALLOC(str, _len, use_heap)
62 #define STR_ALLOCA_INIT(str, s, len, use_heap) ZSTR_ALLOCA_INIT(str, s, len, use_heap)
63 #define STR_ALLOCA_FREE(str, use_heap) ZSTR_ALLOCA_FREE(str, use_heap)
64 
65 /*---*/
66 
67 #define ZSTR_IS_INTERNED(s)					(GC_FLAGS(s) & IS_STR_INTERNED)
68 
69 #define ZSTR_EMPTY_ALLOC() zend_empty_string
70 #define ZSTR_CHAR(c) zend_one_char_string[c]
71 #define ZSTR_KNOWN(idx) zend_known_strings[idx]
72 
73 #define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
74 
75 #define _ZSTR_STRUCT_SIZE(len) (_ZSTR_HEADER_SIZE + len + 1)
76 
77 #define ZSTR_ALLOCA_ALLOC(str, _len, use_heap) do { \
78 	(str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
79 	GC_REFCOUNT(str) = 1; \
80 	GC_TYPE_INFO(str) = IS_STRING; \
81 	zend_string_forget_hash_val(str); \
82 	ZSTR_LEN(str) = _len; \
83 } while (0)
84 
85 #define ZSTR_ALLOCA_INIT(str, s, len, use_heap) do { \
86 	ZSTR_ALLOCA_ALLOC(str, len, use_heap); \
87 	memcpy(ZSTR_VAL(str), (s), (len)); \
88 	ZSTR_VAL(str)[(len)] = '\0'; \
89 } while (0)
90 
91 #define ZSTR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
92 
93 /*---*/
94 
95 static zend_always_inline zend_ulong zend_string_hash_val(zend_string *s)
96 {
97 	if (!ZSTR_H(s)) {
98 		ZSTR_H(s) = zend_hash_func(ZSTR_VAL(s), ZSTR_LEN(s));
99 	}
100 	return ZSTR_H(s);
101 }
102 
zend_string_forget_hash_val(zend_string * s)103 static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
104 {
105 	ZSTR_H(s) = 0;
106 }
107 
zend_string_refcount(const zend_string * s)108 static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
109 {
110 	if (!ZSTR_IS_INTERNED(s)) {
111 		return GC_REFCOUNT(s);
112 	}
113 	return 1;
114 }
115 
zend_string_addref(zend_string * s)116 static zend_always_inline uint32_t zend_string_addref(zend_string *s)
117 {
118 	if (!ZSTR_IS_INTERNED(s)) {
119 		return ++GC_REFCOUNT(s);
120 	}
121 	return 1;
122 }
123 
zend_string_delref(zend_string * s)124 static zend_always_inline uint32_t zend_string_delref(zend_string *s)
125 {
126 	if (!ZSTR_IS_INTERNED(s)) {
127 		return --GC_REFCOUNT(s);
128 	}
129 	return 1;
130 }
131 
zend_string_alloc(size_t len,int persistent)132 static zend_always_inline zend_string *zend_string_alloc(size_t len, int persistent)
133 {
134 	zend_string *ret = (zend_string *)pemalloc(ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
135 
136 	GC_REFCOUNT(ret) = 1;
137 #if 1
138 	/* optimized single assignment */
139 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
140 #else
141 	GC_TYPE(ret) = IS_STRING;
142 	GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
143 	GC_INFO(ret) = 0;
144 #endif
145 	zend_string_forget_hash_val(ret);
146 	ZSTR_LEN(ret) = len;
147 	return ret;
148 }
149 
zend_string_safe_alloc(size_t n,size_t m,size_t l,int persistent)150 static zend_always_inline zend_string *zend_string_safe_alloc(size_t n, size_t m, size_t l, int persistent)
151 {
152 	zend_string *ret = (zend_string *)safe_pemalloc(n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
153 
154 	GC_REFCOUNT(ret) = 1;
155 #if 1
156 	/* optimized single assignment */
157 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
158 #else
159 	GC_TYPE(ret) = IS_STRING;
160 	GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
161 	GC_INFO(ret) = 0;
162 #endif
163 	zend_string_forget_hash_val(ret);
164 	ZSTR_LEN(ret) = (n * m) + l;
165 	return ret;
166 }
167 
zend_string_init(const char * str,size_t len,int persistent)168 static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, int persistent)
169 {
170 	zend_string *ret = zend_string_alloc(len, persistent);
171 
172 	memcpy(ZSTR_VAL(ret), str, len);
173 	ZSTR_VAL(ret)[len] = '\0';
174 	return ret;
175 }
176 
zend_string_init_interned(const char * str,size_t len,int persistent)177 static zend_always_inline zend_string *zend_string_init_interned(const char *str, size_t len, int persistent)
178 {
179 	zend_string *ret = zend_string_init(str, len, persistent);
180 
181 	return zend_new_interned_string(ret);
182 }
183 
zend_string_copy(zend_string * s)184 static zend_always_inline zend_string *zend_string_copy(zend_string *s)
185 {
186 	if (!ZSTR_IS_INTERNED(s)) {
187 		GC_REFCOUNT(s)++;
188 	}
189 	return s;
190 }
191 
zend_string_dup(zend_string * s,int persistent)192 static zend_always_inline zend_string *zend_string_dup(zend_string *s, int persistent)
193 {
194 	if (ZSTR_IS_INTERNED(s)) {
195 		return s;
196 	} else {
197 		return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
198 	}
199 }
200 
zend_string_realloc(zend_string * s,size_t len,int persistent)201 static zend_always_inline zend_string *zend_string_realloc(zend_string *s, size_t len, int persistent)
202 {
203 	zend_string *ret;
204 
205 	if (!ZSTR_IS_INTERNED(s)) {
206 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
207 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
208 			ZSTR_LEN(ret) = len;
209 			zend_string_forget_hash_val(ret);
210 			return ret;
211 		} else {
212 			GC_REFCOUNT(s)--;
213 		}
214 	}
215 	ret = zend_string_alloc(len, persistent);
216 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN(len, ZSTR_LEN(s)) + 1);
217 	return ret;
218 }
219 
zend_string_extend(zend_string * s,size_t len,int persistent)220 static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t len, int persistent)
221 {
222 	zend_string *ret;
223 
224 	ZEND_ASSERT(len >= ZSTR_LEN(s));
225 	if (!ZSTR_IS_INTERNED(s)) {
226 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
227 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
228 			ZSTR_LEN(ret) = len;
229 			zend_string_forget_hash_val(ret);
230 			return ret;
231 		} else {
232 			GC_REFCOUNT(s)--;
233 		}
234 	}
235 	ret = zend_string_alloc(len, persistent);
236 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), ZSTR_LEN(s) + 1);
237 	return ret;
238 }
239 
zend_string_truncate(zend_string * s,size_t len,int persistent)240 static zend_always_inline zend_string *zend_string_truncate(zend_string *s, size_t len, int persistent)
241 {
242 	zend_string *ret;
243 
244 	ZEND_ASSERT(len <= ZSTR_LEN(s));
245 	if (!ZSTR_IS_INTERNED(s)) {
246 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
247 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
248 			ZSTR_LEN(ret) = len;
249 			zend_string_forget_hash_val(ret);
250 			return ret;
251 		} else {
252 			GC_REFCOUNT(s)--;
253 		}
254 	}
255 	ret = zend_string_alloc(len, persistent);
256 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), len + 1);
257 	return ret;
258 }
259 
zend_string_safe_realloc(zend_string * s,size_t n,size_t m,size_t l,int persistent)260 static zend_always_inline zend_string *zend_string_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, int persistent)
261 {
262 	zend_string *ret;
263 
264 	if (!ZSTR_IS_INTERNED(s)) {
265 		if (GC_REFCOUNT(s) == 1) {
266 			ret = (zend_string *)safe_perealloc(s, n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
267 			ZSTR_LEN(ret) = (n * m) + l;
268 			zend_string_forget_hash_val(ret);
269 			return ret;
270 		} else {
271 			GC_REFCOUNT(s)--;
272 		}
273 	}
274 	ret = zend_string_safe_alloc(n, m, l, persistent);
275 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN((n * m) + l, ZSTR_LEN(s)) + 1);
276 	return ret;
277 }
278 
zend_string_free(zend_string * s)279 static zend_always_inline void zend_string_free(zend_string *s)
280 {
281 	if (!ZSTR_IS_INTERNED(s)) {
282 		ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
283 		pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
284 	}
285 }
286 
zend_string_release(zend_string * s)287 static zend_always_inline void zend_string_release(zend_string *s)
288 {
289 	if (!ZSTR_IS_INTERNED(s)) {
290 		if (--GC_REFCOUNT(s) == 0) {
291 			pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
292 		}
293 	}
294 }
295 
296 
zend_string_equals(zend_string * s1,zend_string * s2)297 static zend_always_inline zend_bool zend_string_equals(zend_string *s1, zend_string *s2)
298 {
299 	return s1 == s2 || (ZSTR_LEN(s1) == ZSTR_LEN(s2) && !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1)));
300 }
301 
302 #define zend_string_equals_ci(s1, s2) \
303 	(ZSTR_LEN(s1) == ZSTR_LEN(s2) && !zend_binary_strcasecmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)))
304 
305 #define zend_string_equals_literal_ci(str, c) \
306 	(ZSTR_LEN(str) == sizeof(c) - 1 && !zend_binary_strcasecmp(ZSTR_VAL(str), ZSTR_LEN(str), (c), sizeof(c) - 1))
307 
308 #define zend_string_equals_literal(str, literal) \
309 	(ZSTR_LEN(str) == sizeof(literal)-1 && !memcmp(ZSTR_VAL(str), literal, sizeof(literal) - 1))
310 
311 /*
312  * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
313  *
314  * This is Daniel J. Bernstein's popular `times 33' hash function as
315  * posted by him years ago on comp.lang.c. It basically uses a function
316  * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
317  * known hash functions for strings. Because it is both computed very
318  * fast and distributes very well.
319  *
320  * The magic of number 33, i.e. why it works better than many other
321  * constants, prime or not, has never been adequately explained by
322  * anyone. So I try an explanation: if one experimentally tests all
323  * multipliers between 1 and 256 (as RSE did now) one detects that even
324  * numbers are not useable at all. The remaining 128 odd numbers
325  * (except for the number 1) work more or less all equally well. They
326  * all distribute in an acceptable way and this way fill a hash table
327  * with an average percent of approx. 86%.
328  *
329  * If one compares the Chi^2 values of the variants, the number 33 not
330  * even has the best value. But the number 33 and a few other equally
331  * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
332  * advantage to the remaining numbers in the large set of possible
333  * multipliers: their multiply operation can be replaced by a faster
334  * operation based on just one shift plus either a single addition
335  * or subtraction operation. And because a hash function has to both
336  * distribute good _and_ has to be very fast to compute, those few
337  * numbers should be preferred and seems to be the reason why Daniel J.
338  * Bernstein also preferred it.
339  *
340  *
341  *                  -- Ralf S. Engelschall <rse@engelschall.com>
342  */
343 
zend_inline_hash_func(const char * str,size_t len)344 static zend_always_inline zend_ulong zend_inline_hash_func(const char *str, size_t len)
345 {
346 	zend_ulong hash = Z_UL(5381);
347 
348 	/* variant with the hash unrolled eight times */
349 	for (; len >= 8; len -= 8) {
350 		hash = ((hash << 5) + hash) + *str++;
351 		hash = ((hash << 5) + hash) + *str++;
352 		hash = ((hash << 5) + hash) + *str++;
353 		hash = ((hash << 5) + hash) + *str++;
354 		hash = ((hash << 5) + hash) + *str++;
355 		hash = ((hash << 5) + hash) + *str++;
356 		hash = ((hash << 5) + hash) + *str++;
357 		hash = ((hash << 5) + hash) + *str++;
358 	}
359 	switch (len) {
360 		case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
361 		case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
362 		case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
363 		case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
364 		case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
365 		case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
366 		case 1: hash = ((hash << 5) + hash) + *str++; break;
367 		case 0: break;
368 EMPTY_SWITCH_DEFAULT_CASE()
369 	}
370 
371 	/* Hash value can't be zero, so we always set the high bit */
372 #if SIZEOF_ZEND_LONG == 8
373 	return hash | Z_UL(0x8000000000000000);
374 #elif SIZEOF_ZEND_LONG == 4
375 	return hash | Z_UL(0x80000000);
376 #else
377 # error "Unknown SIZEOF_ZEND_LONG"
378 #endif
379 }
380 
381 #define ZEND_KNOWN_STRINGS(_) \
382 	_(ZEND_STR_FILE,                   "file") \
383 	_(ZEND_STR_LINE,                   "line") \
384 	_(ZEND_STR_FUNCTION,               "function") \
385 	_(ZEND_STR_CLASS,                  "class") \
386 	_(ZEND_STR_OBJECT,                 "object") \
387 	_(ZEND_STR_TYPE,                   "type") \
388 	_(ZEND_STR_OBJECT_OPERATOR,        "->") \
389 	_(ZEND_STR_PAAMAYIM_NEKUDOTAYIM,   "::") \
390 	_(ZEND_STR_ARGS,                   "args") \
391 	_(ZEND_STR_UNKNOWN,                "unknown") \
392 	_(ZEND_STR_EVAL,                   "eval") \
393 	_(ZEND_STR_INCLUDE,                "include") \
394 	_(ZEND_STR_REQUIRE,                "require") \
395 	_(ZEND_STR_INCLUDE_ONCE,           "include_once") \
396 	_(ZEND_STR_REQUIRE_ONCE,           "require_once") \
397 	_(ZEND_STR_SCALAR,                 "scalar") \
398 	_(ZEND_STR_ERROR_REPORTING,        "error_reporting") \
399 	_(ZEND_STR_STATIC,                 "static") \
400 	_(ZEND_STR_THIS,                   "this") \
401 	_(ZEND_STR_VALUE,                  "value") \
402 	_(ZEND_STR_KEY,                    "key") \
403 	_(ZEND_STR_MAGIC_AUTOLOAD,         "__autoload") \
404 	_(ZEND_STR_MAGIC_INVOKE,           "__invoke") \
405 	_(ZEND_STR_PREVIOUS,               "previous") \
406 	_(ZEND_STR_CODE,                   "code") \
407 	_(ZEND_STR_MESSAGE,                "message") \
408 	_(ZEND_STR_SEVERITY,               "severity") \
409 	_(ZEND_STR_STRING,                 "string") \
410 	_(ZEND_STR_TRACE,                  "trace") \
411 	_(ZEND_STR_SCHEME,                 "scheme") \
412 	_(ZEND_STR_HOST,                   "host") \
413 	_(ZEND_STR_PORT,                   "port") \
414 	_(ZEND_STR_USER,                   "user") \
415 	_(ZEND_STR_PASS,                   "pass") \
416 	_(ZEND_STR_PATH,                   "path") \
417 	_(ZEND_STR_QUERY,                  "query") \
418 	_(ZEND_STR_FRAGMENT,               "fragment") \
419 	_(ZEND_STR_NULL,                   "NULL") \
420 	_(ZEND_STR_BOOLEAN,                "boolean") \
421 	_(ZEND_STR_INTEGER,                "integer") \
422 	_(ZEND_STR_DOUBLE,                 "double") \
423 	_(ZEND_STR_ARRAY,                  "array") \
424 	_(ZEND_STR_RESOURCE,               "resource") \
425 	_(ZEND_STR_CLOSED_RESOURCE,        "resource (closed)") \
426 
427 
428 typedef enum _zend_known_string_id {
429 #define _ZEND_STR_ID(id, str) id,
430 ZEND_KNOWN_STRINGS(_ZEND_STR_ID)
431 #undef _ZEND_STR_ID
432 	ZEND_STR_LAST_KNOWN
433 } zend_known_string_id;
434 
435 #endif /* ZEND_STRING_H */
436 
437 /*
438  * Local variables:
439  * tab-width: 4
440  * c-basic-offset: 4
441  * indent-tabs-mode: t
442  * End:
443  * vim600: sw=4 ts=4 fdm=marker
444  * vim<600: sw=4 ts=4
445  */
446