xref: /PHP-7.1/Zend/zend_string.h (revision ccd4716e)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2018 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Dmitry Stogov <dmitry@zend.com>                             |
16    +----------------------------------------------------------------------+
17 */
18 
19 /* $Id: $ */
20 
21 #ifndef ZEND_STRING_H
22 #define ZEND_STRING_H
23 
24 #include "zend.h"
25 
26 BEGIN_EXTERN_C()
27 
28 ZEND_API extern zend_string *(*zend_new_interned_string)(zend_string *str);
29 ZEND_API extern void (*zend_interned_strings_snapshot)(void);
30 ZEND_API extern void (*zend_interned_strings_restore)(void);
31 
32 ZEND_API zend_ulong zend_hash_func(const char *str, size_t len);
33 void zend_interned_strings_init(void);
34 void zend_interned_strings_dtor(void);
35 void zend_known_interned_strings_init(zend_string ***, uint32_t *);
36 
END_EXTERN_C()37 END_EXTERN_C()
38 
39 /* Shortcuts */
40 
41 #define ZSTR_VAL(zstr)  (zstr)->val
42 #define ZSTR_LEN(zstr)  (zstr)->len
43 #define ZSTR_H(zstr)    (zstr)->h
44 #define ZSTR_HASH(zstr) zend_string_hash_val(zstr)
45 
46 /* Compatibility macros */
47 
48 #define IS_INTERNED(s)	ZSTR_IS_INTERNED(s)
49 #define STR_EMPTY_ALLOC()	ZSTR_EMPTY_ALLOC()
50 #define _STR_HEADER_SIZE _ZSTR_HEADER_SIZE
51 #define STR_ALLOCA_ALLOC(str, _len, use_heap) ZSTR_ALLOCA_ALLOC(str, _len, use_heap)
52 #define STR_ALLOCA_INIT(str, s, len, use_heap) ZSTR_ALLOCA_INIT(str, s, len, use_heap)
53 #define STR_ALLOCA_FREE(str, use_heap) ZSTR_ALLOCA_FREE(str, use_heap)
54 
55 /*---*/
56 
57 #define ZSTR_IS_INTERNED(s)					(GC_FLAGS(s) & IS_STR_INTERNED)
58 
59 #define ZSTR_EMPTY_ALLOC()				CG(empty_string)
60 
61 #define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
62 
63 #define _ZSTR_STRUCT_SIZE(len) (_ZSTR_HEADER_SIZE + len + 1)
64 
65 #define ZSTR_ALLOCA_ALLOC(str, _len, use_heap) do { \
66 	(str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
67 	GC_REFCOUNT(str) = 1; \
68 	GC_TYPE_INFO(str) = IS_STRING; \
69 	zend_string_forget_hash_val(str); \
70 	ZSTR_LEN(str) = _len; \
71 } while (0)
72 
73 #define ZSTR_ALLOCA_INIT(str, s, len, use_heap) do { \
74 	ZSTR_ALLOCA_ALLOC(str, len, use_heap); \
75 	memcpy(ZSTR_VAL(str), (s), (len)); \
76 	ZSTR_VAL(str)[(len)] = '\0'; \
77 } while (0)
78 
79 #define ZSTR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
80 
81 /*---*/
82 
83 static zend_always_inline zend_ulong zend_string_hash_val(zend_string *s)
84 {
85 	if (!ZSTR_H(s)) {
86 		ZSTR_H(s) = zend_hash_func(ZSTR_VAL(s), ZSTR_LEN(s));
87 	}
88 	return ZSTR_H(s);
89 }
90 
zend_string_forget_hash_val(zend_string * s)91 static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
92 {
93 	ZSTR_H(s) = 0;
94 }
95 
zend_string_refcount(const zend_string * s)96 static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
97 {
98 	if (!ZSTR_IS_INTERNED(s)) {
99 		return GC_REFCOUNT(s);
100 	}
101 	return 1;
102 }
103 
zend_string_addref(zend_string * s)104 static zend_always_inline uint32_t zend_string_addref(zend_string *s)
105 {
106 	if (!ZSTR_IS_INTERNED(s)) {
107 		return ++GC_REFCOUNT(s);
108 	}
109 	return 1;
110 }
111 
zend_string_delref(zend_string * s)112 static zend_always_inline uint32_t zend_string_delref(zend_string *s)
113 {
114 	if (!ZSTR_IS_INTERNED(s)) {
115 		return --GC_REFCOUNT(s);
116 	}
117 	return 1;
118 }
119 
zend_string_alloc(size_t len,int persistent)120 static zend_always_inline zend_string *zend_string_alloc(size_t len, int persistent)
121 {
122 	zend_string *ret = (zend_string *)pemalloc(ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
123 
124 	GC_REFCOUNT(ret) = 1;
125 #if 1
126 	/* optimized single assignment */
127 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
128 #else
129 	GC_TYPE(ret) = IS_STRING;
130 	GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
131 	GC_INFO(ret) = 0;
132 #endif
133 	zend_string_forget_hash_val(ret);
134 	ZSTR_LEN(ret) = len;
135 	return ret;
136 }
137 
zend_string_safe_alloc(size_t n,size_t m,size_t l,int persistent)138 static zend_always_inline zend_string *zend_string_safe_alloc(size_t n, size_t m, size_t l, int persistent)
139 {
140 	zend_string *ret = (zend_string *)safe_pemalloc(n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
141 
142 	GC_REFCOUNT(ret) = 1;
143 #if 1
144 	/* optimized single assignment */
145 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
146 #else
147 	GC_TYPE(ret) = IS_STRING;
148 	GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
149 	GC_INFO(ret) = 0;
150 #endif
151 	zend_string_forget_hash_val(ret);
152 	ZSTR_LEN(ret) = (n * m) + l;
153 	return ret;
154 }
155 
zend_string_init(const char * str,size_t len,int persistent)156 static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, int persistent)
157 {
158 	zend_string *ret = zend_string_alloc(len, persistent);
159 
160 	memcpy(ZSTR_VAL(ret), str, len);
161 	ZSTR_VAL(ret)[len] = '\0';
162 	return ret;
163 }
164 
zend_string_copy(zend_string * s)165 static zend_always_inline zend_string *zend_string_copy(zend_string *s)
166 {
167 	if (!ZSTR_IS_INTERNED(s)) {
168 		GC_REFCOUNT(s)++;
169 	}
170 	return s;
171 }
172 
zend_string_dup(zend_string * s,int persistent)173 static zend_always_inline zend_string *zend_string_dup(zend_string *s, int persistent)
174 {
175 	if (ZSTR_IS_INTERNED(s)) {
176 		return s;
177 	} else {
178 		return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
179 	}
180 }
181 
zend_string_realloc(zend_string * s,size_t len,int persistent)182 static zend_always_inline zend_string *zend_string_realloc(zend_string *s, size_t len, int persistent)
183 {
184 	zend_string *ret;
185 
186 	if (!ZSTR_IS_INTERNED(s)) {
187 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
188 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
189 			ZSTR_LEN(ret) = len;
190 			zend_string_forget_hash_val(ret);
191 			return ret;
192 		} else {
193 			GC_REFCOUNT(s)--;
194 		}
195 	}
196 	ret = zend_string_alloc(len, persistent);
197 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN(len, ZSTR_LEN(s)) + 1);
198 	return ret;
199 }
200 
zend_string_extend(zend_string * s,size_t len,int persistent)201 static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t len, int persistent)
202 {
203 	zend_string *ret;
204 
205 	ZEND_ASSERT(len >= ZSTR_LEN(s));
206 	if (!ZSTR_IS_INTERNED(s)) {
207 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
208 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
209 			ZSTR_LEN(ret) = len;
210 			zend_string_forget_hash_val(ret);
211 			return ret;
212 		} else {
213 			GC_REFCOUNT(s)--;
214 		}
215 	}
216 	ret = zend_string_alloc(len, persistent);
217 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), ZSTR_LEN(s) + 1);
218 	return ret;
219 }
220 
zend_string_truncate(zend_string * s,size_t len,int persistent)221 static zend_always_inline zend_string *zend_string_truncate(zend_string *s, size_t len, int persistent)
222 {
223 	zend_string *ret;
224 
225 	ZEND_ASSERT(len <= ZSTR_LEN(s));
226 	if (!ZSTR_IS_INTERNED(s)) {
227 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
228 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
229 			ZSTR_LEN(ret) = len;
230 			zend_string_forget_hash_val(ret);
231 			return ret;
232 		} else {
233 			GC_REFCOUNT(s)--;
234 		}
235 	}
236 	ret = zend_string_alloc(len, persistent);
237 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), len + 1);
238 	return ret;
239 }
240 
zend_string_safe_realloc(zend_string * s,size_t n,size_t m,size_t l,int persistent)241 static zend_always_inline zend_string *zend_string_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, int persistent)
242 {
243 	zend_string *ret;
244 
245 	if (!ZSTR_IS_INTERNED(s)) {
246 		if (GC_REFCOUNT(s) == 1) {
247 			ret = (zend_string *)safe_perealloc(s, n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
248 			ZSTR_LEN(ret) = (n * m) + l;
249 			zend_string_forget_hash_val(ret);
250 			return ret;
251 		} else {
252 			GC_REFCOUNT(s)--;
253 		}
254 	}
255 	ret = zend_string_safe_alloc(n, m, l, persistent);
256 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN((n * m) + l, ZSTR_LEN(s)) + 1);
257 	return ret;
258 }
259 
zend_string_free(zend_string * s)260 static zend_always_inline void zend_string_free(zend_string *s)
261 {
262 	if (!ZSTR_IS_INTERNED(s)) {
263 		ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
264 		pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
265 	}
266 }
267 
zend_string_release(zend_string * s)268 static zend_always_inline void zend_string_release(zend_string *s)
269 {
270 	if (!ZSTR_IS_INTERNED(s)) {
271 		if (--GC_REFCOUNT(s) == 0) {
272 			pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
273 		}
274 	}
275 }
276 
277 
zend_string_equals(zend_string * s1,zend_string * s2)278 static zend_always_inline zend_bool zend_string_equals(zend_string *s1, zend_string *s2)
279 {
280 	return s1 == s2 || (ZSTR_LEN(s1) == ZSTR_LEN(s2) && !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1)));
281 }
282 
283 #define zend_string_equals_ci(s1, s2) \
284 	(ZSTR_LEN(s1) == ZSTR_LEN(s2) && !zend_binary_strcasecmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)))
285 
286 #define zend_string_equals_literal_ci(str, c) \
287 	(ZSTR_LEN(str) == sizeof(c) - 1 && !zend_binary_strcasecmp(ZSTR_VAL(str), ZSTR_LEN(str), (c), sizeof(c) - 1))
288 
289 #define zend_string_equals_literal(str, literal) \
290 	(ZSTR_LEN(str) == sizeof(literal)-1 && !memcmp(ZSTR_VAL(str), literal, sizeof(literal) - 1))
291 
292 /*
293  * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
294  *
295  * This is Daniel J. Bernstein's popular `times 33' hash function as
296  * posted by him years ago on comp.lang.c. It basically uses a function
297  * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
298  * known hash functions for strings. Because it is both computed very
299  * fast and distributes very well.
300  *
301  * The magic of number 33, i.e. why it works better than many other
302  * constants, prime or not, has never been adequately explained by
303  * anyone. So I try an explanation: if one experimentally tests all
304  * multipliers between 1 and 256 (as RSE did now) one detects that even
305  * numbers are not useable at all. The remaining 128 odd numbers
306  * (except for the number 1) work more or less all equally well. They
307  * all distribute in an acceptable way and this way fill a hash table
308  * with an average percent of approx. 86%.
309  *
310  * If one compares the Chi^2 values of the variants, the number 33 not
311  * even has the best value. But the number 33 and a few other equally
312  * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
313  * advantage to the remaining numbers in the large set of possible
314  * multipliers: their multiply operation can be replaced by a faster
315  * operation based on just one shift plus either a single addition
316  * or subtraction operation. And because a hash function has to both
317  * distribute good _and_ has to be very fast to compute, those few
318  * numbers should be preferred and seems to be the reason why Daniel J.
319  * Bernstein also preferred it.
320  *
321  *
322  *                  -- Ralf S. Engelschall <rse@engelschall.com>
323  */
324 
zend_inline_hash_func(const char * str,size_t len)325 static zend_always_inline zend_ulong zend_inline_hash_func(const char *str, size_t len)
326 {
327 	zend_ulong hash = Z_UL(5381);
328 
329 	/* variant with the hash unrolled eight times */
330 	for (; len >= 8; len -= 8) {
331 		hash = ((hash << 5) + hash) + *str++;
332 		hash = ((hash << 5) + hash) + *str++;
333 		hash = ((hash << 5) + hash) + *str++;
334 		hash = ((hash << 5) + hash) + *str++;
335 		hash = ((hash << 5) + hash) + *str++;
336 		hash = ((hash << 5) + hash) + *str++;
337 		hash = ((hash << 5) + hash) + *str++;
338 		hash = ((hash << 5) + hash) + *str++;
339 	}
340 	switch (len) {
341 		case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
342 		case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
343 		case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
344 		case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
345 		case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
346 		case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
347 		case 1: hash = ((hash << 5) + hash) + *str++; break;
348 		case 0: break;
349 EMPTY_SWITCH_DEFAULT_CASE()
350 	}
351 
352 	/* Hash value can't be zero, so we always set the high bit */
353 #if SIZEOF_ZEND_LONG == 8
354 	return hash | Z_UL(0x8000000000000000);
355 #elif SIZEOF_ZEND_LONG == 4
356 	return hash | Z_UL(0x80000000);
357 #else
358 # error "Unknown SIZEOF_ZEND_LONG"
359 #endif
360 }
361 
362 #ifdef ZTS
zend_zts_interned_string_init(const char * val,size_t len)363 static zend_always_inline zend_string* zend_zts_interned_string_init(const char *val, size_t len)
364 {
365 	zend_string *str;
366 
367 	str = zend_string_init(val, len, 1);
368 
369 	zend_string_hash_val(str);
370 	GC_FLAGS(str) |= IS_STR_INTERNED;
371 	return str;
372 }
373 
zend_zts_interned_string_free(zend_string ** s)374 static zend_always_inline void zend_zts_interned_string_free(zend_string **s)
375 {
376 	if (NULL != *s) {
377 		free(*s);
378 		*s = NULL;
379 	}
380 }
381 #endif
382 
383 #define ZEND_KNOWN_STRINGS(_) \
384 	_(ZEND_STR_FILE,                   "file") \
385 	_(ZEND_STR_LINE,                   "line") \
386 	_(ZEND_STR_FUNCTION,               "function") \
387 	_(ZEND_STR_CLASS,                  "class") \
388 	_(ZEND_STR_OBJECT,                 "object") \
389 	_(ZEND_STR_TYPE,                   "type") \
390 	_(ZEND_STR_OBJECT_OPERATOR,        "->") \
391 	_(ZEND_STR_PAAMAYIM_NEKUDOTAYIM,   "::") \
392 	_(ZEND_STR_ARGS,                   "args") \
393 	_(ZEND_STR_UNKNOWN,                "unknown") \
394 	_(ZEND_STR_EVAL,                   "eval") \
395 	_(ZEND_STR_INCLUDE,                "include") \
396 	_(ZEND_STR_REQUIRE,                "require") \
397 	_(ZEND_STR_INCLUDE_ONCE,           "include_once") \
398 	_(ZEND_STR_REQUIRE_ONCE,           "require_once") \
399 	_(ZEND_STR_SCALAR,                 "scalar") \
400 	_(ZEND_STR_ERROR_REPORTING,        "error_reporting") \
401 	_(ZEND_STR_STATIC,                 "static") \
402 	_(ZEND_STR_THIS,                   "this") \
403 	_(ZEND_STR_VALUE,                  "value") \
404 	_(ZEND_STR_KEY,                    "key") \
405 	_(ZEND_STR_MAGIC_AUTOLOAD,         "__autoload") \
406 	_(ZEND_STR_MAGIC_INVOKE,           "__invoke") \
407 	_(ZEND_STR_PREVIOUS,               "previous") \
408 	_(ZEND_STR_CODE,                   "code") \
409 	_(ZEND_STR_MESSAGE,                "message") \
410 	_(ZEND_STR_SEVERITY,               "severity") \
411 	_(ZEND_STR_STRING,                 "string") \
412 	_(ZEND_STR_TRACE,                  "trace") \
413 
414 
415 typedef enum _zend_known_string_id {
416 #define _ZEND_STR_ID(id, str) id,
417 ZEND_KNOWN_STRINGS(_ZEND_STR_ID)
418 #undef _ZEND_STR_ID
419 	ZEND_STR_LAST_KNOWN
420 } zend_known_string_id;
421 
422 ZEND_API uint32_t zend_intern_known_strings(const char **strings, uint32_t count);
423 
424 #endif /* ZEND_STRING_H */
425 
426 /*
427  * Local variables:
428  * tab-width: 4
429  * c-basic-offset: 4
430  * indent-tabs-mode: t
431  * End:
432  */
433