xref: /PHP-7.0/Zend/zend_string.h (revision 478f119a)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1998-2017 Zend Technologies Ltd. (http://www.zend.com) |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Dmitry Stogov <dmitry@zend.com>                             |
16    +----------------------------------------------------------------------+
17 */
18 
19 /* $Id: $ */
20 
21 #ifndef ZEND_STRING_H
22 #define ZEND_STRING_H
23 
24 #include "zend.h"
25 
26 BEGIN_EXTERN_C()
27 
28 ZEND_API extern zend_string *(*zend_new_interned_string)(zend_string *str);
29 ZEND_API extern void (*zend_interned_strings_snapshot)(void);
30 ZEND_API extern void (*zend_interned_strings_restore)(void);
31 
32 ZEND_API zend_ulong zend_hash_func(const char *str, size_t len);
33 void zend_interned_strings_init(void);
34 void zend_interned_strings_dtor(void);
35 
END_EXTERN_C()36 END_EXTERN_C()
37 
38 /* Shortcuts */
39 
40 #define ZSTR_VAL(zstr)  (zstr)->val
41 #define ZSTR_LEN(zstr)  (zstr)->len
42 #define ZSTR_H(zstr)    (zstr)->h
43 #define ZSTR_HASH(zstr) zend_string_hash_val(zstr)
44 
45 /* Compatibility macros */
46 
47 #define IS_INTERNED(s)	ZSTR_IS_INTERNED(s)
48 #define STR_EMPTY_ALLOC()	ZSTR_EMPTY_ALLOC()
49 #define _STR_HEADER_SIZE _ZSTR_HEADER_SIZE
50 #define STR_ALLOCA_ALLOC(str, _len, use_heap) ZSTR_ALLOCA_ALLOC(str, _len, use_heap)
51 #define STR_ALLOCA_INIT(str, s, len, use_heap) ZSTR_ALLOCA_INIT(str, s, len, use_heap)
52 #define STR_ALLOCA_FREE(str, use_heap) ZSTR_ALLOCA_FREE(str, use_heap)
53 
54 /*---*/
55 
56 #define ZSTR_IS_INTERNED(s)					(GC_FLAGS(s) & IS_STR_INTERNED)
57 
58 #define ZSTR_EMPTY_ALLOC()				CG(empty_string)
59 
60 #define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
61 
62 #define _ZSTR_STRUCT_SIZE(len) (_ZSTR_HEADER_SIZE + len + 1)
63 
64 #define ZSTR_ALLOCA_ALLOC(str, _len, use_heap) do { \
65 	(str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
66 	GC_REFCOUNT(str) = 1; \
67 	GC_TYPE_INFO(str) = IS_STRING; \
68 	zend_string_forget_hash_val(str); \
69 	ZSTR_LEN(str) = _len; \
70 } while (0)
71 
72 #define ZSTR_ALLOCA_INIT(str, s, len, use_heap) do { \
73 	ZSTR_ALLOCA_ALLOC(str, len, use_heap); \
74 	memcpy(ZSTR_VAL(str), (s), (len)); \
75 	ZSTR_VAL(str)[(len)] = '\0'; \
76 } while (0)
77 
78 #define ZSTR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
79 
80 /*---*/
81 
82 static zend_always_inline zend_ulong zend_string_hash_val(zend_string *s)
83 {
84 	if (!ZSTR_H(s)) {
85 		ZSTR_H(s) = zend_hash_func(ZSTR_VAL(s), ZSTR_LEN(s));
86 	}
87 	return ZSTR_H(s);
88 }
89 
zend_string_forget_hash_val(zend_string * s)90 static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
91 {
92 	ZSTR_H(s) = 0;
93 }
94 
zend_string_refcount(const zend_string * s)95 static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
96 {
97 	if (!ZSTR_IS_INTERNED(s)) {
98 		return GC_REFCOUNT(s);
99 	}
100 	return 1;
101 }
102 
zend_string_addref(zend_string * s)103 static zend_always_inline uint32_t zend_string_addref(zend_string *s)
104 {
105 	if (!ZSTR_IS_INTERNED(s)) {
106 		return ++GC_REFCOUNT(s);
107 	}
108 	return 1;
109 }
110 
zend_string_delref(zend_string * s)111 static zend_always_inline uint32_t zend_string_delref(zend_string *s)
112 {
113 	if (!ZSTR_IS_INTERNED(s)) {
114 		return --GC_REFCOUNT(s);
115 	}
116 	return 1;
117 }
118 
zend_string_alloc(size_t len,int persistent)119 static zend_always_inline zend_string *zend_string_alloc(size_t len, int persistent)
120 {
121 	zend_string *ret = (zend_string *)pemalloc(ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
122 
123 	GC_REFCOUNT(ret) = 1;
124 #if 1
125 	/* optimized single assignment */
126 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
127 #else
128 	GC_TYPE(ret) = IS_STRING;
129 	GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
130 	GC_INFO(ret) = 0;
131 #endif
132 	zend_string_forget_hash_val(ret);
133 	ZSTR_LEN(ret) = len;
134 	return ret;
135 }
136 
zend_string_safe_alloc(size_t n,size_t m,size_t l,int persistent)137 static zend_always_inline zend_string *zend_string_safe_alloc(size_t n, size_t m, size_t l, int persistent)
138 {
139 	zend_string *ret = (zend_string *)safe_pemalloc(n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
140 
141 	GC_REFCOUNT(ret) = 1;
142 #if 1
143 	/* optimized single assignment */
144 	GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
145 #else
146 	GC_TYPE(ret) = IS_STRING;
147 	GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
148 	GC_INFO(ret) = 0;
149 #endif
150 	zend_string_forget_hash_val(ret);
151 	ZSTR_LEN(ret) = (n * m) + l;
152 	return ret;
153 }
154 
zend_string_init(const char * str,size_t len,int persistent)155 static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, int persistent)
156 {
157 	zend_string *ret = zend_string_alloc(len, persistent);
158 
159 	memcpy(ZSTR_VAL(ret), str, len);
160 	ZSTR_VAL(ret)[len] = '\0';
161 	return ret;
162 }
163 
zend_string_copy(zend_string * s)164 static zend_always_inline zend_string *zend_string_copy(zend_string *s)
165 {
166 	if (!ZSTR_IS_INTERNED(s)) {
167 		GC_REFCOUNT(s)++;
168 	}
169 	return s;
170 }
171 
zend_string_dup(zend_string * s,int persistent)172 static zend_always_inline zend_string *zend_string_dup(zend_string *s, int persistent)
173 {
174 	if (ZSTR_IS_INTERNED(s)) {
175 		return s;
176 	} else {
177 		return zend_string_init(ZSTR_VAL(s), ZSTR_LEN(s), persistent);
178 	}
179 }
180 
zend_string_realloc(zend_string * s,size_t len,int persistent)181 static zend_always_inline zend_string *zend_string_realloc(zend_string *s, size_t len, int persistent)
182 {
183 	zend_string *ret;
184 
185 	if (!ZSTR_IS_INTERNED(s)) {
186 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
187 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
188 			ZSTR_LEN(ret) = len;
189 			zend_string_forget_hash_val(ret);
190 			return ret;
191 		} else {
192 			GC_REFCOUNT(s)--;
193 		}
194 	}
195 	ret = zend_string_alloc(len, persistent);
196 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN(len, ZSTR_LEN(s)) + 1);
197 	return ret;
198 }
199 
zend_string_extend(zend_string * s,size_t len,int persistent)200 static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t len, int persistent)
201 {
202 	zend_string *ret;
203 
204 	ZEND_ASSERT(len >= ZSTR_LEN(s));
205 	if (!ZSTR_IS_INTERNED(s)) {
206 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
207 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
208 			ZSTR_LEN(ret) = len;
209 			zend_string_forget_hash_val(ret);
210 			return ret;
211 		} else {
212 			GC_REFCOUNT(s)--;
213 		}
214 	}
215 	ret = zend_string_alloc(len, persistent);
216 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), ZSTR_LEN(s) + 1);
217 	return ret;
218 }
219 
zend_string_truncate(zend_string * s,size_t len,int persistent)220 static zend_always_inline zend_string *zend_string_truncate(zend_string *s, size_t len, int persistent)
221 {
222 	zend_string *ret;
223 
224 	ZEND_ASSERT(len <= ZSTR_LEN(s));
225 	if (!ZSTR_IS_INTERNED(s)) {
226 		if (EXPECTED(GC_REFCOUNT(s) == 1)) {
227 			ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
228 			ZSTR_LEN(ret) = len;
229 			zend_string_forget_hash_val(ret);
230 			return ret;
231 		} else {
232 			GC_REFCOUNT(s)--;
233 		}
234 	}
235 	ret = zend_string_alloc(len, persistent);
236 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), len + 1);
237 	return ret;
238 }
239 
zend_string_safe_realloc(zend_string * s,size_t n,size_t m,size_t l,int persistent)240 static zend_always_inline zend_string *zend_string_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, int persistent)
241 {
242 	zend_string *ret;
243 
244 	if (!ZSTR_IS_INTERNED(s)) {
245 		if (GC_REFCOUNT(s) == 1) {
246 			ret = (zend_string *)safe_perealloc(s, n, m, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(l)), persistent);
247 			ZSTR_LEN(ret) = (n * m) + l;
248 			zend_string_forget_hash_val(ret);
249 			return ret;
250 		} else {
251 			GC_REFCOUNT(s)--;
252 		}
253 	}
254 	ret = zend_string_safe_alloc(n, m, l, persistent);
255 	memcpy(ZSTR_VAL(ret), ZSTR_VAL(s), MIN((n * m) + l, ZSTR_LEN(s)) + 1);
256 	return ret;
257 }
258 
zend_string_free(zend_string * s)259 static zend_always_inline void zend_string_free(zend_string *s)
260 {
261 	if (!ZSTR_IS_INTERNED(s)) {
262 		ZEND_ASSERT(GC_REFCOUNT(s) <= 1);
263 		pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
264 	}
265 }
266 
zend_string_release(zend_string * s)267 static zend_always_inline void zend_string_release(zend_string *s)
268 {
269 	if (!ZSTR_IS_INTERNED(s)) {
270 		if (--GC_REFCOUNT(s) == 0) {
271 			pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
272 		}
273 	}
274 }
275 
276 
zend_string_equals(zend_string * s1,zend_string * s2)277 static zend_always_inline zend_bool zend_string_equals(zend_string *s1, zend_string *s2)
278 {
279 	return s1 == s2 || (ZSTR_LEN(s1) == ZSTR_LEN(s2) && !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1)));
280 }
281 
282 #define zend_string_equals_ci(s1, s2) \
283 	(ZSTR_LEN(s1) == ZSTR_LEN(s2) && !zend_binary_strcasecmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)))
284 
285 #define zend_string_equals_literal_ci(str, c) \
286 	(ZSTR_LEN(str) == sizeof(c) - 1 && !zend_binary_strcasecmp(ZSTR_VAL(str), ZSTR_LEN(str), (c), sizeof(c) - 1))
287 
288 #define zend_string_equals_literal(str, literal) \
289 	(ZSTR_LEN(str) == sizeof(literal)-1 && !memcmp(ZSTR_VAL(str), literal, sizeof(literal) - 1))
290 
291 /*
292  * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
293  *
294  * This is Daniel J. Bernstein's popular `times 33' hash function as
295  * posted by him years ago on comp.lang.c. It basically uses a function
296  * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
297  * known hash functions for strings. Because it is both computed very
298  * fast and distributes very well.
299  *
300  * The magic of number 33, i.e. why it works better than many other
301  * constants, prime or not, has never been adequately explained by
302  * anyone. So I try an explanation: if one experimentally tests all
303  * multipliers between 1 and 256 (as RSE did now) one detects that even
304  * numbers are not useable at all. The remaining 128 odd numbers
305  * (except for the number 1) work more or less all equally well. They
306  * all distribute in an acceptable way and this way fill a hash table
307  * with an average percent of approx. 86%.
308  *
309  * If one compares the Chi^2 values of the variants, the number 33 not
310  * even has the best value. But the number 33 and a few other equally
311  * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
312  * advantage to the remaining numbers in the large set of possible
313  * multipliers: their multiply operation can be replaced by a faster
314  * operation based on just one shift plus either a single addition
315  * or subtraction operation. And because a hash function has to both
316  * distribute good _and_ has to be very fast to compute, those few
317  * numbers should be preferred and seems to be the reason why Daniel J.
318  * Bernstein also preferred it.
319  *
320  *
321  *                  -- Ralf S. Engelschall <rse@engelschall.com>
322  */
323 
zend_inline_hash_func(const char * str,size_t len)324 static zend_always_inline zend_ulong zend_inline_hash_func(const char *str, size_t len)
325 {
326 	register zend_ulong hash = Z_UL(5381);
327 
328 	/* variant with the hash unrolled eight times */
329 	for (; len >= 8; len -= 8) {
330 		hash = ((hash << 5) + hash) + *str++;
331 		hash = ((hash << 5) + hash) + *str++;
332 		hash = ((hash << 5) + hash) + *str++;
333 		hash = ((hash << 5) + hash) + *str++;
334 		hash = ((hash << 5) + hash) + *str++;
335 		hash = ((hash << 5) + hash) + *str++;
336 		hash = ((hash << 5) + hash) + *str++;
337 		hash = ((hash << 5) + hash) + *str++;
338 	}
339 	switch (len) {
340 		case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
341 		case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
342 		case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
343 		case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
344 		case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
345 		case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
346 		case 1: hash = ((hash << 5) + hash) + *str++; break;
347 		case 0: break;
348 EMPTY_SWITCH_DEFAULT_CASE()
349 	}
350 
351 	/* Hash value can't be zero, so we always set the high bit */
352 #if SIZEOF_ZEND_LONG == 8
353 	return hash | Z_UL(0x8000000000000000);
354 #elif SIZEOF_ZEND_LONG == 4
355 	return hash | Z_UL(0x80000000);
356 #else
357 # error "Unknown SIZEOF_ZEND_LONG"
358 #endif
359 }
360 
zend_interned_empty_string_init(zend_string ** s)361 static zend_always_inline void zend_interned_empty_string_init(zend_string **s)
362 {
363 	zend_string *str;
364 
365 	str = zend_string_alloc(sizeof("")-1, 1);
366 	ZSTR_VAL(str)[0] = '\000';
367 
368 #ifndef ZTS
369 	*s = zend_new_interned_string(str);
370 #else
371 	zend_string_hash_val(str);
372 	GC_FLAGS(str) |= IS_STR_INTERNED;
373 	*s = str;
374 #endif
375 }
376 
zend_interned_empty_string_free(zend_string ** s)377 static zend_always_inline void zend_interned_empty_string_free(zend_string **s)
378 {
379 	if (NULL != *s) {
380 		free(*s);
381 		*s = NULL;
382 	}
383 }
384 
385 #endif /* ZEND_STRING_H */
386 
387 /*
388  * Local variables:
389  * tab-width: 4
390  * c-basic-offset: 4
391  * indent-tabs-mode: t
392  * End:
393  */
394