xref: /PHP-8.2/Zend/zend_string.c (revision 99504aa1)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) Zend Technologies Ltd. (http://www.zend.com)           |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Dmitry Stogov <dmitry@php.net>                              |
16    +----------------------------------------------------------------------+
17 */
18 
19 #include "zend.h"
20 #include "zend_globals.h"
21 
22 #ifdef HAVE_VALGRIND
23 # include "valgrind/callgrind.h"
24 #endif
25 
26 #if __has_feature(memory_sanitizer)
27 # include <sanitizer/msan_interface.h>
28 #endif
29 
30 ZEND_API zend_new_interned_string_func_t zend_new_interned_string;
31 ZEND_API zend_string_init_interned_func_t zend_string_init_interned;
32 ZEND_API zend_string_init_existing_interned_func_t zend_string_init_existing_interned;
33 
34 static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str);
35 static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string *str);
36 static zend_string* ZEND_FASTCALL zend_string_init_interned_permanent(const char *str, size_t size, bool permanent);
37 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_permanent(const char *str, size_t size, bool permanent);
38 static zend_string* ZEND_FASTCALL zend_string_init_interned_request(const char *str, size_t size, bool permanent);
39 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_request(const char *str, size_t size, bool permanent);
40 
41 /* Any strings interned in the startup phase. Common to all the threads,
42    won't be free'd until process exit. If we want an ability to
43    add permanent strings even after startup, it would be still
44    possible on costs of locking in the thread safe builds. */
45 static HashTable interned_strings_permanent;
46 
47 static zend_new_interned_string_func_t interned_string_request_handler = zend_new_interned_string_request;
48 static zend_string_init_interned_func_t interned_string_init_request_handler = zend_string_init_interned_request;
49 static zend_string_init_existing_interned_func_t interned_string_init_existing_request_handler = zend_string_init_existing_interned_request;
50 
51 ZEND_API zend_string  *zend_empty_string = NULL;
52 ZEND_API zend_string  *zend_one_char_string[256];
53 ZEND_API zend_string **zend_known_strings = NULL;
54 
zend_string_hash_func(zend_string * str)55 ZEND_API zend_ulong ZEND_FASTCALL zend_string_hash_func(zend_string *str)
56 {
57 	return ZSTR_H(str) = zend_hash_func(ZSTR_VAL(str), ZSTR_LEN(str));
58 }
59 
zend_hash_func(const char * str,size_t len)60 ZEND_API zend_ulong ZEND_FASTCALL zend_hash_func(const char *str, size_t len)
61 {
62 	return zend_inline_hash_func(str, len);
63 }
64 
_str_dtor(zval * zv)65 static void _str_dtor(zval *zv)
66 {
67 	zend_string *str = Z_STR_P(zv);
68 	pefree(str, GC_FLAGS(str) & IS_STR_PERSISTENT);
69 }
70 
71 static const char *known_strings[] = {
72 #define _ZEND_STR_DSC(id, str) str,
73 ZEND_KNOWN_STRINGS(_ZEND_STR_DSC)
74 #undef _ZEND_STR_DSC
75 	NULL
76 };
77 
zend_init_interned_strings_ht(HashTable * interned_strings,bool permanent)78 static zend_always_inline void zend_init_interned_strings_ht(HashTable *interned_strings, bool permanent)
79 {
80 	zend_hash_init(interned_strings, 1024, NULL, _str_dtor, permanent);
81 	if (permanent) {
82 		zend_hash_real_init_mixed(interned_strings);
83 	}
84 }
85 
zend_interned_strings_init(void)86 ZEND_API void zend_interned_strings_init(void)
87 {
88 	char s[2];
89 	unsigned int i;
90 	zend_string *str;
91 
92 	interned_string_request_handler = zend_new_interned_string_request;
93 	interned_string_init_request_handler = zend_string_init_interned_request;
94 	interned_string_init_existing_request_handler = zend_string_init_existing_interned_request;
95 
96 	zend_empty_string = NULL;
97 	zend_known_strings = NULL;
98 
99 	zend_init_interned_strings_ht(&interned_strings_permanent, 1);
100 
101 	zend_new_interned_string = zend_new_interned_string_permanent;
102 	zend_string_init_interned = zend_string_init_interned_permanent;
103 	zend_string_init_existing_interned = zend_string_init_existing_interned_permanent;
104 
105 	/* interned empty string */
106 	str = zend_string_alloc(sizeof("")-1, 1);
107 	ZSTR_VAL(str)[0] = '\000';
108 	zend_empty_string = zend_new_interned_string_permanent(str);
109 
110 	s[1] = 0;
111 	for (i = 0; i < 256; i++) {
112 		s[0] = i;
113 		zend_one_char_string[i] = zend_new_interned_string_permanent(zend_string_init(s, 1, 1));
114 	}
115 
116 	/* known strings */
117 	zend_known_strings = pemalloc(sizeof(zend_string*) * ((sizeof(known_strings) / sizeof(known_strings[0]) - 1)), 1);
118 	for (i = 0; i < (sizeof(known_strings) / sizeof(known_strings[0])) - 1; i++) {
119 		str = zend_string_init(known_strings[i], strlen(known_strings[i]), 1);
120 		zend_known_strings[i] = zend_new_interned_string_permanent(str);
121 	}
122 }
123 
zend_interned_strings_dtor(void)124 ZEND_API void zend_interned_strings_dtor(void)
125 {
126 	zend_hash_destroy(&interned_strings_permanent);
127 
128 	free(zend_known_strings);
129 	zend_known_strings = NULL;
130 }
131 
zend_interned_string_ht_lookup_ex(zend_ulong h,const char * str,size_t size,HashTable * interned_strings)132 static zend_always_inline zend_string *zend_interned_string_ht_lookup_ex(zend_ulong h, const char *str, size_t size, HashTable *interned_strings)
133 {
134 	uint32_t nIndex;
135 	uint32_t idx;
136 	Bucket *p;
137 
138 	nIndex = h | interned_strings->nTableMask;
139 	idx = HT_HASH(interned_strings, nIndex);
140 	while (idx != HT_INVALID_IDX) {
141 		p = HT_HASH_TO_BUCKET(interned_strings, idx);
142 		if ((p->h == h) && zend_string_equals_cstr(p->key, str, size)) {
143 			return p->key;
144 		}
145 		idx = Z_NEXT(p->val);
146 	}
147 
148 	return NULL;
149 }
150 
zend_interned_string_ht_lookup(zend_string * str,HashTable * interned_strings)151 static zend_always_inline zend_string *zend_interned_string_ht_lookup(zend_string *str, HashTable *interned_strings)
152 {
153 	zend_ulong h = ZSTR_H(str);
154 	uint32_t nIndex;
155 	uint32_t idx;
156 	Bucket *p;
157 
158 	nIndex = h | interned_strings->nTableMask;
159 	idx = HT_HASH(interned_strings, nIndex);
160 	while (idx != HT_INVALID_IDX) {
161 		p = HT_HASH_TO_BUCKET(interned_strings, idx);
162 		if ((p->h == h) && zend_string_equal_content(p->key, str)) {
163 			return p->key;
164 		}
165 		idx = Z_NEXT(p->val);
166 	}
167 
168 	return NULL;
169 }
170 
171 /* This function might be not thread safe at least because it would update the
172    hash val in the passed string. Be sure it is called in the appropriate context. */
zend_add_interned_string(zend_string * str,HashTable * interned_strings,uint32_t flags)173 static zend_always_inline zend_string *zend_add_interned_string(zend_string *str, HashTable *interned_strings, uint32_t flags)
174 {
175 	zval val;
176 
177 	GC_SET_REFCOUNT(str, 1);
178 	GC_ADD_FLAGS(str, IS_STR_INTERNED | flags);
179 
180 	ZVAL_INTERNED_STR(&val, str);
181 
182 	zend_hash_add_new(interned_strings, str, &val);
183 
184 	return str;
185 }
186 
zend_interned_string_find_permanent(zend_string * str)187 ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_string *str)
188 {
189 	zend_string_hash_val(str);
190 	return zend_interned_string_ht_lookup(str, &interned_strings_permanent);
191 }
192 
zend_new_interned_string_permanent(zend_string * str)193 static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str)
194 {
195 	zend_string *ret;
196 
197 	if (ZSTR_IS_INTERNED(str)) {
198 		return str;
199 	}
200 
201 	zend_string_hash_val(str);
202 	ret = zend_interned_string_ht_lookup(str, &interned_strings_permanent);
203 	if (ret) {
204 		zend_string_release(str);
205 		return ret;
206 	}
207 
208 	ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT);
209 	if (GC_REFCOUNT(str) > 1) {
210 		zend_ulong h = ZSTR_H(str);
211 		zend_string_delref(str);
212 		str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 1);
213 		ZSTR_H(str) = h;
214 	}
215 
216 	return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT);
217 }
218 
zend_new_interned_string_request(zend_string * str)219 static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string *str)
220 {
221 	zend_string *ret;
222 
223 	if (ZSTR_IS_INTERNED(str)) {
224 		return str;
225 	}
226 
227 	zend_string_hash_val(str);
228 
229 	/* Check for permanent strings, the table is readonly at this point. */
230 	ret = zend_interned_string_ht_lookup(str, &interned_strings_permanent);
231 	if (ret) {
232 		zend_string_release(str);
233 		return ret;
234 	}
235 
236 	ret = zend_interned_string_ht_lookup(str, &CG(interned_strings));
237 	if (ret) {
238 		zend_string_release(str);
239 		return ret;
240 	}
241 
242 	/* Create a short living interned, freed after the request. */
243 #if ZEND_RC_DEBUG
244 	if (zend_rc_debug) {
245 		/* PHP shouldn't create persistent interned string during request,
246 		 * but at least dl() may do this */
247 		ZEND_ASSERT(!(GC_FLAGS(str) & GC_PERSISTENT));
248 	}
249 #endif
250 	if (GC_REFCOUNT(str) > 1) {
251 		zend_ulong h = ZSTR_H(str);
252 		zend_string_delref(str);
253 		str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
254 		ZSTR_H(str) = h;
255 	}
256 
257 	ret = zend_add_interned_string(str, &CG(interned_strings), 0);
258 
259 	return ret;
260 }
261 
zend_string_init_interned_permanent(const char * str,size_t size,bool permanent)262 static zend_string* ZEND_FASTCALL zend_string_init_interned_permanent(const char *str, size_t size, bool permanent)
263 {
264 	zend_string *ret;
265 	zend_ulong h = zend_inline_hash_func(str, size);
266 
267 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
268 	if (ret) {
269 		return ret;
270 	}
271 
272 	ZEND_ASSERT(permanent);
273 	ret = zend_string_init(str, size, permanent);
274 	ZSTR_H(ret) = h;
275 	return zend_add_interned_string(ret, &interned_strings_permanent, IS_STR_PERMANENT);
276 }
277 
zend_string_init_existing_interned_permanent(const char * str,size_t size,bool permanent)278 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_permanent(const char *str, size_t size, bool permanent)
279 {
280 	zend_ulong h = zend_inline_hash_func(str, size);
281 	zend_string *ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
282 	if (ret) {
283 		return ret;
284 	}
285 
286 	ZEND_ASSERT(permanent);
287 	ret = zend_string_init(str, size, permanent);
288 	ZSTR_H(ret) = h;
289 	return ret;
290 }
291 
zend_string_init_interned_request(const char * str,size_t size,bool permanent)292 static zend_string* ZEND_FASTCALL zend_string_init_interned_request(const char *str, size_t size, bool permanent)
293 {
294 	zend_string *ret;
295 	zend_ulong h = zend_inline_hash_func(str, size);
296 
297 	/* Check for permanent strings, the table is readonly at this point. */
298 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
299 	if (ret) {
300 		return ret;
301 	}
302 
303 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &CG(interned_strings));
304 	if (ret) {
305 		return ret;
306 	}
307 
308 #if ZEND_RC_DEBUG
309 	if (zend_rc_debug) {
310 		/* PHP shouldn't create persistent interned string during request,
311 		 * but at least dl() may do this */
312 		ZEND_ASSERT(!permanent);
313 	}
314 #endif
315 	ret = zend_string_init(str, size, permanent);
316 	ZSTR_H(ret) = h;
317 
318 	/* Create a short living interned, freed after the request. */
319 	return zend_add_interned_string(ret, &CG(interned_strings), 0);
320 }
321 
zend_string_init_existing_interned_request(const char * str,size_t size,bool permanent)322 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_request(const char *str, size_t size, bool permanent)
323 {
324 	zend_ulong h = zend_inline_hash_func(str, size);
325 	zend_string *ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
326 	if (ret) {
327 		return ret;
328 	}
329 
330 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &CG(interned_strings));
331 	if (ret) {
332 		return ret;
333 	}
334 
335 	ZEND_ASSERT(!permanent);
336 	ret = zend_string_init(str, size, permanent);
337 	ZSTR_H(ret) = h;
338 	return ret;
339 }
340 
zend_interned_strings_activate(void)341 ZEND_API void zend_interned_strings_activate(void)
342 {
343 	zend_init_interned_strings_ht(&CG(interned_strings), 0);
344 }
345 
zend_interned_strings_deactivate(void)346 ZEND_API void zend_interned_strings_deactivate(void)
347 {
348 	zend_hash_destroy(&CG(interned_strings));
349 }
350 
zend_interned_strings_set_request_storage_handlers(zend_new_interned_string_func_t handler,zend_string_init_interned_func_t init_handler,zend_string_init_existing_interned_func_t init_existing_handler)351 ZEND_API void zend_interned_strings_set_request_storage_handlers(zend_new_interned_string_func_t handler, zend_string_init_interned_func_t init_handler, zend_string_init_existing_interned_func_t init_existing_handler)
352 {
353 	interned_string_request_handler = handler;
354 	interned_string_init_request_handler = init_handler;
355 	interned_string_init_existing_request_handler = init_existing_handler;
356 }
357 
zend_interned_strings_switch_storage(bool request)358 ZEND_API void zend_interned_strings_switch_storage(bool request)
359 {
360 	if (request) {
361 		zend_new_interned_string = interned_string_request_handler;
362 		zend_string_init_interned = interned_string_init_request_handler;
363 		zend_string_init_existing_interned = interned_string_init_existing_request_handler;
364 	} else {
365 		zend_new_interned_string = zend_new_interned_string_permanent;
366 		zend_string_init_interned = zend_string_init_interned_permanent;
367 		zend_string_init_existing_interned = zend_string_init_existing_interned_permanent;
368 	}
369 }
370 
371 #if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64__) && !defined(__ILP32__)))
372 /* Even if we don't build with valgrind support, include the symbol so that valgrind available
373  * only at runtime will not result in false positives. */
374 #ifndef I_REPLACE_SONAME_FNNAME_ZU
375 # define I_REPLACE_SONAME_FNNAME_ZU(soname, fnname) _vgr00000ZU_ ## soname ## _ ## fnname
376 #endif
377 
378 /* See GH-9068 */
379 #if __has_attribute(noipa)
380 # define NOIPA __attribute__((noipa))
381 #else
382 # define NOIPA
383 #endif
384 
I_REPLACE_SONAME_FNNAME_ZU(NONE,zend_string_equal_val)385 ZEND_API bool ZEND_FASTCALL I_REPLACE_SONAME_FNNAME_ZU(NONE,zend_string_equal_val)(const zend_string *s1, const zend_string *s2)
386 {
387 	return !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1));
388 }
389 #endif
390 
391 #if defined(__GNUC__) && defined(__i386__)
zend_string_equal_val(const zend_string * s1,const zend_string * s2)392 ZEND_API zend_never_inline NOIPA bool ZEND_FASTCALL zend_string_equal_val(const zend_string *s1, const zend_string *s2)
393 {
394 	const char *ptr = ZSTR_VAL(s1);
395 	size_t delta = (const char*)s2 - (const char*)s1;
396 	size_t len = ZSTR_LEN(s1);
397 	zend_ulong ret;
398 
399 	__asm__ (
400 		".LL0%=:\n\t"
401 		"movl (%2,%3), %0\n\t"
402 		"xorl (%2), %0\n\t"
403 		"jne .LL1%=\n\t"
404 		"addl $0x4, %2\n\t"
405 		"subl $0x4, %1\n\t"
406 		"ja .LL0%=\n\t"
407 		"movl $0x1, %0\n\t"
408 		"jmp .LL3%=\n\t"
409 		".LL1%=:\n\t"
410 		"cmpl $0x4,%1\n\t"
411 		"jb .LL2%=\n\t"
412 		"xorl %0, %0\n\t"
413 		"jmp .LL3%=\n\t"
414 		".LL2%=:\n\t"
415 		"negl %1\n\t"
416 		"lea 0x20(,%1,8), %1\n\t"
417 		"shll %b1, %0\n\t"
418 		"sete %b0\n\t"
419 		"movzbl %b0, %0\n\t"
420 		".LL3%=:\n"
421 		: "=&a"(ret),
422 		  "+c"(len),
423 		  "+r"(ptr)
424 		: "r"(delta)
425 		: "cc");
426 	return ret;
427 }
428 
429 #elif defined(__GNUC__) && defined(__x86_64__) && !defined(__ILP32__)
zend_string_equal_val(const zend_string * s1,const zend_string * s2)430 ZEND_API zend_never_inline NOIPA bool ZEND_FASTCALL zend_string_equal_val(const zend_string *s1, const zend_string *s2)
431 {
432 	const char *ptr = ZSTR_VAL(s1);
433 	size_t delta = (const char*)s2 - (const char*)s1;
434 	size_t len = ZSTR_LEN(s1);
435 	zend_ulong ret;
436 
437 	__asm__ (
438 		".LL0%=:\n\t"
439 		"movq (%2,%3), %0\n\t"
440 		"xorq (%2), %0\n\t"
441 		"jne .LL1%=\n\t"
442 		"addq $0x8, %2\n\t"
443 		"subq $0x8, %1\n\t"
444 		"ja .LL0%=\n\t"
445 		"movq $0x1, %0\n\t"
446 		"jmp .LL3%=\n\t"
447 		".LL1%=:\n\t"
448 		"cmpq $0x8,%1\n\t"
449 		"jb .LL2%=\n\t"
450 		"xorq %0, %0\n\t"
451 		"jmp .LL3%=\n\t"
452 		".LL2%=:\n\t"
453 		"negq %1\n\t"
454 		"lea 0x40(,%1,8), %1\n\t"
455 		"shlq %b1, %0\n\t"
456 		"sete %b0\n\t"
457 		"movzbq %b0, %0\n\t"
458 		".LL3%=:\n"
459 		: "=&a"(ret),
460 		  "+c"(len),
461 		  "+r"(ptr)
462 		: "r"(delta)
463 		: "cc");
464 	return ret;
465 }
466 #endif
467 
zend_string_concat2(const char * str1,size_t str1_len,const char * str2,size_t str2_len)468 ZEND_API zend_string *zend_string_concat2(
469 		const char *str1, size_t str1_len,
470 		const char *str2, size_t str2_len)
471 {
472 	size_t len = str1_len + str2_len;
473 	zend_string *res = zend_string_alloc(len, 0);
474 
475 	memcpy(ZSTR_VAL(res), str1, str1_len);
476 	memcpy(ZSTR_VAL(res) + str1_len, str2, str2_len);
477 	ZSTR_VAL(res)[len] = '\0';
478 
479 	return res;
480 }
481 
zend_string_concat3(const char * str1,size_t str1_len,const char * str2,size_t str2_len,const char * str3,size_t str3_len)482 ZEND_API zend_string *zend_string_concat3(
483 		const char *str1, size_t str1_len,
484 		const char *str2, size_t str2_len,
485 		const char *str3, size_t str3_len)
486 {
487 	size_t len = str1_len + str2_len + str3_len;
488 	zend_string *res = zend_string_alloc(len, 0);
489 
490 	memcpy(ZSTR_VAL(res), str1, str1_len);
491 	memcpy(ZSTR_VAL(res) + str1_len, str2, str2_len);
492 	memcpy(ZSTR_VAL(res) + str1_len + str2_len, str3, str3_len);
493 	ZSTR_VAL(res)[len] = '\0';
494 
495 	return res;
496 }
497 
498 /* strlcpy and strlcat are not intercepted by msan, so we need to do it ourselves. */
499 #if __has_feature(memory_sanitizer)
500 static size_t (*libc_strlcpy)(char *__restrict, const char *__restrict, size_t);
strlcpy(char * __restrict dest,const char * __restrict src,size_t n)501 size_t strlcpy(char *__restrict dest, const char *__restrict src, size_t n)
502 {
503 	if (!libc_strlcpy) {
504 		libc_strlcpy = dlsym(RTLD_NEXT, "strlcpy");
505 	}
506 	size_t result = libc_strlcpy(dest, src, n);
507 	__msan_unpoison_string(dest);
508 	return result;
509 }
510 static size_t (*libc_strlcat)(char *__restrict, const char *__restrict, size_t);
strlcat(char * __restrict dest,const char * restrict src,size_t n)511 size_t strlcat (char *__restrict dest, const char *restrict src, size_t n)
512 {
513 	if (!libc_strlcat) {
514 		libc_strlcat = dlsym(RTLD_NEXT, "strlcat");
515 	}
516 	size_t result = libc_strlcat(dest, src, n);
517 	__msan_unpoison_string(dest);
518 	return result;
519 }
520 #endif
521