xref: /PHP-8.4/Zend/zend_string.c (revision e2e2b3ab)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) Zend Technologies Ltd. (http://www.zend.com)           |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Dmitry Stogov <dmitry@php.net>                              |
16    +----------------------------------------------------------------------+
17 */
18 
19 #include "zend.h"
20 #include "zend_globals.h"
21 
22 #ifdef HAVE_VALGRIND
23 # include "valgrind/callgrind.h"
24 #endif
25 
26 #if __has_feature(memory_sanitizer)
27 # include <sanitizer/msan_interface.h>
28 #endif
29 
30 ZEND_API zend_new_interned_string_func_t zend_new_interned_string;
31 ZEND_API zend_string_init_interned_func_t zend_string_init_interned;
32 ZEND_API zend_string_init_existing_interned_func_t zend_string_init_existing_interned;
33 
34 static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str);
35 static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string *str);
36 static zend_string* ZEND_FASTCALL zend_string_init_interned_permanent(const char *str, size_t size, bool permanent);
37 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_permanent(const char *str, size_t size, bool permanent);
38 static zend_string* ZEND_FASTCALL zend_string_init_interned_request(const char *str, size_t size, bool permanent);
39 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_request(const char *str, size_t size, bool permanent);
40 
41 /* Any strings interned in the startup phase. Common to all the threads,
42    won't be free'd until process exit. If we want an ability to
43    add permanent strings even after startup, it would be still
44    possible on costs of locking in the thread safe builds. */
45 static HashTable interned_strings_permanent;
46 
47 static zend_new_interned_string_func_t interned_string_request_handler = zend_new_interned_string_request;
48 static zend_string_init_interned_func_t interned_string_init_request_handler = zend_string_init_interned_request;
49 static zend_string_init_existing_interned_func_t interned_string_init_existing_request_handler = zend_string_init_existing_interned_request;
50 
51 ZEND_API zend_string  *zend_empty_string = NULL;
52 ZEND_API zend_string  *zend_one_char_string[256];
53 ZEND_API zend_string **zend_known_strings = NULL;
54 
zend_string_hash_func(zend_string * str)55 ZEND_API zend_ulong ZEND_FASTCALL zend_string_hash_func(zend_string *str)
56 {
57 	return ZSTR_H(str) = zend_hash_func(ZSTR_VAL(str), ZSTR_LEN(str));
58 }
59 
zend_hash_func(const char * str,size_t len)60 ZEND_API zend_ulong ZEND_FASTCALL zend_hash_func(const char *str, size_t len)
61 {
62 	return zend_inline_hash_func(str, len);
63 }
64 
_str_dtor(zval * zv)65 static void _str_dtor(zval *zv)
66 {
67 	zend_string *str = Z_STR_P(zv);
68 	pefree(str, GC_FLAGS(str) & IS_STR_PERSISTENT);
69 }
70 
71 static const char *known_strings[] = {
72 #define _ZEND_STR_DSC(id, str) str,
73 ZEND_KNOWN_STRINGS(_ZEND_STR_DSC)
74 #undef _ZEND_STR_DSC
75 	NULL
76 };
77 
zend_init_interned_strings_ht(HashTable * interned_strings,bool permanent)78 static zend_always_inline void zend_init_interned_strings_ht(HashTable *interned_strings, bool permanent)
79 {
80 	zend_hash_init(interned_strings, 1024, NULL, _str_dtor, permanent);
81 	if (permanent) {
82 		zend_hash_real_init_mixed(interned_strings);
83 	}
84 }
85 
zend_interned_strings_init(void)86 ZEND_API void zend_interned_strings_init(void)
87 {
88 	char s[2];
89 	unsigned int i;
90 	zend_string *str;
91 
92 	interned_string_request_handler = zend_new_interned_string_request;
93 	interned_string_init_request_handler = zend_string_init_interned_request;
94 	interned_string_init_existing_request_handler = zend_string_init_existing_interned_request;
95 
96 	zend_empty_string = NULL;
97 	zend_known_strings = NULL;
98 
99 	zend_init_interned_strings_ht(&interned_strings_permanent, 1);
100 
101 	zend_new_interned_string = zend_new_interned_string_permanent;
102 	zend_string_init_interned = zend_string_init_interned_permanent;
103 	zend_string_init_existing_interned = zend_string_init_existing_interned_permanent;
104 
105 	/* interned empty string */
106 	str = zend_string_alloc(sizeof("")-1, 1);
107 	ZSTR_VAL(str)[0] = '\000';
108 	zend_empty_string = zend_new_interned_string_permanent(str);
109 	GC_ADD_FLAGS(zend_empty_string, IS_STR_VALID_UTF8);
110 
111 	s[1] = 0;
112 	for (i = 0; i < 256; i++) {
113 		s[0] = i;
114 		zend_one_char_string[i] = zend_new_interned_string_permanent(zend_string_init(s, 1, 1));
115 		if (i < 0x80) {
116 			GC_ADD_FLAGS(zend_one_char_string[i], IS_STR_VALID_UTF8);
117 		}
118 	}
119 
120 	/* known strings */
121 	zend_known_strings = pemalloc(sizeof(zend_string*) * ((sizeof(known_strings) / sizeof(known_strings[0]) - 1)), 1);
122 	for (i = 0; i < (sizeof(known_strings) / sizeof(known_strings[0])) - 1; i++) {
123 		str = zend_string_init(known_strings[i], strlen(known_strings[i]), 1);
124 		zend_known_strings[i] = zend_new_interned_string_permanent(str);
125 		GC_ADD_FLAGS(zend_known_strings[i], IS_STR_VALID_UTF8);
126 	}
127 }
128 
zend_interned_strings_dtor(void)129 ZEND_API void zend_interned_strings_dtor(void)
130 {
131 	zend_hash_destroy(&interned_strings_permanent);
132 
133 	free(zend_known_strings);
134 	zend_known_strings = NULL;
135 }
136 
zend_interned_string_ht_lookup_ex(zend_ulong h,const char * str,size_t size,HashTable * interned_strings)137 static zend_always_inline zend_string *zend_interned_string_ht_lookup_ex(zend_ulong h, const char *str, size_t size, HashTable *interned_strings)
138 {
139 	uint32_t nIndex;
140 	uint32_t idx;
141 	Bucket *p;
142 
143 	nIndex = h | interned_strings->nTableMask;
144 	idx = HT_HASH(interned_strings, nIndex);
145 	while (idx != HT_INVALID_IDX) {
146 		p = HT_HASH_TO_BUCKET(interned_strings, idx);
147 		if ((p->h == h) && zend_string_equals_cstr(p->key, str, size)) {
148 			return p->key;
149 		}
150 		idx = Z_NEXT(p->val);
151 	}
152 
153 	return NULL;
154 }
155 
zend_interned_string_ht_lookup(zend_string * str,HashTable * interned_strings)156 static zend_always_inline zend_string *zend_interned_string_ht_lookup(zend_string *str, HashTable *interned_strings)
157 {
158 	zend_ulong h = ZSTR_H(str);
159 	uint32_t nIndex;
160 	uint32_t idx;
161 	Bucket *p;
162 
163 	nIndex = h | interned_strings->nTableMask;
164 	idx = HT_HASH(interned_strings, nIndex);
165 	while (idx != HT_INVALID_IDX) {
166 		p = HT_HASH_TO_BUCKET(interned_strings, idx);
167 		if ((p->h == h) && zend_string_equal_content(p->key, str)) {
168 			return p->key;
169 		}
170 		idx = Z_NEXT(p->val);
171 	}
172 
173 	return NULL;
174 }
175 
176 /* This function might be not thread safe at least because it would update the
177    hash val in the passed string. Be sure it is called in the appropriate context. */
zend_add_interned_string(zend_string * str,HashTable * interned_strings,uint32_t flags)178 static zend_always_inline zend_string *zend_add_interned_string(zend_string *str, HashTable *interned_strings, uint32_t flags)
179 {
180 	zval val;
181 
182 	GC_SET_REFCOUNT(str, 1);
183 	GC_ADD_FLAGS(str, IS_STR_INTERNED | flags);
184 
185 	ZVAL_INTERNED_STR(&val, str);
186 
187 	zend_hash_add_new(interned_strings, str, &val);
188 
189 	return str;
190 }
191 
zend_interned_string_find_permanent(zend_string * str)192 ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_string *str)
193 {
194 	zend_string_hash_val(str);
195 	return zend_interned_string_ht_lookup(str, &interned_strings_permanent);
196 }
197 
zend_init_string_for_interning(zend_string * str,bool persistent)198 static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent)
199 {
200 	uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(str);
201 	zend_ulong h = ZSTR_H(str);
202 	zend_string_delref(str);
203 	str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent);
204 	GC_ADD_FLAGS(str, flags);
205 	ZSTR_H(str) = h;
206 	return str;
207 }
208 
zend_new_interned_string_permanent(zend_string * str)209 static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str)
210 {
211 	zend_string *ret;
212 
213 	if (ZSTR_IS_INTERNED(str)) {
214 		return str;
215 	}
216 
217 	zend_string_hash_val(str);
218 	ret = zend_interned_string_ht_lookup(str, &interned_strings_permanent);
219 	if (ret) {
220 		zend_string_release(str);
221 		return ret;
222 	}
223 
224 	ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT);
225 	if (GC_REFCOUNT(str) > 1) {
226 		str = zend_init_string_for_interning(str, true);
227 	}
228 
229 	return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT);
230 }
231 
zend_new_interned_string_request(zend_string * str)232 static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string *str)
233 {
234 	zend_string *ret;
235 
236 	if (ZSTR_IS_INTERNED(str)) {
237 		return str;
238 	}
239 
240 	zend_string_hash_val(str);
241 
242 	/* Check for permanent strings, the table is readonly at this point. */
243 	ret = zend_interned_string_ht_lookup(str, &interned_strings_permanent);
244 	if (ret) {
245 		zend_string_release(str);
246 		return ret;
247 	}
248 
249 	ret = zend_interned_string_ht_lookup(str, &CG(interned_strings));
250 	if (ret) {
251 		zend_string_release(str);
252 		return ret;
253 	}
254 
255 	/* Create a short living interned, freed after the request. */
256 #if ZEND_RC_DEBUG
257 	if (zend_rc_debug) {
258 		/* PHP shouldn't create persistent interned string during request,
259 		 * but at least dl() may do this */
260 		ZEND_ASSERT(!(GC_FLAGS(str) & GC_PERSISTENT));
261 	}
262 #endif
263 	if (GC_REFCOUNT(str) > 1) {
264 		str = zend_init_string_for_interning(str, false);
265 	}
266 
267 	ret = zend_add_interned_string(str, &CG(interned_strings), 0);
268 
269 	return ret;
270 }
271 
zend_string_init_interned_permanent(const char * str,size_t size,bool permanent)272 static zend_string* ZEND_FASTCALL zend_string_init_interned_permanent(const char *str, size_t size, bool permanent)
273 {
274 	zend_string *ret;
275 	zend_ulong h = zend_inline_hash_func(str, size);
276 
277 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
278 	if (ret) {
279 		return ret;
280 	}
281 
282 	ZEND_ASSERT(permanent);
283 	ret = zend_string_init(str, size, permanent);
284 	ZSTR_H(ret) = h;
285 	return zend_add_interned_string(ret, &interned_strings_permanent, IS_STR_PERMANENT);
286 }
287 
zend_string_init_existing_interned_permanent(const char * str,size_t size,bool permanent)288 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_permanent(const char *str, size_t size, bool permanent)
289 {
290 	zend_ulong h = zend_inline_hash_func(str, size);
291 	zend_string *ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
292 	if (ret) {
293 		return ret;
294 	}
295 
296 	ZEND_ASSERT(permanent);
297 	ret = zend_string_init(str, size, permanent);
298 	ZSTR_H(ret) = h;
299 	return ret;
300 }
301 
zend_string_init_interned_request(const char * str,size_t size,bool permanent)302 static zend_string* ZEND_FASTCALL zend_string_init_interned_request(const char *str, size_t size, bool permanent)
303 {
304 	zend_string *ret;
305 	zend_ulong h = zend_inline_hash_func(str, size);
306 
307 	/* Check for permanent strings, the table is readonly at this point. */
308 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
309 	if (ret) {
310 		return ret;
311 	}
312 
313 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &CG(interned_strings));
314 	if (ret) {
315 		return ret;
316 	}
317 
318 #if ZEND_RC_DEBUG
319 	if (zend_rc_debug) {
320 		/* PHP shouldn't create persistent interned string during request,
321 		 * but at least dl() may do this */
322 		ZEND_ASSERT(!permanent);
323 	}
324 #endif
325 	ret = zend_string_init(str, size, permanent);
326 	ZSTR_H(ret) = h;
327 
328 	/* Create a short living interned, freed after the request. */
329 	return zend_add_interned_string(ret, &CG(interned_strings), 0);
330 }
331 
zend_string_init_existing_interned_request(const char * str,size_t size,bool permanent)332 static zend_string* ZEND_FASTCALL zend_string_init_existing_interned_request(const char *str, size_t size, bool permanent)
333 {
334 	zend_ulong h = zend_inline_hash_func(str, size);
335 	zend_string *ret = zend_interned_string_ht_lookup_ex(h, str, size, &interned_strings_permanent);
336 	if (ret) {
337 		return ret;
338 	}
339 
340 	ret = zend_interned_string_ht_lookup_ex(h, str, size, &CG(interned_strings));
341 	if (ret) {
342 		return ret;
343 	}
344 
345 	ZEND_ASSERT(!permanent);
346 	ret = zend_string_init(str, size, permanent);
347 	ZSTR_H(ret) = h;
348 	return ret;
349 }
350 
zend_interned_strings_activate(void)351 ZEND_API void zend_interned_strings_activate(void)
352 {
353 	zend_init_interned_strings_ht(&CG(interned_strings), 0);
354 }
355 
zend_interned_strings_deactivate(void)356 ZEND_API void zend_interned_strings_deactivate(void)
357 {
358 	zend_hash_destroy(&CG(interned_strings));
359 }
360 
zend_interned_strings_set_request_storage_handlers(zend_new_interned_string_func_t handler,zend_string_init_interned_func_t init_handler,zend_string_init_existing_interned_func_t init_existing_handler)361 ZEND_API void zend_interned_strings_set_request_storage_handlers(zend_new_interned_string_func_t handler, zend_string_init_interned_func_t init_handler, zend_string_init_existing_interned_func_t init_existing_handler)
362 {
363 	interned_string_request_handler = handler;
364 	interned_string_init_request_handler = init_handler;
365 	interned_string_init_existing_request_handler = init_existing_handler;
366 }
367 
zend_interned_strings_switch_storage(bool request)368 ZEND_API void zend_interned_strings_switch_storage(bool request)
369 {
370 	if (request) {
371 		zend_new_interned_string = interned_string_request_handler;
372 		zend_string_init_interned = interned_string_init_request_handler;
373 		zend_string_init_existing_interned = interned_string_init_existing_request_handler;
374 	} else {
375 		zend_new_interned_string = zend_new_interned_string_permanent;
376 		zend_string_init_interned = zend_string_init_interned_permanent;
377 		zend_string_init_existing_interned = zend_string_init_existing_interned_permanent;
378 	}
379 }
380 
381 #if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64__) && !defined(__ILP32__)))
382 /* Even if we don't build with valgrind support, include the symbol so that valgrind available
383  * only at runtime will not result in false positives. */
384 #ifndef I_REPLACE_SONAME_FNNAME_ZU
385 # define I_REPLACE_SONAME_FNNAME_ZU(soname, fnname) _vgr00000ZU_ ## soname ## _ ## fnname
386 #endif
387 
388 /* See GH-9068 */
389 #if __has_attribute(noipa)
390 # define NOIPA __attribute__((noipa))
391 #else
392 # define NOIPA
393 #endif
394 
I_REPLACE_SONAME_FNNAME_ZU(NONE,zend_string_equal_val)395 ZEND_API bool ZEND_FASTCALL I_REPLACE_SONAME_FNNAME_ZU(NONE,zend_string_equal_val)(const zend_string *s1, const zend_string *s2)
396 {
397 	return !memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), ZSTR_LEN(s1));
398 }
399 #endif
400 
401 #if defined(__GNUC__) && defined(__i386__)
zend_string_equal_val(const zend_string * s1,const zend_string * s2)402 ZEND_API zend_never_inline NOIPA bool ZEND_FASTCALL zend_string_equal_val(const zend_string *s1, const zend_string *s2)
403 {
404 	const char *ptr = ZSTR_VAL(s1);
405 	uintptr_t delta = (uintptr_t) s2 - (uintptr_t) s1;
406 	size_t len = ZSTR_LEN(s1);
407 	zend_ulong ret;
408 
409 	__asm__ (
410 		"0:\n\t"
411 		"movl (%2,%3), %0\n\t"
412 		"xorl (%2), %0\n\t"
413 		"jne 1f\n\t"
414 		"addl $0x4, %2\n\t"
415 		"subl $0x4, %1\n\t"
416 		"ja 0b\n\t"
417 		"movl $0x1, %0\n\t"
418 		"jmp 3f\n\t"
419 		"1:\n\t"
420 		"cmpl $0x4,%1\n\t"
421 		"jb 2f\n\t"
422 		"xorl %0, %0\n\t"
423 		"jmp 3f\n\t"
424 		"2:\n\t"
425 		"negl %1\n\t"
426 		"lea 0x20(,%1,8), %1\n\t"
427 		"shll %b1, %0\n\t"
428 		"sete %b0\n\t"
429 		"movzbl %b0, %0\n\t"
430 		"3:\n"
431 		: "=&a"(ret),
432 		  "+c"(len),
433 		  "+r"(ptr)
434 		: "r"(delta)
435 		: "cc");
436 	return ret;
437 }
438 
439 #elif defined(__GNUC__) && defined(__x86_64__) && !defined(__ILP32__)
zend_string_equal_val(const zend_string * s1,const zend_string * s2)440 ZEND_API zend_never_inline NOIPA bool ZEND_FASTCALL zend_string_equal_val(const zend_string *s1, const zend_string *s2)
441 {
442 	const char *ptr = ZSTR_VAL(s1);
443 	uintptr_t delta = (uintptr_t) s2 - (uintptr_t) s1;
444 	size_t len = ZSTR_LEN(s1);
445 	zend_ulong ret;
446 
447 	__asm__ (
448 		"0:\n\t"
449 		"movq (%2,%3), %0\n\t"
450 		"xorq (%2), %0\n\t"
451 		"jne 1f\n\t"
452 		"addq $0x8, %2\n\t"
453 		"subq $0x8, %1\n\t"
454 		"ja 0b\n\t"
455 		"movq $0x1, %0\n\t"
456 		"jmp 3f\n\t"
457 		"1:\n\t"
458 		"cmpq $0x8,%1\n\t"
459 		"jb 2f\n\t"
460 		"xorq %0, %0\n\t"
461 		"jmp 3f\n\t"
462 		"2:\n\t"
463 		"negq %1\n\t"
464 		"lea 0x40(,%1,8), %1\n\t"
465 		"shlq %b1, %0\n\t"
466 		"sete %b0\n\t"
467 		"movzbq %b0, %0\n\t"
468 		"3:\n"
469 		: "=&a"(ret),
470 		  "+c"(len),
471 		  "+r"(ptr)
472 		: "r"(delta)
473 		: "cc");
474 	return ret;
475 }
476 #endif
477 
zend_string_concat2(const char * str1,size_t str1_len,const char * str2,size_t str2_len)478 ZEND_API zend_string *zend_string_concat2(
479 		const char *str1, size_t str1_len,
480 		const char *str2, size_t str2_len)
481 {
482 	size_t len = str1_len + str2_len;
483 	zend_string *res = zend_string_alloc(len, 0);
484 
485 	memcpy(ZSTR_VAL(res), str1, str1_len);
486 	memcpy(ZSTR_VAL(res) + str1_len, str2, str2_len);
487 	ZSTR_VAL(res)[len] = '\0';
488 
489 	return res;
490 }
491 
zend_string_concat3(const char * str1,size_t str1_len,const char * str2,size_t str2_len,const char * str3,size_t str3_len)492 ZEND_API zend_string *zend_string_concat3(
493 		const char *str1, size_t str1_len,
494 		const char *str2, size_t str2_len,
495 		const char *str3, size_t str3_len)
496 {
497 	size_t len = str1_len + str2_len + str3_len;
498 	zend_string *res = zend_string_alloc(len, 0);
499 
500 	memcpy(ZSTR_VAL(res), str1, str1_len);
501 	memcpy(ZSTR_VAL(res) + str1_len, str2, str2_len);
502 	memcpy(ZSTR_VAL(res) + str1_len + str2_len, str3, str3_len);
503 	ZSTR_VAL(res)[len] = '\0';
504 
505 	return res;
506 }
507 
508 /* strlcpy and strlcat are not intercepted by msan, so we need to do it ourselves. */
509 #if __has_feature(memory_sanitizer)
510 static size_t (*libc_strlcpy)(char *__restrict, const char *__restrict, size_t);
strlcpy(char * __restrict dest,const char * __restrict src,size_t n)511 size_t strlcpy(char *__restrict dest, const char *__restrict src, size_t n)
512 {
513 	if (!libc_strlcpy) {
514 		libc_strlcpy = dlsym(RTLD_NEXT, "strlcpy");
515 	}
516 	size_t result = libc_strlcpy(dest, src, n);
517 	__msan_unpoison_string(dest);
518 	return result;
519 }
520 static size_t (*libc_strlcat)(char *__restrict, const char *__restrict, size_t);
strlcat(char * __restrict dest,const char * restrict src,size_t n)521 size_t strlcat (char *__restrict dest, const char *restrict src, size_t n)
522 {
523 	if (!libc_strlcat) {
524 		libc_strlcat = dlsym(RTLD_NEXT, "strlcat");
525 	}
526 	size_t result = libc_strlcat(dest, src, n);
527 	__msan_unpoison_string(dest);
528 	return result;
529 }
530 #endif
531