xref: /PHP-8.0/ext/json/json_encoder.c (revision 2d2c001c)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Author: Omar Kilani <omar@php.net>                                   |
14   |         Jakub Zelenka <bukka@php.net>                                |
15   +----------------------------------------------------------------------+
16 */
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include "php.h"
23 #include "php_ini.h"
24 #include "ext/standard/info.h"
25 #include "ext/standard/html.h"
26 #include "zend_smart_str.h"
27 #include "php_json.h"
28 #include "php_json_encoder.h"
29 #include <zend_exceptions.h>
30 
31 static const char digits[] = "0123456789abcdef";
32 
33 static int php_json_escape_string(
34 		smart_str *buf,	const char *s, size_t len,
35 		int options, php_json_encoder *encoder);
36 
php_json_determine_array_type(zval * val)37 static int php_json_determine_array_type(zval *val) /* {{{ */
38 {
39 	int i;
40 	HashTable *myht = Z_ARRVAL_P(val);
41 
42 	i = myht ? zend_hash_num_elements(myht) : 0;
43 	if (i > 0) {
44 		zend_string *key;
45 		zend_ulong index, idx;
46 
47 		if (HT_IS_PACKED(myht) && HT_IS_WITHOUT_HOLES(myht)) {
48 			return PHP_JSON_OUTPUT_ARRAY;
49 		}
50 
51 		idx = 0;
52 		ZEND_HASH_FOREACH_KEY(myht, index, key) {
53 			if (key) {
54 				return PHP_JSON_OUTPUT_OBJECT;
55 			} else {
56 				if (index != idx) {
57 					return PHP_JSON_OUTPUT_OBJECT;
58 				}
59 			}
60 			idx++;
61 		} ZEND_HASH_FOREACH_END();
62 	}
63 
64 	return PHP_JSON_OUTPUT_ARRAY;
65 }
66 /* }}} */
67 
68 /* {{{ Pretty printing support functions */
69 
php_json_pretty_print_char(smart_str * buf,int options,char c)70 static inline void php_json_pretty_print_char(smart_str *buf, int options, char c) /* {{{ */
71 {
72 	if (options & PHP_JSON_PRETTY_PRINT) {
73 		smart_str_appendc(buf, c);
74 	}
75 }
76 /* }}} */
77 
php_json_pretty_print_indent(smart_str * buf,int options,php_json_encoder * encoder)78 static inline void php_json_pretty_print_indent(smart_str *buf, int options, php_json_encoder *encoder) /* {{{ */
79 {
80 	int i;
81 
82 	if (options & PHP_JSON_PRETTY_PRINT) {
83 		for (i = 0; i < encoder->depth; ++i) {
84 			smart_str_appendl(buf, "    ", 4);
85 		}
86 	}
87 }
88 /* }}} */
89 
90 /* }}} */
91 
php_json_is_valid_double(double d)92 static inline int php_json_is_valid_double(double d) /* {{{ */
93 {
94 	return !zend_isinf(d) && !zend_isnan(d);
95 }
96 /* }}} */
97 
php_json_encode_double(smart_str * buf,double d,int options)98 static inline void php_json_encode_double(smart_str *buf, double d, int options) /* {{{ */
99 {
100 	size_t len;
101 	char num[PHP_DOUBLE_MAX_LENGTH];
102 
103 	php_gcvt(d, (int)PG(serialize_precision), '.', 'e', num);
104 	len = strlen(num);
105 	if (options & PHP_JSON_PRESERVE_ZERO_FRACTION && strchr(num, '.') == NULL && len < PHP_DOUBLE_MAX_LENGTH - 2) {
106 		num[len++] = '.';
107 		num[len++] = '0';
108 		num[len] = '\0';
109 	}
110 	smart_str_appendl(buf, num, len);
111 }
112 /* }}} */
113 
114 #define PHP_JSON_HASH_PROTECT_RECURSION(_tmp_ht) \
115 	do { \
116 		if (_tmp_ht) { \
117 			GC_TRY_PROTECT_RECURSION(_tmp_ht); \
118 		} \
119 	} while (0)
120 
121 #define PHP_JSON_HASH_UNPROTECT_RECURSION(_tmp_ht) \
122 	do { \
123 		if (_tmp_ht) { \
124 			GC_TRY_UNPROTECT_RECURSION(_tmp_ht); \
125 		} \
126 	} while (0)
127 
php_json_encode_array(smart_str * buf,zval * val,int options,php_json_encoder * encoder)128 static int php_json_encode_array(smart_str *buf, zval *val, int options, php_json_encoder *encoder) /* {{{ */
129 {
130 	int i, r, need_comma = 0;
131 	HashTable *myht, *prop_ht;
132 
133 	if (Z_TYPE_P(val) == IS_ARRAY) {
134 		myht = Z_ARRVAL_P(val);
135 		prop_ht = NULL;
136 		r = (options & PHP_JSON_FORCE_OBJECT) ? PHP_JSON_OUTPUT_OBJECT : php_json_determine_array_type(val);
137 	} else {
138 		prop_ht = myht = zend_get_properties_for(val, ZEND_PROP_PURPOSE_JSON);
139 		r = PHP_JSON_OUTPUT_OBJECT;
140 	}
141 
142 	if (myht && GC_IS_RECURSIVE(myht)) {
143 		encoder->error_code = PHP_JSON_ERROR_RECURSION;
144 		smart_str_appendl(buf, "null", 4);
145 		zend_release_properties(prop_ht);
146 		return FAILURE;
147 	}
148 
149 	PHP_JSON_HASH_PROTECT_RECURSION(myht);
150 
151 	if (r == PHP_JSON_OUTPUT_ARRAY) {
152 		smart_str_appendc(buf, '[');
153 	} else {
154 		smart_str_appendc(buf, '{');
155 	}
156 
157 	++encoder->depth;
158 
159 	i = myht ? zend_hash_num_elements(myht) : 0;
160 
161 	if (i > 0) {
162 		zend_string *key;
163 		zval *data;
164 		zend_ulong index;
165 
166 		ZEND_HASH_FOREACH_KEY_VAL_IND(myht, index, key, data) {
167 			if (r == PHP_JSON_OUTPUT_ARRAY) {
168 				if (need_comma) {
169 					smart_str_appendc(buf, ',');
170 				} else {
171 					need_comma = 1;
172 				}
173 
174 				php_json_pretty_print_char(buf, options, '\n');
175 				php_json_pretty_print_indent(buf, options, encoder);
176 			} else if (r == PHP_JSON_OUTPUT_OBJECT) {
177 				if (key) {
178 					if (ZSTR_VAL(key)[0] == '\0' && ZSTR_LEN(key) > 0 && Z_TYPE_P(val) == IS_OBJECT) {
179 						/* Skip protected and private members. */
180 						continue;
181 					}
182 
183 					if (need_comma) {
184 						smart_str_appendc(buf, ',');
185 					} else {
186 						need_comma = 1;
187 					}
188 
189 					php_json_pretty_print_char(buf, options, '\n');
190 					php_json_pretty_print_indent(buf, options, encoder);
191 
192 					if (php_json_escape_string(buf, ZSTR_VAL(key), ZSTR_LEN(key),
193 								options & ~PHP_JSON_NUMERIC_CHECK, encoder) == FAILURE &&
194 							(options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) &&
195 							buf->s) {
196 						ZSTR_LEN(buf->s) -= 4;
197 						smart_str_appendl(buf, "\"\"", 2);
198 					}
199 				} else {
200 					if (need_comma) {
201 						smart_str_appendc(buf, ',');
202 					} else {
203 						need_comma = 1;
204 					}
205 
206 					php_json_pretty_print_char(buf, options, '\n');
207 					php_json_pretty_print_indent(buf, options, encoder);
208 
209 					smart_str_appendc(buf, '"');
210 					smart_str_append_long(buf, (zend_long) index);
211 					smart_str_appendc(buf, '"');
212 				}
213 
214 				smart_str_appendc(buf, ':');
215 				php_json_pretty_print_char(buf, options, ' ');
216 			}
217 
218 			if (php_json_encode_zval(buf, data, options, encoder) == FAILURE &&
219 					!(options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR)) {
220 				PHP_JSON_HASH_UNPROTECT_RECURSION(myht);
221 				zend_release_properties(prop_ht);
222 				return FAILURE;
223 			}
224 		} ZEND_HASH_FOREACH_END();
225 	}
226 
227 	PHP_JSON_HASH_UNPROTECT_RECURSION(myht);
228 
229 	if (encoder->depth > encoder->max_depth) {
230 		encoder->error_code = PHP_JSON_ERROR_DEPTH;
231 		if (!(options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR)) {
232 			zend_release_properties(prop_ht);
233 			return FAILURE;
234 		}
235 	}
236 	--encoder->depth;
237 
238 	/* Only keep closing bracket on same line for empty arrays/objects */
239 	if (need_comma) {
240 		php_json_pretty_print_char(buf, options, '\n');
241 		php_json_pretty_print_indent(buf, options, encoder);
242 	}
243 
244 	if (r == PHP_JSON_OUTPUT_ARRAY) {
245 		smart_str_appendc(buf, ']');
246 	} else {
247 		smart_str_appendc(buf, '}');
248 	}
249 
250 	zend_release_properties(prop_ht);
251 	return SUCCESS;
252 }
253 /* }}} */
254 
php_json_escape_string(smart_str * buf,const char * s,size_t len,int options,php_json_encoder * encoder)255 static int php_json_escape_string(
256 		smart_str *buf, const char *s, size_t len,
257 		int options, php_json_encoder *encoder) /* {{{ */
258 {
259 	int status;
260 	unsigned int us;
261 	size_t pos, checkpoint;
262 	char *dst;
263 
264 	if (len == 0) {
265 		smart_str_appendl(buf, "\"\"", 2);
266 		return SUCCESS;
267 	}
268 
269 	if (options & PHP_JSON_NUMERIC_CHECK) {
270 		double d;
271 		int type;
272 		zend_long p;
273 
274 		if ((type = is_numeric_string(s, len, &p, &d, 0)) != 0) {
275 			if (type == IS_LONG) {
276 				smart_str_append_long(buf, p);
277 				return SUCCESS;
278 			} else if (type == IS_DOUBLE && php_json_is_valid_double(d)) {
279 				php_json_encode_double(buf, d, options);
280 				return SUCCESS;
281 			}
282 		}
283 
284 	}
285 	checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0;
286 
287 	/* pre-allocate for string length plus 2 quotes */
288 	smart_str_alloc(buf, len+2, 0);
289 	smart_str_appendc(buf, '"');
290 
291 	pos = 0;
292 
293 	do {
294 		static const uint32_t charmap[8] = {
295 			0xffffffff, 0x500080c4, 0x10000000, 0x00000000,
296 			0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};
297 
298 		us = (unsigned char)s[pos];
299 		if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
300 			pos++;
301 			len--;
302 			if (len == 0) {
303 				smart_str_appendl(buf, s, pos);
304 				break;
305 			}
306 		} else {
307 			if (pos) {
308 				smart_str_appendl(buf, s, pos);
309 				s += pos;
310 				pos = 0;
311 			}
312 			us = (unsigned char)s[0];
313 			if (UNEXPECTED(us >= 0x80)) {
314 
315 				us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
316 
317 				/* check whether UTF8 character is correct */
318 				if (UNEXPECTED(status != SUCCESS)) {
319 					if (options & PHP_JSON_INVALID_UTF8_IGNORE) {
320 						/* ignore invalid UTF8 character */
321 					} else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
322 						/* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */
323 						if (options & PHP_JSON_UNESCAPED_UNICODE) {
324 							smart_str_appendl(buf, "\xef\xbf\xbd", 3);
325 						} else {
326 							smart_str_appendl(buf, "\\ufffd", 6);
327 						}
328 					} else {
329 						ZSTR_LEN(buf->s) = checkpoint;
330 						encoder->error_code = PHP_JSON_ERROR_UTF8;
331 						if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
332 							smart_str_appendl(buf, "null", 4);
333 						}
334 						return FAILURE;
335 					}
336 
337 				/* Escape U+2028/U+2029 line terminators, UNLESS both
338 				   JSON_UNESCAPED_UNICODE and
339 				   JSON_UNESCAPED_LINE_TERMINATORS were provided */
340 				} else if ((options & PHP_JSON_UNESCAPED_UNICODE)
341 				    && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
342 						|| us < 0x2028 || us > 0x2029)) {
343 					smart_str_appendl(buf, s, pos);
344 				} else {
345 					/* From http://en.wikipedia.org/wiki/UTF16 */
346 					if (us >= 0x10000) {
347 						unsigned int next_us;
348 
349 						us -= 0x10000;
350 						next_us = (unsigned short)((us & 0x3ff) | 0xdc00);
351 						us = (unsigned short)((us >> 10) | 0xd800);
352 						dst = smart_str_extend(buf, 6);
353 						dst[0] = '\\';
354 						dst[1] = 'u';
355 						dst[2] = digits[(us >> 12) & 0xf];
356 						dst[3] = digits[(us >> 8) & 0xf];
357 						dst[4] = digits[(us >> 4) & 0xf];
358 						dst[5] = digits[us & 0xf];
359 						us = next_us;
360 					}
361 					dst = smart_str_extend(buf, 6);
362 					dst[0] = '\\';
363 					dst[1] = 'u';
364 					dst[2] = digits[(us >> 12) & 0xf];
365 					dst[3] = digits[(us >> 8) & 0xf];
366 					dst[4] = digits[(us >> 4) & 0xf];
367 					dst[5] = digits[us & 0xf];
368 				}
369 				s += pos;
370 				len -= pos;
371 				pos = 0;
372 			} else {
373 				s++;
374 				switch (us) {
375 					case '"':
376 						if (options & PHP_JSON_HEX_QUOT) {
377 							smart_str_appendl(buf, "\\u0022", 6);
378 						} else {
379 							smart_str_appendl(buf, "\\\"", 2);
380 						}
381 						break;
382 
383 					case '\\':
384 						smart_str_appendl(buf, "\\\\", 2);
385 						break;
386 
387 					case '/':
388 						if (options & PHP_JSON_UNESCAPED_SLASHES) {
389 							smart_str_appendc(buf, '/');
390 						} else {
391 							smart_str_appendl(buf, "\\/", 2);
392 						}
393 						break;
394 
395 					case '\b':
396 						smart_str_appendl(buf, "\\b", 2);
397 						break;
398 
399 					case '\f':
400 						smart_str_appendl(buf, "\\f", 2);
401 						break;
402 
403 					case '\n':
404 						smart_str_appendl(buf, "\\n", 2);
405 						break;
406 
407 					case '\r':
408 						smart_str_appendl(buf, "\\r", 2);
409 						break;
410 
411 					case '\t':
412 						smart_str_appendl(buf, "\\t", 2);
413 						break;
414 
415 					case '<':
416 						if (options & PHP_JSON_HEX_TAG) {
417 							smart_str_appendl(buf, "\\u003C", 6);
418 						} else {
419 							smart_str_appendc(buf, '<');
420 						}
421 						break;
422 
423 					case '>':
424 						if (options & PHP_JSON_HEX_TAG) {
425 							smart_str_appendl(buf, "\\u003E", 6);
426 						} else {
427 							smart_str_appendc(buf, '>');
428 						}
429 						break;
430 
431 					case '&':
432 						if (options & PHP_JSON_HEX_AMP) {
433 							smart_str_appendl(buf, "\\u0026", 6);
434 						} else {
435 							smart_str_appendc(buf, '&');
436 						}
437 						break;
438 
439 					case '\'':
440 						if (options & PHP_JSON_HEX_APOS) {
441 							smart_str_appendl(buf, "\\u0027", 6);
442 						} else {
443 							smart_str_appendc(buf, '\'');
444 						}
445 						break;
446 
447 					default:
448 						ZEND_ASSERT(us < ' ');
449 						dst = smart_str_extend(buf, 6);
450 						dst[0] = '\\';
451 						dst[1] = 'u';
452 						dst[2] = '0';
453 						dst[3] = '0';
454 						dst[4] = digits[(us >> 4) & 0xf];
455 						dst[5] = digits[us & 0xf];
456 						break;
457 				}
458 				len--;
459 			}
460 		}
461 	} while (len);
462 
463 	smart_str_appendc(buf, '"');
464 
465 	return SUCCESS;
466 }
467 /* }}} */
468 
php_json_encode_serializable_object(smart_str * buf,zval * val,int options,php_json_encoder * encoder)469 static int php_json_encode_serializable_object(smart_str *buf, zval *val, int options, php_json_encoder *encoder) /* {{{ */
470 {
471 	zend_class_entry *ce = Z_OBJCE_P(val);
472 	HashTable* myht = Z_OBJPROP_P(val);
473 	zval retval, fname;
474 	int return_code;
475 
476 	if (myht && GC_IS_RECURSIVE(myht)) {
477 		encoder->error_code = PHP_JSON_ERROR_RECURSION;
478 		if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
479 			smart_str_appendl(buf, "null", 4);
480 		}
481 		return FAILURE;
482 	}
483 
484 	PHP_JSON_HASH_PROTECT_RECURSION(myht);
485 
486 	ZVAL_STRING(&fname, "jsonSerialize");
487 
488 	if (FAILURE == call_user_function(NULL, val, &fname, &retval, 0, NULL) || Z_TYPE(retval) == IS_UNDEF) {
489 		if (!EG(exception)) {
490 			zend_throw_exception_ex(NULL, 0, "Failed calling %s::jsonSerialize()", ZSTR_VAL(ce->name));
491 		}
492 		zval_ptr_dtor(&fname);
493 
494 		if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
495 			smart_str_appendl(buf, "null", 4);
496 		}
497 		PHP_JSON_HASH_UNPROTECT_RECURSION(myht);
498 		return FAILURE;
499 	}
500 
501 	if (EG(exception)) {
502 		/* Error already raised */
503 		zval_ptr_dtor(&retval);
504 		zval_ptr_dtor(&fname);
505 
506 		if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
507 			smart_str_appendl(buf, "null", 4);
508 		}
509 		PHP_JSON_HASH_UNPROTECT_RECURSION(myht);
510 		return FAILURE;
511 	}
512 
513 	if ((Z_TYPE(retval) == IS_OBJECT) &&
514 		(Z_OBJ(retval) == Z_OBJ_P(val))) {
515 		/* Handle the case where jsonSerialize does: return $this; by going straight to encode array */
516 		PHP_JSON_HASH_UNPROTECT_RECURSION(myht);
517 		return_code = php_json_encode_array(buf, &retval, options, encoder);
518 	} else {
519 		/* All other types, encode as normal */
520 		return_code = php_json_encode_zval(buf, &retval, options, encoder);
521 		PHP_JSON_HASH_UNPROTECT_RECURSION(myht);
522 	}
523 
524 	zval_ptr_dtor(&retval);
525 	zval_ptr_dtor(&fname);
526 
527 	return return_code;
528 }
529 /* }}} */
530 
php_json_encode_zval(smart_str * buf,zval * val,int options,php_json_encoder * encoder)531 int php_json_encode_zval(smart_str *buf, zval *val, int options, php_json_encoder *encoder) /* {{{ */
532 {
533 again:
534 	switch (Z_TYPE_P(val))
535 	{
536 		case IS_NULL:
537 			smart_str_appendl(buf, "null", 4);
538 			break;
539 
540 		case IS_TRUE:
541 			smart_str_appendl(buf, "true", 4);
542 			break;
543 		case IS_FALSE:
544 			smart_str_appendl(buf, "false", 5);
545 			break;
546 
547 		case IS_LONG:
548 			smart_str_append_long(buf, Z_LVAL_P(val));
549 			break;
550 
551 		case IS_DOUBLE:
552 			if (php_json_is_valid_double(Z_DVAL_P(val))) {
553 				php_json_encode_double(buf, Z_DVAL_P(val), options);
554 			} else {
555 				encoder->error_code = PHP_JSON_ERROR_INF_OR_NAN;
556 				smart_str_appendc(buf, '0');
557 			}
558 			break;
559 
560 		case IS_STRING:
561 			return php_json_escape_string(buf, Z_STRVAL_P(val), Z_STRLEN_P(val), options, encoder);
562 
563 		case IS_OBJECT:
564 			if (instanceof_function(Z_OBJCE_P(val), php_json_serializable_ce)) {
565 				return php_json_encode_serializable_object(buf, val, options, encoder);
566 			}
567 			/* fallthrough -- Non-serializable object */
568 		case IS_ARRAY: {
569 			/* Avoid modifications (and potential freeing) of the array through a reference when a
570 			 * jsonSerialize() method is invoked. */
571 			zval zv;
572 			int res;
573 			ZVAL_COPY(&zv, val);
574 			res = php_json_encode_array(buf, &zv, options, encoder);
575 			zval_ptr_dtor_nogc(&zv);
576 			return res;
577 		}
578 
579 		case IS_REFERENCE:
580 			val = Z_REFVAL_P(val);
581 			goto again;
582 
583 		default:
584 			encoder->error_code = PHP_JSON_ERROR_UNSUPPORTED_TYPE;
585 			if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
586 				smart_str_appendl(buf, "null", 4);
587 			}
588 			return FAILURE;
589 	}
590 
591 	return SUCCESS;
592 }
593 /* }}} */
594