xref: /PHP-5.3/ext/standard/var_unserializer.re (revision a2045ff3)
1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 5                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2013 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22#include "ext/standard/php_var.h"
23#include "php_incomplete_class.h"
24
25/* {{{ reference-handling for unserializer: var_* */
26#define VAR_ENTRIES_MAX 1024
27
28typedef struct {
29	zval *data[VAR_ENTRIES_MAX];
30	long used_slots;
31	void *next;
32} var_entries;
33
34static inline void var_push(php_unserialize_data_t *var_hashx, zval **rval)
35{
36	var_entries *var_hash = var_hashx->first, *prev = NULL;
37
38	while (var_hash && var_hash->used_slots == VAR_ENTRIES_MAX) {
39		prev = var_hash;
40		var_hash = var_hash->next;
41	}
42
43	if (!var_hash) {
44		var_hash = emalloc(sizeof(var_entries));
45		var_hash->used_slots = 0;
46		var_hash->next = 0;
47
48		if (!var_hashx->first)
49			var_hashx->first = var_hash;
50		else
51			prev->next = var_hash;
52	}
53
54	var_hash->data[var_hash->used_slots++] = *rval;
55}
56
57PHPAPI void var_push_dtor(php_unserialize_data_t *var_hashx, zval **rval)
58{
59	var_entries *var_hash = var_hashx->first_dtor, *prev = NULL;
60
61	while (var_hash && var_hash->used_slots == VAR_ENTRIES_MAX) {
62		prev = var_hash;
63		var_hash = var_hash->next;
64	}
65
66	if (!var_hash) {
67		var_hash = emalloc(sizeof(var_entries));
68		var_hash->used_slots = 0;
69		var_hash->next = 0;
70
71		if (!var_hashx->first_dtor)
72			var_hashx->first_dtor = var_hash;
73		else
74			prev->next = var_hash;
75	}
76
77	Z_ADDREF_PP(rval);
78	var_hash->data[var_hash->used_slots++] = *rval;
79}
80
81PHPAPI void var_replace(php_unserialize_data_t *var_hashx, zval *ozval, zval **nzval)
82{
83	long i;
84	var_entries *var_hash = var_hashx->first;
85
86	while (var_hash) {
87		for (i = 0; i < var_hash->used_slots; i++) {
88			if (var_hash->data[i] == ozval) {
89				var_hash->data[i] = *nzval;
90				/* do not break here */
91			}
92		}
93		var_hash = var_hash->next;
94	}
95}
96
97static int var_access(php_unserialize_data_t *var_hashx, long id, zval ***store)
98{
99	var_entries *var_hash = var_hashx->first;
100
101	while (id >= VAR_ENTRIES_MAX && var_hash && var_hash->used_slots == VAR_ENTRIES_MAX) {
102		var_hash = var_hash->next;
103		id -= VAR_ENTRIES_MAX;
104	}
105
106	if (!var_hash) return !SUCCESS;
107
108	if (id < 0 || id >= var_hash->used_slots) return !SUCCESS;
109
110	*store = &var_hash->data[id];
111
112	return SUCCESS;
113}
114
115PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
116{
117	void *next;
118	long i;
119	var_entries *var_hash = var_hashx->first;
120
121	while (var_hash) {
122		next = var_hash->next;
123		efree(var_hash);
124		var_hash = next;
125	}
126
127	var_hash = var_hashx->first_dtor;
128
129	while (var_hash) {
130		for (i = 0; i < var_hash->used_slots; i++) {
131			zval_ptr_dtor(&var_hash->data[i]);
132		}
133		next = var_hash->next;
134		efree(var_hash);
135		var_hash = next;
136	}
137}
138
139/* }}} */
140
141static char *unserialize_str(const unsigned char **p, size_t *len, size_t maxlen)
142{
143	size_t i, j;
144	char *str = safe_emalloc(*len, 1, 1);
145	unsigned char *end = *(unsigned char **)p+maxlen;
146
147	if (end < *p) {
148		efree(str);
149		return NULL;
150	}
151
152	for (i = 0; i < *len; i++) {
153		if (*p >= end) {
154			efree(str);
155			return NULL;
156		}
157		if (**p != '\\') {
158			str[i] = (char)**p;
159		} else {
160			unsigned char ch = 0;
161
162			for (j = 0; j < 2; j++) {
163				(*p)++;
164				if (**p >= '0' && **p <= '9') {
165					ch = (ch << 4) + (**p -'0');
166				} else if (**p >= 'a' && **p <= 'f') {
167					ch = (ch << 4) + (**p -'a'+10);
168				} else if (**p >= 'A' && **p <= 'F') {
169					ch = (ch << 4) + (**p -'A'+10);
170				} else {
171					efree(str);
172					return NULL;
173				}
174			}
175			str[i] = (char)ch;
176		}
177		(*p)++;
178	}
179	str[i] = 0;
180	*len = i;
181	return str;
182}
183
184#define YYFILL(n) do { } while (0)
185#define YYCTYPE unsigned char
186#define YYCURSOR cursor
187#define YYLIMIT limit
188#define YYMARKER marker
189
190
191/*!re2c
192uiv = [+]? [0-9]+;
193iv = [+-]? [0-9]+;
194nv = [+-]? ([0-9]* "." [0-9]+|[0-9]+ "." [0-9]*);
195nvexp = (iv | nv) [eE] [+-]? iv;
196any = [\000-\377];
197object = [OC];
198*/
199
200
201
202static inline long parse_iv2(const unsigned char *p, const unsigned char **q)
203{
204	char cursor;
205	long result = 0;
206	int neg = 0;
207
208	switch (*p) {
209		case '-':
210			neg++;
211			/* fall-through */
212		case '+':
213			p++;
214	}
215
216	while (1) {
217		cursor = (char)*p;
218		if (cursor >= '0' && cursor <= '9') {
219			result = result * 10 + (size_t)(cursor - (unsigned char)'0');
220		} else {
221			break;
222		}
223		p++;
224	}
225	if (q) *q = p;
226	if (neg) return -result;
227	return result;
228}
229
230static inline long parse_iv(const unsigned char *p)
231{
232	return parse_iv2(p, NULL);
233}
234
235/* no need to check for length - re2c already did */
236static inline size_t parse_uiv(const unsigned char *p)
237{
238	unsigned char cursor;
239	size_t result = 0;
240
241	if (*p == '+') {
242		p++;
243	}
244
245	while (1) {
246		cursor = *p;
247		if (cursor >= '0' && cursor <= '9') {
248			result = result * 10 + (size_t)(cursor - (unsigned char)'0');
249		} else {
250			break;
251		}
252		p++;
253	}
254	return result;
255}
256
257#define UNSERIALIZE_PARAMETER zval **rval, const unsigned char **p, const unsigned char *max, php_unserialize_data_t *var_hash TSRMLS_DC
258#define UNSERIALIZE_PASSTHRU rval, p, max, var_hash TSRMLS_CC
259
260static inline int process_nested_data(UNSERIALIZE_PARAMETER, HashTable *ht, long elements, int objprops)
261{
262	while (elements-- > 0) {
263		zval *key, *data, **old_data;
264
265		ALLOC_INIT_ZVAL(key);
266
267		if (!php_var_unserialize(&key, p, max, NULL TSRMLS_CC)) {
268			zval_dtor(key);
269			FREE_ZVAL(key);
270			return 0;
271		}
272
273		if (Z_TYPE_P(key) != IS_LONG && Z_TYPE_P(key) != IS_STRING) {
274			zval_dtor(key);
275			FREE_ZVAL(key);
276			return 0;
277		}
278
279		ALLOC_INIT_ZVAL(data);
280
281		if (!php_var_unserialize(&data, p, max, var_hash TSRMLS_CC)) {
282			zval_dtor(key);
283			FREE_ZVAL(key);
284			zval_dtor(data);
285			FREE_ZVAL(data);
286			return 0;
287		}
288
289		if (!objprops) {
290			switch (Z_TYPE_P(key)) {
291			case IS_LONG:
292				if (zend_hash_index_find(ht, Z_LVAL_P(key), (void **)&old_data)==SUCCESS) {
293					var_push_dtor(var_hash, old_data);
294				}
295				zend_hash_index_update(ht, Z_LVAL_P(key), &data, sizeof(data), NULL);
296				break;
297			case IS_STRING:
298				if (zend_symtable_find(ht, Z_STRVAL_P(key), Z_STRLEN_P(key) + 1, (void **)&old_data)==SUCCESS) {
299					var_push_dtor(var_hash, old_data);
300				}
301				zend_symtable_update(ht, Z_STRVAL_P(key), Z_STRLEN_P(key) + 1, &data, sizeof(data), NULL);
302				break;
303			}
304		} else {
305			/* object properties should include no integers */
306			convert_to_string(key);
307			zend_hash_update(ht, Z_STRVAL_P(key), Z_STRLEN_P(key) + 1, &data,
308					sizeof data, NULL);
309		}
310
311		zval_dtor(key);
312		FREE_ZVAL(key);
313
314		if (elements && *(*p-1) != ';' && *(*p-1) != '}') {
315			(*p)--;
316			return 0;
317		}
318	}
319
320	return 1;
321}
322
323static inline int finish_nested_data(UNSERIALIZE_PARAMETER)
324{
325	if (*((*p)++) == '}')
326		return 1;
327
328#if SOMETHING_NEW_MIGHT_LEAD_TO_CRASH_ENABLE_IF_YOU_ARE_BRAVE
329	zval_ptr_dtor(rval);
330#endif
331	return 0;
332}
333
334static inline int object_custom(UNSERIALIZE_PARAMETER, zend_class_entry *ce)
335{
336	long datalen;
337
338	datalen = parse_iv2((*p) + 2, p);
339
340	(*p) += 2;
341
342	if (datalen < 0 || (*p) + datalen >= max) {
343		zend_error(E_WARNING, "Insufficient data for unserializing - %ld required, %ld present", datalen, (long)(max - (*p)));
344		return 0;
345	}
346
347	if (ce->unserialize == NULL) {
348		zend_error(E_WARNING, "Class %s has no unserializer", ce->name);
349		object_init_ex(*rval, ce);
350	} else if (ce->unserialize(rval, ce, (const unsigned char*)*p, datalen, (zend_unserialize_data *)var_hash TSRMLS_CC) != SUCCESS) {
351		return 0;
352	}
353
354	(*p) += datalen;
355
356	return finish_nested_data(UNSERIALIZE_PASSTHRU);
357}
358
359static inline long object_common1(UNSERIALIZE_PARAMETER, zend_class_entry *ce)
360{
361	long elements;
362
363	elements = parse_iv2((*p) + 2, p);
364
365	(*p) += 2;
366
367	object_init_ex(*rval, ce);
368	return elements;
369}
370
371static inline int object_common2(UNSERIALIZE_PARAMETER, long elements)
372{
373	zval *retval_ptr = NULL;
374	zval fname;
375
376	if (!process_nested_data(UNSERIALIZE_PASSTHRU, Z_OBJPROP_PP(rval), elements, 1)) {
377		return 0;
378	}
379
380	if (Z_OBJCE_PP(rval) != PHP_IC_ENTRY &&
381		zend_hash_exists(&Z_OBJCE_PP(rval)->function_table, "__wakeup", sizeof("__wakeup"))) {
382		INIT_PZVAL(&fname);
383		ZVAL_STRINGL(&fname, "__wakeup", sizeof("__wakeup") - 1, 0);
384		call_user_function_ex(CG(function_table), rval, &fname, &retval_ptr, 0, 0, 1, NULL TSRMLS_CC);
385	}
386
387	if (retval_ptr)
388		zval_ptr_dtor(&retval_ptr);
389
390	return finish_nested_data(UNSERIALIZE_PASSTHRU);
391
392}
393
394PHPAPI int php_var_unserialize(UNSERIALIZE_PARAMETER)
395{
396	const unsigned char *cursor, *limit, *marker, *start;
397	zval **rval_ref;
398
399	limit = cursor = *p;
400
401	if (var_hash && cursor[0] != 'R') {
402		var_push(var_hash, rval);
403	}
404
405	start = cursor;
406
407
408
409/*!re2c
410
411"R:" iv ";"		{
412	long id;
413
414 	*p = YYCURSOR;
415	if (!var_hash) return 0;
416
417	id = parse_iv(start + 2) - 1;
418	if (id == -1 || var_access(var_hash, id, &rval_ref) != SUCCESS) {
419		return 0;
420	}
421
422	if (*rval != NULL) {
423		zval_ptr_dtor(rval);
424	}
425	*rval = *rval_ref;
426	Z_ADDREF_PP(rval);
427	Z_SET_ISREF_PP(rval);
428
429	return 1;
430}
431
432"r:" iv ";"		{
433	long id;
434
435 	*p = YYCURSOR;
436	if (!var_hash) return 0;
437
438	id = parse_iv(start + 2) - 1;
439	if (id == -1 || var_access(var_hash, id, &rval_ref) != SUCCESS) {
440		return 0;
441	}
442
443	if (*rval == *rval_ref) return 0;
444
445	if (*rval != NULL) {
446		zval_ptr_dtor(rval);
447	}
448	*rval = *rval_ref;
449	Z_ADDREF_PP(rval);
450	Z_UNSET_ISREF_PP(rval);
451
452	return 1;
453}
454
455"N;"	{
456	*p = YYCURSOR;
457	INIT_PZVAL(*rval);
458	ZVAL_NULL(*rval);
459	return 1;
460}
461
462"b:" [01] ";"	{
463	*p = YYCURSOR;
464	INIT_PZVAL(*rval);
465	ZVAL_BOOL(*rval, parse_iv(start + 2));
466	return 1;
467}
468
469"i:" iv ";"	{
470#if SIZEOF_LONG == 4
471	int digits = YYCURSOR - start - 3;
472
473	if (start[2] == '-' || start[2] == '+') {
474		digits--;
475	}
476
477	/* Use double for large long values that were serialized on a 64-bit system */
478	if (digits >= MAX_LENGTH_OF_LONG - 1) {
479		if (digits == MAX_LENGTH_OF_LONG - 1) {
480			int cmp = strncmp(YYCURSOR - MAX_LENGTH_OF_LONG, long_min_digits, MAX_LENGTH_OF_LONG - 1);
481
482			if (!(cmp < 0 || (cmp == 0 && start[2] == '-'))) {
483				goto use_double;
484			}
485		} else {
486			goto use_double;
487		}
488	}
489#endif
490	*p = YYCURSOR;
491	INIT_PZVAL(*rval);
492	ZVAL_LONG(*rval, parse_iv(start + 2));
493	return 1;
494}
495
496"d:" ("NAN" | "-"? "INF") ";"	{
497	*p = YYCURSOR;
498	INIT_PZVAL(*rval);
499
500	if (!strncmp(start + 2, "NAN", 3)) {
501		ZVAL_DOUBLE(*rval, php_get_nan());
502	} else if (!strncmp(start + 2, "INF", 3)) {
503		ZVAL_DOUBLE(*rval, php_get_inf());
504	} else if (!strncmp(start + 2, "-INF", 4)) {
505		ZVAL_DOUBLE(*rval, -php_get_inf());
506	}
507
508	return 1;
509}
510
511"d:" (iv | nv | nvexp) ";"	{
512#if SIZEOF_LONG == 4
513use_double:
514#endif
515	*p = YYCURSOR;
516	INIT_PZVAL(*rval);
517	ZVAL_DOUBLE(*rval, zend_strtod((const char *)start + 2, NULL));
518	return 1;
519}
520
521"s:" uiv ":" ["] 	{
522	size_t len, maxlen;
523	char *str;
524
525	len = parse_uiv(start + 2);
526	maxlen = max - YYCURSOR;
527	if (maxlen < len) {
528		*p = start + 2;
529		return 0;
530	}
531
532	str = (char*)YYCURSOR;
533
534	YYCURSOR += len;
535
536	if (*(YYCURSOR) != '"') {
537		*p = YYCURSOR;
538		return 0;
539	}
540
541	YYCURSOR += 2;
542	*p = YYCURSOR;
543
544	INIT_PZVAL(*rval);
545	ZVAL_STRINGL(*rval, str, len, 1);
546	return 1;
547}
548
549"S:" uiv ":" ["] 	{
550	size_t len, maxlen;
551	char *str;
552
553	len = parse_uiv(start + 2);
554	maxlen = max - YYCURSOR;
555	if (maxlen < len) {
556		*p = start + 2;
557		return 0;
558	}
559
560	if ((str = unserialize_str(&YYCURSOR, &len, maxlen)) == NULL) {
561		return 0;
562	}
563
564	if (*(YYCURSOR) != '"') {
565		efree(str);
566		*p = YYCURSOR;
567		return 0;
568	}
569
570	YYCURSOR += 2;
571	*p = YYCURSOR;
572
573	INIT_PZVAL(*rval);
574	ZVAL_STRINGL(*rval, str, len, 0);
575	return 1;
576}
577
578"a:" uiv ":" "{" {
579	long elements = parse_iv(start + 2);
580	/* use iv() not uiv() in order to check data range */
581	*p = YYCURSOR;
582
583	if (elements < 0) {
584		return 0;
585	}
586
587	INIT_PZVAL(*rval);
588
589	array_init_size(*rval, elements);
590
591	if (!process_nested_data(UNSERIALIZE_PASSTHRU, Z_ARRVAL_PP(rval), elements, 0)) {
592		return 0;
593	}
594
595	return finish_nested_data(UNSERIALIZE_PASSTHRU);
596}
597
598"o:" iv ":" ["] {
599
600	INIT_PZVAL(*rval);
601
602	return object_common2(UNSERIALIZE_PASSTHRU,
603			object_common1(UNSERIALIZE_PASSTHRU, ZEND_STANDARD_CLASS_DEF_PTR));
604}
605
606object ":" uiv ":" ["]	{
607	size_t len, len2, len3, maxlen;
608	long elements;
609	char *class_name;
610	zend_class_entry *ce;
611	zend_class_entry **pce;
612	int incomplete_class = 0;
613
614	int custom_object = 0;
615
616	zval *user_func;
617	zval *retval_ptr;
618	zval **args[1];
619	zval *arg_func_name;
620
621	if (*start == 'C') {
622		custom_object = 1;
623	}
624
625	INIT_PZVAL(*rval);
626	len2 = len = parse_uiv(start + 2);
627	maxlen = max - YYCURSOR;
628	if (maxlen < len || len == 0) {
629		*p = start + 2;
630		return 0;
631	}
632
633	class_name = (char*)YYCURSOR;
634
635	YYCURSOR += len;
636
637	if (*(YYCURSOR) != '"') {
638		*p = YYCURSOR;
639		return 0;
640	}
641	if (*(YYCURSOR+1) != ':') {
642		*p = YYCURSOR+1;
643		return 0;
644	}
645
646	len3 = strspn(class_name, "0123456789_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377\\");
647	if (len3 != len)
648	{
649		*p = YYCURSOR + len3 - len;
650		return 0;
651	}
652
653	class_name = estrndup(class_name, len);
654
655	do {
656		/* Try to find class directly */
657		if (zend_lookup_class(class_name, len2, &pce TSRMLS_CC) == SUCCESS) {
658			ce = *pce;
659			break;
660		}
661
662		/* Check for unserialize callback */
663		if ((PG(unserialize_callback_func) == NULL) || (PG(unserialize_callback_func)[0] == '\0')) {
664			incomplete_class = 1;
665			ce = PHP_IC_ENTRY;
666			break;
667		}
668
669		/* Call unserialize callback */
670		MAKE_STD_ZVAL(user_func);
671		ZVAL_STRING(user_func, PG(unserialize_callback_func), 1);
672		args[0] = &arg_func_name;
673		MAKE_STD_ZVAL(arg_func_name);
674		ZVAL_STRING(arg_func_name, class_name, 1);
675		if (call_user_function_ex(CG(function_table), NULL, user_func, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) != SUCCESS) {
676			php_error_docref(NULL TSRMLS_CC, E_WARNING, "defined (%s) but not found", user_func->value.str.val);
677			incomplete_class = 1;
678			ce = PHP_IC_ENTRY;
679			zval_ptr_dtor(&user_func);
680			zval_ptr_dtor(&arg_func_name);
681			break;
682		}
683		if (retval_ptr) {
684			zval_ptr_dtor(&retval_ptr);
685		}
686
687		/* The callback function may have defined the class */
688		if (zend_lookup_class(class_name, len2, &pce TSRMLS_CC) == SUCCESS) {
689			ce = *pce;
690		} else {
691			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Function %s() hasn't defined the class it was called for", user_func->value.str.val);
692			incomplete_class = 1;
693			ce = PHP_IC_ENTRY;
694		}
695
696		zval_ptr_dtor(&user_func);
697		zval_ptr_dtor(&arg_func_name);
698		break;
699	} while (1);
700
701	*p = YYCURSOR;
702
703	if (custom_object) {
704		int ret = object_custom(UNSERIALIZE_PASSTHRU, ce);
705
706		if (ret && incomplete_class) {
707			php_store_class_name(*rval, class_name, len2);
708		}
709		efree(class_name);
710		return ret;
711	}
712
713	elements = object_common1(UNSERIALIZE_PASSTHRU, ce);
714
715	if (incomplete_class) {
716		php_store_class_name(*rval, class_name, len2);
717	}
718	efree(class_name);
719
720	return object_common2(UNSERIALIZE_PASSTHRU, elements);
721}
722
723"}" {
724	/* this is the case where we have less data than planned */
725	php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Unexpected end of serialized data");
726	return 0; /* not sure if it should be 0 or 1 here? */
727}
728
729any	{ return 0; }
730
731*/
732
733	return 0;
734}
735