xref: /PHP-8.3/ext/xml/xml.c (revision 6c82ca21)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Stig Sæther Bakken <ssb@php.net>                            |
14    |          Thies C. Arntzen <thies@thieso.net>                         |
15    |          Sterling Hughes <sterling@php.net>                          |
16    +----------------------------------------------------------------------+
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "php.h"
24 
25 #include "zend_variables.h"
26 #include "ext/standard/info.h"
27 #include "ext/standard/html.h"
28 
29 #ifdef HAVE_XML
30 
31 #include "php_xml.h"
32 # include "ext/standard/head.h"
33 #ifdef LIBXML_EXPAT_COMPAT
34 #include "ext/libxml/php_libxml.h"
35 #endif
36 
37 #include "xml_arginfo.h"
38 
39 /* Short-term TODO list:
40  * - Implement XML_ExternalEntityParserCreate()
41  * - XML_SetCommentHandler
42  * - XML_SetCdataSectionHandler
43  * - XML_SetParamEntityParsing
44  */
45 
46 /* Long-term TODO list:
47  * - Fix the expat library so you can install your own memory manager
48  *   functions
49  */
50 
51 /* Known bugs:
52  * - Weird things happen with <![CDATA[]]> sections.
53  */
54 
ZEND_BEGIN_MODULE_GLOBALS(xml)55 ZEND_BEGIN_MODULE_GLOBALS(xml)
56 	XML_Char *default_encoding;
57 ZEND_END_MODULE_GLOBALS(xml)
58 
59 ZEND_DECLARE_MODULE_GLOBALS(xml)
60 
61 #define XML(v) ZEND_MODULE_GLOBALS_ACCESSOR(xml, v)
62 
63 typedef struct {
64 	int case_folding;
65 	XML_Parser parser;
66 	XML_Char *target_encoding;
67 
68 	/* Reference to the object itself, for convenience.
69 	 * It is not owned, do not release it. */
70 	zval index;
71 
72 	/* We return a pointer to these zvals in get_gc(), so it's
73 	 * important that a) they are adjacent b) object is the first
74 	 * and c) the number of zvals is kept up to date. */
75 #define XML_PARSER_NUM_ZVALS 12
76 	zval object;
77 	zval startElementHandler;
78 	zval endElementHandler;
79 	zval characterDataHandler;
80 	zval processingInstructionHandler;
81 	zval defaultHandler;
82 	zval unparsedEntityDeclHandler;
83 	zval notationDeclHandler;
84 	zval externalEntityRefHandler;
85 	zval unknownEncodingHandler;
86 	zval startNamespaceDeclHandler;
87 	zval endNamespaceDeclHandler;
88 
89 	zend_function *startElementPtr;
90 	zend_function *endElementPtr;
91 	zend_function *characterDataPtr;
92 	zend_function *processingInstructionPtr;
93 	zend_function *defaultPtr;
94 	zend_function *unparsedEntityDeclPtr;
95 	zend_function *notationDeclPtr;
96 	zend_function *externalEntityRefPtr;
97 	zend_function *unknownEncodingPtr;
98 	zend_function *startNamespaceDeclPtr;
99 	zend_function *endNamespaceDeclPtr;
100 
101 	zval data;
102 	zval info;
103 	int level;
104 	int toffset;
105 	int curtag;
106 	zval *ctag;
107 	char **ltags;
108 	int lastwasopen;
109 	int skipwhite;
110 	int isparsing;
111 
112 	XML_Char *baseURI;
113 
114 	zend_object std;
115 } xml_parser;
116 
117 
118 typedef struct {
119 	XML_Char *name;
120 	char (*decoding_function)(unsigned short);
121 	unsigned short (*encoding_function)(unsigned char);
122 } xml_encoding;
123 
124 /* {{{ dynamically loadable module stuff */
125 #ifdef COMPILE_DL_XML
126 #ifdef ZTS
127 ZEND_TSRMLS_CACHE_DEFINE()
128 #endif
129 ZEND_GET_MODULE(xml)
130 #endif /* COMPILE_DL_XML */
131 /* }}} */
132 
133 #define XML_MAXLEVEL 255 /* XXX this should be dynamic */
134 
135 #define SKIP_TAGSTART(str) ((str) + (parser->toffset > strlen(str) ? strlen(str) : parser->toffset))
136 
137 static zend_class_entry *xml_parser_ce;
138 static zend_object_handlers xml_parser_object_handlers;
139 
140 /* {{{ function prototypes */
141 PHP_MINIT_FUNCTION(xml);
142 PHP_MINFO_FUNCTION(xml);
143 static PHP_GINIT_FUNCTION(xml);
144 
145 static zend_object *xml_parser_create_object(zend_class_entry *class_type);
146 static void xml_parser_free_obj(zend_object *object);
147 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n);
148 static zend_function *xml_parser_get_constructor(zend_object *object);
149 
150 static zend_string *xml_utf8_decode(const XML_Char *, size_t, const XML_Char *);
151 static void xml_set_handler(zval *, zval *);
152 inline static unsigned short xml_encode_iso_8859_1(unsigned char);
153 inline static char xml_decode_iso_8859_1(unsigned short);
154 inline static unsigned short xml_encode_us_ascii(unsigned char);
155 inline static char xml_decode_us_ascii(unsigned short);
156 static void xml_call_handler(xml_parser *, zval *, zend_function *, int, zval *, zval *);
157 static void _xml_xmlchar_zval(const XML_Char *, int, const XML_Char *, zval *);
158 static int _xml_xmlcharlen(const XML_Char *);
159 static void _xml_add_to_info(xml_parser *parser, const char *name);
160 inline static zend_string *_xml_decode_tag(xml_parser *parser, const XML_Char *tag);
161 
162 void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
163 void _xml_endElementHandler(void *, const XML_Char *);
164 void _xml_characterDataHandler(void *, const XML_Char *, int);
165 void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
166 void _xml_defaultHandler(void *, const XML_Char *, int);
167 void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
168 void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
169 int  _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
170 
171 void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
172 void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
173 /* }}} */
174 
175 #ifdef LIBXML_EXPAT_COMPAT
176 static const zend_module_dep xml_deps[] = {
177 	ZEND_MOD_REQUIRED("libxml")
178 	ZEND_MOD_END
179 };
180 #endif
181 
182 zend_module_entry xml_module_entry = {
183 #ifdef LIBXML_EXPAT_COMPAT
184 	STANDARD_MODULE_HEADER_EX, NULL,
185 	xml_deps,
186 #else
187 	STANDARD_MODULE_HEADER,
188 #endif
189 	"xml",                /* extension name */
190 	ext_functions,        /* extension function list */
191 	PHP_MINIT(xml),       /* extension-wide startup function */
192 	NULL,                 /* extension-wide shutdown function */
193 	NULL,                 /* per-request startup function */
194 	NULL,                 /* per-request shutdown function */
195 	PHP_MINFO(xml),       /* information function */
196 	PHP_XML_VERSION,
197 	PHP_MODULE_GLOBALS(xml), /* globals descriptor */
198 	PHP_GINIT(xml),          /* globals ctor */
199 	NULL,                    /* globals dtor */
200 	NULL,                    /* post deactivate */
201 	STANDARD_MODULE_PROPERTIES_EX
202 };
203 
204 /* All the encoding functions are set to NULL right now, since all
205  * the encoding is currently done internally by expat/xmltok.
206  */
207 const xml_encoding xml_encodings[] = {
208 	{ (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
209 	{ (XML_Char *)"US-ASCII",   xml_decode_us_ascii,   xml_encode_us_ascii   },
210 	{ (XML_Char *)"UTF-8",      NULL,                  NULL                  },
211 	{ (XML_Char *)NULL,         NULL,                  NULL                  }
212 };
213 
214 static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
215 
216 /* }}} */
217 
218 /* {{{ startup, shutdown and info functions */
PHP_GINIT_FUNCTION(xml)219 static PHP_GINIT_FUNCTION(xml)
220 {
221 #if defined(COMPILE_DL_XML) && defined(ZTS)
222 	ZEND_TSRMLS_CACHE_UPDATE();
223 #endif
224 	xml_globals->default_encoding = (XML_Char*)"UTF-8";
225 }
226 
php_xml_malloc_wrapper(size_t sz)227 static void *php_xml_malloc_wrapper(size_t sz)
228 {
229 	return emalloc(sz);
230 }
231 
php_xml_realloc_wrapper(void * ptr,size_t sz)232 static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
233 {
234 	return erealloc(ptr, sz);
235 }
236 
php_xml_free_wrapper(void * ptr)237 static void php_xml_free_wrapper(void *ptr)
238 {
239 	if (ptr != NULL) {
240 		efree(ptr);
241 	}
242 }
243 
PHP_MINIT_FUNCTION(xml)244 PHP_MINIT_FUNCTION(xml)
245 {
246 	xml_parser_ce = register_class_XMLParser();
247 	xml_parser_ce->create_object = xml_parser_create_object;
248 	xml_parser_ce->default_object_handlers = &xml_parser_object_handlers;
249 
250 	memcpy(&xml_parser_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
251 	xml_parser_object_handlers.offset = XtOffsetOf(xml_parser, std);
252 	xml_parser_object_handlers.free_obj = xml_parser_free_obj;
253 	xml_parser_object_handlers.get_gc = xml_parser_get_gc;
254 	xml_parser_object_handlers.get_constructor = xml_parser_get_constructor;
255 	xml_parser_object_handlers.clone_obj = NULL;
256 	xml_parser_object_handlers.compare = zend_objects_not_comparable;
257 
258 	register_xml_symbols(module_number);
259 
260 	/* this object should not be pre-initialised at compile time,
261 	   as the order of members may vary */
262 
263 	php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
264 	php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
265 	php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
266 
267 	return SUCCESS;
268 }
269 
PHP_MINFO_FUNCTION(xml)270 PHP_MINFO_FUNCTION(xml)
271 {
272 	php_info_print_table_start();
273 	php_info_print_table_row(2, "XML Support", "active");
274 	php_info_print_table_row(2, "XML Namespace Support", "active");
275 #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
276 	php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
277 #else
278 	php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
279 #endif
280 	php_info_print_table_end();
281 }
282 /* }}} */
283 
284 /* {{{ extension-internal functions */
285 
_xml_xmlchar_zval(const XML_Char * s,int len,const XML_Char * encoding,zval * ret)286 static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
287 {
288 	if (s == NULL) {
289 		ZVAL_FALSE(ret);
290 		return;
291 	}
292 	if (len == 0) {
293 		len = _xml_xmlcharlen(s);
294 	}
295 	ZVAL_STR(ret, xml_utf8_decode(s, len, encoding));
296 }
297 /* }}} */
298 
xml_parser_from_obj(zend_object * obj)299 static inline xml_parser *xml_parser_from_obj(zend_object *obj) {
300 	return (xml_parser *)((char *)(obj) - XtOffsetOf(xml_parser, std));
301 }
302 
303 #define Z_XMLPARSER_P(zv) xml_parser_from_obj(Z_OBJ_P(zv))
304 
xml_parser_create_object(zend_class_entry * class_type)305 static zend_object *xml_parser_create_object(zend_class_entry *class_type) {
306 	xml_parser *intern = zend_object_alloc(sizeof(xml_parser), class_type);
307 	memset(intern, 0, sizeof(xml_parser) - sizeof(zend_object));
308 
309 	zend_object_std_init(&intern->std, class_type);
310 	object_properties_init(&intern->std, class_type);
311 
312 	return &intern->std;
313 }
314 
xml_parser_free_ltags(xml_parser * parser)315 static void xml_parser_free_ltags(xml_parser *parser)
316 {
317 	if (parser->ltags) {
318 		int inx;
319 		for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++)
320 			efree(parser->ltags[ inx ]);
321 		efree(parser->ltags);
322 	}
323 }
324 
xml_parser_free_obj(zend_object * object)325 static void xml_parser_free_obj(zend_object *object)
326 {
327 	xml_parser *parser = xml_parser_from_obj(object);
328 
329 	if (parser->parser) {
330 		XML_ParserFree(parser->parser);
331 	}
332 	xml_parser_free_ltags(parser);
333 	if (!Z_ISUNDEF(parser->startElementHandler)) {
334 		zval_ptr_dtor(&parser->startElementHandler);
335 	}
336 	if (!Z_ISUNDEF(parser->endElementHandler)) {
337 		zval_ptr_dtor(&parser->endElementHandler);
338 	}
339 	if (!Z_ISUNDEF(parser->characterDataHandler)) {
340 		zval_ptr_dtor(&parser->characterDataHandler);
341 	}
342 	if (!Z_ISUNDEF(parser->processingInstructionHandler)) {
343 		zval_ptr_dtor(&parser->processingInstructionHandler);
344 	}
345 	if (!Z_ISUNDEF(parser->defaultHandler)) {
346 		zval_ptr_dtor(&parser->defaultHandler);
347 	}
348 	if (!Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
349 		zval_ptr_dtor(&parser->unparsedEntityDeclHandler);
350 	}
351 	if (!Z_ISUNDEF(parser->notationDeclHandler)) {
352 		zval_ptr_dtor(&parser->notationDeclHandler);
353 	}
354 	if (!Z_ISUNDEF(parser->externalEntityRefHandler)) {
355 		zval_ptr_dtor(&parser->externalEntityRefHandler);
356 	}
357 	if (!Z_ISUNDEF(parser->unknownEncodingHandler)) {
358 		zval_ptr_dtor(&parser->unknownEncodingHandler);
359 	}
360 	if (!Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
361 		zval_ptr_dtor(&parser->startNamespaceDeclHandler);
362 	}
363 	if (!Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
364 		zval_ptr_dtor(&parser->endNamespaceDeclHandler);
365 	}
366 	if (parser->baseURI) {
367 		efree(parser->baseURI);
368 	}
369 	if (!Z_ISUNDEF(parser->object)) {
370 		zval_ptr_dtor(&parser->object);
371 	}
372 
373 	zend_object_std_dtor(&parser->std);
374 }
375 
xml_parser_get_gc(zend_object * object,zval ** table,int * n)376 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n)
377 {
378 	xml_parser *parser = xml_parser_from_obj(object);
379 	*table = &parser->object;
380 	*n = XML_PARSER_NUM_ZVALS;
381 	return zend_std_get_properties(object);
382 }
383 
xml_parser_get_constructor(zend_object * object)384 static zend_function *xml_parser_get_constructor(zend_object *object) {
385 	zend_throw_error(NULL, "Cannot directly construct XMLParser, use xml_parser_create() or xml_parser_create_ns() instead");
386 	return NULL;
387 }
388 
389 /* {{{ xml_set_handler() */
xml_set_handler(zval * handler,zval * data)390 static void xml_set_handler(zval *handler, zval *data)
391 {
392 	/* If we have already a handler, release it */
393 	if (handler) {
394 		zval_ptr_dtor(handler);
395 	}
396 
397 	/* IS_ARRAY might indicate that we're using array($obj, 'method') syntax */
398 	if (Z_TYPE_P(data) != IS_ARRAY && Z_TYPE_P(data) != IS_OBJECT) {
399 		convert_to_string(data);
400 		if (Z_STRLEN_P(data) == 0) {
401 			ZVAL_UNDEF(handler);
402 			return;
403 		}
404 	}
405 
406 	ZVAL_COPY(handler, data);
407 }
408 /* }}} */
409 
410 /* {{{ xml_call_handler() */
xml_call_handler(xml_parser * parser,zval * handler,zend_function * function_ptr,int argc,zval * argv,zval * retval)411 static void xml_call_handler(xml_parser *parser, zval *handler, zend_function *function_ptr, int argc, zval *argv, zval *retval)
412 {
413 	int i;
414 
415 	ZVAL_UNDEF(retval);
416 	if (parser && handler && !EG(exception)) {
417 		int result;
418 		zend_fcall_info fci;
419 
420 		fci.size = sizeof(fci);
421 		ZVAL_COPY_VALUE(&fci.function_name, handler);
422 		fci.object = Z_OBJ(parser->object);
423 		fci.retval = retval;
424 		fci.param_count = argc;
425 		fci.params = argv;
426 		fci.named_params = NULL;
427 
428 		result = zend_call_function(&fci, NULL);
429 		if (result == FAILURE) {
430 			zval *method;
431 			zval *obj;
432 
433 			if (Z_TYPE_P(handler) == IS_STRING) {
434 				php_error_docref(NULL, E_WARNING, "Unable to call handler %s()", Z_STRVAL_P(handler));
435 			} else if (Z_TYPE_P(handler) == IS_ARRAY &&
436 					   (obj = zend_hash_index_find(Z_ARRVAL_P(handler), 0)) != NULL &&
437 					   (method = zend_hash_index_find(Z_ARRVAL_P(handler), 1)) != NULL &&
438 					   Z_TYPE_P(obj) == IS_OBJECT &&
439 					   Z_TYPE_P(method) == IS_STRING) {
440 				php_error_docref(NULL, E_WARNING, "Unable to call handler %s::%s()", ZSTR_VAL(Z_OBJCE_P(obj)->name), Z_STRVAL_P(method));
441 			} else
442 				php_error_docref(NULL, E_WARNING, "Unable to call handler");
443 		}
444 	}
445 	for (i = 0; i < argc; i++) {
446 		zval_ptr_dtor(&argv[i]);
447 	}
448 }
449 /* }}} */
450 
451 /* {{{ xml_encode_iso_8859_1() */
xml_encode_iso_8859_1(unsigned char c)452 inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
453 {
454 	return (unsigned short)c;
455 }
456 /* }}} */
457 
458 /* {{{ xml_decode_iso_8859_1() */
xml_decode_iso_8859_1(unsigned short c)459 inline static char xml_decode_iso_8859_1(unsigned short c)
460 {
461 	return (char)(c > 0xff ? '?' : c);
462 }
463 /* }}} */
464 
465 /* {{{ xml_encode_us_ascii() */
xml_encode_us_ascii(unsigned char c)466 inline static unsigned short xml_encode_us_ascii(unsigned char c)
467 {
468 	return (unsigned short)c;
469 }
470 /* }}} */
471 
472 /* {{{ xml_decode_us_ascii() */
xml_decode_us_ascii(unsigned short c)473 inline static char xml_decode_us_ascii(unsigned short c)
474 {
475 	return (char)(c > 0x7f ? '?' : c);
476 }
477 /* }}} */
478 
479 /* {{{ xml_get_encoding() */
xml_get_encoding(const XML_Char * name)480 static const xml_encoding *xml_get_encoding(const XML_Char *name)
481 {
482 	const xml_encoding *enc = &xml_encodings[0];
483 
484 	while (enc && enc->name) {
485 		if (strcasecmp((char *)name, (char *)enc->name) == 0) {
486 			return enc;
487 		}
488 		enc++;
489 	}
490 	return NULL;
491 }
492 /* }}} */
493 
494 /* {{{ xml_utf8_decode() */
xml_utf8_decode(const XML_Char * s,size_t len,const XML_Char * encoding)495 static zend_string *xml_utf8_decode(const XML_Char *s, size_t len, const XML_Char *encoding)
496 {
497 	size_t pos = 0;
498 	unsigned int c;
499 	char (*decoder)(unsigned short) = NULL;
500 	const xml_encoding *enc = xml_get_encoding(encoding);
501 	zend_string *str;
502 
503 	if (enc) {
504 		decoder = enc->decoding_function;
505 	}
506 
507 	if (decoder == NULL) {
508 		/* If the target encoding was unknown, or no decoder function
509 		 * was specified, return the UTF-8-encoded data as-is.
510 		 */
511 		str = zend_string_init((char *)s, len, 0);
512 		return str;
513 	}
514 
515 	str = zend_string_alloc(len, 0);
516 	ZSTR_LEN(str) = 0;
517 	while (pos < len) {
518 		zend_result status = FAILURE;
519 		c = php_next_utf8_char((const unsigned char*)s, len, &pos, &status);
520 
521 		if (status == FAILURE || c > 0xFFU) {
522 			c = '?';
523 		}
524 
525 		ZSTR_VAL(str)[ZSTR_LEN(str)++] = (unsigned int)decoder(c);
526 	}
527 	ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
528 	if (ZSTR_LEN(str) < len) {
529 		str = zend_string_truncate(str, ZSTR_LEN(str), 0);
530 	}
531 
532 	return str;
533 }
534 /* }}} */
535 
536 /* {{{ _xml_xmlcharlen() */
_xml_xmlcharlen(const XML_Char * s)537 static int _xml_xmlcharlen(const XML_Char *s)
538 {
539 	int len = 0;
540 
541 	while (*s) {
542 		len++;
543 		s++;
544 	}
545 	return len;
546 }
547 /* }}} */
548 
549 /* {{{ _xml_add_to_info() */
_xml_add_to_info(xml_parser * parser,const char * name)550 static void _xml_add_to_info(xml_parser *parser, const char *name)
551 {
552 	zval *element;
553 
554 	if (Z_ISUNDEF(parser->info)) {
555 		return;
556 	}
557 
558 	size_t name_len = strlen(name);
559 	if ((element = zend_hash_str_find(Z_ARRVAL(parser->info), name, name_len)) == NULL) {
560 		zval values;
561 		array_init(&values);
562 		element = zend_hash_str_update(Z_ARRVAL(parser->info), name, name_len, &values);
563 	}
564 
565 	add_next_index_long(element, parser->curtag);
566 
567 	parser->curtag++;
568 }
569 /* }}} */
570 
571 /* {{{ _xml_decode_tag() */
_xml_decode_tag(xml_parser * parser,const XML_Char * tag)572 static zend_string *_xml_decode_tag(xml_parser *parser, const XML_Char *tag)
573 {
574 	zend_string *str;
575 
576 	str = xml_utf8_decode(tag, _xml_xmlcharlen(tag), parser->target_encoding);
577 
578 	if (parser->case_folding) {
579 		zend_str_toupper(ZSTR_VAL(str), ZSTR_LEN(str));
580 	}
581 
582 	return str;
583 }
584 /* }}} */
585 
586 /* {{{ _xml_startElementHandler() */
_xml_startElementHandler(void * userData,const XML_Char * name,const XML_Char ** attributes)587 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
588 {
589 	xml_parser *parser = (xml_parser *)userData;
590 	const char **attrs = (const char **) attributes;
591 	zend_string *att, *tag_name, *val;
592 	zval retval, args[3];
593 
594 	if (!parser) {
595 		return;
596 	}
597 
598 	parser->level++;
599 
600 	tag_name = _xml_decode_tag(parser, name);
601 
602 	if (!Z_ISUNDEF(parser->startElementHandler)) {
603 		ZVAL_COPY(&args[0], &parser->index);
604 		ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
605 		array_init(&args[2]);
606 
607 		while (attributes && *attributes) {
608 			zval tmp;
609 
610 			att = _xml_decode_tag(parser, attributes[0]);
611 			val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
612 
613 			ZVAL_STR(&tmp, val);
614 			zend_symtable_update(Z_ARRVAL(args[2]), att, &tmp);
615 
616 			attributes += 2;
617 
618 			zend_string_release_ex(att, 0);
619 		}
620 
621 		xml_call_handler(parser, &parser->startElementHandler, parser->startElementPtr, 3, args, &retval);
622 		zval_ptr_dtor(&retval);
623 	}
624 
625 	if (!Z_ISUNDEF(parser->data) && !EG(exception)) {
626 		if (parser->level <= XML_MAXLEVEL)  {
627 			zval tag, atr;
628 			int atcnt = 0;
629 
630 			array_init(&tag);
631 			array_init(&atr);
632 
633 			_xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
634 
635 			add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
636 			add_assoc_string(&tag, "type", "open");
637 			add_assoc_long(&tag, "level", parser->level);
638 
639 			parser->ltags[parser->level-1] = estrdup(ZSTR_VAL(tag_name));
640 			parser->lastwasopen = 1;
641 
642 			attributes = (const XML_Char **) attrs;
643 
644 			while (attributes && *attributes) {
645 				zval tmp;
646 
647 				att = _xml_decode_tag(parser, attributes[0]);
648 				val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
649 
650 				ZVAL_STR(&tmp, val);
651 				zend_symtable_update(Z_ARRVAL(atr), att, &tmp);
652 
653 				atcnt++;
654 				attributes += 2;
655 
656 				zend_string_release_ex(att, 0);
657 			}
658 
659 			if (atcnt) {
660 				zend_hash_str_add(Z_ARRVAL(tag), "attributes", sizeof("attributes") - 1, &atr);
661 			} else {
662 				zval_ptr_dtor(&atr);
663 			}
664 
665 			parser->ctag = zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
666 		} else if (parser->level == (XML_MAXLEVEL + 1)) {
667 						php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
668 		}
669 	}
670 
671 	zend_string_release_ex(tag_name, 0);
672 }
673 /* }}} */
674 
675 /* {{{ _xml_endElementHandler() */
_xml_endElementHandler(void * userData,const XML_Char * name)676 void _xml_endElementHandler(void *userData, const XML_Char *name)
677 {
678 	xml_parser *parser = (xml_parser *)userData;
679 
680 	if (!parser) {
681 		return;
682 	}
683 
684 	zval retval, args[2];
685 
686 	zend_string *tag_name = _xml_decode_tag(parser, name);
687 
688 	if (!Z_ISUNDEF(parser->endElementHandler)) {
689 		ZVAL_COPY(&args[0], &parser->index);
690 		ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
691 
692 		xml_call_handler(parser, &parser->endElementHandler, parser->endElementPtr, 2, args, &retval);
693 		zval_ptr_dtor(&retval);
694 	}
695 
696 	if (!Z_ISUNDEF(parser->data) && !EG(exception)) {
697 		zval tag;
698 
699 		if (parser->lastwasopen) {
700 			add_assoc_string(parser->ctag, "type", "complete");
701 		} else {
702 			array_init(&tag);
703 
704 			_xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
705 
706 			add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
707 			add_assoc_string(&tag, "type", "close");
708 			add_assoc_long(&tag, "level", parser->level);
709 
710 			zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
711 		}
712 
713 		parser->lastwasopen = 0;
714 	}
715 
716 	zend_string_release_ex(tag_name, 0);
717 
718 	if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
719 		efree(parser->ltags[parser->level-1]);
720 	}
721 
722 	parser->level--;
723 }
724 /* }}} */
725 
726 /* {{{ _xml_characterDataHandler() */
_xml_characterDataHandler(void * userData,const XML_Char * s,int len)727 void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
728 {
729 	xml_parser *parser = (xml_parser *)userData;
730 
731 	if (!parser) {
732 		return;
733 	}
734 
735 	zval retval, args[2];
736 
737 	if (!Z_ISUNDEF(parser->characterDataHandler)) {
738 		ZVAL_COPY(&args[0], &parser->index);
739 		_xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
740 		xml_call_handler(parser, &parser->characterDataHandler, parser->characterDataPtr, 2, args, &retval);
741 		zval_ptr_dtor(&retval);
742 	}
743 
744 	if (Z_ISUNDEF(parser->data) || EG(exception)) {
745 		return;
746 	}
747 
748 	bool doprint = 0;
749 	zend_string *decoded_value;
750 	decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
751 	if (parser->skipwhite) {
752 		for (size_t i = 0; i < ZSTR_LEN(decoded_value); i++) {
753 			switch (ZSTR_VAL(decoded_value)[i]) {
754 				case ' ':
755 				case '\t':
756 				case '\n':
757 					continue;
758 				default:
759 					doprint = 1;
760 					break;
761 			}
762 			if (doprint) {
763 				break;
764 			}
765 		}
766 	}
767 	if (parser->lastwasopen) {
768 		zval *myval;
769 		/* check if the current tag already has a value - if yes append to that! */
770 		if ((myval = zend_hash_find(Z_ARRVAL_P(parser->ctag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
771 			size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
772 			Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
773 			strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
774 					ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
775 			zend_string_release_ex(decoded_value, 0);
776 		} else {
777 			if (doprint || (! parser->skipwhite)) {
778 				add_assoc_str(parser->ctag, "value", decoded_value);
779 			} else {
780 				zend_string_release_ex(decoded_value, 0);
781 			}
782 		}
783 	} else {
784 		zval tag;
785 		zval *curtag, *mytype, *myval;
786 		ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
787 			if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
788 				if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) {
789 					if ((myval = zend_hash_find(Z_ARRVAL_P(curtag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
790 						size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
791 						Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
792 						strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
793 								ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
794 						zend_string_release_ex(decoded_value, 0);
795 						return;
796 					}
797 				}
798 			}
799 			break;
800 		} ZEND_HASH_FOREACH_END();
801 		if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
802 			array_init(&tag);
803 			_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
804 			add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
805 			add_assoc_str(&tag, "value", decoded_value);
806 			add_assoc_string(&tag, "type", "cdata");
807 			add_assoc_long(&tag, "level", parser->level);
808 			zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
809 		} else if (parser->level == (XML_MAXLEVEL + 1)) {
810 								php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
811 		} else {
812 			zend_string_release_ex(decoded_value, 0);
813 		}
814 	}
815 }
816 /* }}} */
817 
818 /* {{{ _xml_processingInstructionHandler() */
_xml_processingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)819 void _xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
820 {
821 	xml_parser *parser = (xml_parser *)userData;
822 
823 	if (!parser || Z_ISUNDEF(parser->processingInstructionHandler)) {
824 		return;
825 	}
826 
827 	zval retval, args[3];
828 
829 	ZVAL_COPY(&args[0], &parser->index);
830 	_xml_xmlchar_zval(target, 0, parser->target_encoding, &args[1]);
831 	_xml_xmlchar_zval(data, 0, parser->target_encoding, &args[2]);
832 	xml_call_handler(parser, &parser->processingInstructionHandler, parser->processingInstructionPtr, 3, args, &retval);
833 	zval_ptr_dtor(&retval);
834 }
835 /* }}} */
836 
837 /* {{{ _xml_defaultHandler() */
_xml_defaultHandler(void * userData,const XML_Char * s,int len)838 void _xml_defaultHandler(void *userData, const XML_Char *s, int len)
839 {
840 	xml_parser *parser = (xml_parser *)userData;
841 
842 	if (!parser || Z_ISUNDEF(parser->defaultHandler)) {
843 		return;
844 	}
845 
846 	zval retval, args[2];
847 
848 	ZVAL_COPY(&args[0], &parser->index);
849 	_xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
850 	xml_call_handler(parser, &parser->defaultHandler, parser->defaultPtr, 2, args, &retval);
851 	zval_ptr_dtor(&retval);
852 }
853 /* }}} */
854 
855 /* {{{ _xml_unparsedEntityDeclHandler() */
_xml_unparsedEntityDeclHandler(void * userData,const XML_Char * entityName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)856 void _xml_unparsedEntityDeclHandler(void *userData,
857 	const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId,
858 	const XML_Char *publicId, const XML_Char *notationName)
859 {
860 	xml_parser *parser = (xml_parser *)userData;
861 
862 	if (!parser || Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
863 		return;
864 	}
865 
866 	zval retval, args[6];
867 
868 	ZVAL_COPY(&args[0], &parser->index);
869 	_xml_xmlchar_zval(entityName, 0, parser->target_encoding, &args[1]);
870 	_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
871 	_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
872 	_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
873 	_xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[5]);
874 	xml_call_handler(parser, &parser->unparsedEntityDeclHandler, parser->unparsedEntityDeclPtr, 6, args, &retval);
875 	zval_ptr_dtor(&retval);
876 }
877 /* }}} */
878 
879 /* {{{ _xml_notationDeclHandler() */
_xml_notationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)880 void _xml_notationDeclHandler(void *userData, const XML_Char *notationName,
881 	const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
882 {
883 	xml_parser *parser = (xml_parser *)userData;
884 
885 	if (!parser || Z_ISUNDEF(parser->notationDeclHandler)) {
886 		return;
887 	}
888 
889 	zval retval, args[5];
890 
891 	ZVAL_COPY(&args[0], &parser->index);
892 	_xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[1]);
893 	_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
894 	_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
895 	_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
896 	xml_call_handler(parser, &parser->notationDeclHandler, parser->notationDeclPtr, 5, args, &retval);
897 	zval_ptr_dtor(&retval);
898 }
899 /* }}} */
900 
901 /* {{{ _xml_externalEntityRefHandler() */
_xml_externalEntityRefHandler(XML_Parser parserPtr,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)902 int _xml_externalEntityRefHandler(XML_Parser parserPtr, const XML_Char *openEntityNames,
903 	const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
904 {
905 	xml_parser *parser = XML_GetUserData(parserPtr);
906 
907 	if (!parser || Z_ISUNDEF(parser->externalEntityRefHandler)) {
908 		return 0;
909 	}
910 
911 	int ret = 0; /* abort if no handler is set (should be configurable?) */
912 	zval retval, args[5];
913 
914 	ZVAL_COPY(&args[0], &parser->index);
915 	_xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding, &args[1]);
916 	_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
917 	_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
918 	_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
919 	xml_call_handler(parser, &parser->externalEntityRefHandler, parser->externalEntityRefPtr, 5, args, &retval);
920 	if (!Z_ISUNDEF(retval)) {
921 		convert_to_long(&retval);
922 		ret = Z_LVAL(retval);
923 	} else {
924 		ret = 0;
925 	}
926 
927 	return ret;
928 }
929 /* }}} */
930 
931 /* {{{ _xml_startNamespaceDeclHandler() */
_xml_startNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)932 void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
933 {
934 	xml_parser *parser = (xml_parser *)userData;
935 
936 	if (!parser || Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
937 		return;
938 	}
939 
940 	zval retval, args[3];
941 
942 	ZVAL_COPY(&args[0], &parser->index);
943 	_xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
944 	_xml_xmlchar_zval(uri, 0, parser->target_encoding, &args[2]);
945 	xml_call_handler(parser, &parser->startNamespaceDeclHandler, parser->startNamespaceDeclPtr, 3, args, &retval);
946 	zval_ptr_dtor(&retval);
947 }
948 /* }}} */
949 
950 /* {{{ _xml_endNamespaceDeclHandler() */
_xml_endNamespaceDeclHandler(void * userData,const XML_Char * prefix)951 void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
952 {
953 	xml_parser *parser = (xml_parser *)userData;
954 
955 	if (!parser || Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
956 		return;
957 	}
958 
959 	zval retval, args[2];
960 
961 	ZVAL_COPY(&args[0], &parser->index);
962 	_xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
963 	xml_call_handler(parser, &parser->endNamespaceDeclHandler, parser->endNamespaceDeclPtr, 2, args, &retval);
964 	zval_ptr_dtor(&retval);
965 }
966 /* }}} */
967 
968 /************************* EXTENSION FUNCTIONS *************************/
969 
php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS,int ns_support)970 static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support) /* {{{ */
971 {
972 	xml_parser *parser;
973 	int auto_detect = 0;
974 
975 	zend_string *encoding_param = NULL;
976 
977 	char *ns_param = NULL;
978 	size_t ns_param_len = 0;
979 
980 	XML_Char *encoding;
981 
982 	if (zend_parse_parameters(ZEND_NUM_ARGS(), (ns_support ? "|S!s": "|S!"), &encoding_param, &ns_param, &ns_param_len) == FAILURE) {
983 		RETURN_THROWS();
984 	}
985 
986 	if (encoding_param != NULL) {
987 		/* The supported encoding types are hardcoded here because
988 		 * we are limited to the encodings supported by expat/xmltok.
989 		 */
990 		if (ZSTR_LEN(encoding_param) == 0) {
991 			encoding = XML(default_encoding);
992 			auto_detect = 1;
993 		} else if (zend_string_equals_literal_ci(encoding_param, "ISO-8859-1")) {
994 			encoding = (XML_Char*)"ISO-8859-1";
995 		} else if (zend_string_equals_literal_ci(encoding_param, "UTF-8")) {
996 			encoding = (XML_Char*)"UTF-8";
997 		} else if (zend_string_equals_literal_ci(encoding_param, "US-ASCII")) {
998 			encoding = (XML_Char*)"US-ASCII";
999 		} else {
1000 			zend_argument_value_error(1, "is not a supported source encoding");
1001 			RETURN_THROWS();
1002 		}
1003 	} else {
1004 		encoding = XML(default_encoding);
1005 	}
1006 
1007 	if (ns_support && ns_param == NULL){
1008 		ns_param = ":";
1009 	}
1010 
1011 	object_init_ex(return_value, xml_parser_ce);
1012 	parser = Z_XMLPARSER_P(return_value);
1013 	parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
1014 	                                     &php_xml_mem_hdlrs, (XML_Char*)ns_param);
1015 
1016 	parser->target_encoding = encoding;
1017 	parser->case_folding = 1;
1018 	parser->isparsing = 0;
1019 
1020 	XML_SetUserData(parser->parser, parser);
1021 	ZVAL_COPY_VALUE(&parser->index, return_value);
1022 }
1023 /* }}} */
1024 
1025 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create)1026 PHP_FUNCTION(xml_parser_create)
1027 {
1028 	php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1029 }
1030 /* }}} */
1031 
1032 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create_ns)1033 PHP_FUNCTION(xml_parser_create_ns)
1034 {
1035 	php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1036 }
1037 /* }}} */
1038 
1039 /* {{{ Set up object which should be used for callbacks */
PHP_FUNCTION(xml_set_object)1040 PHP_FUNCTION(xml_set_object)
1041 {
1042 	xml_parser *parser;
1043 	zval *pind, *mythis;
1044 
1045 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oo", &pind, xml_parser_ce, &mythis) == FAILURE) {
1046 		RETURN_THROWS();
1047 	}
1048 
1049 	parser = Z_XMLPARSER_P(pind);
1050 
1051 	zval_ptr_dtor(&parser->object);
1052 	ZVAL_OBJ_COPY(&parser->object, Z_OBJ_P(mythis));
1053 
1054 	RETURN_TRUE;
1055 }
1056 /* }}} */
1057 
1058 /* {{{ Set up start and end element handlers */
PHP_FUNCTION(xml_set_element_handler)1059 PHP_FUNCTION(xml_set_element_handler)
1060 {
1061 	xml_parser *parser;
1062 	zval *pind, *shdl, *ehdl;
1063 
1064 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ozz", &pind, xml_parser_ce, &shdl, &ehdl) == FAILURE) {
1065 		RETURN_THROWS();
1066 	}
1067 
1068 	parser = Z_XMLPARSER_P(pind);
1069 	xml_set_handler(&parser->startElementHandler, shdl);
1070 	xml_set_handler(&parser->endElementHandler, ehdl);
1071 	XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1072 
1073 	RETURN_TRUE;
1074 }
1075 /* }}} */
1076 
1077 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_character_data_handler)1078 PHP_FUNCTION(xml_set_character_data_handler)
1079 {
1080 	xml_parser *parser;
1081 	zval *pind, *hdl;
1082 
1083 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1084 		RETURN_THROWS();
1085 	}
1086 
1087 	parser = Z_XMLPARSER_P(pind);
1088 	xml_set_handler(&parser->characterDataHandler, hdl);
1089 	XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1090 
1091 	RETURN_TRUE;
1092 }
1093 /* }}} */
1094 
1095 /* {{{ Set up processing instruction (PI) handler */
PHP_FUNCTION(xml_set_processing_instruction_handler)1096 PHP_FUNCTION(xml_set_processing_instruction_handler)
1097 {
1098 	xml_parser *parser;
1099 	zval *pind, *hdl;
1100 
1101 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1102 		RETURN_THROWS();
1103 	}
1104 
1105 	parser = Z_XMLPARSER_P(pind);
1106 	xml_set_handler(&parser->processingInstructionHandler, hdl);
1107 	XML_SetProcessingInstructionHandler(parser->parser, _xml_processingInstructionHandler);
1108 
1109 	RETURN_TRUE;
1110 }
1111 /* }}} */
1112 
1113 /* {{{ Set up default handler */
PHP_FUNCTION(xml_set_default_handler)1114 PHP_FUNCTION(xml_set_default_handler)
1115 {
1116 	xml_parser *parser;
1117 	zval *pind, *hdl;
1118 
1119 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1120 		RETURN_THROWS();
1121 	}
1122 
1123 	parser = Z_XMLPARSER_P(pind);
1124 	xml_set_handler(&parser->defaultHandler, hdl);
1125 	XML_SetDefaultHandler(parser->parser, _xml_defaultHandler);
1126 
1127 	RETURN_TRUE;
1128 }
1129 /* }}} */
1130 
1131 /* {{{ Set up unparsed entity declaration handler */
PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)1132 PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)
1133 {
1134 	xml_parser *parser;
1135 	zval *pind, *hdl;
1136 
1137 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1138 		RETURN_THROWS();
1139 	}
1140 
1141 	parser = Z_XMLPARSER_P(pind);
1142 	xml_set_handler(&parser->unparsedEntityDeclHandler, hdl);
1143 	XML_SetUnparsedEntityDeclHandler(parser->parser, _xml_unparsedEntityDeclHandler);
1144 
1145 	RETURN_TRUE;
1146 }
1147 /* }}} */
1148 
1149 /* {{{ Set up notation declaration handler */
PHP_FUNCTION(xml_set_notation_decl_handler)1150 PHP_FUNCTION(xml_set_notation_decl_handler)
1151 {
1152 	xml_parser *parser;
1153 	zval *pind, *hdl;
1154 
1155 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1156 		RETURN_THROWS();
1157 	}
1158 
1159 	parser = Z_XMLPARSER_P(pind);
1160 	xml_set_handler(&parser->notationDeclHandler, hdl);
1161 	XML_SetNotationDeclHandler(parser->parser, _xml_notationDeclHandler);
1162 
1163 	RETURN_TRUE;
1164 }
1165 /* }}} */
1166 
1167 /* {{{ Set up external entity reference handler */
PHP_FUNCTION(xml_set_external_entity_ref_handler)1168 PHP_FUNCTION(xml_set_external_entity_ref_handler)
1169 {
1170 	xml_parser *parser;
1171 	zval *pind, *hdl;
1172 
1173 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1174 		RETURN_THROWS();
1175 	}
1176 
1177 	parser = Z_XMLPARSER_P(pind);
1178 	xml_set_handler(&parser->externalEntityRefHandler, hdl);
1179 	XML_SetExternalEntityRefHandler(parser->parser, (void *) _xml_externalEntityRefHandler);
1180 
1181 	RETURN_TRUE;
1182 }
1183 /* }}} */
1184 
1185 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_start_namespace_decl_handler)1186 PHP_FUNCTION(xml_set_start_namespace_decl_handler)
1187 {
1188 	xml_parser *parser;
1189 	zval *pind, *hdl;
1190 
1191 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1192 		RETURN_THROWS();
1193 	}
1194 
1195 	parser = Z_XMLPARSER_P(pind);
1196 	xml_set_handler(&parser->startNamespaceDeclHandler, hdl);
1197 	XML_SetStartNamespaceDeclHandler(parser->parser, _xml_startNamespaceDeclHandler);
1198 
1199 	RETURN_TRUE;
1200 }
1201 /* }}} */
1202 
1203 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_end_namespace_decl_handler)1204 PHP_FUNCTION(xml_set_end_namespace_decl_handler)
1205 {
1206 	xml_parser *parser;
1207 	zval *pind, *hdl;
1208 
1209 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1210 		RETURN_THROWS();
1211 	}
1212 
1213 	parser = Z_XMLPARSER_P(pind);
1214 	xml_set_handler(&parser->endNamespaceDeclHandler, hdl);
1215 	XML_SetEndNamespaceDeclHandler(parser->parser, _xml_endNamespaceDeclHandler);
1216 
1217 	RETURN_TRUE;
1218 }
1219 /* }}} */
1220 
1221 /* {{{ Start parsing an XML document */
PHP_FUNCTION(xml_parse)1222 PHP_FUNCTION(xml_parse)
1223 {
1224 	xml_parser *parser;
1225 	zval *pind;
1226 	char *data;
1227 	size_t data_len;
1228 	int ret;
1229 	bool isFinal = 0;
1230 
1231 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
1232 		RETURN_THROWS();
1233 	}
1234 
1235 	parser = Z_XMLPARSER_P(pind);
1236 	if (parser->isparsing) {
1237 		zend_throw_error(NULL, "Parser must not be called recursively");
1238 		RETURN_THROWS();
1239 	}
1240 	parser->isparsing = 1;
1241 	ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
1242 	parser->isparsing = 0;
1243 	RETVAL_LONG(ret);
1244 }
1245 
1246 /* }}} */
1247 
1248 /* {{{ Parsing a XML document */
PHP_FUNCTION(xml_parse_into_struct)1249 PHP_FUNCTION(xml_parse_into_struct)
1250 {
1251 	xml_parser *parser;
1252 	zval *pind, *xdata, *info = NULL;
1253 	char *data;
1254 	size_t data_len;
1255 	int ret;
1256 
1257 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
1258 		RETURN_THROWS();
1259 	}
1260 
1261 	parser = Z_XMLPARSER_P(pind);
1262 
1263 	if (parser->isparsing) {
1264 		php_error_docref(NULL, E_WARNING, "Parser must not be called recursively");
1265 		RETURN_FALSE;
1266 	}
1267 
1268 	if (info) {
1269 		info = zend_try_array_init(info);
1270 		if (!info) {
1271 			RETURN_THROWS();
1272 		}
1273 	}
1274 
1275 	xdata = zend_try_array_init(xdata);
1276 	if (!xdata) {
1277 		RETURN_THROWS();
1278 	}
1279 
1280 	ZVAL_COPY_VALUE(&parser->data, xdata);
1281 
1282 	if (info) {
1283 		ZVAL_COPY_VALUE(&parser->info, info);
1284 	}
1285 
1286 	parser->level = 0;
1287 	xml_parser_free_ltags(parser);
1288 	parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
1289 	memset(parser->ltags, 0, XML_MAXLEVEL * sizeof(char *));
1290 
1291 	XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1292 	XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1293 
1294 	parser->isparsing = 1;
1295 	ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
1296 	parser->isparsing = 0;
1297 
1298 	RETVAL_LONG(ret);
1299 }
1300 /* }}} */
1301 
1302 /* {{{ Get XML parser error code */
PHP_FUNCTION(xml_get_error_code)1303 PHP_FUNCTION(xml_get_error_code)
1304 {
1305 	xml_parser *parser;
1306 	zval *pind;
1307 
1308 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1309 		RETURN_THROWS();
1310 	}
1311 
1312 	parser = Z_XMLPARSER_P(pind);
1313 	RETURN_LONG((zend_long)XML_GetErrorCode(parser->parser));
1314 }
1315 /* }}} */
1316 
1317 /* {{{ Get XML parser error string */
PHP_FUNCTION(xml_error_string)1318 PHP_FUNCTION(xml_error_string)
1319 {
1320 	zend_long code;
1321 	char *str;
1322 
1323 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &code) == FAILURE) {
1324 		RETURN_THROWS();
1325 	}
1326 
1327 	str = (char *)XML_ErrorString((int)code);
1328 	if (str) {
1329 		RETVAL_STRING(str);
1330 	}
1331 }
1332 /* }}} */
1333 
1334 /* {{{ Get current line number for an XML parser */
PHP_FUNCTION(xml_get_current_line_number)1335 PHP_FUNCTION(xml_get_current_line_number)
1336 {
1337 	xml_parser *parser;
1338 	zval *pind;
1339 
1340 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1341 		RETURN_THROWS();
1342 	}
1343 
1344 	parser = Z_XMLPARSER_P(pind);
1345 	RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
1346 }
1347 /* }}} */
1348 
1349 /* {{{ Get current column number for an XML parser */
PHP_FUNCTION(xml_get_current_column_number)1350 PHP_FUNCTION(xml_get_current_column_number)
1351 {
1352 	xml_parser *parser;
1353 	zval *pind;
1354 
1355 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1356 		RETURN_THROWS();
1357 	}
1358 
1359 	parser = Z_XMLPARSER_P(pind);
1360 	RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
1361 }
1362 /* }}} */
1363 
1364 /* {{{ Get current byte index for an XML parser */
PHP_FUNCTION(xml_get_current_byte_index)1365 PHP_FUNCTION(xml_get_current_byte_index)
1366 {
1367 	xml_parser *parser;
1368 	zval *pind;
1369 
1370 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1371 		RETURN_THROWS();
1372 	}
1373 
1374 	parser = Z_XMLPARSER_P(pind);
1375 	RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
1376 }
1377 /* }}} */
1378 
1379 /* {{{ Free an XML parser */
PHP_FUNCTION(xml_parser_free)1380 PHP_FUNCTION(xml_parser_free)
1381 {
1382 	zval *pind;
1383 	xml_parser *parser;
1384 
1385 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1386 		RETURN_THROWS();
1387 	}
1388 
1389 	parser = Z_XMLPARSER_P(pind);
1390 	if (parser->isparsing == 1) {
1391 		php_error_docref(NULL, E_WARNING, "Parser cannot be freed while it is parsing");
1392 		RETURN_FALSE;
1393 	}
1394 
1395 	RETURN_TRUE;
1396 }
1397 /* }}} */
1398 
1399 /* {{{ Set options in an XML parser */
PHP_FUNCTION(xml_parser_set_option)1400 PHP_FUNCTION(xml_parser_set_option)
1401 {
1402 	xml_parser *parser;
1403 	zval *pind;
1404 	zend_long opt;
1405 	zval *value;
1406 
1407 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Olz", &pind, xml_parser_ce, &opt, &value) == FAILURE) {
1408 		RETURN_THROWS();
1409 	}
1410 
1411 	if (Z_TYPE_P(value) != IS_FALSE && Z_TYPE_P(value) != IS_TRUE &&
1412 		Z_TYPE_P(value) != IS_LONG && Z_TYPE_P(value) != IS_STRING) {
1413 		php_error_docref(NULL, E_WARNING,
1414 			"Argument #3 ($value) must be of type string|int|bool, %s given", zend_zval_type_name(value));
1415 	}
1416 
1417 	parser = Z_XMLPARSER_P(pind);
1418 	switch (opt) {
1419 		/* Boolean option */
1420 		case PHP_XML_OPTION_CASE_FOLDING:
1421 			parser->case_folding = zend_is_true(value);
1422 			break;
1423 		/* Boolean option */
1424 		case PHP_XML_OPTION_SKIP_WHITE:
1425 			parser->skipwhite = zend_is_true(value);
1426 			break;
1427 		/* Integer option */
1428 		case PHP_XML_OPTION_SKIP_TAGSTART:
1429 			/* The tag start offset is stored in an int */
1430 			/* TODO Improve handling of values? */
1431 			parser->toffset = zval_get_long(value);
1432 			if (parser->toffset < 0) {
1433 				/* TODO Promote to ValueError in PHP 9.0 */
1434 				php_error_docref(NULL, E_WARNING, "Argument #3 ($value) must be between 0 and %d"
1435 					" for option XML_OPTION_SKIP_TAGSTART", INT_MAX);
1436 				parser->toffset = 0;
1437 				RETURN_FALSE;
1438 			}
1439 			break;
1440 		/* String option */
1441 		case PHP_XML_OPTION_TARGET_ENCODING: {
1442 			const xml_encoding *enc;
1443 			if (!try_convert_to_string(value)) {
1444 				RETURN_THROWS();
1445 			}
1446 
1447 			enc = xml_get_encoding((XML_Char*)Z_STRVAL_P(value));
1448 			if (enc == NULL) {
1449 				zend_argument_value_error(3, "is not a supported target encoding");
1450 				RETURN_THROWS();
1451 			}
1452 
1453 			parser->target_encoding = enc->name;
1454 			break;
1455 		}
1456 		default:
1457 			zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1458 			RETURN_THROWS();
1459 			break;
1460 	}
1461 
1462 	RETURN_TRUE;
1463 }
1464 /* }}} */
1465 
1466 /* {{{ Get options from an XML parser */
PHP_FUNCTION(xml_parser_get_option)1467 PHP_FUNCTION(xml_parser_get_option)
1468 {
1469 	xml_parser *parser;
1470 	zval *pind;
1471 	zend_long opt;
1472 
1473 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pind, xml_parser_ce, &opt) == FAILURE) {
1474 		RETURN_THROWS();
1475 	}
1476 
1477 	parser = Z_XMLPARSER_P(pind);
1478 	switch (opt) {
1479 		case PHP_XML_OPTION_CASE_FOLDING:
1480 			RETURN_BOOL(parser->case_folding);
1481 			break;
1482 		case PHP_XML_OPTION_SKIP_TAGSTART:
1483 			RETURN_LONG(parser->toffset);
1484 			break;
1485 		case PHP_XML_OPTION_SKIP_WHITE:
1486 			RETURN_BOOL(parser->skipwhite);
1487 			break;
1488 		case PHP_XML_OPTION_TARGET_ENCODING:
1489 			RETURN_STRING((char *)parser->target_encoding);
1490 			break;
1491 		default:
1492 			zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1493 			RETURN_THROWS();
1494 	}
1495 }
1496 /* }}} */
1497 
1498 #endif
1499