xref: /PHP-8.1/ext/xml/xml.c (revision 30f26b58)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Stig Sæther Bakken <ssb@php.net>                            |
14    |          Thies C. Arntzen <thies@thieso.net>                         |
15    |          Sterling Hughes <sterling@php.net>                          |
16    +----------------------------------------------------------------------+
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "php.h"
24 
25 #include "zend_variables.h"
26 #include "ext/standard/php_string.h"
27 #include "ext/standard/info.h"
28 #include "ext/standard/html.h"
29 
30 #ifdef HAVE_XML
31 
32 #include "php_xml.h"
33 # include "ext/standard/head.h"
34 #ifdef LIBXML_EXPAT_COMPAT
35 #include "ext/libxml/php_libxml.h"
36 #endif
37 
38 #include "xml_arginfo.h"
39 
40 /* Short-term TODO list:
41  * - Implement XML_ExternalEntityParserCreate()
42  * - XML_SetCommentHandler
43  * - XML_SetCdataSectionHandler
44  * - XML_SetParamEntityParsing
45  */
46 
47 /* Long-term TODO list:
48  * - Fix the expat library so you can install your own memory manager
49  *   functions
50  */
51 
52 /* Known bugs:
53  * - Weird things happen with <![CDATA[]]> sections.
54  */
55 
ZEND_BEGIN_MODULE_GLOBALS(xml)56 ZEND_BEGIN_MODULE_GLOBALS(xml)
57 	XML_Char *default_encoding;
58 ZEND_END_MODULE_GLOBALS(xml)
59 
60 ZEND_DECLARE_MODULE_GLOBALS(xml)
61 
62 #define XML(v) ZEND_MODULE_GLOBALS_ACCESSOR(xml, v)
63 
64 typedef struct {
65 	int case_folding;
66 	XML_Parser parser;
67 	XML_Char *target_encoding;
68 
69 	/* Reference to the object itself, for convenience.
70 	 * It is not owned, do not release it. */
71 	zval index;
72 
73 	/* We return a pointer to these zvals in get_gc(), so it's
74 	 * important that a) they are adjacent b) object is the first
75 	 * and c) the number of zvals is kept up to date. */
76 #define XML_PARSER_NUM_ZVALS 12
77 	zval object;
78 	zval startElementHandler;
79 	zval endElementHandler;
80 	zval characterDataHandler;
81 	zval processingInstructionHandler;
82 	zval defaultHandler;
83 	zval unparsedEntityDeclHandler;
84 	zval notationDeclHandler;
85 	zval externalEntityRefHandler;
86 	zval unknownEncodingHandler;
87 	zval startNamespaceDeclHandler;
88 	zval endNamespaceDeclHandler;
89 
90 	zend_function *startElementPtr;
91 	zend_function *endElementPtr;
92 	zend_function *characterDataPtr;
93 	zend_function *processingInstructionPtr;
94 	zend_function *defaultPtr;
95 	zend_function *unparsedEntityDeclPtr;
96 	zend_function *notationDeclPtr;
97 	zend_function *externalEntityRefPtr;
98 	zend_function *unknownEncodingPtr;
99 	zend_function *startNamespaceDeclPtr;
100 	zend_function *endNamespaceDeclPtr;
101 
102 	zval data;
103 	zval info;
104 	int level;
105 	int toffset;
106 	int curtag;
107 	zval *ctag;
108 	char **ltags;
109 	int lastwasopen;
110 	int skipwhite;
111 	int isparsing;
112 
113 	XML_Char *baseURI;
114 
115 	zend_object std;
116 } xml_parser;
117 
118 
119 typedef struct {
120 	XML_Char *name;
121 	char (*decoding_function)(unsigned short);
122 	unsigned short (*encoding_function)(unsigned char);
123 } xml_encoding;
124 
125 
126 enum php_xml_option {
127 	PHP_XML_OPTION_CASE_FOLDING = 1,
128 	PHP_XML_OPTION_TARGET_ENCODING,
129 	PHP_XML_OPTION_SKIP_TAGSTART,
130 	PHP_XML_OPTION_SKIP_WHITE
131 };
132 
133 /* {{{ dynamically loadable module stuff */
134 #ifdef COMPILE_DL_XML
135 #ifdef ZTS
136 ZEND_TSRMLS_CACHE_DEFINE()
137 #endif
138 ZEND_GET_MODULE(xml)
139 #endif /* COMPILE_DL_XML */
140 /* }}} */
141 
142 #define XML_MAXLEVEL 255 /* XXX this should be dynamic */
143 
144 #define SKIP_TAGSTART(str) ((str) + (parser->toffset > (int)strlen(str) ? strlen(str) : parser->toffset))
145 
146 static zend_class_entry *xml_parser_ce;
147 static zend_object_handlers xml_parser_object_handlers;
148 
149 /* {{{ function prototypes */
150 PHP_MINIT_FUNCTION(xml);
151 PHP_MINFO_FUNCTION(xml);
152 static PHP_GINIT_FUNCTION(xml);
153 
154 static zend_object *xml_parser_create_object(zend_class_entry *class_type);
155 static void xml_parser_free_obj(zend_object *object);
156 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n);
157 static zend_function *xml_parser_get_constructor(zend_object *object);
158 
159 static zend_string *xml_utf8_decode(const XML_Char *, size_t, const XML_Char *);
160 static void xml_set_handler(zval *, zval *);
161 inline static unsigned short xml_encode_iso_8859_1(unsigned char);
162 inline static char xml_decode_iso_8859_1(unsigned short);
163 inline static unsigned short xml_encode_us_ascii(unsigned char);
164 inline static char xml_decode_us_ascii(unsigned short);
165 static void xml_call_handler(xml_parser *, zval *, zend_function *, int, zval *, zval *);
166 static void _xml_xmlchar_zval(const XML_Char *, int, const XML_Char *, zval *);
167 static int _xml_xmlcharlen(const XML_Char *);
168 static void _xml_add_to_info(xml_parser *parser,char *name);
169 inline static zend_string *_xml_decode_tag(xml_parser *parser, const char *tag);
170 
171 void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
172 void _xml_endElementHandler(void *, const XML_Char *);
173 void _xml_characterDataHandler(void *, const XML_Char *, int);
174 void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
175 void _xml_defaultHandler(void *, const XML_Char *, int);
176 void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
177 void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
178 int  _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
179 
180 void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
181 void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
182 /* }}} */
183 
184 #ifdef LIBXML_EXPAT_COMPAT
185 static const zend_module_dep xml_deps[] = {
186 	ZEND_MOD_REQUIRED("libxml")
187 	ZEND_MOD_END
188 };
189 #endif
190 
191 zend_module_entry xml_module_entry = {
192 #ifdef LIBXML_EXPAT_COMPAT
193 	STANDARD_MODULE_HEADER_EX, NULL,
194 	xml_deps,
195 #else
196 	STANDARD_MODULE_HEADER,
197 #endif
198 	"xml",                /* extension name */
199 	ext_functions,        /* extension function list */
200 	PHP_MINIT(xml),       /* extension-wide startup function */
201 	NULL,                 /* extension-wide shutdown function */
202 	NULL,                 /* per-request startup function */
203 	NULL,                 /* per-request shutdown function */
204 	PHP_MINFO(xml),       /* information function */
205 	PHP_XML_VERSION,
206 	PHP_MODULE_GLOBALS(xml), /* globals descriptor */
207 	PHP_GINIT(xml),          /* globals ctor */
208 	NULL,                    /* globals dtor */
209 	NULL,                    /* post deactivate */
210 	STANDARD_MODULE_PROPERTIES_EX
211 };
212 
213 /* All the encoding functions are set to NULL right now, since all
214  * the encoding is currently done internally by expat/xmltok.
215  */
216 const xml_encoding xml_encodings[] = {
217 	{ (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
218 	{ (XML_Char *)"US-ASCII",   xml_decode_us_ascii,   xml_encode_us_ascii   },
219 	{ (XML_Char *)"UTF-8",      NULL,                  NULL                  },
220 	{ (XML_Char *)NULL,         NULL,                  NULL                  }
221 };
222 
223 static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
224 
225 /* }}} */
226 
227 /* {{{ startup, shutdown and info functions */
PHP_GINIT_FUNCTION(xml)228 static PHP_GINIT_FUNCTION(xml)
229 {
230 #if defined(COMPILE_DL_XML) && defined(ZTS)
231 	ZEND_TSRMLS_CACHE_UPDATE();
232 #endif
233 	xml_globals->default_encoding = (XML_Char*)"UTF-8";
234 }
235 
php_xml_malloc_wrapper(size_t sz)236 static void *php_xml_malloc_wrapper(size_t sz)
237 {
238 	return emalloc(sz);
239 }
240 
php_xml_realloc_wrapper(void * ptr,size_t sz)241 static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
242 {
243 	return erealloc(ptr, sz);
244 }
245 
php_xml_free_wrapper(void * ptr)246 static void php_xml_free_wrapper(void *ptr)
247 {
248 	if (ptr != NULL) {
249 		efree(ptr);
250 	}
251 }
252 
PHP_MINIT_FUNCTION(xml)253 PHP_MINIT_FUNCTION(xml)
254 {
255 	xml_parser_ce = register_class_XMLParser();
256 	xml_parser_ce->create_object = xml_parser_create_object;
257 
258 	memcpy(&xml_parser_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
259 	xml_parser_object_handlers.offset = XtOffsetOf(xml_parser, std);
260 	xml_parser_object_handlers.free_obj = xml_parser_free_obj;
261 	xml_parser_object_handlers.get_gc = xml_parser_get_gc;
262 	xml_parser_object_handlers.get_constructor = xml_parser_get_constructor;
263 	xml_parser_object_handlers.clone_obj = NULL;
264 	xml_parser_object_handlers.compare = zend_objects_not_comparable;
265 
266 	REGISTER_LONG_CONSTANT("XML_ERROR_NONE", XML_ERROR_NONE, CONST_CS|CONST_PERSISTENT);
267 	REGISTER_LONG_CONSTANT("XML_ERROR_NO_MEMORY", XML_ERROR_NO_MEMORY, CONST_CS|CONST_PERSISTENT);
268 	REGISTER_LONG_CONSTANT("XML_ERROR_SYNTAX", XML_ERROR_SYNTAX, CONST_CS|CONST_PERSISTENT);
269 	REGISTER_LONG_CONSTANT("XML_ERROR_NO_ELEMENTS", XML_ERROR_NO_ELEMENTS, CONST_CS|CONST_PERSISTENT);
270 	REGISTER_LONG_CONSTANT("XML_ERROR_INVALID_TOKEN", XML_ERROR_INVALID_TOKEN, CONST_CS|CONST_PERSISTENT);
271 	REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_TOKEN", XML_ERROR_UNCLOSED_TOKEN, CONST_CS|CONST_PERSISTENT);
272 	REGISTER_LONG_CONSTANT("XML_ERROR_PARTIAL_CHAR", XML_ERROR_PARTIAL_CHAR, CONST_CS|CONST_PERSISTENT);
273 	REGISTER_LONG_CONSTANT("XML_ERROR_TAG_MISMATCH", XML_ERROR_TAG_MISMATCH, CONST_CS|CONST_PERSISTENT);
274 	REGISTER_LONG_CONSTANT("XML_ERROR_DUPLICATE_ATTRIBUTE", XML_ERROR_DUPLICATE_ATTRIBUTE, CONST_CS|CONST_PERSISTENT);
275 	REGISTER_LONG_CONSTANT("XML_ERROR_JUNK_AFTER_DOC_ELEMENT", XML_ERROR_JUNK_AFTER_DOC_ELEMENT, CONST_CS|CONST_PERSISTENT);
276 	REGISTER_LONG_CONSTANT("XML_ERROR_PARAM_ENTITY_REF", XML_ERROR_PARAM_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
277 	REGISTER_LONG_CONSTANT("XML_ERROR_UNDEFINED_ENTITY", XML_ERROR_UNDEFINED_ENTITY, CONST_CS|CONST_PERSISTENT);
278 	REGISTER_LONG_CONSTANT("XML_ERROR_RECURSIVE_ENTITY_REF", XML_ERROR_RECURSIVE_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
279 	REGISTER_LONG_CONSTANT("XML_ERROR_ASYNC_ENTITY", XML_ERROR_ASYNC_ENTITY, CONST_CS|CONST_PERSISTENT);
280 	REGISTER_LONG_CONSTANT("XML_ERROR_BAD_CHAR_REF", XML_ERROR_BAD_CHAR_REF, CONST_CS|CONST_PERSISTENT);
281 	REGISTER_LONG_CONSTANT("XML_ERROR_BINARY_ENTITY_REF", XML_ERROR_BINARY_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
282 	REGISTER_LONG_CONSTANT("XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
283 	REGISTER_LONG_CONSTANT("XML_ERROR_MISPLACED_XML_PI", XML_ERROR_MISPLACED_XML_PI, CONST_CS|CONST_PERSISTENT);
284 	REGISTER_LONG_CONSTANT("XML_ERROR_UNKNOWN_ENCODING", XML_ERROR_UNKNOWN_ENCODING, CONST_CS|CONST_PERSISTENT);
285 	REGISTER_LONG_CONSTANT("XML_ERROR_INCORRECT_ENCODING", XML_ERROR_INCORRECT_ENCODING, CONST_CS|CONST_PERSISTENT);
286 	REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_CDATA_SECTION", XML_ERROR_UNCLOSED_CDATA_SECTION, CONST_CS|CONST_PERSISTENT);
287 	REGISTER_LONG_CONSTANT("XML_ERROR_EXTERNAL_ENTITY_HANDLING", XML_ERROR_EXTERNAL_ENTITY_HANDLING, CONST_CS|CONST_PERSISTENT);
288 
289 	REGISTER_LONG_CONSTANT("XML_OPTION_CASE_FOLDING", PHP_XML_OPTION_CASE_FOLDING, CONST_CS|CONST_PERSISTENT);
290 	REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT);
291 	REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_CS|CONST_PERSISTENT);
292 	REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_CS|CONST_PERSISTENT);
293 
294 	/* this object should not be pre-initialised at compile time,
295 	   as the order of members may vary */
296 
297 	php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
298 	php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
299 	php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
300 
301 #ifdef LIBXML_EXPAT_COMPAT
302 	REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "libxml", CONST_CS|CONST_PERSISTENT);
303 #else
304 	REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "expat", CONST_CS|CONST_PERSISTENT);
305 #endif
306 
307 	return SUCCESS;
308 }
309 
PHP_MINFO_FUNCTION(xml)310 PHP_MINFO_FUNCTION(xml)
311 {
312 	php_info_print_table_start();
313 	php_info_print_table_row(2, "XML Support", "active");
314 	php_info_print_table_row(2, "XML Namespace Support", "active");
315 #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
316 	php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
317 #else
318 	php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
319 #endif
320 	php_info_print_table_end();
321 }
322 /* }}} */
323 
324 /* {{{ extension-internal functions */
325 
_xml_xmlchar_zval(const XML_Char * s,int len,const XML_Char * encoding,zval * ret)326 static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
327 {
328 	if (s == NULL) {
329 		ZVAL_FALSE(ret);
330 		return;
331 	}
332 	if (len == 0) {
333 		len = _xml_xmlcharlen(s);
334 	}
335 	ZVAL_STR(ret, xml_utf8_decode(s, len, encoding));
336 }
337 /* }}} */
338 
xml_parser_from_obj(zend_object * obj)339 static inline xml_parser *xml_parser_from_obj(zend_object *obj) {
340 	return (xml_parser *)((char *)(obj) - XtOffsetOf(xml_parser, std));
341 }
342 
343 #define Z_XMLPARSER_P(zv) xml_parser_from_obj(Z_OBJ_P(zv))
344 
xml_parser_create_object(zend_class_entry * class_type)345 static zend_object *xml_parser_create_object(zend_class_entry *class_type) {
346 	xml_parser *intern = zend_object_alloc(sizeof(xml_parser), class_type);
347 	memset(intern, 0, sizeof(xml_parser) - sizeof(zend_object));
348 
349 	zend_object_std_init(&intern->std, class_type);
350 	object_properties_init(&intern->std, class_type);
351 	intern->std.handlers = &xml_parser_object_handlers;
352 
353 	return &intern->std;
354 }
355 
xml_parser_free_ltags(xml_parser * parser)356 static void xml_parser_free_ltags(xml_parser *parser)
357 {
358 	if (parser->ltags) {
359 		int inx;
360 		for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++)
361 			efree(parser->ltags[ inx ]);
362 		efree(parser->ltags);
363 	}
364 }
365 
xml_parser_free_obj(zend_object * object)366 static void xml_parser_free_obj(zend_object *object)
367 {
368 	xml_parser *parser = xml_parser_from_obj(object);
369 
370 	if (parser->parser) {
371 		XML_ParserFree(parser->parser);
372 	}
373 	xml_parser_free_ltags(parser);
374 	if (!Z_ISUNDEF(parser->startElementHandler)) {
375 		zval_ptr_dtor(&parser->startElementHandler);
376 	}
377 	if (!Z_ISUNDEF(parser->endElementHandler)) {
378 		zval_ptr_dtor(&parser->endElementHandler);
379 	}
380 	if (!Z_ISUNDEF(parser->characterDataHandler)) {
381 		zval_ptr_dtor(&parser->characterDataHandler);
382 	}
383 	if (!Z_ISUNDEF(parser->processingInstructionHandler)) {
384 		zval_ptr_dtor(&parser->processingInstructionHandler);
385 	}
386 	if (!Z_ISUNDEF(parser->defaultHandler)) {
387 		zval_ptr_dtor(&parser->defaultHandler);
388 	}
389 	if (!Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
390 		zval_ptr_dtor(&parser->unparsedEntityDeclHandler);
391 	}
392 	if (!Z_ISUNDEF(parser->notationDeclHandler)) {
393 		zval_ptr_dtor(&parser->notationDeclHandler);
394 	}
395 	if (!Z_ISUNDEF(parser->externalEntityRefHandler)) {
396 		zval_ptr_dtor(&parser->externalEntityRefHandler);
397 	}
398 	if (!Z_ISUNDEF(parser->unknownEncodingHandler)) {
399 		zval_ptr_dtor(&parser->unknownEncodingHandler);
400 	}
401 	if (!Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
402 		zval_ptr_dtor(&parser->startNamespaceDeclHandler);
403 	}
404 	if (!Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
405 		zval_ptr_dtor(&parser->endNamespaceDeclHandler);
406 	}
407 	if (parser->baseURI) {
408 		efree(parser->baseURI);
409 	}
410 	if (!Z_ISUNDEF(parser->object)) {
411 		zval_ptr_dtor(&parser->object);
412 	}
413 
414 	zend_object_std_dtor(&parser->std);
415 }
416 
xml_parser_get_gc(zend_object * object,zval ** table,int * n)417 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n)
418 {
419 	xml_parser *parser = xml_parser_from_obj(object);
420 	*table = &parser->object;
421 	*n = XML_PARSER_NUM_ZVALS;
422 	return zend_std_get_properties(object);
423 }
424 
xml_parser_get_constructor(zend_object * object)425 static zend_function *xml_parser_get_constructor(zend_object *object) {
426 	zend_throw_error(NULL, "Cannot directly construct XMLParser, use xml_parser_create() or xml_parser_create_ns() instead");
427 	return NULL;
428 }
429 
430 /* {{{ xml_set_handler() */
xml_set_handler(zval * handler,zval * data)431 static void xml_set_handler(zval *handler, zval *data)
432 {
433 	/* If we have already a handler, release it */
434 	if (handler) {
435 		zval_ptr_dtor(handler);
436 	}
437 
438 	/* IS_ARRAY might indicate that we're using array($obj, 'method') syntax */
439 	if (Z_TYPE_P(data) != IS_ARRAY && Z_TYPE_P(data) != IS_OBJECT) {
440 		convert_to_string(data);
441 		if (Z_STRLEN_P(data) == 0) {
442 			ZVAL_UNDEF(handler);
443 			return;
444 		}
445 	}
446 
447 	ZVAL_COPY(handler, data);
448 }
449 /* }}} */
450 
451 /* {{{ xml_call_handler() */
xml_call_handler(xml_parser * parser,zval * handler,zend_function * function_ptr,int argc,zval * argv,zval * retval)452 static void xml_call_handler(xml_parser *parser, zval *handler, zend_function *function_ptr, int argc, zval *argv, zval *retval)
453 {
454 	int i;
455 
456 	ZVAL_UNDEF(retval);
457 	if (parser && handler && !EG(exception)) {
458 		int result;
459 		zend_fcall_info fci;
460 
461 		fci.size = sizeof(fci);
462 		ZVAL_COPY_VALUE(&fci.function_name, handler);
463 		fci.object = Z_OBJ(parser->object);
464 		fci.retval = retval;
465 		fci.param_count = argc;
466 		fci.params = argv;
467 		fci.named_params = NULL;
468 
469 		result = zend_call_function(&fci, NULL);
470 		if (result == FAILURE) {
471 			zval *method;
472 			zval *obj;
473 
474 			if (Z_TYPE_P(handler) == IS_STRING) {
475 				php_error_docref(NULL, E_WARNING, "Unable to call handler %s()", Z_STRVAL_P(handler));
476 			} else if (Z_TYPE_P(handler) == IS_ARRAY &&
477 					   (obj = zend_hash_index_find(Z_ARRVAL_P(handler), 0)) != NULL &&
478 					   (method = zend_hash_index_find(Z_ARRVAL_P(handler), 1)) != NULL &&
479 					   Z_TYPE_P(obj) == IS_OBJECT &&
480 					   Z_TYPE_P(method) == IS_STRING) {
481 				php_error_docref(NULL, E_WARNING, "Unable to call handler %s::%s()", ZSTR_VAL(Z_OBJCE_P(obj)->name), Z_STRVAL_P(method));
482 			} else
483 				php_error_docref(NULL, E_WARNING, "Unable to call handler");
484 		}
485 	}
486 	for (i = 0; i < argc; i++) {
487 		zval_ptr_dtor(&argv[i]);
488 	}
489 }
490 /* }}} */
491 
492 /* {{{ xml_encode_iso_8859_1() */
xml_encode_iso_8859_1(unsigned char c)493 inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
494 {
495 	return (unsigned short)c;
496 }
497 /* }}} */
498 
499 /* {{{ xml_decode_iso_8859_1() */
xml_decode_iso_8859_1(unsigned short c)500 inline static char xml_decode_iso_8859_1(unsigned short c)
501 {
502 	return (char)(c > 0xff ? '?' : c);
503 }
504 /* }}} */
505 
506 /* {{{ xml_encode_us_ascii() */
xml_encode_us_ascii(unsigned char c)507 inline static unsigned short xml_encode_us_ascii(unsigned char c)
508 {
509 	return (unsigned short)c;
510 }
511 /* }}} */
512 
513 /* {{{ xml_decode_us_ascii() */
xml_decode_us_ascii(unsigned short c)514 inline static char xml_decode_us_ascii(unsigned short c)
515 {
516 	return (char)(c > 0x7f ? '?' : c);
517 }
518 /* }}} */
519 
520 /* {{{ xml_get_encoding() */
xml_get_encoding(const XML_Char * name)521 static const xml_encoding *xml_get_encoding(const XML_Char *name)
522 {
523 	const xml_encoding *enc = &xml_encodings[0];
524 
525 	while (enc && enc->name) {
526 		if (strcasecmp((char *)name, (char *)enc->name) == 0) {
527 			return enc;
528 		}
529 		enc++;
530 	}
531 	return NULL;
532 }
533 /* }}} */
534 
535 /* {{{ xml_utf8_decode() */
xml_utf8_decode(const XML_Char * s,size_t len,const XML_Char * encoding)536 static zend_string *xml_utf8_decode(const XML_Char *s, size_t len, const XML_Char *encoding)
537 {
538 	size_t pos = 0;
539 	unsigned int c;
540 	char (*decoder)(unsigned short) = NULL;
541 	const xml_encoding *enc = xml_get_encoding(encoding);
542 	zend_string *str;
543 
544 	if (enc) {
545 		decoder = enc->decoding_function;
546 	}
547 
548 	if (decoder == NULL) {
549 		/* If the target encoding was unknown, or no decoder function
550 		 * was specified, return the UTF-8-encoded data as-is.
551 		 */
552 		str = zend_string_init((char *)s, len, 0);
553 		return str;
554 	}
555 
556 	str = zend_string_alloc(len, 0);
557 	ZSTR_LEN(str) = 0;
558 	while (pos < len) {
559 		int status = FAILURE;
560 		c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
561 
562 		if (status == FAILURE || c > 0xFFU) {
563 			c = '?';
564 		}
565 
566 		ZSTR_VAL(str)[ZSTR_LEN(str)++] = decoder ? (unsigned int)decoder(c) : c;
567 	}
568 	ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
569 	if (ZSTR_LEN(str) < len) {
570 		str = zend_string_truncate(str, ZSTR_LEN(str), 0);
571 	}
572 
573 	return str;
574 }
575 /* }}} */
576 
577 /* {{{ _xml_xmlcharlen() */
_xml_xmlcharlen(const XML_Char * s)578 static int _xml_xmlcharlen(const XML_Char *s)
579 {
580 	int len = 0;
581 
582 	while (*s) {
583 		len++;
584 		s++;
585 	}
586 	return len;
587 }
588 /* }}} */
589 
590 /* {{{ _xml_add_to_info() */
_xml_add_to_info(xml_parser * parser,char * name)591 static void _xml_add_to_info(xml_parser *parser,char *name)
592 {
593 	zval *element;
594 
595 	if (Z_ISUNDEF(parser->info)) {
596 		return;
597 	}
598 
599 	if ((element = zend_hash_str_find(Z_ARRVAL(parser->info), name, strlen(name))) == NULL) {
600 		zval values;
601 		array_init(&values);
602 		element = zend_hash_str_update(Z_ARRVAL(parser->info), name, strlen(name), &values);
603 	}
604 
605 	add_next_index_long(element, parser->curtag);
606 
607 	parser->curtag++;
608 }
609 /* }}} */
610 
611 /* {{{ _xml_decode_tag() */
_xml_decode_tag(xml_parser * parser,const char * tag)612 static zend_string *_xml_decode_tag(xml_parser *parser, const char *tag)
613 {
614 	zend_string *str;
615 
616 	str = xml_utf8_decode((const XML_Char *)tag, strlen(tag), parser->target_encoding);
617 
618 	if (parser->case_folding) {
619 		php_strtoupper(ZSTR_VAL(str), ZSTR_LEN(str));
620 	}
621 
622 	return str;
623 }
624 /* }}} */
625 
626 /* {{{ _xml_startElementHandler() */
_xml_startElementHandler(void * userData,const XML_Char * name,const XML_Char ** attributes)627 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
628 {
629 	xml_parser *parser = (xml_parser *)userData;
630 	const char **attrs = (const char **) attributes;
631 	zend_string *att, *tag_name, *val;
632 	zval retval, args[3];
633 
634 	if (parser) {
635 		parser->level++;
636 
637 		tag_name = _xml_decode_tag(parser, (const char *)name);
638 
639 		if (!Z_ISUNDEF(parser->startElementHandler)) {
640 			ZVAL_COPY(&args[0], &parser->index);
641 			ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
642 			array_init(&args[2]);
643 
644 			while (attributes && *attributes) {
645 				zval tmp;
646 
647 				att = _xml_decode_tag(parser, (const char *)attributes[0]);
648 				val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
649 
650 				ZVAL_STR(&tmp, val);
651 				zend_symtable_update(Z_ARRVAL(args[2]), att, &tmp);
652 
653 				attributes += 2;
654 
655 				zend_string_release_ex(att, 0);
656 			}
657 
658 			xml_call_handler(parser, &parser->startElementHandler, parser->startElementPtr, 3, args, &retval);
659 			zval_ptr_dtor(&retval);
660 		}
661 
662 		if (!Z_ISUNDEF(parser->data)) {
663 			if (parser->level <= XML_MAXLEVEL)  {
664 				zval tag, atr;
665 				int atcnt = 0;
666 
667 				array_init(&tag);
668 				array_init(&atr);
669 
670 				_xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
671 
672 				add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
673 				add_assoc_string(&tag, "type", "open");
674 				add_assoc_long(&tag, "level", parser->level);
675 
676 				parser->ltags[parser->level-1] = estrdup(ZSTR_VAL(tag_name));
677 				parser->lastwasopen = 1;
678 
679 				attributes = (const XML_Char **) attrs;
680 
681 				while (attributes && *attributes) {
682 					zval tmp;
683 
684 					att = _xml_decode_tag(parser, (const char *)attributes[0]);
685 					val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
686 
687 					ZVAL_STR(&tmp, val);
688 					zend_symtable_update(Z_ARRVAL(atr), att, &tmp);
689 
690 					atcnt++;
691 					attributes += 2;
692 
693 					zend_string_release_ex(att, 0);
694 				}
695 
696 				if (atcnt) {
697 					zend_hash_str_add(Z_ARRVAL(tag), "attributes", sizeof("attributes") - 1, &atr);
698 				} else {
699 					zval_ptr_dtor(&atr);
700 				}
701 
702 				parser->ctag = zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
703 			} else if (parser->level == (XML_MAXLEVEL + 1)) {
704 							php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
705 			}
706 		}
707 
708 		zend_string_release_ex(tag_name, 0);
709 	}
710 }
711 /* }}} */
712 
713 /* {{{ _xml_endElementHandler() */
_xml_endElementHandler(void * userData,const XML_Char * name)714 void _xml_endElementHandler(void *userData, const XML_Char *name)
715 {
716 	xml_parser *parser = (xml_parser *)userData;
717 
718 	if (parser) {
719 		zval retval, args[2];
720 
721 		zend_string *tag_name = _xml_decode_tag(parser, (const char *)name);
722 
723 		if (!Z_ISUNDEF(parser->endElementHandler)) {
724 			ZVAL_COPY(&args[0], &parser->index);
725 			ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
726 
727 			xml_call_handler(parser, &parser->endElementHandler, parser->endElementPtr, 2, args, &retval);
728 			zval_ptr_dtor(&retval);
729 		}
730 
731 		if (!Z_ISUNDEF(parser->data)) {
732 			zval tag;
733 
734 			if (parser->lastwasopen) {
735 				add_assoc_string(parser->ctag, "type", "complete");
736 			} else {
737 				array_init(&tag);
738 
739 				_xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
740 
741 				add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
742 				add_assoc_string(&tag, "type", "close");
743 				add_assoc_long(&tag, "level", parser->level);
744 
745 				zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
746 			}
747 
748 			parser->lastwasopen = 0;
749 		}
750 
751 		zend_string_release_ex(tag_name, 0);
752 
753 		if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
754 			efree(parser->ltags[parser->level-1]);
755 		}
756 
757 		parser->level--;
758 	}
759 }
760 /* }}} */
761 
762 /* {{{ _xml_characterDataHandler() */
_xml_characterDataHandler(void * userData,const XML_Char * s,int len)763 void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
764 {
765 	xml_parser *parser = (xml_parser *)userData;
766 
767 	if (parser) {
768 		zval retval, args[2];
769 
770 		if (!Z_ISUNDEF(parser->characterDataHandler)) {
771 			ZVAL_COPY(&args[0], &parser->index);
772 			_xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
773 			xml_call_handler(parser, &parser->characterDataHandler, parser->characterDataPtr, 2, args, &retval);
774 			zval_ptr_dtor(&retval);
775 		}
776 
777 		if (!Z_ISUNDEF(parser->data)) {
778 			size_t i;
779 			int doprint = 0;
780 			zend_string *decoded_value;
781 
782 			decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
783 			if (parser->skipwhite) {
784 				for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
785 					switch (ZSTR_VAL(decoded_value)[i]) {
786 						case ' ':
787 						case '\t':
788 						case '\n':
789 							continue;
790 						default:
791 							doprint = 1;
792 							break;
793 					}
794 					if (doprint) {
795 						break;
796 					}
797 				}
798 			}
799 
800 			if (parser->lastwasopen) {
801 				zval *myval;
802 
803 				/* check if the current tag already has a value - if yes append to that! */
804 				if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
805 					size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
806 					Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
807 					strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
808 							ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
809 					zend_string_release_ex(decoded_value, 0);
810 				} else {
811 					if (doprint || (! parser->skipwhite)) {
812 						add_assoc_str(parser->ctag, "value", decoded_value);
813 					} else {
814 						zend_string_release_ex(decoded_value, 0);
815 					}
816 				}
817 
818 			} else {
819 				zval tag;
820 				zval *curtag, *mytype, *myval;
821 
822 				ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
823 					if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
824 						if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) {
825 							if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
826 								size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
827 								Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
828 								strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
829 										ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
830 								zend_string_release_ex(decoded_value, 0);
831 								return;
832 							}
833 						}
834 					}
835 					break;
836 				} ZEND_HASH_FOREACH_END();
837 
838 				if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
839 					array_init(&tag);
840 
841 					_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
842 
843 					add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
844 					add_assoc_str(&tag, "value", decoded_value);
845 					add_assoc_string(&tag, "type", "cdata");
846 					add_assoc_long(&tag, "level", parser->level);
847 
848 					zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
849 				} else if (parser->level == (XML_MAXLEVEL + 1)) {
850 										php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
851 				} else {
852 					zend_string_release_ex(decoded_value, 0);
853 				}
854 			}
855 		}
856 	}
857 }
858 /* }}} */
859 
860 /* {{{ _xml_processingInstructionHandler() */
_xml_processingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)861 void _xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
862 {
863 	xml_parser *parser = (xml_parser *)userData;
864 
865 	if (parser && !Z_ISUNDEF(parser->processingInstructionHandler)) {
866 		zval retval, args[3];
867 
868 		ZVAL_COPY(&args[0], &parser->index);
869 		_xml_xmlchar_zval(target, 0, parser->target_encoding, &args[1]);
870 		_xml_xmlchar_zval(data, 0, parser->target_encoding, &args[2]);
871 		xml_call_handler(parser, &parser->processingInstructionHandler, parser->processingInstructionPtr, 3, args, &retval);
872 		zval_ptr_dtor(&retval);
873 	}
874 }
875 /* }}} */
876 
877 /* {{{ _xml_defaultHandler() */
_xml_defaultHandler(void * userData,const XML_Char * s,int len)878 void _xml_defaultHandler(void *userData, const XML_Char *s, int len)
879 {
880 	xml_parser *parser = (xml_parser *)userData;
881 
882 	if (parser && !Z_ISUNDEF(parser->defaultHandler)) {
883 		zval retval, args[2];
884 
885 		ZVAL_COPY(&args[0], &parser->index);
886 		_xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
887 		xml_call_handler(parser, &parser->defaultHandler, parser->defaultPtr, 2, args, &retval);
888 		zval_ptr_dtor(&retval);
889 	}
890 }
891 /* }}} */
892 
893 /* {{{ _xml_unparsedEntityDeclHandler() */
_xml_unparsedEntityDeclHandler(void * userData,const XML_Char * entityName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)894 void _xml_unparsedEntityDeclHandler(void *userData,
895 										 const XML_Char *entityName,
896 										 const XML_Char *base,
897 										 const XML_Char *systemId,
898 										 const XML_Char *publicId,
899 										 const XML_Char *notationName)
900 {
901 	xml_parser *parser = (xml_parser *)userData;
902 
903 	if (parser && !Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
904 		zval retval, args[6];
905 
906 		ZVAL_COPY(&args[0], &parser->index);
907 		_xml_xmlchar_zval(entityName, 0, parser->target_encoding, &args[1]);
908 		_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
909 		_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
910 		_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
911 		_xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[5]);
912 		xml_call_handler(parser, &parser->unparsedEntityDeclHandler, parser->unparsedEntityDeclPtr, 6, args, &retval);
913 		zval_ptr_dtor(&retval);
914 	}
915 }
916 /* }}} */
917 
918 /* {{{ _xml_notationDeclHandler() */
_xml_notationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)919 void _xml_notationDeclHandler(void *userData,
920 							  const XML_Char *notationName,
921 							  const XML_Char *base,
922 							  const XML_Char *systemId,
923 							  const XML_Char *publicId)
924 {
925 	xml_parser *parser = (xml_parser *)userData;
926 
927 	if (parser && !Z_ISUNDEF(parser->notationDeclHandler)) {
928 		zval retval, args[5];
929 
930 		ZVAL_COPY(&args[0], &parser->index);
931 		_xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[1]);
932 		_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
933 		_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
934 		_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
935 		xml_call_handler(parser, &parser->notationDeclHandler, parser->notationDeclPtr, 5, args, &retval);
936 		zval_ptr_dtor(&retval);
937 	}
938 }
939 /* }}} */
940 
941 /* {{{ _xml_externalEntityRefHandler() */
_xml_externalEntityRefHandler(XML_Parser parserPtr,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)942 int _xml_externalEntityRefHandler(XML_Parser parserPtr,
943 								   const XML_Char *openEntityNames,
944 								   const XML_Char *base,
945 								   const XML_Char *systemId,
946 								   const XML_Char *publicId)
947 {
948 	xml_parser *parser = XML_GetUserData(parserPtr);
949 	int ret = 0; /* abort if no handler is set (should be configurable?) */
950 
951 	if (parser && !Z_ISUNDEF(parser->externalEntityRefHandler)) {
952 		zval retval, args[5];
953 
954 		ZVAL_COPY(&args[0], &parser->index);
955 		_xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding, &args[1]);
956 		_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
957 		_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
958 		_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
959 		xml_call_handler(parser, &parser->externalEntityRefHandler, parser->externalEntityRefPtr, 5, args, &retval);
960 		if (!Z_ISUNDEF(retval)) {
961 			convert_to_long(&retval);
962 			ret = Z_LVAL(retval);
963 		} else {
964 			ret = 0;
965 		}
966 	}
967 	return ret;
968 }
969 /* }}} */
970 
971 /* {{{ _xml_startNamespaceDeclHandler() */
_xml_startNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)972 void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
973 {
974 	xml_parser *parser = (xml_parser *)userData;
975 
976 	if (parser && !Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
977 		zval retval, args[3];
978 
979 		ZVAL_COPY(&args[0], &parser->index);
980 		_xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
981 		_xml_xmlchar_zval(uri, 0, parser->target_encoding, &args[2]);
982 		xml_call_handler(parser, &parser->startNamespaceDeclHandler, parser->startNamespaceDeclPtr, 3, args, &retval);
983 		zval_ptr_dtor(&retval);
984 	}
985 }
986 /* }}} */
987 
988 /* {{{ _xml_endNamespaceDeclHandler() */
_xml_endNamespaceDeclHandler(void * userData,const XML_Char * prefix)989 void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
990 {
991 	xml_parser *parser = (xml_parser *)userData;
992 
993 	if (parser && !Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
994 		zval retval, args[2];
995 
996 		ZVAL_COPY(&args[0], &parser->index);
997 		_xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
998 		xml_call_handler(parser, &parser->endNamespaceDeclHandler, parser->endNamespaceDeclPtr, 2, args, &retval);
999 		zval_ptr_dtor(&retval);
1000 	}
1001 }
1002 /* }}} */
1003 
1004 /************************* EXTENSION FUNCTIONS *************************/
1005 
php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS,int ns_support)1006 static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support) /* {{{ */
1007 {
1008 	xml_parser *parser;
1009 	int auto_detect = 0;
1010 
1011 	zend_string *encoding_param = NULL;
1012 
1013 	char *ns_param = NULL;
1014 	size_t ns_param_len = 0;
1015 
1016 	XML_Char *encoding;
1017 
1018 	if (zend_parse_parameters(ZEND_NUM_ARGS(), (ns_support ? "|S!s": "|S!"), &encoding_param, &ns_param, &ns_param_len) == FAILURE) {
1019 		RETURN_THROWS();
1020 	}
1021 
1022 	if (encoding_param != NULL) {
1023 		/* The supported encoding types are hardcoded here because
1024 		 * we are limited to the encodings supported by expat/xmltok.
1025 		 */
1026 		if (ZSTR_LEN(encoding_param) == 0) {
1027 			encoding = XML(default_encoding);
1028 			auto_detect = 1;
1029 		} else if (zend_string_equals_literal_ci(encoding_param, "ISO-8859-1")) {
1030 			encoding = (XML_Char*)"ISO-8859-1";
1031 		} else if (zend_string_equals_literal_ci(encoding_param, "UTF-8")) {
1032 			encoding = (XML_Char*)"UTF-8";
1033 		} else if (zend_string_equals_literal_ci(encoding_param, "US-ASCII")) {
1034 			encoding = (XML_Char*)"US-ASCII";
1035 		} else {
1036 			zend_argument_value_error(1, "is not a supported source encoding");
1037 			RETURN_THROWS();
1038 		}
1039 	} else {
1040 		encoding = XML(default_encoding);
1041 	}
1042 
1043 	if (ns_support && ns_param == NULL){
1044 		ns_param = ":";
1045 	}
1046 
1047 	object_init_ex(return_value, xml_parser_ce);
1048 	parser = Z_XMLPARSER_P(return_value);
1049 	parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
1050 	                                     &php_xml_mem_hdlrs, (XML_Char*)ns_param);
1051 
1052 	parser->target_encoding = encoding;
1053 	parser->case_folding = 1;
1054 	parser->isparsing = 0;
1055 
1056 	XML_SetUserData(parser->parser, parser);
1057 	ZVAL_COPY_VALUE(&parser->index, return_value);
1058 }
1059 /* }}} */
1060 
1061 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create)1062 PHP_FUNCTION(xml_parser_create)
1063 {
1064 	php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1065 }
1066 /* }}} */
1067 
1068 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create_ns)1069 PHP_FUNCTION(xml_parser_create_ns)
1070 {
1071 	php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1072 }
1073 /* }}} */
1074 
1075 /* {{{ Set up object which should be used for callbacks */
PHP_FUNCTION(xml_set_object)1076 PHP_FUNCTION(xml_set_object)
1077 {
1078 	xml_parser *parser;
1079 	zval *pind, *mythis;
1080 
1081 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oo", &pind, xml_parser_ce, &mythis) == FAILURE) {
1082 		RETURN_THROWS();
1083 	}
1084 
1085 	parser = Z_XMLPARSER_P(pind);
1086 
1087 	zval_ptr_dtor(&parser->object);
1088 	ZVAL_OBJ_COPY(&parser->object, Z_OBJ_P(mythis));
1089 
1090 	RETVAL_TRUE;
1091 }
1092 /* }}} */
1093 
1094 /* {{{ Set up start and end element handlers */
PHP_FUNCTION(xml_set_element_handler)1095 PHP_FUNCTION(xml_set_element_handler)
1096 {
1097 	xml_parser *parser;
1098 	zval *pind, *shdl, *ehdl;
1099 
1100 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ozz", &pind, xml_parser_ce, &shdl, &ehdl) == FAILURE) {
1101 		RETURN_THROWS();
1102 	}
1103 
1104 	parser = Z_XMLPARSER_P(pind);
1105 	xml_set_handler(&parser->startElementHandler, shdl);
1106 	xml_set_handler(&parser->endElementHandler, ehdl);
1107 	XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1108 	RETVAL_TRUE;
1109 }
1110 /* }}} */
1111 
1112 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_character_data_handler)1113 PHP_FUNCTION(xml_set_character_data_handler)
1114 {
1115 	xml_parser *parser;
1116 	zval *pind, *hdl;
1117 
1118 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1119 		RETURN_THROWS();
1120 	}
1121 
1122 	parser = Z_XMLPARSER_P(pind);
1123 	xml_set_handler(&parser->characterDataHandler, hdl);
1124 	XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1125 	RETVAL_TRUE;
1126 }
1127 /* }}} */
1128 
1129 /* {{{ Set up processing instruction (PI) handler */
PHP_FUNCTION(xml_set_processing_instruction_handler)1130 PHP_FUNCTION(xml_set_processing_instruction_handler)
1131 {
1132 	xml_parser *parser;
1133 	zval *pind, *hdl;
1134 
1135 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1136 		RETURN_THROWS();
1137 	}
1138 
1139 	parser = Z_XMLPARSER_P(pind);
1140 	xml_set_handler(&parser->processingInstructionHandler, hdl);
1141 	XML_SetProcessingInstructionHandler(parser->parser, _xml_processingInstructionHandler);
1142 	RETVAL_TRUE;
1143 }
1144 /* }}} */
1145 
1146 /* {{{ Set up default handler */
PHP_FUNCTION(xml_set_default_handler)1147 PHP_FUNCTION(xml_set_default_handler)
1148 {
1149 	xml_parser *parser;
1150 	zval *pind, *hdl;
1151 
1152 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1153 		RETURN_THROWS();
1154 	}
1155 
1156 	parser = Z_XMLPARSER_P(pind);
1157 	xml_set_handler(&parser->defaultHandler, hdl);
1158 	XML_SetDefaultHandler(parser->parser, _xml_defaultHandler);
1159 	RETVAL_TRUE;
1160 }
1161 /* }}} */
1162 
1163 /* {{{ Set up unparsed entity declaration handler */
PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)1164 PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)
1165 {
1166 	xml_parser *parser;
1167 	zval *pind, *hdl;
1168 
1169 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1170 		RETURN_THROWS();
1171 	}
1172 
1173 	parser = Z_XMLPARSER_P(pind);
1174 	xml_set_handler(&parser->unparsedEntityDeclHandler, hdl);
1175 	XML_SetUnparsedEntityDeclHandler(parser->parser, _xml_unparsedEntityDeclHandler);
1176 	RETVAL_TRUE;
1177 }
1178 /* }}} */
1179 
1180 /* {{{ Set up notation declaration handler */
PHP_FUNCTION(xml_set_notation_decl_handler)1181 PHP_FUNCTION(xml_set_notation_decl_handler)
1182 {
1183 	xml_parser *parser;
1184 	zval *pind, *hdl;
1185 
1186 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1187 		RETURN_THROWS();
1188 	}
1189 
1190 	parser = Z_XMLPARSER_P(pind);
1191 	xml_set_handler(&parser->notationDeclHandler, hdl);
1192 	XML_SetNotationDeclHandler(parser->parser, _xml_notationDeclHandler);
1193 	RETVAL_TRUE;
1194 }
1195 /* }}} */
1196 
1197 /* {{{ Set up external entity reference handler */
PHP_FUNCTION(xml_set_external_entity_ref_handler)1198 PHP_FUNCTION(xml_set_external_entity_ref_handler)
1199 {
1200 	xml_parser *parser;
1201 	zval *pind, *hdl;
1202 
1203 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1204 		RETURN_THROWS();
1205 	}
1206 
1207 	parser = Z_XMLPARSER_P(pind);
1208 	xml_set_handler(&parser->externalEntityRefHandler, hdl);
1209 	XML_SetExternalEntityRefHandler(parser->parser, (void *) _xml_externalEntityRefHandler);
1210 	RETVAL_TRUE;
1211 }
1212 /* }}} */
1213 
1214 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_start_namespace_decl_handler)1215 PHP_FUNCTION(xml_set_start_namespace_decl_handler)
1216 {
1217 	xml_parser *parser;
1218 	zval *pind, *hdl;
1219 
1220 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1221 		RETURN_THROWS();
1222 	}
1223 
1224 	parser = Z_XMLPARSER_P(pind);
1225 	xml_set_handler(&parser->startNamespaceDeclHandler, hdl);
1226 	XML_SetStartNamespaceDeclHandler(parser->parser, _xml_startNamespaceDeclHandler);
1227 	RETVAL_TRUE;
1228 }
1229 /* }}} */
1230 
1231 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_end_namespace_decl_handler)1232 PHP_FUNCTION(xml_set_end_namespace_decl_handler)
1233 {
1234 	xml_parser *parser;
1235 	zval *pind, *hdl;
1236 
1237 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1238 		RETURN_THROWS();
1239 	}
1240 
1241 	parser = Z_XMLPARSER_P(pind);
1242 	xml_set_handler(&parser->endNamespaceDeclHandler, hdl);
1243 	XML_SetEndNamespaceDeclHandler(parser->parser, _xml_endNamespaceDeclHandler);
1244 	RETVAL_TRUE;
1245 }
1246 /* }}} */
1247 
1248 /* {{{ Start parsing an XML document */
PHP_FUNCTION(xml_parse)1249 PHP_FUNCTION(xml_parse)
1250 {
1251 	xml_parser *parser;
1252 	zval *pind;
1253 	char *data;
1254 	size_t data_len;
1255 	int ret;
1256 	bool isFinal = 0;
1257 
1258 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
1259 		RETURN_THROWS();
1260 	}
1261 
1262 	parser = Z_XMLPARSER_P(pind);
1263 	if (parser->isparsing) {
1264 		zend_throw_error(NULL, "Parser must not be called recursively");
1265 		RETURN_THROWS();
1266 	}
1267 	parser->isparsing = 1;
1268 	ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
1269 	parser->isparsing = 0;
1270 	RETVAL_LONG(ret);
1271 }
1272 
1273 /* }}} */
1274 
1275 /* {{{ Parsing a XML document */
PHP_FUNCTION(xml_parse_into_struct)1276 PHP_FUNCTION(xml_parse_into_struct)
1277 {
1278 	xml_parser *parser;
1279 	zval *pind, *xdata, *info = NULL;
1280 	char *data;
1281 	size_t data_len;
1282 	int ret;
1283 
1284 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
1285 		RETURN_THROWS();
1286 	}
1287 
1288 	parser = Z_XMLPARSER_P(pind);
1289 
1290 	if (parser->isparsing) {
1291 		php_error_docref(NULL, E_WARNING, "Parser must not be called recursively");
1292 		RETURN_FALSE;
1293 	}
1294 
1295 	if (info) {
1296 		info = zend_try_array_init(info);
1297 		if (!info) {
1298 			RETURN_THROWS();
1299 		}
1300 	}
1301 
1302 	xdata = zend_try_array_init(xdata);
1303 	if (!xdata) {
1304 		RETURN_THROWS();
1305 	}
1306 
1307 	ZVAL_COPY_VALUE(&parser->data, xdata);
1308 
1309 	if (info) {
1310 		ZVAL_COPY_VALUE(&parser->info, info);
1311 	}
1312 
1313 	parser->level = 0;
1314 	xml_parser_free_ltags(parser);
1315 	parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
1316 
1317 	XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1318 	XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1319 
1320 	parser->isparsing = 1;
1321 	ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
1322 	parser->isparsing = 0;
1323 
1324 	RETVAL_LONG(ret);
1325 }
1326 /* }}} */
1327 
1328 /* {{{ Get XML parser error code */
PHP_FUNCTION(xml_get_error_code)1329 PHP_FUNCTION(xml_get_error_code)
1330 {
1331 	xml_parser *parser;
1332 	zval *pind;
1333 
1334 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1335 		RETURN_THROWS();
1336 	}
1337 
1338 	parser = Z_XMLPARSER_P(pind);
1339 	RETURN_LONG((zend_long)XML_GetErrorCode(parser->parser));
1340 }
1341 /* }}} */
1342 
1343 /* {{{ Get XML parser error string */
PHP_FUNCTION(xml_error_string)1344 PHP_FUNCTION(xml_error_string)
1345 {
1346 	zend_long code;
1347 	char *str;
1348 
1349 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &code) == FAILURE) {
1350 		RETURN_THROWS();
1351 	}
1352 
1353 	str = (char *)XML_ErrorString((int)code);
1354 	if (str) {
1355 		RETVAL_STRING(str);
1356 	}
1357 }
1358 /* }}} */
1359 
1360 /* {{{ Get current line number for an XML parser */
PHP_FUNCTION(xml_get_current_line_number)1361 PHP_FUNCTION(xml_get_current_line_number)
1362 {
1363 	xml_parser *parser;
1364 	zval *pind;
1365 
1366 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1367 		RETURN_THROWS();
1368 	}
1369 
1370 	parser = Z_XMLPARSER_P(pind);
1371 	RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
1372 }
1373 /* }}} */
1374 
1375 /* {{{ Get current column number for an XML parser */
PHP_FUNCTION(xml_get_current_column_number)1376 PHP_FUNCTION(xml_get_current_column_number)
1377 {
1378 	xml_parser *parser;
1379 	zval *pind;
1380 
1381 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1382 		RETURN_THROWS();
1383 	}
1384 
1385 	parser = Z_XMLPARSER_P(pind);
1386 	RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
1387 }
1388 /* }}} */
1389 
1390 /* {{{ Get current byte index for an XML parser */
PHP_FUNCTION(xml_get_current_byte_index)1391 PHP_FUNCTION(xml_get_current_byte_index)
1392 {
1393 	xml_parser *parser;
1394 	zval *pind;
1395 
1396 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1397 		RETURN_THROWS();
1398 	}
1399 
1400 	parser = Z_XMLPARSER_P(pind);
1401 	RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
1402 }
1403 /* }}} */
1404 
1405 /* {{{ Free an XML parser */
PHP_FUNCTION(xml_parser_free)1406 PHP_FUNCTION(xml_parser_free)
1407 {
1408 	zval *pind;
1409 	xml_parser *parser;
1410 
1411 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1412 		RETURN_THROWS();
1413 	}
1414 
1415 	parser = Z_XMLPARSER_P(pind);
1416 	if (parser->isparsing == 1) {
1417 		php_error_docref(NULL, E_WARNING, "Parser cannot be freed while it is parsing");
1418 		RETURN_FALSE;
1419 	}
1420 
1421 	RETURN_TRUE;
1422 }
1423 /* }}} */
1424 
1425 /* {{{ Set options in an XML parser */
PHP_FUNCTION(xml_parser_set_option)1426 PHP_FUNCTION(xml_parser_set_option)
1427 {
1428 	xml_parser *parser;
1429 	zval *pind, *val;
1430 	zend_long opt;
1431 
1432 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Olz", &pind, xml_parser_ce, &opt, &val) == FAILURE) {
1433 		RETURN_THROWS();
1434 	}
1435 
1436 	parser = Z_XMLPARSER_P(pind);
1437 	switch (opt) {
1438 		case PHP_XML_OPTION_CASE_FOLDING:
1439 			parser->case_folding = zval_get_long(val);
1440 			break;
1441 		case PHP_XML_OPTION_SKIP_TAGSTART:
1442 			parser->toffset = zval_get_long(val);
1443 			if (parser->toffset < 0) {
1444 				php_error_docref(NULL, E_WARNING, "tagstart ignored, because it is out of range");
1445 				parser->toffset = 0;
1446 			}
1447 			break;
1448 		case PHP_XML_OPTION_SKIP_WHITE:
1449 			parser->skipwhite = zval_get_long(val);
1450 			break;
1451 		case PHP_XML_OPTION_TARGET_ENCODING: {
1452 			const xml_encoding *enc;
1453 			if (!try_convert_to_string(val)) {
1454 				RETURN_THROWS();
1455 			}
1456 
1457 			enc = xml_get_encoding((XML_Char*)Z_STRVAL_P(val));
1458 			if (enc == NULL) {
1459 				zend_argument_value_error(3, "is not a supported target encoding");
1460 				RETURN_THROWS();
1461 			}
1462 
1463 			parser->target_encoding = enc->name;
1464 			break;
1465 		}
1466 		default:
1467 			zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1468 			RETURN_THROWS();
1469 			break;
1470 	}
1471 	RETVAL_TRUE;
1472 }
1473 /* }}} */
1474 
1475 /* {{{ Get options from an XML parser */
PHP_FUNCTION(xml_parser_get_option)1476 PHP_FUNCTION(xml_parser_get_option)
1477 {
1478 	xml_parser *parser;
1479 	zval *pind;
1480 	zend_long opt;
1481 
1482 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pind, xml_parser_ce, &opt) == FAILURE) {
1483 		RETURN_THROWS();
1484 	}
1485 
1486 	parser = Z_XMLPARSER_P(pind);
1487 	switch (opt) {
1488 		case PHP_XML_OPTION_CASE_FOLDING:
1489 			RETURN_LONG(parser->case_folding);
1490 			break;
1491 		case PHP_XML_OPTION_SKIP_TAGSTART:
1492 			RETURN_LONG(parser->toffset);
1493 			break;
1494 		case PHP_XML_OPTION_SKIP_WHITE:
1495 			RETURN_LONG(parser->skipwhite);
1496 			break;
1497 		case PHP_XML_OPTION_TARGET_ENCODING:
1498 			RETURN_STRING((char *)parser->target_encoding);
1499 			break;
1500 		default:
1501 			zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1502 			RETURN_THROWS();
1503 	}
1504 }
1505 /* }}} */
1506 
1507 #endif
1508