xref: /PHP-8.0/ext/xml/xml.c (revision a9661a52)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Stig Sæther Bakken <ssb@php.net>                            |
14    |          Thies C. Arntzen <thies@thieso.net>                         |
15    |          Sterling Hughes <sterling@php.net>                          |
16    +----------------------------------------------------------------------+
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "php.h"
24 
25 #include "zend_variables.h"
26 #include "ext/standard/php_string.h"
27 #include "ext/standard/info.h"
28 #include "ext/standard/html.h"
29 #include "zend_interfaces.h"
30 
31 #ifdef HAVE_XML
32 
33 #include "php_xml.h"
34 # include "ext/standard/head.h"
35 #ifdef LIBXML_EXPAT_COMPAT
36 #include "ext/libxml/php_libxml.h"
37 #endif
38 
39 #include "xml_arginfo.h"
40 
41 /* Short-term TODO list:
42  * - Implement XML_ExternalEntityParserCreate()
43  * - XML_SetCommentHandler
44  * - XML_SetCdataSectionHandler
45  * - XML_SetParamEntityParsing
46  */
47 
48 /* Long-term TODO list:
49  * - Fix the expat library so you can install your own memory manager
50  *   functions
51  */
52 
53 /* Known bugs:
54  * - Weird things happen with <![CDATA[]]> sections.
55  */
56 
ZEND_BEGIN_MODULE_GLOBALS(xml)57 ZEND_BEGIN_MODULE_GLOBALS(xml)
58 	XML_Char *default_encoding;
59 ZEND_END_MODULE_GLOBALS(xml)
60 
61 ZEND_DECLARE_MODULE_GLOBALS(xml)
62 
63 #define XML(v) ZEND_MODULE_GLOBALS_ACCESSOR(xml, v)
64 
65 typedef struct {
66 	int case_folding;
67 	XML_Parser parser;
68 	XML_Char *target_encoding;
69 
70 	/* Reference to the object itself, for convenience.
71 	 * It is not owned, do not release it. */
72 	zval index;
73 
74 	/* We return a pointer to these zvals in get_gc(), so it's
75 	 * important that a) they are adjacent b) object is the first
76 	 * and c) the number of zvals is kept up to date. */
77 #define XML_PARSER_NUM_ZVALS 12
78 	zval object;
79 	zval startElementHandler;
80 	zval endElementHandler;
81 	zval characterDataHandler;
82 	zval processingInstructionHandler;
83 	zval defaultHandler;
84 	zval unparsedEntityDeclHandler;
85 	zval notationDeclHandler;
86 	zval externalEntityRefHandler;
87 	zval unknownEncodingHandler;
88 	zval startNamespaceDeclHandler;
89 	zval endNamespaceDeclHandler;
90 
91 	zend_function *startElementPtr;
92 	zend_function *endElementPtr;
93 	zend_function *characterDataPtr;
94 	zend_function *processingInstructionPtr;
95 	zend_function *defaultPtr;
96 	zend_function *unparsedEntityDeclPtr;
97 	zend_function *notationDeclPtr;
98 	zend_function *externalEntityRefPtr;
99 	zend_function *unknownEncodingPtr;
100 	zend_function *startNamespaceDeclPtr;
101 	zend_function *endNamespaceDeclPtr;
102 
103 	zval data;
104 	zval info;
105 	int level;
106 	int toffset;
107 	int curtag;
108 	zval *ctag;
109 	char **ltags;
110 	int lastwasopen;
111 	int skipwhite;
112 	int isparsing;
113 
114 	XML_Char *baseURI;
115 
116 	zend_object std;
117 } xml_parser;
118 
119 
120 typedef struct {
121 	XML_Char *name;
122 	char (*decoding_function)(unsigned short);
123 	unsigned short (*encoding_function)(unsigned char);
124 } xml_encoding;
125 
126 
127 enum php_xml_option {
128     PHP_XML_OPTION_CASE_FOLDING = 1,
129     PHP_XML_OPTION_TARGET_ENCODING,
130     PHP_XML_OPTION_SKIP_TAGSTART,
131     PHP_XML_OPTION_SKIP_WHITE
132 };
133 
134 /* {{{ dynamically loadable module stuff */
135 #ifdef COMPILE_DL_XML
136 #ifdef ZTS
137 ZEND_TSRMLS_CACHE_DEFINE()
138 #endif
139 ZEND_GET_MODULE(xml)
140 #endif /* COMPILE_DL_XML */
141 /* }}} */
142 
143 #define XML_MAXLEVEL 255 /* XXX this should be dynamic */
144 
145 #define SKIP_TAGSTART(str) ((str) + (parser->toffset > (int)strlen(str) ? strlen(str) : parser->toffset))
146 
147 static zend_class_entry *xml_parser_ce;
148 static zend_object_handlers xml_parser_object_handlers;
149 
150 /* {{{ function prototypes */
151 PHP_MINIT_FUNCTION(xml);
152 PHP_MINFO_FUNCTION(xml);
153 static PHP_GINIT_FUNCTION(xml);
154 
155 static zend_object *xml_parser_create_object(zend_class_entry *class_type);
156 static void xml_parser_free_obj(zend_object *object);
157 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n);
158 static zend_function *xml_parser_get_constructor(zend_object *object);
159 
160 static zend_string *xml_utf8_decode(const XML_Char *, size_t, const XML_Char *);
161 static void xml_set_handler(zval *, zval *);
162 inline static unsigned short xml_encode_iso_8859_1(unsigned char);
163 inline static char xml_decode_iso_8859_1(unsigned short);
164 inline static unsigned short xml_encode_us_ascii(unsigned char);
165 inline static char xml_decode_us_ascii(unsigned short);
166 static void xml_call_handler(xml_parser *, zval *, zend_function *, int, zval *, zval *);
167 static void _xml_xmlchar_zval(const XML_Char *, int, const XML_Char *, zval *);
168 static int _xml_xmlcharlen(const XML_Char *);
169 static void _xml_add_to_info(xml_parser *parser,char *name);
170 inline static zend_string *_xml_decode_tag(xml_parser *parser, const char *tag);
171 
172 void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
173 void _xml_endElementHandler(void *, const XML_Char *);
174 void _xml_characterDataHandler(void *, const XML_Char *, int);
175 void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
176 void _xml_defaultHandler(void *, const XML_Char *, int);
177 void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
178 void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
179 int  _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
180 
181 void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
182 void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
183 /* }}} */
184 
185 #ifdef LIBXML_EXPAT_COMPAT
186 static const zend_module_dep xml_deps[] = {
187 	ZEND_MOD_REQUIRED("libxml")
188 	ZEND_MOD_END
189 };
190 #endif
191 
192 zend_module_entry xml_module_entry = {
193 #ifdef LIBXML_EXPAT_COMPAT
194     STANDARD_MODULE_HEADER_EX, NULL,
195 	xml_deps,
196 #else
197     STANDARD_MODULE_HEADER,
198 #endif
199 	"xml",                /* extension name */
200 	ext_functions,        /* extension function list */
201 	PHP_MINIT(xml),       /* extension-wide startup function */
202 	NULL,                 /* extension-wide shutdown function */
203 	NULL,                 /* per-request startup function */
204 	NULL,                 /* per-request shutdown function */
205 	PHP_MINFO(xml),       /* information function */
206     PHP_XML_VERSION,
207     PHP_MODULE_GLOBALS(xml), /* globals descriptor */
208     PHP_GINIT(xml),          /* globals ctor */
209     NULL,                    /* globals dtor */
210     NULL,                    /* post deactivate */
211 	STANDARD_MODULE_PROPERTIES_EX
212 };
213 
214 /* All the encoding functions are set to NULL right now, since all
215  * the encoding is currently done internally by expat/xmltok.
216  */
217 const xml_encoding xml_encodings[] = {
218 	{ (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
219 	{ (XML_Char *)"US-ASCII",   xml_decode_us_ascii,   xml_encode_us_ascii   },
220 	{ (XML_Char *)"UTF-8",      NULL,                  NULL                  },
221 	{ (XML_Char *)NULL,         NULL,                  NULL                  }
222 };
223 
224 static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
225 
226 /* }}} */
227 
228 /* {{{ startup, shutdown and info functions */
PHP_GINIT_FUNCTION(xml)229 static PHP_GINIT_FUNCTION(xml)
230 {
231 #if defined(COMPILE_DL_XML) && defined(ZTS)
232 	ZEND_TSRMLS_CACHE_UPDATE();
233 #endif
234 	xml_globals->default_encoding = (XML_Char*)"UTF-8";
235 }
236 
php_xml_malloc_wrapper(size_t sz)237 static void *php_xml_malloc_wrapper(size_t sz)
238 {
239 	return emalloc(sz);
240 }
241 
php_xml_realloc_wrapper(void * ptr,size_t sz)242 static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
243 {
244 	return erealloc(ptr, sz);
245 }
246 
php_xml_free_wrapper(void * ptr)247 static void php_xml_free_wrapper(void *ptr)
248 {
249 	if (ptr != NULL) {
250 		efree(ptr);
251 	}
252 }
253 
PHP_MINIT_FUNCTION(xml)254 PHP_MINIT_FUNCTION(xml)
255 {
256 	zend_class_entry ce;
257 	INIT_CLASS_ENTRY(ce, "XMLParser", class_XMLParser_methods);
258 	xml_parser_ce = zend_register_internal_class(&ce);
259 	xml_parser_ce->create_object = xml_parser_create_object;
260 	xml_parser_ce->ce_flags |= ZEND_ACC_FINAL | ZEND_ACC_NO_DYNAMIC_PROPERTIES;
261 	xml_parser_ce->serialize = zend_class_serialize_deny;
262 	xml_parser_ce->unserialize = zend_class_unserialize_deny;
263 
264 	memcpy(&xml_parser_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
265 	xml_parser_object_handlers.offset = XtOffsetOf(xml_parser, std);
266 	xml_parser_object_handlers.free_obj = xml_parser_free_obj;
267 	xml_parser_object_handlers.get_gc = xml_parser_get_gc;
268 	xml_parser_object_handlers.get_constructor = xml_parser_get_constructor;
269 	xml_parser_object_handlers.clone_obj = NULL;
270 	xml_parser_object_handlers.compare = zend_objects_not_comparable;
271 
272 	REGISTER_LONG_CONSTANT("XML_ERROR_NONE", XML_ERROR_NONE, CONST_CS|CONST_PERSISTENT);
273 	REGISTER_LONG_CONSTANT("XML_ERROR_NO_MEMORY", XML_ERROR_NO_MEMORY, CONST_CS|CONST_PERSISTENT);
274 	REGISTER_LONG_CONSTANT("XML_ERROR_SYNTAX", XML_ERROR_SYNTAX, CONST_CS|CONST_PERSISTENT);
275 	REGISTER_LONG_CONSTANT("XML_ERROR_NO_ELEMENTS", XML_ERROR_NO_ELEMENTS, CONST_CS|CONST_PERSISTENT);
276 	REGISTER_LONG_CONSTANT("XML_ERROR_INVALID_TOKEN", XML_ERROR_INVALID_TOKEN, CONST_CS|CONST_PERSISTENT);
277 	REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_TOKEN", XML_ERROR_UNCLOSED_TOKEN, CONST_CS|CONST_PERSISTENT);
278 	REGISTER_LONG_CONSTANT("XML_ERROR_PARTIAL_CHAR", XML_ERROR_PARTIAL_CHAR, CONST_CS|CONST_PERSISTENT);
279 	REGISTER_LONG_CONSTANT("XML_ERROR_TAG_MISMATCH", XML_ERROR_TAG_MISMATCH, CONST_CS|CONST_PERSISTENT);
280 	REGISTER_LONG_CONSTANT("XML_ERROR_DUPLICATE_ATTRIBUTE", XML_ERROR_DUPLICATE_ATTRIBUTE, CONST_CS|CONST_PERSISTENT);
281 	REGISTER_LONG_CONSTANT("XML_ERROR_JUNK_AFTER_DOC_ELEMENT", XML_ERROR_JUNK_AFTER_DOC_ELEMENT, CONST_CS|CONST_PERSISTENT);
282 	REGISTER_LONG_CONSTANT("XML_ERROR_PARAM_ENTITY_REF", XML_ERROR_PARAM_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
283 	REGISTER_LONG_CONSTANT("XML_ERROR_UNDEFINED_ENTITY", XML_ERROR_UNDEFINED_ENTITY, CONST_CS|CONST_PERSISTENT);
284 	REGISTER_LONG_CONSTANT("XML_ERROR_RECURSIVE_ENTITY_REF", XML_ERROR_RECURSIVE_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
285 	REGISTER_LONG_CONSTANT("XML_ERROR_ASYNC_ENTITY", XML_ERROR_ASYNC_ENTITY, CONST_CS|CONST_PERSISTENT);
286 	REGISTER_LONG_CONSTANT("XML_ERROR_BAD_CHAR_REF", XML_ERROR_BAD_CHAR_REF, CONST_CS|CONST_PERSISTENT);
287 	REGISTER_LONG_CONSTANT("XML_ERROR_BINARY_ENTITY_REF", XML_ERROR_BINARY_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
288 	REGISTER_LONG_CONSTANT("XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
289 	REGISTER_LONG_CONSTANT("XML_ERROR_MISPLACED_XML_PI", XML_ERROR_MISPLACED_XML_PI, CONST_CS|CONST_PERSISTENT);
290 	REGISTER_LONG_CONSTANT("XML_ERROR_UNKNOWN_ENCODING", XML_ERROR_UNKNOWN_ENCODING, CONST_CS|CONST_PERSISTENT);
291 	REGISTER_LONG_CONSTANT("XML_ERROR_INCORRECT_ENCODING", XML_ERROR_INCORRECT_ENCODING, CONST_CS|CONST_PERSISTENT);
292 	REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_CDATA_SECTION", XML_ERROR_UNCLOSED_CDATA_SECTION, CONST_CS|CONST_PERSISTENT);
293 	REGISTER_LONG_CONSTANT("XML_ERROR_EXTERNAL_ENTITY_HANDLING", XML_ERROR_EXTERNAL_ENTITY_HANDLING, CONST_CS|CONST_PERSISTENT);
294 
295 	REGISTER_LONG_CONSTANT("XML_OPTION_CASE_FOLDING", PHP_XML_OPTION_CASE_FOLDING, CONST_CS|CONST_PERSISTENT);
296 	REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT);
297 	REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_CS|CONST_PERSISTENT);
298 	REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_CS|CONST_PERSISTENT);
299 
300 	/* this object should not be pre-initialised at compile time,
301 	   as the order of members may vary */
302 
303 	php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
304 	php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
305 	php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
306 
307 #ifdef LIBXML_EXPAT_COMPAT
308 	REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "libxml", CONST_CS|CONST_PERSISTENT);
309 #else
310 	REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "expat", CONST_CS|CONST_PERSISTENT);
311 #endif
312 
313 	return SUCCESS;
314 }
315 
PHP_MINFO_FUNCTION(xml)316 PHP_MINFO_FUNCTION(xml)
317 {
318 	php_info_print_table_start();
319 	php_info_print_table_row(2, "XML Support", "active");
320 	php_info_print_table_row(2, "XML Namespace Support", "active");
321 #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
322 	php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
323 #else
324 	php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
325 #endif
326 	php_info_print_table_end();
327 }
328 /* }}} */
329 
330 /* {{{ extension-internal functions */
331 
_xml_xmlchar_zval(const XML_Char * s,int len,const XML_Char * encoding,zval * ret)332 static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
333 {
334 	if (s == NULL) {
335 		ZVAL_FALSE(ret);
336 		return;
337 	}
338 	if (len == 0) {
339 		len = _xml_xmlcharlen(s);
340 	}
341 	ZVAL_STR(ret, xml_utf8_decode(s, len, encoding));
342 }
343 /* }}} */
344 
xml_parser_from_obj(zend_object * obj)345 static inline xml_parser *xml_parser_from_obj(zend_object *obj) {
346 	return (xml_parser *)((char *)(obj) - XtOffsetOf(xml_parser, std));
347 }
348 
349 #define Z_XMLPARSER_P(zv) xml_parser_from_obj(Z_OBJ_P(zv))
350 
xml_parser_create_object(zend_class_entry * class_type)351 static zend_object *xml_parser_create_object(zend_class_entry *class_type) {
352 	xml_parser *intern = zend_object_alloc(sizeof(xml_parser), class_type);
353 	memset(intern, 0, sizeof(xml_parser) - sizeof(zend_object));
354 
355 	zend_object_std_init(&intern->std, class_type);
356 	object_properties_init(&intern->std, class_type);
357 	intern->std.handlers = &xml_parser_object_handlers;
358 
359 	return &intern->std;
360 }
361 
xml_parser_free_obj(zend_object * object)362 static void xml_parser_free_obj(zend_object *object)
363 {
364 	xml_parser *parser = xml_parser_from_obj(object);
365 
366 	if (parser->parser) {
367 		XML_ParserFree(parser->parser);
368 	}
369 	if (parser->ltags) {
370 		int inx;
371 		for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++)
372 			efree(parser->ltags[ inx ]);
373 		efree(parser->ltags);
374 	}
375 	if (!Z_ISUNDEF(parser->startElementHandler)) {
376 		zval_ptr_dtor(&parser->startElementHandler);
377 	}
378 	if (!Z_ISUNDEF(parser->endElementHandler)) {
379 		zval_ptr_dtor(&parser->endElementHandler);
380 	}
381 	if (!Z_ISUNDEF(parser->characterDataHandler)) {
382 		zval_ptr_dtor(&parser->characterDataHandler);
383 	}
384 	if (!Z_ISUNDEF(parser->processingInstructionHandler)) {
385 		zval_ptr_dtor(&parser->processingInstructionHandler);
386 	}
387 	if (!Z_ISUNDEF(parser->defaultHandler)) {
388 		zval_ptr_dtor(&parser->defaultHandler);
389 	}
390 	if (!Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
391 		zval_ptr_dtor(&parser->unparsedEntityDeclHandler);
392 	}
393 	if (!Z_ISUNDEF(parser->notationDeclHandler)) {
394 		zval_ptr_dtor(&parser->notationDeclHandler);
395 	}
396 	if (!Z_ISUNDEF(parser->externalEntityRefHandler)) {
397 		zval_ptr_dtor(&parser->externalEntityRefHandler);
398 	}
399 	if (!Z_ISUNDEF(parser->unknownEncodingHandler)) {
400 		zval_ptr_dtor(&parser->unknownEncodingHandler);
401 	}
402 	if (!Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
403 		zval_ptr_dtor(&parser->startNamespaceDeclHandler);
404 	}
405 	if (!Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
406 		zval_ptr_dtor(&parser->endNamespaceDeclHandler);
407 	}
408 	if (parser->baseURI) {
409 		efree(parser->baseURI);
410 	}
411 	if (!Z_ISUNDEF(parser->object)) {
412 		zval_ptr_dtor(&parser->object);
413 	}
414 
415 	zend_object_std_dtor(&parser->std);
416 }
417 
xml_parser_get_gc(zend_object * object,zval ** table,int * n)418 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n)
419 {
420 	xml_parser *parser = xml_parser_from_obj(object);
421 	*table = &parser->object;
422 	*n = XML_PARSER_NUM_ZVALS;
423 	return zend_std_get_properties(object);
424 }
425 
xml_parser_get_constructor(zend_object * object)426 static zend_function *xml_parser_get_constructor(zend_object *object) {
427 	zend_throw_error(NULL, "Cannot directly construct XMLParser, use xml_parser_create() or xml_parser_create_ns() instead");
428 	return NULL;
429 }
430 
431 /* {{{ xml_set_handler() */
xml_set_handler(zval * handler,zval * data)432 static void xml_set_handler(zval *handler, zval *data)
433 {
434 	/* If we have already a handler, release it */
435 	if (handler) {
436 		zval_ptr_dtor(handler);
437 	}
438 
439 	/* IS_ARRAY might indicate that we're using array($obj, 'method') syntax */
440 	if (Z_TYPE_P(data) != IS_ARRAY && Z_TYPE_P(data) != IS_OBJECT) {
441 		convert_to_string_ex(data);
442 		if (Z_STRLEN_P(data) == 0) {
443 			ZVAL_UNDEF(handler);
444 			return;
445 		}
446 	}
447 
448 	ZVAL_COPY(handler, data);
449 }
450 /* }}} */
451 
452 /* {{{ xml_call_handler() */
xml_call_handler(xml_parser * parser,zval * handler,zend_function * function_ptr,int argc,zval * argv,zval * retval)453 static void xml_call_handler(xml_parser *parser, zval *handler, zend_function *function_ptr, int argc, zval *argv, zval *retval)
454 {
455 	int i;
456 
457 	ZVAL_UNDEF(retval);
458 	if (parser && handler && !EG(exception)) {
459 		int result;
460 		zend_fcall_info fci;
461 
462 		fci.size = sizeof(fci);
463 		ZVAL_COPY_VALUE(&fci.function_name, handler);
464 		fci.object = Z_OBJ(parser->object);
465 		fci.retval = retval;
466 		fci.param_count = argc;
467 		fci.params = argv;
468 		fci.named_params = NULL;
469 
470 		result = zend_call_function(&fci, NULL);
471 		if (result == FAILURE) {
472 			zval *method;
473 			zval *obj;
474 
475 			if (Z_TYPE_P(handler) == IS_STRING) {
476 				php_error_docref(NULL, E_WARNING, "Unable to call handler %s()", Z_STRVAL_P(handler));
477 			} else if (Z_TYPE_P(handler) == IS_ARRAY &&
478 					   (obj = zend_hash_index_find(Z_ARRVAL_P(handler), 0)) != NULL &&
479 					   (method = zend_hash_index_find(Z_ARRVAL_P(handler), 1)) != NULL &&
480 					   Z_TYPE_P(obj) == IS_OBJECT &&
481 					   Z_TYPE_P(method) == IS_STRING) {
482 				php_error_docref(NULL, E_WARNING, "Unable to call handler %s::%s()", ZSTR_VAL(Z_OBJCE_P(obj)->name), Z_STRVAL_P(method));
483 			} else
484 				php_error_docref(NULL, E_WARNING, "Unable to call handler");
485 		}
486 	}
487 	for (i = 0; i < argc; i++) {
488 		zval_ptr_dtor(&argv[i]);
489 	}
490 }
491 /* }}} */
492 
493 /* {{{ xml_encode_iso_8859_1() */
xml_encode_iso_8859_1(unsigned char c)494 inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
495 {
496 	return (unsigned short)c;
497 }
498 /* }}} */
499 
500 /* {{{ xml_decode_iso_8859_1() */
xml_decode_iso_8859_1(unsigned short c)501 inline static char xml_decode_iso_8859_1(unsigned short c)
502 {
503 	return (char)(c > 0xff ? '?' : c);
504 }
505 /* }}} */
506 
507 /* {{{ xml_encode_us_ascii() */
xml_encode_us_ascii(unsigned char c)508 inline static unsigned short xml_encode_us_ascii(unsigned char c)
509 {
510 	return (unsigned short)c;
511 }
512 /* }}} */
513 
514 /* {{{ xml_decode_us_ascii() */
xml_decode_us_ascii(unsigned short c)515 inline static char xml_decode_us_ascii(unsigned short c)
516 {
517 	return (char)(c > 0x7f ? '?' : c);
518 }
519 /* }}} */
520 
521 /* {{{ xml_get_encoding() */
xml_get_encoding(const XML_Char * name)522 static const xml_encoding *xml_get_encoding(const XML_Char *name)
523 {
524 	const xml_encoding *enc = &xml_encodings[0];
525 
526 	while (enc && enc->name) {
527 		if (strcasecmp((char *)name, (char *)enc->name) == 0) {
528 			return enc;
529 		}
530 		enc++;
531 	}
532 	return NULL;
533 }
534 /* }}} */
535 
536 /* {{{ xml_utf8_decode() */
xml_utf8_decode(const XML_Char * s,size_t len,const XML_Char * encoding)537 static zend_string *xml_utf8_decode(const XML_Char *s, size_t len, const XML_Char *encoding)
538 {
539 	size_t pos = 0;
540 	unsigned int c;
541 	char (*decoder)(unsigned short) = NULL;
542 	const xml_encoding *enc = xml_get_encoding(encoding);
543 	zend_string *str;
544 
545 	if (enc) {
546 		decoder = enc->decoding_function;
547 	}
548 
549 	if (decoder == NULL) {
550 		/* If the target encoding was unknown, or no decoder function
551 		 * was specified, return the UTF-8-encoded data as-is.
552 		 */
553 		str = zend_string_init((char *)s, len, 0);
554 		return str;
555 	}
556 
557 	str = zend_string_alloc(len, 0);
558 	ZSTR_LEN(str) = 0;
559 	while (pos < len) {
560 		int status = FAILURE;
561 		c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
562 
563 		if (status == FAILURE || c > 0xFFU) {
564 			c = '?';
565 		}
566 
567 		ZSTR_VAL(str)[ZSTR_LEN(str)++] = decoder ? (unsigned int)decoder(c) : c;
568 	}
569 	ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
570 	if (ZSTR_LEN(str) < len) {
571 		str = zend_string_truncate(str, ZSTR_LEN(str), 0);
572 	}
573 
574 	return str;
575 }
576 /* }}} */
577 
578 /* {{{ _xml_xmlcharlen() */
_xml_xmlcharlen(const XML_Char * s)579 static int _xml_xmlcharlen(const XML_Char *s)
580 {
581 	int len = 0;
582 
583 	while (*s) {
584 		len++;
585 		s++;
586 	}
587 	return len;
588 }
589 /* }}} */
590 
591 /* {{{ _xml_add_to_info() */
_xml_add_to_info(xml_parser * parser,char * name)592 static void _xml_add_to_info(xml_parser *parser,char *name)
593 {
594 	zval *element;
595 
596 	if (Z_ISUNDEF(parser->info)) {
597 		return;
598 	}
599 
600 	if ((element = zend_hash_str_find(Z_ARRVAL(parser->info), name, strlen(name))) == NULL) {
601 		zval values;
602 		array_init(&values);
603 		element = zend_hash_str_update(Z_ARRVAL(parser->info), name, strlen(name), &values);
604 	}
605 
606 	add_next_index_long(element, parser->curtag);
607 
608 	parser->curtag++;
609 }
610 /* }}} */
611 
612 /* {{{ _xml_decode_tag() */
_xml_decode_tag(xml_parser * parser,const char * tag)613 static zend_string *_xml_decode_tag(xml_parser *parser, const char *tag)
614 {
615 	zend_string *str;
616 
617 	str = xml_utf8_decode((const XML_Char *)tag, strlen(tag), parser->target_encoding);
618 
619 	if (parser->case_folding) {
620 		php_strtoupper(ZSTR_VAL(str), ZSTR_LEN(str));
621 	}
622 
623 	return str;
624 }
625 /* }}} */
626 
627 /* {{{ _xml_startElementHandler() */
_xml_startElementHandler(void * userData,const XML_Char * name,const XML_Char ** attributes)628 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
629 {
630 	xml_parser *parser = (xml_parser *)userData;
631 	const char **attrs = (const char **) attributes;
632 	zend_string *att, *tag_name, *val;
633 	zval retval, args[3];
634 
635 	if (parser) {
636 		parser->level++;
637 
638 		tag_name = _xml_decode_tag(parser, (const char *)name);
639 
640 		if (!Z_ISUNDEF(parser->startElementHandler)) {
641 			ZVAL_COPY(&args[0], &parser->index);
642 			ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
643 			array_init(&args[2]);
644 
645 			while (attributes && *attributes) {
646 				zval tmp;
647 
648 				att = _xml_decode_tag(parser, (const char *)attributes[0]);
649 				val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
650 
651 				ZVAL_STR(&tmp, val);
652 				zend_symtable_update(Z_ARRVAL(args[2]), att, &tmp);
653 
654 				attributes += 2;
655 
656 				zend_string_release_ex(att, 0);
657 			}
658 
659 			xml_call_handler(parser, &parser->startElementHandler, parser->startElementPtr, 3, args, &retval);
660 			zval_ptr_dtor(&retval);
661 		}
662 
663 		if (!Z_ISUNDEF(parser->data)) {
664 			if (parser->level <= XML_MAXLEVEL)  {
665 				zval tag, atr;
666 				int atcnt = 0;
667 
668 				array_init(&tag);
669 				array_init(&atr);
670 
671 				_xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
672 
673 				add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
674 				add_assoc_string(&tag, "type", "open");
675 				add_assoc_long(&tag, "level", parser->level);
676 
677 				parser->ltags[parser->level-1] = estrdup(ZSTR_VAL(tag_name));
678 				parser->lastwasopen = 1;
679 
680 				attributes = (const XML_Char **) attrs;
681 
682 				while (attributes && *attributes) {
683 					zval tmp;
684 
685 					att = _xml_decode_tag(parser, (const char *)attributes[0]);
686 					val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
687 
688 					ZVAL_STR(&tmp, val);
689 					zend_symtable_update(Z_ARRVAL(atr), att, &tmp);
690 
691 					atcnt++;
692 					attributes += 2;
693 
694 					zend_string_release_ex(att, 0);
695 				}
696 
697 				if (atcnt) {
698 					zend_hash_str_add(Z_ARRVAL(tag), "attributes", sizeof("attributes") - 1, &atr);
699 				} else {
700 					zval_ptr_dtor(&atr);
701 				}
702 
703 				parser->ctag = zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
704 			} else if (parser->level == (XML_MAXLEVEL + 1)) {
705 							php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
706 			}
707 		}
708 
709 		zend_string_release_ex(tag_name, 0);
710 	}
711 }
712 /* }}} */
713 
714 /* {{{ _xml_endElementHandler() */
_xml_endElementHandler(void * userData,const XML_Char * name)715 void _xml_endElementHandler(void *userData, const XML_Char *name)
716 {
717 	xml_parser *parser = (xml_parser *)userData;
718 
719 	if (parser) {
720 		zval retval, args[2];
721 
722 		zend_string *tag_name = _xml_decode_tag(parser, (const char *)name);
723 
724 		if (!Z_ISUNDEF(parser->endElementHandler)) {
725 			ZVAL_COPY(&args[0], &parser->index);
726 			ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
727 
728 			xml_call_handler(parser, &parser->endElementHandler, parser->endElementPtr, 2, args, &retval);
729 			zval_ptr_dtor(&retval);
730 		}
731 
732 		if (!Z_ISUNDEF(parser->data)) {
733 			zval tag;
734 
735 			if (parser->lastwasopen) {
736 				add_assoc_string(parser->ctag, "type", "complete");
737 			} else {
738 				array_init(&tag);
739 
740 				_xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
741 
742 				add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
743 				add_assoc_string(&tag, "type", "close");
744 				add_assoc_long(&tag, "level", parser->level);
745 
746 				zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
747 			}
748 
749 			parser->lastwasopen = 0;
750 		}
751 
752 		zend_string_release_ex(tag_name, 0);
753 
754 		if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
755 			efree(parser->ltags[parser->level-1]);
756 		}
757 
758 		parser->level--;
759 	}
760 }
761 /* }}} */
762 
763 /* {{{ _xml_characterDataHandler() */
_xml_characterDataHandler(void * userData,const XML_Char * s,int len)764 void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
765 {
766 	xml_parser *parser = (xml_parser *)userData;
767 
768 	if (parser) {
769 		zval retval, args[2];
770 
771 		if (!Z_ISUNDEF(parser->characterDataHandler)) {
772 			ZVAL_COPY(&args[0], &parser->index);
773 			_xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
774 			xml_call_handler(parser, &parser->characterDataHandler, parser->characterDataPtr, 2, args, &retval);
775 			zval_ptr_dtor(&retval);
776 		}
777 
778 		if (!Z_ISUNDEF(parser->data)) {
779 			size_t i;
780 			int doprint = 0;
781 			zend_string *decoded_value;
782 
783 			decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
784 			if (parser->skipwhite) {
785 				for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
786 					switch (ZSTR_VAL(decoded_value)[i]) {
787 						case ' ':
788 						case '\t':
789 						case '\n':
790 							continue;
791 						default:
792 							doprint = 1;
793 							break;
794 					}
795 					if (doprint) {
796 						break;
797 					}
798 				}
799 			}
800 
801 			if (parser->lastwasopen) {
802 				zval *myval;
803 
804 				/* check if the current tag already has a value - if yes append to that! */
805 				if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
806 					size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
807 					Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
808 					strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
809 							ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
810 					zend_string_release_ex(decoded_value, 0);
811 				} else {
812 					if (doprint || (! parser->skipwhite)) {
813 						add_assoc_str(parser->ctag, "value", decoded_value);
814 					} else {
815 						zend_string_release_ex(decoded_value, 0);
816 					}
817 				}
818 
819 			} else {
820 				zval tag;
821 				zval *curtag, *mytype, *myval;
822 
823 				ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
824 					if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
825 						if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
826 							if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
827 								size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
828 								Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
829 								strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
830 										ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
831 								zend_string_release_ex(decoded_value, 0);
832 								return;
833 							}
834 						}
835 					}
836 					break;
837 				} ZEND_HASH_FOREACH_END();
838 
839 				if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
840 					array_init(&tag);
841 
842 					_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
843 
844 					add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
845 					add_assoc_str(&tag, "value", decoded_value);
846 					add_assoc_string(&tag, "type", "cdata");
847 					add_assoc_long(&tag, "level", parser->level);
848 
849 					zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
850 				} else if (parser->level == (XML_MAXLEVEL + 1)) {
851 					php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
852 				} else {
853 					zend_string_release_ex(decoded_value, 0);
854 				}
855 			}
856 		}
857 	}
858 }
859 /* }}} */
860 
861 /* {{{ _xml_processingInstructionHandler() */
_xml_processingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)862 void _xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
863 {
864 	xml_parser *parser = (xml_parser *)userData;
865 
866 	if (parser && !Z_ISUNDEF(parser->processingInstructionHandler)) {
867 		zval retval, args[3];
868 
869 		ZVAL_COPY(&args[0], &parser->index);
870 		_xml_xmlchar_zval(target, 0, parser->target_encoding, &args[1]);
871 		_xml_xmlchar_zval(data, 0, parser->target_encoding, &args[2]);
872 		xml_call_handler(parser, &parser->processingInstructionHandler, parser->processingInstructionPtr, 3, args, &retval);
873 		zval_ptr_dtor(&retval);
874 	}
875 }
876 /* }}} */
877 
878 /* {{{ _xml_defaultHandler() */
_xml_defaultHandler(void * userData,const XML_Char * s,int len)879 void _xml_defaultHandler(void *userData, const XML_Char *s, int len)
880 {
881 	xml_parser *parser = (xml_parser *)userData;
882 
883 	if (parser && !Z_ISUNDEF(parser->defaultHandler)) {
884 		zval retval, args[2];
885 
886 		ZVAL_COPY(&args[0], &parser->index);
887 		_xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
888 		xml_call_handler(parser, &parser->defaultHandler, parser->defaultPtr, 2, args, &retval);
889 		zval_ptr_dtor(&retval);
890 	}
891 }
892 /* }}} */
893 
894 /* {{{ _xml_unparsedEntityDeclHandler() */
_xml_unparsedEntityDeclHandler(void * userData,const XML_Char * entityName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)895 void _xml_unparsedEntityDeclHandler(void *userData,
896 										 const XML_Char *entityName,
897 										 const XML_Char *base,
898 										 const XML_Char *systemId,
899 										 const XML_Char *publicId,
900 										 const XML_Char *notationName)
901 {
902 	xml_parser *parser = (xml_parser *)userData;
903 
904 	if (parser && !Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
905 		zval retval, args[6];
906 
907 		ZVAL_COPY(&args[0], &parser->index);
908 		_xml_xmlchar_zval(entityName, 0, parser->target_encoding, &args[1]);
909 		_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
910 		_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
911 		_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
912 		_xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[5]);
913 		xml_call_handler(parser, &parser->unparsedEntityDeclHandler, parser->unparsedEntityDeclPtr, 6, args, &retval);
914 		zval_ptr_dtor(&retval);
915 	}
916 }
917 /* }}} */
918 
919 /* {{{ _xml_notationDeclHandler() */
_xml_notationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)920 void _xml_notationDeclHandler(void *userData,
921 							  const XML_Char *notationName,
922 							  const XML_Char *base,
923 							  const XML_Char *systemId,
924 							  const XML_Char *publicId)
925 {
926 	xml_parser *parser = (xml_parser *)userData;
927 
928 	if (parser && !Z_ISUNDEF(parser->notationDeclHandler)) {
929 		zval retval, args[5];
930 
931 		ZVAL_COPY(&args[0], &parser->index);
932 		_xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[1]);
933 		_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
934 		_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
935 		_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
936 		xml_call_handler(parser, &parser->notationDeclHandler, parser->notationDeclPtr, 5, args, &retval);
937 		zval_ptr_dtor(&retval);
938 	}
939 }
940 /* }}} */
941 
942 /* {{{ _xml_externalEntityRefHandler() */
_xml_externalEntityRefHandler(XML_Parser parserPtr,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)943 int _xml_externalEntityRefHandler(XML_Parser parserPtr,
944 								   const XML_Char *openEntityNames,
945 								   const XML_Char *base,
946 								   const XML_Char *systemId,
947 								   const XML_Char *publicId)
948 {
949 	xml_parser *parser = XML_GetUserData(parserPtr);
950 	int ret = 0; /* abort if no handler is set (should be configurable?) */
951 
952 	if (parser && !Z_ISUNDEF(parser->externalEntityRefHandler)) {
953 		zval retval, args[5];
954 
955 		ZVAL_COPY(&args[0], &parser->index);
956 		_xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding, &args[1]);
957 		_xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
958 		_xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
959 		_xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
960 		xml_call_handler(parser, &parser->externalEntityRefHandler, parser->externalEntityRefPtr, 5, args, &retval);
961 		if (!Z_ISUNDEF(retval)) {
962 			convert_to_long(&retval);
963 			ret = Z_LVAL(retval);
964 		} else {
965 			ret = 0;
966 		}
967 	}
968 	return ret;
969 }
970 /* }}} */
971 
972 /* {{{ _xml_startNamespaceDeclHandler() */
_xml_startNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)973 void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
974 {
975 	xml_parser *parser = (xml_parser *)userData;
976 
977 	if (parser && !Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
978 		zval retval, args[3];
979 
980 		ZVAL_COPY(&args[0], &parser->index);
981 		_xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
982 		_xml_xmlchar_zval(uri, 0, parser->target_encoding, &args[2]);
983 		xml_call_handler(parser, &parser->startNamespaceDeclHandler, parser->startNamespaceDeclPtr, 3, args, &retval);
984 		zval_ptr_dtor(&retval);
985 	}
986 }
987 /* }}} */
988 
989 /* {{{ _xml_endNamespaceDeclHandler() */
_xml_endNamespaceDeclHandler(void * userData,const XML_Char * prefix)990 void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
991 {
992 	xml_parser *parser = (xml_parser *)userData;
993 
994 	if (parser && !Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
995 		zval retval, args[2];
996 
997 		ZVAL_COPY(&args[0], &parser->index);
998 		_xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
999 		xml_call_handler(parser, &parser->endNamespaceDeclHandler, parser->endNamespaceDeclPtr, 2, args, &retval);
1000 		zval_ptr_dtor(&retval);
1001 	}
1002 }
1003 /* }}} */
1004 
1005 /************************* EXTENSION FUNCTIONS *************************/
1006 
php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS,int ns_support)1007 static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support) /* {{{ */
1008 {
1009 	xml_parser *parser;
1010 	int auto_detect = 0;
1011 
1012 	char *encoding_param = NULL;
1013 	size_t encoding_param_len = 0;
1014 
1015 	char *ns_param = NULL;
1016 	size_t ns_param_len = 0;
1017 
1018 	XML_Char *encoding;
1019 
1020 	if (zend_parse_parameters(ZEND_NUM_ARGS(), (ns_support ? "|s!s": "|s!"), &encoding_param, &encoding_param_len, &ns_param, &ns_param_len) == FAILURE) {
1021 		RETURN_THROWS();
1022 	}
1023 
1024 	if (encoding_param != NULL) {
1025 		/* The supported encoding types are hardcoded here because
1026 		 * we are limited to the encodings supported by expat/xmltok.
1027 		 */
1028 		if (encoding_param_len == 0) {
1029 			encoding = XML(default_encoding);
1030 			auto_detect = 1;
1031 		} else if (strcasecmp(encoding_param, "ISO-8859-1") == 0) {
1032 			encoding = (XML_Char*)"ISO-8859-1";
1033 		} else if (strcasecmp(encoding_param, "UTF-8") == 0) {
1034 			encoding = (XML_Char*)"UTF-8";
1035 		} else if (strcasecmp(encoding_param, "US-ASCII") == 0) {
1036 			encoding = (XML_Char*)"US-ASCII";
1037 		} else {
1038 			zend_argument_value_error(1, "is not a supported source encoding");
1039 			RETURN_THROWS();
1040 		}
1041 	} else {
1042 		encoding = XML(default_encoding);
1043 	}
1044 
1045 	if (ns_support && ns_param == NULL){
1046 		ns_param = ":";
1047 	}
1048 
1049 	object_init_ex(return_value, xml_parser_ce);
1050 	parser = Z_XMLPARSER_P(return_value);
1051 	parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
1052                                          &php_xml_mem_hdlrs, (XML_Char*)ns_param);
1053 
1054 	parser->target_encoding = encoding;
1055 	parser->case_folding = 1;
1056 	parser->isparsing = 0;
1057 
1058 	XML_SetUserData(parser->parser, parser);
1059 	ZVAL_COPY_VALUE(&parser->index, return_value);
1060 }
1061 /* }}} */
1062 
1063 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create)1064 PHP_FUNCTION(xml_parser_create)
1065 {
1066 	php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1067 }
1068 /* }}} */
1069 
1070 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create_ns)1071 PHP_FUNCTION(xml_parser_create_ns)
1072 {
1073 	php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1074 }
1075 /* }}} */
1076 
1077 /* {{{ Set up object which should be used for callbacks */
PHP_FUNCTION(xml_set_object)1078 PHP_FUNCTION(xml_set_object)
1079 {
1080 	xml_parser *parser;
1081 	zval *pind, *mythis;
1082 
1083 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oo", &pind, xml_parser_ce, &mythis) == FAILURE) {
1084 		RETURN_THROWS();
1085 	}
1086 
1087 	parser = Z_XMLPARSER_P(pind);
1088 
1089 	zval_ptr_dtor(&parser->object);
1090 	ZVAL_OBJ_COPY(&parser->object, Z_OBJ_P(mythis));
1091 
1092 	RETVAL_TRUE;
1093 }
1094 /* }}} */
1095 
1096 /* {{{ Set up start and end element handlers */
PHP_FUNCTION(xml_set_element_handler)1097 PHP_FUNCTION(xml_set_element_handler)
1098 {
1099 	xml_parser *parser;
1100 	zval *pind, *shdl, *ehdl;
1101 
1102 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ozz", &pind, xml_parser_ce, &shdl, &ehdl) == FAILURE) {
1103 		RETURN_THROWS();
1104 	}
1105 
1106 	parser = Z_XMLPARSER_P(pind);
1107 	xml_set_handler(&parser->startElementHandler, shdl);
1108 	xml_set_handler(&parser->endElementHandler, ehdl);
1109 	XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1110 	RETVAL_TRUE;
1111 }
1112 /* }}} */
1113 
1114 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_character_data_handler)1115 PHP_FUNCTION(xml_set_character_data_handler)
1116 {
1117 	xml_parser *parser;
1118 	zval *pind, *hdl;
1119 
1120 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1121 		RETURN_THROWS();
1122 	}
1123 
1124 	parser = Z_XMLPARSER_P(pind);
1125 	xml_set_handler(&parser->characterDataHandler, hdl);
1126 	XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1127 	RETVAL_TRUE;
1128 }
1129 /* }}} */
1130 
1131 /* {{{ Set up processing instruction (PI) handler */
PHP_FUNCTION(xml_set_processing_instruction_handler)1132 PHP_FUNCTION(xml_set_processing_instruction_handler)
1133 {
1134 	xml_parser *parser;
1135 	zval *pind, *hdl;
1136 
1137 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1138 		RETURN_THROWS();
1139 	}
1140 
1141 	parser = Z_XMLPARSER_P(pind);
1142 	xml_set_handler(&parser->processingInstructionHandler, hdl);
1143 	XML_SetProcessingInstructionHandler(parser->parser, _xml_processingInstructionHandler);
1144 	RETVAL_TRUE;
1145 }
1146 /* }}} */
1147 
1148 /* {{{ Set up default handler */
PHP_FUNCTION(xml_set_default_handler)1149 PHP_FUNCTION(xml_set_default_handler)
1150 {
1151 	xml_parser *parser;
1152 	zval *pind, *hdl;
1153 
1154 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1155 		RETURN_THROWS();
1156 	}
1157 
1158 	parser = Z_XMLPARSER_P(pind);
1159 	xml_set_handler(&parser->defaultHandler, hdl);
1160 	XML_SetDefaultHandler(parser->parser, _xml_defaultHandler);
1161 	RETVAL_TRUE;
1162 }
1163 /* }}} */
1164 
1165 /* {{{ Set up unparsed entity declaration handler */
PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)1166 PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)
1167 {
1168 	xml_parser *parser;
1169 	zval *pind, *hdl;
1170 
1171 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1172 		RETURN_THROWS();
1173 	}
1174 
1175 	parser = Z_XMLPARSER_P(pind);
1176 	xml_set_handler(&parser->unparsedEntityDeclHandler, hdl);
1177 	XML_SetUnparsedEntityDeclHandler(parser->parser, _xml_unparsedEntityDeclHandler);
1178 	RETVAL_TRUE;
1179 }
1180 /* }}} */
1181 
1182 /* {{{ Set up notation declaration handler */
PHP_FUNCTION(xml_set_notation_decl_handler)1183 PHP_FUNCTION(xml_set_notation_decl_handler)
1184 {
1185 	xml_parser *parser;
1186 	zval *pind, *hdl;
1187 
1188 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1189 		RETURN_THROWS();
1190 	}
1191 
1192 	parser = Z_XMLPARSER_P(pind);
1193 	xml_set_handler(&parser->notationDeclHandler, hdl);
1194 	XML_SetNotationDeclHandler(parser->parser, _xml_notationDeclHandler);
1195 	RETVAL_TRUE;
1196 }
1197 /* }}} */
1198 
1199 /* {{{ Set up external entity reference handler */
PHP_FUNCTION(xml_set_external_entity_ref_handler)1200 PHP_FUNCTION(xml_set_external_entity_ref_handler)
1201 {
1202 	xml_parser *parser;
1203 	zval *pind, *hdl;
1204 
1205 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1206 		RETURN_THROWS();
1207 	}
1208 
1209 	parser = Z_XMLPARSER_P(pind);
1210 	xml_set_handler(&parser->externalEntityRefHandler, hdl);
1211 	XML_SetExternalEntityRefHandler(parser->parser, (void *) _xml_externalEntityRefHandler);
1212 	RETVAL_TRUE;
1213 }
1214 /* }}} */
1215 
1216 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_start_namespace_decl_handler)1217 PHP_FUNCTION(xml_set_start_namespace_decl_handler)
1218 {
1219 	xml_parser *parser;
1220 	zval *pind, *hdl;
1221 
1222 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1223 		RETURN_THROWS();
1224 	}
1225 
1226 	parser = Z_XMLPARSER_P(pind);
1227 	xml_set_handler(&parser->startNamespaceDeclHandler, hdl);
1228 	XML_SetStartNamespaceDeclHandler(parser->parser, _xml_startNamespaceDeclHandler);
1229 	RETVAL_TRUE;
1230 }
1231 /* }}} */
1232 
1233 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_end_namespace_decl_handler)1234 PHP_FUNCTION(xml_set_end_namespace_decl_handler)
1235 {
1236 	xml_parser *parser;
1237 	zval *pind, *hdl;
1238 
1239 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1240 		RETURN_THROWS();
1241 	}
1242 
1243 	parser = Z_XMLPARSER_P(pind);
1244 	xml_set_handler(&parser->endNamespaceDeclHandler, hdl);
1245 	XML_SetEndNamespaceDeclHandler(parser->parser, _xml_endNamespaceDeclHandler);
1246 	RETVAL_TRUE;
1247 }
1248 /* }}} */
1249 
1250 /* {{{ Start parsing an XML document */
PHP_FUNCTION(xml_parse)1251 PHP_FUNCTION(xml_parse)
1252 {
1253 	xml_parser *parser;
1254 	zval *pind;
1255 	char *data;
1256 	size_t data_len;
1257 	int ret;
1258 	zend_bool isFinal = 0;
1259 
1260 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
1261 		RETURN_THROWS();
1262 	}
1263 
1264 	parser = Z_XMLPARSER_P(pind);
1265 	if (parser->isparsing) {
1266 		zend_throw_error(NULL, "Parser must not be called recursively");
1267 		RETURN_THROWS();
1268 	}
1269 	parser->isparsing = 1;
1270 	ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
1271 	parser->isparsing = 0;
1272 	RETVAL_LONG(ret);
1273 }
1274 
1275 /* }}} */
1276 
1277 /* {{{ Parsing a XML document */
PHP_FUNCTION(xml_parse_into_struct)1278 PHP_FUNCTION(xml_parse_into_struct)
1279 {
1280 	xml_parser *parser;
1281 	zval *pind, *xdata, *info = NULL;
1282 	char *data;
1283 	size_t data_len;
1284 	int ret;
1285 
1286 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
1287 		RETURN_THROWS();
1288 	}
1289 
1290 	parser = Z_XMLPARSER_P(pind);
1291 
1292 	if (info) {
1293 		info = zend_try_array_init(info);
1294 		if (!info) {
1295 			RETURN_THROWS();
1296 		}
1297 	}
1298 
1299 	xdata = zend_try_array_init(xdata);
1300 	if (!xdata) {
1301 		RETURN_THROWS();
1302 	}
1303 
1304 	ZVAL_COPY_VALUE(&parser->data, xdata);
1305 
1306 	if (info) {
1307 		ZVAL_COPY_VALUE(&parser->info, info);
1308 	}
1309 
1310 	parser->level = 0;
1311 	parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
1312 
1313 	XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1314 	XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1315 
1316 	if (parser->isparsing) {
1317 		php_error_docref(NULL, E_WARNING, "Parser must not be called recursively");
1318 		RETURN_FALSE;
1319 	}
1320 	parser->isparsing = 1;
1321 	ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
1322 	parser->isparsing = 0;
1323 
1324 	RETVAL_LONG(ret);
1325 }
1326 /* }}} */
1327 
1328 /* {{{ Get XML parser error code */
PHP_FUNCTION(xml_get_error_code)1329 PHP_FUNCTION(xml_get_error_code)
1330 {
1331 	xml_parser *parser;
1332 	zval *pind;
1333 
1334 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1335 		RETURN_THROWS();
1336 	}
1337 
1338 	parser = Z_XMLPARSER_P(pind);
1339 	RETURN_LONG((zend_long)XML_GetErrorCode(parser->parser));
1340 }
1341 /* }}} */
1342 
1343 /* {{{ Get XML parser error string */
PHP_FUNCTION(xml_error_string)1344 PHP_FUNCTION(xml_error_string)
1345 {
1346 	zend_long code;
1347 	char *str;
1348 
1349 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &code) == FAILURE) {
1350 		RETURN_THROWS();
1351 	}
1352 
1353 	str = (char *)XML_ErrorString((int)code);
1354 	if (str) {
1355 		RETVAL_STRING(str);
1356 	}
1357 }
1358 /* }}} */
1359 
1360 /* {{{ Get current line number for an XML parser */
PHP_FUNCTION(xml_get_current_line_number)1361 PHP_FUNCTION(xml_get_current_line_number)
1362 {
1363 	xml_parser *parser;
1364 	zval *pind;
1365 
1366 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1367 		RETURN_THROWS();
1368 	}
1369 
1370 	parser = Z_XMLPARSER_P(pind);
1371 	RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
1372 }
1373 /* }}} */
1374 
1375 /* {{{ Get current column number for an XML parser */
PHP_FUNCTION(xml_get_current_column_number)1376 PHP_FUNCTION(xml_get_current_column_number)
1377 {
1378 	xml_parser *parser;
1379 	zval *pind;
1380 
1381 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1382 		RETURN_THROWS();
1383 	}
1384 
1385 	parser = Z_XMLPARSER_P(pind);
1386 	RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
1387 }
1388 /* }}} */
1389 
1390 /* {{{ Get current byte index for an XML parser */
PHP_FUNCTION(xml_get_current_byte_index)1391 PHP_FUNCTION(xml_get_current_byte_index)
1392 {
1393 	xml_parser *parser;
1394 	zval *pind;
1395 
1396 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1397 		RETURN_THROWS();
1398 	}
1399 
1400 	parser = Z_XMLPARSER_P(pind);
1401 	RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
1402 }
1403 /* }}} */
1404 
1405 /* {{{ Free an XML parser */
PHP_FUNCTION(xml_parser_free)1406 PHP_FUNCTION(xml_parser_free)
1407 {
1408 	zval *pind;
1409 	xml_parser *parser;
1410 
1411 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1412 		RETURN_THROWS();
1413 	}
1414 
1415 	parser = Z_XMLPARSER_P(pind);
1416 	if (parser->isparsing == 1) {
1417 		php_error_docref(NULL, E_WARNING, "Parser cannot be freed while it is parsing.");
1418 		RETURN_FALSE;
1419 	}
1420 
1421 	RETURN_TRUE;
1422 }
1423 /* }}} */
1424 
1425 /* {{{ Set options in an XML parser */
PHP_FUNCTION(xml_parser_set_option)1426 PHP_FUNCTION(xml_parser_set_option)
1427 {
1428 	xml_parser *parser;
1429 	zval *pind, *val;
1430 	zend_long opt;
1431 
1432 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Olz", &pind, xml_parser_ce, &opt, &val) == FAILURE) {
1433 		RETURN_THROWS();
1434 	}
1435 
1436 	parser = Z_XMLPARSER_P(pind);
1437 	switch (opt) {
1438 		case PHP_XML_OPTION_CASE_FOLDING:
1439 			parser->case_folding = zval_get_long(val);
1440 			break;
1441 		case PHP_XML_OPTION_SKIP_TAGSTART:
1442 			parser->toffset = zval_get_long(val);
1443 			if (parser->toffset < 0) {
1444 				php_error_docref(NULL, E_WARNING, "tagstart ignored, because it is out of range");
1445 				parser->toffset = 0;
1446 			}
1447 			break;
1448 		case PHP_XML_OPTION_SKIP_WHITE:
1449 			parser->skipwhite = zval_get_long(val);
1450 			break;
1451 		case PHP_XML_OPTION_TARGET_ENCODING: {
1452 			const xml_encoding *enc;
1453 			if (!try_convert_to_string(val)) {
1454 				RETURN_THROWS();
1455 			}
1456 
1457 			enc = xml_get_encoding((XML_Char*)Z_STRVAL_P(val));
1458 			if (enc == NULL) {
1459 				zend_argument_value_error(3, "is not a supported target encoding");
1460 				RETURN_THROWS();
1461 			}
1462 
1463 			parser->target_encoding = enc->name;
1464 			break;
1465 		}
1466 		default:
1467 			zend_argument_value_error(2, "must be a PHP_XML_OPTION_* constant");
1468 			RETURN_THROWS();
1469 			break;
1470 	}
1471 	RETVAL_TRUE;
1472 }
1473 /* }}} */
1474 
1475 /* {{{ Get options from an XML parser */
PHP_FUNCTION(xml_parser_get_option)1476 PHP_FUNCTION(xml_parser_get_option)
1477 {
1478 	xml_parser *parser;
1479 	zval *pind;
1480 	zend_long opt;
1481 
1482 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pind, xml_parser_ce, &opt) == FAILURE) {
1483 		RETURN_THROWS();
1484 	}
1485 
1486 	parser = Z_XMLPARSER_P(pind);
1487 	switch (opt) {
1488 		case PHP_XML_OPTION_CASE_FOLDING:
1489 			RETURN_LONG(parser->case_folding);
1490 			break;
1491 		case PHP_XML_OPTION_SKIP_TAGSTART:
1492 			RETURN_LONG(parser->toffset);
1493 			break;
1494 		case PHP_XML_OPTION_SKIP_WHITE:
1495 			RETURN_LONG(parser->skipwhite);
1496 			break;
1497 		case PHP_XML_OPTION_TARGET_ENCODING:
1498 			RETURN_STRING((char *)parser->target_encoding);
1499 			break;
1500 		default:
1501 			zend_argument_value_error(2, "must be a PHP_XML_OPTION_* constant");
1502 			RETURN_THROWS();
1503 	}
1504 }
1505 /* }}} */
1506 
1507 #endif
1508