1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Stig Sæther Bakken <ssb@php.net> |
14 | Thies C. Arntzen <thies@thieso.net> |
15 | Sterling Hughes <sterling@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22
23 #include "php.h"
24
25 #include "zend_variables.h"
26 #include "ext/standard/info.h"
27 #include "ext/standard/html.h"
28
29 #ifdef HAVE_XML
30
31 #include "php_xml.h"
32 # include "ext/standard/head.h"
33 #ifdef LIBXML_EXPAT_COMPAT
34 #include "ext/libxml/php_libxml.h"
35 #endif
36
37 #include "xml_arginfo.h"
38
39 /* Short-term TODO list:
40 * - Implement XML_ExternalEntityParserCreate()
41 * - XML_SetCommentHandler
42 * - XML_SetCdataSectionHandler
43 * - XML_SetParamEntityParsing
44 */
45
46 /* Long-term TODO list:
47 * - Fix the expat library so you can install your own memory manager
48 * functions
49 */
50
51 /* Known bugs:
52 * - Weird things happen with <![CDATA[]]> sections.
53 */
54
ZEND_BEGIN_MODULE_GLOBALS(xml)55 ZEND_BEGIN_MODULE_GLOBALS(xml)
56 XML_Char *default_encoding;
57 ZEND_END_MODULE_GLOBALS(xml)
58
59 ZEND_DECLARE_MODULE_GLOBALS(xml)
60
61 #define XML(v) ZEND_MODULE_GLOBALS_ACCESSOR(xml, v)
62
63 typedef struct {
64 XML_Parser parser;
65 XML_Char *target_encoding;
66
67 /* Reference to the object itself, for convenience.
68 * It is not owned, do not release it. */
69 zval index;
70
71 zend_object *object;
72 zend_fcall_info_cache startElementHandler;
73 zend_fcall_info_cache endElementHandler;
74 zend_fcall_info_cache characterDataHandler;
75 zend_fcall_info_cache processingInstructionHandler;
76 zend_fcall_info_cache defaultHandler;
77 zend_fcall_info_cache unparsedEntityDeclHandler;
78 zend_fcall_info_cache notationDeclHandler;
79 zend_fcall_info_cache externalEntityRefHandler;
80 zend_fcall_info_cache startNamespaceDeclHandler;
81 zend_fcall_info_cache endNamespaceDeclHandler;
82
83 zval data;
84 zval info;
85 int level;
86 int toffset;
87 int curtag;
88 zval *ctag;
89 char **ltags;
90 bool lastwasopen;
91 bool skipwhite;
92 bool isparsing;
93 bool parsehuge;
94 bool case_folding;
95
96 XML_Char *baseURI;
97
98 zend_object std;
99 } xml_parser;
100
101
102 typedef struct {
103 XML_Char *name;
104 char (*decoding_function)(unsigned short);
105 unsigned short (*encoding_function)(unsigned char);
106 } xml_encoding;
107
108 /* {{{ dynamically loadable module stuff */
109 #ifdef COMPILE_DL_XML
110 #ifdef ZTS
111 ZEND_TSRMLS_CACHE_DEFINE()
112 #endif
113 ZEND_GET_MODULE(xml)
114 #endif /* COMPILE_DL_XML */
115 /* }}} */
116
117 #define XML_MAXLEVEL 255 /* XXX this should be dynamic */
118
119 #define SKIP_TAGSTART(str) ((str) + (parser->toffset > strlen(str) ? strlen(str) : parser->toffset))
120
121 static zend_class_entry *xml_parser_ce;
122 static zend_object_handlers xml_parser_object_handlers;
123
124 /* {{{ function prototypes */
125 PHP_MINIT_FUNCTION(xml);
126 PHP_MINFO_FUNCTION(xml);
127 static PHP_GINIT_FUNCTION(xml);
128
129 static zend_object *xml_parser_create_object(zend_class_entry *class_type);
130 static void xml_parser_free_obj(zend_object *object);
131 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n);
132 static zend_function *xml_parser_get_constructor(zend_object *object);
133
134 static zend_string *xml_utf8_decode(const XML_Char *, size_t, const XML_Char *);
135 inline static unsigned short xml_encode_iso_8859_1(unsigned char);
136 inline static char xml_decode_iso_8859_1(unsigned short);
137 inline static unsigned short xml_encode_us_ascii(unsigned char);
138 inline static char xml_decode_us_ascii(unsigned short);
139 static void _xml_xmlchar_zval(const XML_Char *, int, const XML_Char *, zval *);
140 static int _xml_xmlcharlen(const XML_Char *);
141 static void _xml_add_to_info(xml_parser *parser, const char *name);
142 inline static zend_string *_xml_decode_tag(xml_parser *parser, const XML_Char *tag);
143
144 void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
145 void _xml_endElementHandler(void *, const XML_Char *);
146 void _xml_characterDataHandler(void *, const XML_Char *, int);
147 void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
148 void _xml_defaultHandler(void *, const XML_Char *, int);
149 void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
150 void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
151 int _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
152
153 void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
154 void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
155 /* }}} */
156
157 #ifdef LIBXML_EXPAT_COMPAT
158 static const zend_module_dep xml_deps[] = {
159 ZEND_MOD_REQUIRED("libxml")
160 ZEND_MOD_END
161 };
162 #endif
163
164 zend_module_entry xml_module_entry = {
165 #ifdef LIBXML_EXPAT_COMPAT
166 STANDARD_MODULE_HEADER_EX, NULL,
167 xml_deps,
168 #else
169 STANDARD_MODULE_HEADER,
170 #endif
171 "xml", /* extension name */
172 ext_functions, /* extension function list */
173 PHP_MINIT(xml), /* extension-wide startup function */
174 NULL, /* extension-wide shutdown function */
175 NULL, /* per-request startup function */
176 NULL, /* per-request shutdown function */
177 PHP_MINFO(xml), /* information function */
178 PHP_XML_VERSION,
179 PHP_MODULE_GLOBALS(xml), /* globals descriptor */
180 PHP_GINIT(xml), /* globals ctor */
181 NULL, /* globals dtor */
182 NULL, /* post deactivate */
183 STANDARD_MODULE_PROPERTIES_EX
184 };
185
186 /* All the encoding functions are set to NULL right now, since all
187 * the encoding is currently done internally by expat/xmltok.
188 */
189 static const xml_encoding xml_encodings[] = {
190 { (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
191 { (XML_Char *)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
192 { (XML_Char *)"UTF-8", NULL, NULL },
193 { (XML_Char *)NULL, NULL, NULL }
194 };
195
196 static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
197
198 /* }}} */
199
200 /* {{{ startup, shutdown and info functions */
PHP_GINIT_FUNCTION(xml)201 static PHP_GINIT_FUNCTION(xml)
202 {
203 #if defined(COMPILE_DL_XML) && defined(ZTS)
204 ZEND_TSRMLS_CACHE_UPDATE();
205 #endif
206 xml_globals->default_encoding = (XML_Char*)"UTF-8";
207 }
208
php_xml_malloc_wrapper(size_t sz)209 static void *php_xml_malloc_wrapper(size_t sz)
210 {
211 return emalloc(sz);
212 }
213
php_xml_realloc_wrapper(void * ptr,size_t sz)214 static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
215 {
216 return erealloc(ptr, sz);
217 }
218
php_xml_free_wrapper(void * ptr)219 static void php_xml_free_wrapper(void *ptr)
220 {
221 if (ptr != NULL) {
222 efree(ptr);
223 }
224 }
225
PHP_MINIT_FUNCTION(xml)226 PHP_MINIT_FUNCTION(xml)
227 {
228 xml_parser_ce = register_class_XMLParser();
229 xml_parser_ce->create_object = xml_parser_create_object;
230 xml_parser_ce->default_object_handlers = &xml_parser_object_handlers;
231
232 memcpy(&xml_parser_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
233 xml_parser_object_handlers.offset = XtOffsetOf(xml_parser, std);
234 xml_parser_object_handlers.free_obj = xml_parser_free_obj;
235 xml_parser_object_handlers.get_gc = xml_parser_get_gc;
236 xml_parser_object_handlers.get_constructor = xml_parser_get_constructor;
237 xml_parser_object_handlers.clone_obj = NULL;
238 xml_parser_object_handlers.compare = zend_objects_not_comparable;
239
240 register_xml_symbols(module_number);
241
242 /* this object should not be pre-initialised at compile time,
243 as the order of members may vary */
244
245 php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
246 php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
247 php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
248
249 return SUCCESS;
250 }
251
PHP_MINFO_FUNCTION(xml)252 PHP_MINFO_FUNCTION(xml)
253 {
254 php_info_print_table_start();
255 php_info_print_table_row(2, "XML Support", "active");
256 php_info_print_table_row(2, "XML Namespace Support", "active");
257 #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
258 php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
259 #else
260 php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
261 #endif
262 php_info_print_table_end();
263 }
264 /* }}} */
265
266 /* {{{ extension-internal functions */
267
xml_parse_helper(xml_parser * parser,const char * data,size_t data_len,bool is_final)268 static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final)
269 {
270 ZEND_ASSERT(!parser->isparsing);
271
272 /* libxml2 specific options */
273 #if LIBXML_EXPAT_COMPAT
274 /* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */
275 if (parser->parsehuge) {
276 parser->parser->parser->options |= XML_PARSE_HUGE;
277 xmlDictSetLimit(parser->parser->parser->dict, 0);
278 } else {
279 parser->parser->parser->options &= ~XML_PARSE_HUGE;
280 xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT);
281 }
282 #endif
283
284 parser->isparsing = 1;
285 int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final);
286 parser->isparsing = 0;
287 return ret;
288 }
289
_xml_xmlchar_zval(const XML_Char * s,int len,const XML_Char * encoding,zval * ret)290 static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
291 {
292 if (s == NULL) {
293 ZVAL_FALSE(ret);
294 return;
295 }
296 if (len == 0) {
297 len = _xml_xmlcharlen(s);
298 }
299 ZVAL_STR(ret, xml_utf8_decode(s, len, encoding));
300 }
301 /* }}} */
302
xml_parser_from_obj(zend_object * obj)303 static inline xml_parser *xml_parser_from_obj(zend_object *obj) {
304 return (xml_parser *)((char *)(obj) - XtOffsetOf(xml_parser, std));
305 }
306
307 #define Z_XMLPARSER_P(zv) xml_parser_from_obj(Z_OBJ_P(zv))
308
xml_parser_create_object(zend_class_entry * class_type)309 static zend_object *xml_parser_create_object(zend_class_entry *class_type) {
310 xml_parser *intern = zend_object_alloc(sizeof(xml_parser), class_type);
311 memset(intern, 0, sizeof(xml_parser) - sizeof(zend_object));
312
313 zend_object_std_init(&intern->std, class_type);
314 object_properties_init(&intern->std, class_type);
315
316 return &intern->std;
317 }
318
xml_parser_free_ltags(xml_parser * parser)319 static void xml_parser_free_ltags(xml_parser *parser)
320 {
321 if (parser->ltags) {
322 int inx;
323 for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++)
324 efree(parser->ltags[ inx ]);
325 efree(parser->ltags);
326 }
327 }
328
xml_parser_free_obj(zend_object * object)329 static void xml_parser_free_obj(zend_object *object)
330 {
331 xml_parser *parser = xml_parser_from_obj(object);
332
333 if (parser->parser) {
334 XML_ParserFree(parser->parser);
335 }
336 xml_parser_free_ltags(parser);
337 if (ZEND_FCC_INITIALIZED(parser->startElementHandler)) {
338 zend_fcc_dtor(&parser->startElementHandler);
339 parser->startElementHandler.function_handler = NULL;
340 }
341 if (ZEND_FCC_INITIALIZED(parser->endElementHandler)) {
342 zend_fcc_dtor(&parser->endElementHandler);
343 parser->endElementHandler.function_handler = NULL;
344 }
345 if (ZEND_FCC_INITIALIZED(parser->characterDataHandler)) {
346 zend_fcc_dtor(&parser->characterDataHandler);
347 parser->characterDataHandler.function_handler = NULL;
348 }
349 if (ZEND_FCC_INITIALIZED(parser->processingInstructionHandler)) {
350 zend_fcc_dtor(&parser->processingInstructionHandler);
351 parser->processingInstructionHandler.function_handler = NULL;
352 }
353 if (ZEND_FCC_INITIALIZED(parser->defaultHandler)) {
354 zend_fcc_dtor(&parser->defaultHandler);
355 parser->defaultHandler.function_handler = NULL;
356 }
357 if (ZEND_FCC_INITIALIZED(parser->unparsedEntityDeclHandler)) {
358 zend_fcc_dtor(&parser->unparsedEntityDeclHandler);
359 parser->unparsedEntityDeclHandler.function_handler = NULL;
360 }
361 if (ZEND_FCC_INITIALIZED(parser->notationDeclHandler)) {
362 zend_fcc_dtor(&parser->notationDeclHandler);
363 parser->notationDeclHandler.function_handler = NULL;
364 }
365 if (ZEND_FCC_INITIALIZED(parser->externalEntityRefHandler)) {
366 zend_fcc_dtor(&parser->externalEntityRefHandler);
367 parser->externalEntityRefHandler.function_handler = NULL;
368 }
369 if (ZEND_FCC_INITIALIZED(parser->startNamespaceDeclHandler)) {
370 zend_fcc_dtor(&parser->startNamespaceDeclHandler);
371 parser->startNamespaceDeclHandler.function_handler = NULL;
372 }
373 if (ZEND_FCC_INITIALIZED(parser->endNamespaceDeclHandler)) {
374 zend_fcc_dtor(&parser->endNamespaceDeclHandler);
375 parser->endNamespaceDeclHandler.function_handler = NULL;
376 }
377 if (parser->baseURI) {
378 efree(parser->baseURI);
379 }
380 if (parser->object) {
381 OBJ_RELEASE(parser->object);
382 }
383
384 zend_object_std_dtor(&parser->std);
385 }
386
xml_parser_get_gc(zend_object * object,zval ** table,int * n)387 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n)
388 {
389 xml_parser *parser = xml_parser_from_obj(object);
390
391 zend_get_gc_buffer *gc_buffer = zend_get_gc_buffer_create();
392 if (parser->object) {
393 zend_get_gc_buffer_add_obj(gc_buffer, parser->object);
394 }
395 if (ZEND_FCC_INITIALIZED(parser->startElementHandler)) {
396 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->startElementHandler);
397 }
398 if (ZEND_FCC_INITIALIZED(parser->endElementHandler)) {
399 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->endElementHandler);
400 }
401 if (ZEND_FCC_INITIALIZED(parser->characterDataHandler)) {
402 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->characterDataHandler);
403 }
404 if (ZEND_FCC_INITIALIZED(parser->processingInstructionHandler)) {
405 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->processingInstructionHandler);
406 }
407 if (ZEND_FCC_INITIALIZED(parser->defaultHandler)) {
408 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->defaultHandler);
409 }
410 if (ZEND_FCC_INITIALIZED(parser->unparsedEntityDeclHandler)) {
411 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->unparsedEntityDeclHandler);
412 }
413 if (ZEND_FCC_INITIALIZED(parser->notationDeclHandler)) {
414 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->notationDeclHandler);
415 }
416 if (ZEND_FCC_INITIALIZED(parser->externalEntityRefHandler)) {
417 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->externalEntityRefHandler);
418 }
419 if (ZEND_FCC_INITIALIZED(parser->startNamespaceDeclHandler)) {
420 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->startNamespaceDeclHandler);
421 }
422 if (ZEND_FCC_INITIALIZED(parser->endNamespaceDeclHandler)) {
423 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->endNamespaceDeclHandler);
424 }
425
426 zend_get_gc_buffer_use(gc_buffer, table, n);
427
428 return zend_std_get_properties(object);
429 }
430
xml_parser_get_constructor(zend_object * object)431 static zend_function *xml_parser_get_constructor(zend_object *object) {
432 zend_throw_error(NULL, "Cannot directly construct XMLParser, use xml_parser_create() or xml_parser_create_ns() instead");
433 return NULL;
434 }
435
436 /* This is always called to simplify the mess to deal with BC breaks, but only set a new handler if it is initialized */
xml_set_handler(zend_fcall_info_cache * const parser_handler,const zend_fcall_info_cache * const fn)437 static void xml_set_handler(zend_fcall_info_cache *const parser_handler, const zend_fcall_info_cache *const fn)
438 {
439 /* If we have already a handler, release it */
440 if (ZEND_FCC_INITIALIZED(*parser_handler)) {
441 zend_fcc_dtor(parser_handler);
442 parser_handler->function_handler = NULL;
443 }
444
445 if (ZEND_FCC_INITIALIZED(*fn)) {
446 zend_fcc_dup(parser_handler, fn);
447 }
448 }
449
450 /* {{{ xml_encode_iso_8859_1() */
xml_encode_iso_8859_1(unsigned char c)451 inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
452 {
453 return (unsigned short)c;
454 }
455 /* }}} */
456
457 /* {{{ xml_decode_iso_8859_1() */
xml_decode_iso_8859_1(unsigned short c)458 inline static char xml_decode_iso_8859_1(unsigned short c)
459 {
460 return (char)(c > 0xff ? '?' : c);
461 }
462 /* }}} */
463
464 /* {{{ xml_encode_us_ascii() */
xml_encode_us_ascii(unsigned char c)465 inline static unsigned short xml_encode_us_ascii(unsigned char c)
466 {
467 return (unsigned short)c;
468 }
469 /* }}} */
470
471 /* {{{ xml_decode_us_ascii() */
xml_decode_us_ascii(unsigned short c)472 inline static char xml_decode_us_ascii(unsigned short c)
473 {
474 return (char)(c > 0x7f ? '?' : c);
475 }
476 /* }}} */
477
478 /* {{{ xml_get_encoding() */
xml_get_encoding(const XML_Char * name)479 static const xml_encoding *xml_get_encoding(const XML_Char *name)
480 {
481 const xml_encoding *enc = &xml_encodings[0];
482
483 while (enc && enc->name) {
484 if (strcasecmp((char *)name, (char *)enc->name) == 0) {
485 return enc;
486 }
487 enc++;
488 }
489 return NULL;
490 }
491 /* }}} */
492
493 /* {{{ xml_utf8_decode() */
xml_utf8_decode(const XML_Char * s,size_t len,const XML_Char * encoding)494 static zend_string *xml_utf8_decode(const XML_Char *s, size_t len, const XML_Char *encoding)
495 {
496 size_t pos = 0;
497 unsigned int c;
498 char (*decoder)(unsigned short) = NULL;
499 const xml_encoding *enc = xml_get_encoding(encoding);
500 zend_string *str;
501
502 if (enc) {
503 decoder = enc->decoding_function;
504 }
505
506 if (decoder == NULL) {
507 /* If the target encoding was unknown, or no decoder function
508 * was specified, return the UTF-8-encoded data as-is.
509 */
510 str = zend_string_init((char *)s, len, 0);
511 return str;
512 }
513
514 str = zend_string_alloc(len, 0);
515 ZSTR_LEN(str) = 0;
516 while (pos < len) {
517 zend_result status = FAILURE;
518 c = php_next_utf8_char((const unsigned char*)s, len, &pos, &status);
519
520 if (status == FAILURE || c > 0xFFU) {
521 c = '?';
522 }
523
524 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (unsigned int)decoder(c);
525 }
526 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
527 if (ZSTR_LEN(str) < len) {
528 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
529 }
530
531 return str;
532 }
533 /* }}} */
534
535 /* {{{ _xml_xmlcharlen() */
_xml_xmlcharlen(const XML_Char * s)536 static int _xml_xmlcharlen(const XML_Char *s)
537 {
538 int len = 0;
539
540 while (*s) {
541 len++;
542 s++;
543 }
544 return len;
545 }
546 /* }}} */
547
548 /* {{{ _xml_add_to_info() */
_xml_add_to_info(xml_parser * parser,const char * name)549 static void _xml_add_to_info(xml_parser *parser, const char *name)
550 {
551 zval *element;
552
553 if (Z_ISUNDEF(parser->info)) {
554 return;
555 }
556
557 size_t name_len = strlen(name);
558 if ((element = zend_hash_str_find(Z_ARRVAL(parser->info), name, name_len)) == NULL) {
559 zval values;
560 array_init(&values);
561 element = zend_hash_str_update(Z_ARRVAL(parser->info), name, name_len, &values);
562 }
563
564 add_next_index_long(element, parser->curtag);
565
566 parser->curtag++;
567 }
568 /* }}} */
569
570 /* {{{ _xml_decode_tag() */
_xml_decode_tag(xml_parser * parser,const XML_Char * tag)571 static zend_string *_xml_decode_tag(xml_parser *parser, const XML_Char *tag)
572 {
573 zend_string *str;
574
575 str = xml_utf8_decode(tag, _xml_xmlcharlen(tag), parser->target_encoding);
576
577 if (parser->case_folding) {
578 zend_str_toupper(ZSTR_VAL(str), ZSTR_LEN(str));
579 }
580
581 return str;
582 }
583 /* }}} */
584
585 /* {{{ _xml_startElementHandler() */
_xml_startElementHandler(void * userData,const XML_Char * name,const XML_Char ** attributes)586 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
587 {
588 xml_parser *parser = (xml_parser *)userData;
589 const char **attrs = (const char **) attributes;
590 zend_string *att, *tag_name, *val;
591
592 if (!parser) {
593 return;
594 }
595
596 parser->level++;
597
598 tag_name = _xml_decode_tag(parser, name);
599
600 if (ZEND_FCC_INITIALIZED(parser->startElementHandler)) {
601 zval args[3];
602 ZVAL_COPY(&args[0], &parser->index);
603 ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
604 array_init(&args[2]);
605
606 while (attributes && *attributes) {
607 zval tmp;
608
609 att = _xml_decode_tag(parser, attributes[0]);
610 val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
611
612 ZVAL_STR(&tmp, val);
613 zend_symtable_update(Z_ARRVAL(args[2]), att, &tmp);
614
615 attributes += 2;
616
617 zend_string_release_ex(att, 0);
618 }
619
620 zend_call_known_fcc(&parser->startElementHandler, /* retval */ NULL, /* param_count */ 3, args, /* named_params */ NULL);
621 zval_ptr_dtor(&args[0]);
622 zval_ptr_dtor(&args[1]);
623 zval_ptr_dtor(&args[2]);
624 }
625
626 if (!Z_ISUNDEF(parser->data)) {
627 if (parser->level <= XML_MAXLEVEL) {
628 zval tag, atr;
629 int atcnt = 0;
630
631 array_init(&tag);
632 array_init(&atr);
633
634 _xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
635
636 add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
637 add_assoc_string(&tag, "type", "open");
638 add_assoc_long(&tag, "level", parser->level);
639
640 parser->ltags[parser->level-1] = estrdup(ZSTR_VAL(tag_name));
641 parser->lastwasopen = 1;
642
643 attributes = (const XML_Char **) attrs;
644
645 while (attributes && *attributes) {
646 zval tmp;
647
648 att = _xml_decode_tag(parser, attributes[0]);
649 val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
650
651 ZVAL_STR(&tmp, val);
652 zend_symtable_update(Z_ARRVAL(atr), att, &tmp);
653
654 atcnt++;
655 attributes += 2;
656
657 zend_string_release_ex(att, 0);
658 }
659
660 if (atcnt) {
661 zend_hash_str_add(Z_ARRVAL(tag), "attributes", sizeof("attributes") - 1, &atr);
662 } else {
663 zval_ptr_dtor(&atr);
664 }
665
666 parser->ctag = zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
667 } else if (parser->level == (XML_MAXLEVEL + 1)) {
668 php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
669 }
670 }
671
672 zend_string_release_ex(tag_name, 0);
673 }
674 /* }}} */
675
676 /* {{{ _xml_endElementHandler() */
_xml_endElementHandler(void * userData,const XML_Char * name)677 void _xml_endElementHandler(void *userData, const XML_Char *name)
678 {
679 xml_parser *parser = (xml_parser *)userData;
680
681 if (!parser) {
682 return;
683 }
684
685 zend_string *tag_name = _xml_decode_tag(parser, name);
686
687 if (ZEND_FCC_INITIALIZED(parser->endElementHandler)) {
688 zval args[2];
689 ZVAL_COPY(&args[0], &parser->index);
690 ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
691
692 zend_call_known_fcc(&parser->endElementHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
693 zval_ptr_dtor(&args[0]);
694 zval_ptr_dtor(&args[1]);
695 }
696
697 if (!Z_ISUNDEF(parser->data)) {
698 zval tag;
699
700 if (parser->lastwasopen) {
701 add_assoc_string(parser->ctag, "type", "complete");
702 } else {
703 array_init(&tag);
704
705 _xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
706
707 add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
708 add_assoc_string(&tag, "type", "close");
709 add_assoc_long(&tag, "level", parser->level);
710
711 zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
712 }
713
714 parser->lastwasopen = 0;
715 }
716
717 zend_string_release_ex(tag_name, 0);
718
719 if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
720 efree(parser->ltags[parser->level-1]);
721 }
722
723 parser->level--;
724 }
725 /* }}} */
726
727 /* {{{ _xml_characterDataHandler() */
_xml_characterDataHandler(void * userData,const XML_Char * s,int len)728 void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
729 {
730 xml_parser *parser = (xml_parser *)userData;
731
732 if (!parser) {
733 return;
734 }
735
736 if (ZEND_FCC_INITIALIZED(parser->characterDataHandler)) {
737 zval args[2];
738 ZVAL_COPY(&args[0], &parser->index);
739 _xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
740
741 zend_call_known_fcc(&parser->characterDataHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
742 zval_ptr_dtor(&args[0]);
743 zval_ptr_dtor(&args[1]);
744 }
745
746 if (Z_ISUNDEF(parser->data)) {
747 return;
748 }
749
750 bool doprint = 0;
751 zend_string *decoded_value;
752 decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
753 if (parser->skipwhite) {
754 for (size_t i = 0; i < ZSTR_LEN(decoded_value); i++) {
755 switch (ZSTR_VAL(decoded_value)[i]) {
756 case ' ':
757 case '\t':
758 case '\n':
759 continue;
760 default:
761 doprint = 1;
762 break;
763 }
764 if (doprint) {
765 break;
766 }
767 }
768 }
769 if (parser->lastwasopen) {
770 zval *myval;
771 /* check if the current tag already has a value - if yes append to that! */
772 if ((myval = zend_hash_find(Z_ARRVAL_P(parser->ctag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
773 size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
774 Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
775 strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
776 ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
777 zend_string_release_ex(decoded_value, 0);
778 } else {
779 if (doprint || (! parser->skipwhite)) {
780 add_assoc_str(parser->ctag, "value", decoded_value);
781 } else {
782 zend_string_release_ex(decoded_value, 0);
783 }
784 }
785 } else {
786 zval tag;
787 zval *curtag, *mytype, *myval;
788 ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
789 if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
790 if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) {
791 if ((myval = zend_hash_find(Z_ARRVAL_P(curtag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
792 size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
793 Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
794 strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
795 ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
796 zend_string_release_ex(decoded_value, 0);
797 return;
798 }
799 }
800 }
801 break;
802 } ZEND_HASH_FOREACH_END();
803 if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
804 array_init(&tag);
805 _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
806 add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
807 add_assoc_str(&tag, "value", decoded_value);
808 add_assoc_string(&tag, "type", "cdata");
809 add_assoc_long(&tag, "level", parser->level);
810 zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
811 } else if (parser->level == (XML_MAXLEVEL + 1)) {
812 php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
813 } else {
814 zend_string_release_ex(decoded_value, 0);
815 }
816 }
817 }
818 /* }}} */
819
820 /* {{{ _xml_processingInstructionHandler() */
_xml_processingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)821 void _xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
822 {
823 xml_parser *parser = (xml_parser *)userData;
824
825 if (!parser || !ZEND_FCC_INITIALIZED(parser->processingInstructionHandler)) {
826 return;
827 }
828
829 zval args[3];
830
831 ZVAL_COPY(&args[0], &parser->index);
832 _xml_xmlchar_zval(target, 0, parser->target_encoding, &args[1]);
833 _xml_xmlchar_zval(data, 0, parser->target_encoding, &args[2]);
834
835 zend_call_known_fcc(&parser->processingInstructionHandler, /* retval */ NULL, /* param_count */ 3, args, /* named_params */ NULL);
836 zval_ptr_dtor(&args[0]);
837 zval_ptr_dtor(&args[1]);
838 zval_ptr_dtor(&args[2]);
839 }
840 /* }}} */
841
842 /* {{{ _xml_defaultHandler() */
_xml_defaultHandler(void * userData,const XML_Char * s,int len)843 void _xml_defaultHandler(void *userData, const XML_Char *s, int len)
844 {
845 xml_parser *parser = (xml_parser *)userData;
846
847 if (!parser || !ZEND_FCC_INITIALIZED(parser->defaultHandler)) {
848 return;
849 }
850
851 zval args[2];
852
853 ZVAL_COPY(&args[0], &parser->index);
854 _xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
855
856 zend_call_known_fcc(&parser->defaultHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
857 zval_ptr_dtor(&args[0]);
858 zval_ptr_dtor(&args[1]);
859 }
860 /* }}} */
861
862 /* {{{ _xml_unparsedEntityDeclHandler() */
_xml_unparsedEntityDeclHandler(void * userData,const XML_Char * entityName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)863 void _xml_unparsedEntityDeclHandler(void *userData,
864 const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId,
865 const XML_Char *publicId, const XML_Char *notationName)
866 {
867 xml_parser *parser = (xml_parser *)userData;
868
869 if (!parser || !ZEND_FCC_INITIALIZED(parser->unparsedEntityDeclHandler)) {
870 return;
871 }
872
873 zval args[6];
874
875 ZVAL_COPY(&args[0], &parser->index);
876 _xml_xmlchar_zval(entityName, 0, parser->target_encoding, &args[1]);
877 _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
878 _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
879 _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
880 _xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[5]);
881
882 zend_call_known_fcc(&parser->unparsedEntityDeclHandler, /* retval */ NULL, /* param_count */ 6, args, /* named_params */ NULL);
883 zval_ptr_dtor(&args[0]);
884 zval_ptr_dtor(&args[1]);
885 zval_ptr_dtor(&args[2]);
886 zval_ptr_dtor(&args[3]);
887 zval_ptr_dtor(&args[4]);
888 zval_ptr_dtor(&args[5]);
889 }
890 /* }}} */
891
892 /* {{{ _xml_notationDeclHandler() */
_xml_notationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)893 void _xml_notationDeclHandler(void *userData, const XML_Char *notationName,
894 const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
895 {
896 xml_parser *parser = (xml_parser *)userData;
897
898 if (!parser || !ZEND_FCC_INITIALIZED(parser->notationDeclHandler)) {
899 return;
900 }
901
902 zval args[5];
903
904 ZVAL_COPY(&args[0], &parser->index);
905 _xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[1]);
906 _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
907 _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
908 _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
909
910 zend_call_known_fcc(&parser->notationDeclHandler, /* retval */ NULL, /* param_count */ 5, args, /* named_params */ NULL);
911 zval_ptr_dtor(&args[0]);
912 zval_ptr_dtor(&args[1]);
913 zval_ptr_dtor(&args[2]);
914 zval_ptr_dtor(&args[3]);
915 zval_ptr_dtor(&args[4]);
916 }
917 /* }}} */
918
919 /* {{{ _xml_externalEntityRefHandler() */
_xml_externalEntityRefHandler(XML_Parser parserPtr,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)920 int _xml_externalEntityRefHandler(XML_Parser parserPtr, const XML_Char *openEntityNames,
921 const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
922 {
923 xml_parser *parser = XML_GetUserData(parserPtr);
924
925 if (!parser || !ZEND_FCC_INITIALIZED(parser->externalEntityRefHandler)) {
926 return 0;
927 }
928
929 int ret = 0; /* abort if no handler is set (should be configurable?) */
930 zval args[5];
931 zval retval;
932
933 ZVAL_COPY(&args[0], &parser->index);
934 _xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding, &args[1]);
935 _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
936 _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
937 _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
938
939 zend_call_known_fcc(&parser->externalEntityRefHandler, /* retval */ &retval, /* param_count */ 5, args, /* named_params */ NULL);
940 zval_ptr_dtor(&args[0]);
941 zval_ptr_dtor(&args[1]);
942 zval_ptr_dtor(&args[2]);
943 zval_ptr_dtor(&args[3]);
944 zval_ptr_dtor(&args[4]);
945
946 /* TODO Better handling from callable return value */
947 if (!Z_ISUNDEF(retval)) {
948 convert_to_long(&retval);
949 ret = Z_LVAL(retval);
950 } else {
951 ret = 0;
952 }
953 return ret;
954 }
955 /* }}} */
956
957 /* {{{ _xml_startNamespaceDeclHandler() */
_xml_startNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)958 void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
959 {
960 xml_parser *parser = (xml_parser *)userData;
961
962 if (!parser || !ZEND_FCC_INITIALIZED(parser->startNamespaceDeclHandler)) {
963 return;
964 }
965
966 zval args[3];
967
968 ZVAL_COPY(&args[0], &parser->index);
969 _xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
970 _xml_xmlchar_zval(uri, 0, parser->target_encoding, &args[2]);
971
972 zend_call_known_fcc(&parser->startNamespaceDeclHandler, /* retval */ NULL, /* param_count */ 3, args, /* named_params */ NULL);
973 zval_ptr_dtor(&args[0]);
974 zval_ptr_dtor(&args[1]);
975 zval_ptr_dtor(&args[2]);
976 }
977 /* }}} */
978
979 /* {{{ _xml_endNamespaceDeclHandler() */
_xml_endNamespaceDeclHandler(void * userData,const XML_Char * prefix)980 void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
981 {
982 xml_parser *parser = (xml_parser *)userData;
983
984 if (!parser || !ZEND_FCC_INITIALIZED(parser->endNamespaceDeclHandler)) {
985 return;
986 }
987
988 zval args[2];
989
990 ZVAL_COPY(&args[0], &parser->index);
991 _xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
992
993 zend_call_known_fcc(&parser->endNamespaceDeclHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
994 zval_ptr_dtor(&args[0]);
995 zval_ptr_dtor(&args[1]);
996 }
997 /* }}} */
998
999 /************************* EXTENSION FUNCTIONS *************************/
1000
php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS,int ns_support)1001 static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support) /* {{{ */
1002 {
1003 xml_parser *parser;
1004 int auto_detect = 0;
1005
1006 zend_string *encoding_param = NULL;
1007
1008 char *ns_param = NULL;
1009 size_t ns_param_len = 0;
1010
1011 XML_Char *encoding;
1012
1013 if (zend_parse_parameters(ZEND_NUM_ARGS(), (ns_support ? "|S!s": "|S!"), &encoding_param, &ns_param, &ns_param_len) == FAILURE) {
1014 RETURN_THROWS();
1015 }
1016
1017 if (encoding_param != NULL) {
1018 /* The supported encoding types are hardcoded here because
1019 * we are limited to the encodings supported by expat/xmltok.
1020 */
1021 if (ZSTR_LEN(encoding_param) == 0) {
1022 encoding = XML(default_encoding);
1023 auto_detect = 1;
1024 } else if (zend_string_equals_literal_ci(encoding_param, "ISO-8859-1")) {
1025 encoding = (XML_Char*)"ISO-8859-1";
1026 } else if (zend_string_equals_literal_ci(encoding_param, "UTF-8")) {
1027 encoding = (XML_Char*)"UTF-8";
1028 } else if (zend_string_equals_literal_ci(encoding_param, "US-ASCII")) {
1029 encoding = (XML_Char*)"US-ASCII";
1030 } else {
1031 zend_argument_value_error(1, "is not a supported source encoding");
1032 RETURN_THROWS();
1033 }
1034 } else {
1035 encoding = XML(default_encoding);
1036 }
1037
1038 if (ns_support && ns_param == NULL){
1039 ns_param = ":";
1040 }
1041
1042 object_init_ex(return_value, xml_parser_ce);
1043 parser = Z_XMLPARSER_P(return_value);
1044 parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
1045 &php_xml_mem_hdlrs, (XML_Char*)ns_param);
1046
1047 parser->target_encoding = encoding;
1048 parser->case_folding = 1;
1049 parser->isparsing = 0;
1050 parser->parsehuge = false; /* It's the default for BC & DoS protection */
1051
1052 XML_SetUserData(parser->parser, parser);
1053 ZVAL_COPY_VALUE(&parser->index, return_value);
1054 }
1055 /* }}} */
1056
1057 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create)1058 PHP_FUNCTION(xml_parser_create)
1059 {
1060 php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1061 }
1062 /* }}} */
1063
1064 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create_ns)1065 PHP_FUNCTION(xml_parser_create_ns)
1066 {
1067 php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1068 }
1069 /* }}} */
1070
php_xml_check_string_method_arg(unsigned int arg_num,zend_object * object,zend_string * method_name,zend_fcall_info_cache * const parser_handler_fcc)1071 static bool php_xml_check_string_method_arg(
1072 unsigned int arg_num,
1073 zend_object *object,
1074 zend_string *method_name,
1075 zend_fcall_info_cache *const parser_handler_fcc
1076 ) {
1077 if (ZSTR_LEN(method_name) == 0) {
1078 ZEND_ASSERT(arg_num != 0);
1079 /* Unset handler */
1080 return true;
1081 }
1082
1083 if (!object) {
1084 ZEND_ASSERT(arg_num != 0);
1085 zend_argument_value_error(arg_num, "an object must be set via xml_set_object() to be able to lookup method");
1086 return false;
1087 }
1088
1089 zend_class_entry *ce = object->ce;
1090 zend_string *lc_name = zend_string_tolower(method_name);
1091 zend_function *method_ptr = zend_hash_find_ptr(&ce->function_table, lc_name);
1092 zend_string_release_ex(lc_name, 0);
1093 if (!method_ptr) {
1094 if (arg_num) {
1095 zend_argument_value_error(arg_num, "method %s::%s() does not exist", ZSTR_VAL(ce->name), ZSTR_VAL(method_name));
1096 }
1097 return false;
1098 }
1099
1100 parser_handler_fcc->function_handler = method_ptr;
1101 /* We set the calling scope to NULL to be able to differentiate a "method" set from a proper callable */
1102 parser_handler_fcc->calling_scope = NULL;
1103 parser_handler_fcc->called_scope = ce;
1104 parser_handler_fcc->object = object;
1105
1106 return true;
1107 }
1108
1109 #define PHP_XML_CHECK_NEW_THIS_METHODS(parser_to_check, new_this_obj, fcc_field, handler_set_method) \
1110 if ( \
1111 ZEND_FCC_INITIALIZED(parser_to_check->fcc_field) \
1112 && parser_to_check->fcc_field.object == parser_to_check->object \
1113 && parser_to_check->fcc_field.calling_scope == NULL \
1114 ) { \
1115 zend_string *method_name = zend_string_copy(parser_to_check->fcc_field.function_handler->common.function_name); \
1116 zend_fcc_dtor(&parser_to_check->fcc_field); \
1117 bool status = php_xml_check_string_method_arg(0, new_this_obj, method_name, &parser_to_check->fcc_field); \
1118 if (status == false) { \
1119 zend_argument_value_error(2, "cannot safely swap to object of class %s as method \"%s\" does not exist, which was set via " handler_set_method, \
1120 ZSTR_VAL(new_this_obj->ce->name), ZSTR_VAL(method_name)); \
1121 zend_string_release(method_name); \
1122 RETURN_THROWS(); \
1123 } \
1124 zend_string_release(method_name); \
1125 zend_fcc_addref(&parser_to_check->fcc_field); \
1126 }
1127
1128
1129 /* {{{ Set up object which should be used for callbacks */
PHP_FUNCTION(xml_set_object)1130 PHP_FUNCTION(xml_set_object)
1131 {
1132 xml_parser *parser;
1133 zval *pind, *mythis;
1134 zend_object *new_this;
1135
1136 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oo", &pind, xml_parser_ce, &mythis) == FAILURE) {
1137 RETURN_THROWS();
1138 }
1139
1140 parser = Z_XMLPARSER_P(pind);
1141 new_this = Z_OBJ_P(mythis);
1142
1143 if (parser->object) {
1144 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, startElementHandler, "xml_set_element_handler()");
1145 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, endElementHandler, "xml_set_element_handler()");
1146 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, characterDataHandler, "xml_set_character_data_handler()");
1147 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, processingInstructionHandler, "xml_set_processing_instruction_handler()");
1148 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, defaultHandler, "xml_set_default_handler()");
1149 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, unparsedEntityDeclHandler, "xml_set_unparsed_entity_decl_handler()");
1150 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, notationDeclHandler, "xml_set_notation_decl_handler()");
1151 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, externalEntityRefHandler, "xml_set_external_entity_ref_handler()");
1152 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, startNamespaceDeclHandler, "xml_set_start_namespace_decl_handler()");
1153 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, endNamespaceDeclHandler, "xml_set_end_namespace_decl_handler()");
1154
1155 OBJ_RELEASE(parser->object);
1156 }
1157
1158 parser->object = new_this;
1159 GC_ADDREF(parser->object);
1160
1161 RETURN_TRUE;
1162 }
1163 /* }}} */
1164
1165 /* {{{ Set up start and end element handlers */
PHP_FUNCTION(xml_set_element_handler)1166 PHP_FUNCTION(xml_set_element_handler)
1167 {
1168 xml_parser *parser;
1169 zval *pind;
1170 zend_fcall_info start_fci = {0};
1171 zend_fcall_info_cache start_fcc = {0};
1172 zend_fcall_info end_fci = {0};
1173 zend_fcall_info_cache end_fcc = {0};
1174 zend_string *start_method_name = NULL;
1175 zend_string *end_method_name = NULL;
1176
1177 if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OF!F!", &pind, xml_parser_ce, &start_fci, &start_fcc, &end_fci, &end_fcc) == SUCCESS) {
1178 parser = Z_XMLPARSER_P(pind);
1179 goto set_handlers;
1180 }
1181 zend_release_fcall_info_cache(&start_fcc);
1182 zend_release_fcall_info_cache(&end_fcc);
1183
1184 if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OF!S", &pind, xml_parser_ce, &start_fci, &start_fcc, &end_method_name) == SUCCESS) {
1185 parser = Z_XMLPARSER_P(pind);
1186
1187 bool status = php_xml_check_string_method_arg(3, parser->object, end_method_name, &end_fcc);
1188 if (status == false) {
1189 zend_release_fcall_info_cache(&start_fcc);
1190 zend_release_fcall_info_cache(&end_fcc);
1191 RETURN_THROWS();
1192 }
1193 } else if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OSF!", &pind, xml_parser_ce, &start_method_name, &end_fci, &end_fcc) == SUCCESS) {
1194 parser = Z_XMLPARSER_P(pind);
1195
1196 bool status = php_xml_check_string_method_arg(2, parser->object, start_method_name, &start_fcc);
1197 if (status == false) {
1198 zend_release_fcall_info_cache(&start_fcc);
1199 zend_release_fcall_info_cache(&end_fcc);
1200 RETURN_THROWS();
1201 }
1202 } else if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OSS", &pind, xml_parser_ce, &start_method_name, &end_method_name) == SUCCESS) {
1203 zend_release_fcall_info_cache(&start_fcc);
1204 zend_release_fcall_info_cache(&end_fcc);
1205
1206 parser = Z_XMLPARSER_P(pind);
1207
1208 bool status = php_xml_check_string_method_arg(2, parser->object, start_method_name, &start_fcc);
1209 if (status == false) {
1210 RETURN_THROWS();
1211 }
1212 status = php_xml_check_string_method_arg(3, parser->object, end_method_name, &end_fcc);
1213 if (status == false) {
1214 RETURN_THROWS();
1215 }
1216 } else {
1217 zval *dummy_start;
1218 zval *dummy_end;
1219
1220 zend_release_fcall_info_cache(&start_fcc);
1221 zend_release_fcall_info_cache(&end_fcc);
1222 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ozz", &pind, xml_parser_ce, &dummy_start, &dummy_end) == FAILURE) {
1223 RETURN_THROWS();
1224 } else {
1225 switch (Z_TYPE_P(dummy_start)) {
1226 case IS_NULL:
1227 case IS_STRING:
1228 break;
1229 default:
1230 zend_argument_type_error(2, "must be of type callable|string|null");
1231 RETURN_THROWS();
1232 }
1233 zend_argument_type_error(3, "must be of type callable|string|null");
1234 RETURN_THROWS();
1235 }
1236 }
1237
1238 set_handlers:
1239 xml_set_handler(&parser->startElementHandler, &start_fcc);
1240 xml_set_handler(&parser->endElementHandler, &end_fcc);
1241 XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1242
1243 RETURN_TRUE;
1244 }
1245 /* }}} */
1246
php_xml_set_handler_parse_callable(INTERNAL_FUNCTION_PARAMETERS,xml_parser ** const parser,zend_fcall_info_cache * const parser_handler_fcc)1247 static void php_xml_set_handler_parse_callable(
1248 INTERNAL_FUNCTION_PARAMETERS,
1249 xml_parser **const parser,
1250 zend_fcall_info_cache *const parser_handler_fcc
1251 ) {
1252 zval *pind;
1253 zend_fcall_info handler_fci = {0};
1254 zend_fcall_info_cache handler_fcc = {0};
1255 zend_string *method_name = NULL;
1256
1257 if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OF!", &pind, xml_parser_ce, &handler_fci, &handler_fcc) == SUCCESS) {
1258 *parser = Z_XMLPARSER_P(pind);
1259 if (!ZEND_FCI_INITIALIZED(handler_fci)) {
1260 /* Free handler, so just return and a uninitialized FCC communicates this */
1261 return;
1262 }
1263 memcpy(parser_handler_fcc, &handler_fcc, sizeof(zend_fcall_info_cache));
1264 } else if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OS", &pind, xml_parser_ce, &method_name) == SUCCESS) {
1265 *parser = Z_XMLPARSER_P(pind);
1266
1267 bool status = php_xml_check_string_method_arg(2, (*parser)->object, method_name, parser_handler_fcc);
1268 if (status == false) {
1269 RETURN_THROWS();
1270 }
1271 } else {
1272 zval *dummy;
1273 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &dummy) == FAILURE) {
1274 RETURN_THROWS();
1275 }
1276 zend_argument_type_error(2, "must be of type callable|string|null");
1277 RETURN_THROWS();
1278 }
1279 }
1280
1281 #define XML_SET_HANDLER_PHP_FUNCTION(function_name, parser_handler_name, parse_function, c_function) \
1282 PHP_FUNCTION(function_name) \
1283 { \
1284 xml_parser *parser = NULL; \
1285 zend_fcall_info_cache handler_fcc = {0}; \
1286 php_xml_set_handler_parse_callable(INTERNAL_FUNCTION_PARAM_PASSTHRU, &parser, &handler_fcc); \
1287 if (EG(exception)) { return; } \
1288 ZEND_ASSERT(parser); \
1289 xml_set_handler(&parser->parser_handler_name, &handler_fcc); \
1290 parse_function(parser->parser, c_function); \
1291 RETURN_TRUE; \
1292 }
1293
1294 XML_SET_HANDLER_PHP_FUNCTION(xml_set_character_data_handler, characterDataHandler, XML_SetCharacterDataHandler, _xml_characterDataHandler);
1295 XML_SET_HANDLER_PHP_FUNCTION(xml_set_processing_instruction_handler, processingInstructionHandler, XML_SetProcessingInstructionHandler, _xml_processingInstructionHandler);
1296 XML_SET_HANDLER_PHP_FUNCTION(xml_set_default_handler, defaultHandler, XML_SetDefaultHandler, _xml_defaultHandler);
1297 XML_SET_HANDLER_PHP_FUNCTION(xml_set_unparsed_entity_decl_handler, unparsedEntityDeclHandler, XML_SetUnparsedEntityDeclHandler, _xml_unparsedEntityDeclHandler);
1298 XML_SET_HANDLER_PHP_FUNCTION(xml_set_notation_decl_handler, notationDeclHandler, XML_SetNotationDeclHandler, _xml_notationDeclHandler);
1299 XML_SET_HANDLER_PHP_FUNCTION(xml_set_external_entity_ref_handler, externalEntityRefHandler, XML_SetExternalEntityRefHandler, (void *) _xml_externalEntityRefHandler);
1300 XML_SET_HANDLER_PHP_FUNCTION(xml_set_start_namespace_decl_handler, startNamespaceDeclHandler, XML_SetStartNamespaceDeclHandler, _xml_startNamespaceDeclHandler);
1301 XML_SET_HANDLER_PHP_FUNCTION(xml_set_end_namespace_decl_handler, endNamespaceDeclHandler, XML_SetEndNamespaceDeclHandler, _xml_endNamespaceDeclHandler);
1302
1303 /* {{{ Start parsing an XML document */
PHP_FUNCTION(xml_parse)1304 PHP_FUNCTION(xml_parse)
1305 {
1306 xml_parser *parser;
1307 zval *pind;
1308 char *data;
1309 size_t data_len;
1310 bool isFinal = 0;
1311
1312 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
1313 RETURN_THROWS();
1314 }
1315
1316 parser = Z_XMLPARSER_P(pind);
1317 if (parser->isparsing) {
1318 zend_throw_error(NULL, "Parser must not be called recursively");
1319 RETURN_THROWS();
1320 }
1321 RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal));
1322 }
1323
1324 /* }}} */
1325
1326 /* {{{ Parsing a XML document */
PHP_FUNCTION(xml_parse_into_struct)1327 PHP_FUNCTION(xml_parse_into_struct)
1328 {
1329 xml_parser *parser;
1330 zval *pind, *xdata, *info = NULL;
1331 char *data;
1332 size_t data_len;
1333
1334 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
1335 RETURN_THROWS();
1336 }
1337
1338 parser = Z_XMLPARSER_P(pind);
1339
1340 if (parser->isparsing) {
1341 php_error_docref(NULL, E_WARNING, "Parser must not be called recursively");
1342 RETURN_FALSE;
1343 }
1344
1345 if (info) {
1346 info = zend_try_array_init(info);
1347 if (!info) {
1348 RETURN_THROWS();
1349 }
1350 }
1351
1352 xdata = zend_try_array_init(xdata);
1353 if (!xdata) {
1354 RETURN_THROWS();
1355 }
1356
1357 ZVAL_COPY_VALUE(&parser->data, xdata);
1358
1359 if (info) {
1360 ZVAL_COPY_VALUE(&parser->info, info);
1361 }
1362
1363 parser->level = 0;
1364 xml_parser_free_ltags(parser);
1365 parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
1366 memset(parser->ltags, 0, XML_MAXLEVEL * sizeof(char *));
1367
1368 XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1369 XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1370
1371 RETURN_LONG(xml_parse_helper(parser, data, data_len, true));
1372 }
1373 /* }}} */
1374
1375 /* {{{ Get XML parser error code */
PHP_FUNCTION(xml_get_error_code)1376 PHP_FUNCTION(xml_get_error_code)
1377 {
1378 xml_parser *parser;
1379 zval *pind;
1380
1381 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1382 RETURN_THROWS();
1383 }
1384
1385 parser = Z_XMLPARSER_P(pind);
1386 RETURN_LONG((zend_long)XML_GetErrorCode(parser->parser));
1387 }
1388 /* }}} */
1389
1390 /* {{{ Get XML parser error string */
PHP_FUNCTION(xml_error_string)1391 PHP_FUNCTION(xml_error_string)
1392 {
1393 zend_long code;
1394 char *str;
1395
1396 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &code) == FAILURE) {
1397 RETURN_THROWS();
1398 }
1399
1400 str = (char *)XML_ErrorString((int)code);
1401 if (str) {
1402 RETVAL_STRING(str);
1403 }
1404 }
1405 /* }}} */
1406
1407 /* {{{ Get current line number for an XML parser */
PHP_FUNCTION(xml_get_current_line_number)1408 PHP_FUNCTION(xml_get_current_line_number)
1409 {
1410 xml_parser *parser;
1411 zval *pind;
1412
1413 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1414 RETURN_THROWS();
1415 }
1416
1417 parser = Z_XMLPARSER_P(pind);
1418 RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
1419 }
1420 /* }}} */
1421
1422 /* {{{ Get current column number for an XML parser */
PHP_FUNCTION(xml_get_current_column_number)1423 PHP_FUNCTION(xml_get_current_column_number)
1424 {
1425 xml_parser *parser;
1426 zval *pind;
1427
1428 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1429 RETURN_THROWS();
1430 }
1431
1432 parser = Z_XMLPARSER_P(pind);
1433 RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
1434 }
1435 /* }}} */
1436
1437 /* {{{ Get current byte index for an XML parser */
PHP_FUNCTION(xml_get_current_byte_index)1438 PHP_FUNCTION(xml_get_current_byte_index)
1439 {
1440 xml_parser *parser;
1441 zval *pind;
1442
1443 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1444 RETURN_THROWS();
1445 }
1446
1447 parser = Z_XMLPARSER_P(pind);
1448 RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
1449 }
1450 /* }}} */
1451
1452 /* {{{ Free an XML parser */
PHP_FUNCTION(xml_parser_free)1453 PHP_FUNCTION(xml_parser_free)
1454 {
1455 zval *pind;
1456 xml_parser *parser;
1457
1458 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1459 RETURN_THROWS();
1460 }
1461
1462 parser = Z_XMLPARSER_P(pind);
1463 if (parser->isparsing == 1) {
1464 php_error_docref(NULL, E_WARNING, "Parser cannot be freed while it is parsing");
1465 RETURN_FALSE;
1466 }
1467
1468 RETURN_TRUE;
1469 }
1470 /* }}} */
1471
1472 /* {{{ Set options in an XML parser */
PHP_FUNCTION(xml_parser_set_option)1473 PHP_FUNCTION(xml_parser_set_option)
1474 {
1475 xml_parser *parser;
1476 zval *pind;
1477 zend_long opt;
1478 zval *value;
1479
1480 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Olz", &pind, xml_parser_ce, &opt, &value) == FAILURE) {
1481 RETURN_THROWS();
1482 }
1483
1484 if (Z_TYPE_P(value) != IS_FALSE && Z_TYPE_P(value) != IS_TRUE &&
1485 Z_TYPE_P(value) != IS_LONG && Z_TYPE_P(value) != IS_STRING) {
1486 php_error_docref(NULL, E_WARNING,
1487 "Argument #3 ($value) must be of type string|int|bool, %s given", zend_zval_type_name(value));
1488 }
1489
1490 parser = Z_XMLPARSER_P(pind);
1491 switch (opt) {
1492 /* Boolean option */
1493 case PHP_XML_OPTION_CASE_FOLDING:
1494 parser->case_folding = zend_is_true(value);
1495 break;
1496 /* Boolean option */
1497 case PHP_XML_OPTION_SKIP_WHITE:
1498 parser->skipwhite = zend_is_true(value);
1499 break;
1500 /* Boolean option */
1501 case PHP_XML_OPTION_PARSE_HUGE:
1502 /* Prevent wreaking havock to the parser internals during parsing */
1503 if (UNEXPECTED(parser->isparsing)) {
1504 zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing");
1505 RETURN_THROWS();
1506 }
1507 parser->parsehuge = zend_is_true(value);
1508 break;
1509 /* Integer option */
1510 case PHP_XML_OPTION_SKIP_TAGSTART:
1511 /* The tag start offset is stored in an int */
1512 /* TODO Improve handling of values? */
1513 parser->toffset = zval_get_long(value);
1514 if (parser->toffset < 0) {
1515 /* TODO Promote to ValueError in PHP 9.0 */
1516 php_error_docref(NULL, E_WARNING, "Argument #3 ($value) must be between 0 and %d"
1517 " for option XML_OPTION_SKIP_TAGSTART", INT_MAX);
1518 parser->toffset = 0;
1519 RETURN_FALSE;
1520 }
1521 break;
1522 /* String option */
1523 case PHP_XML_OPTION_TARGET_ENCODING: {
1524 const xml_encoding *enc;
1525 if (!try_convert_to_string(value)) {
1526 RETURN_THROWS();
1527 }
1528
1529 enc = xml_get_encoding((XML_Char*)Z_STRVAL_P(value));
1530 if (enc == NULL) {
1531 zend_argument_value_error(3, "is not a supported target encoding");
1532 RETURN_THROWS();
1533 }
1534
1535 parser->target_encoding = enc->name;
1536 break;
1537 }
1538 default:
1539 zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1540 RETURN_THROWS();
1541 break;
1542 }
1543
1544 RETURN_TRUE;
1545 }
1546 /* }}} */
1547
1548 /* {{{ Get options from an XML parser */
PHP_FUNCTION(xml_parser_get_option)1549 PHP_FUNCTION(xml_parser_get_option)
1550 {
1551 xml_parser *parser;
1552 zval *pind;
1553 zend_long opt;
1554
1555 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pind, xml_parser_ce, &opt) == FAILURE) {
1556 RETURN_THROWS();
1557 }
1558
1559 parser = Z_XMLPARSER_P(pind);
1560 switch (opt) {
1561 case PHP_XML_OPTION_CASE_FOLDING:
1562 RETURN_BOOL(parser->case_folding);
1563 break;
1564 case PHP_XML_OPTION_SKIP_TAGSTART:
1565 RETURN_LONG(parser->toffset);
1566 break;
1567 case PHP_XML_OPTION_SKIP_WHITE:
1568 RETURN_BOOL(parser->skipwhite);
1569 break;
1570 case PHP_XML_OPTION_PARSE_HUGE:
1571 RETURN_BOOL(parser->parsehuge);
1572 break;
1573 case PHP_XML_OPTION_TARGET_ENCODING:
1574 RETURN_STRING((char *)parser->target_encoding);
1575 break;
1576 default:
1577 zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1578 RETURN_THROWS();
1579 }
1580 }
1581 /* }}} */
1582
1583 #endif
1584