1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Stig Sæther Bakken <ssb@php.net> |
14 | Thies C. Arntzen <thies@thieso.net> |
15 | Sterling Hughes <sterling@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
22
23 #include "php.h"
24
25 #include "zend_variables.h"
26 #include "zend_attributes.h"
27 #include "ext/standard/info.h"
28 #include "ext/standard/html.h" /* For php_next_utf8_char() */
29
30 #ifdef HAVE_XML
31
32 #include "php_xml.h"
33 #ifdef LIBXML_EXPAT_COMPAT
34 #include "ext/libxml/php_libxml.h"
35 #endif
36
37 #include "xml_arginfo.h"
38
39 /* Short-term TODO list:
40 * - Implement XML_ExternalEntityParserCreate()
41 * - XML_SetCommentHandler
42 * - XML_SetCdataSectionHandler
43 * - XML_SetParamEntityParsing
44 */
45
46 /* Long-term TODO list:
47 * - Fix the expat library so you can install your own memory manager
48 * functions
49 */
50
51 /* Known bugs:
52 * - Weird things happen with <![CDATA[]]> sections.
53 */
54
ZEND_BEGIN_MODULE_GLOBALS(xml)55 ZEND_BEGIN_MODULE_GLOBALS(xml)
56 XML_Char *default_encoding;
57 ZEND_END_MODULE_GLOBALS(xml)
58
59 ZEND_DECLARE_MODULE_GLOBALS(xml)
60
61 #define XML(v) ZEND_MODULE_GLOBALS_ACCESSOR(xml, v)
62
63 typedef struct {
64 XML_Parser parser;
65 XML_Char *target_encoding;
66
67 /* Reference to the object itself, for convenience.
68 * It is not owned, do not release it. */
69 zval index;
70
71 zend_object *object;
72 zend_fcall_info_cache startElementHandler;
73 zend_fcall_info_cache endElementHandler;
74 zend_fcall_info_cache characterDataHandler;
75 zend_fcall_info_cache processingInstructionHandler;
76 zend_fcall_info_cache defaultHandler;
77 zend_fcall_info_cache unparsedEntityDeclHandler;
78 zend_fcall_info_cache notationDeclHandler;
79 zend_fcall_info_cache externalEntityRefHandler;
80 zend_fcall_info_cache startNamespaceDeclHandler;
81 zend_fcall_info_cache endNamespaceDeclHandler;
82
83 zval data;
84 zval info;
85 int level;
86 int toffset;
87 int curtag;
88 zval *ctag;
89 char **ltags;
90 bool lastwasopen;
91 bool skipwhite;
92 bool isparsing;
93 bool parsehuge;
94 bool case_folding;
95
96 XML_Char *baseURI;
97
98 zend_object std;
99 } xml_parser;
100
101
102 typedef struct {
103 XML_Char *name;
104 char (*decoding_function)(unsigned short);
105 unsigned short (*encoding_function)(unsigned char);
106 } xml_encoding;
107
108 /* {{{ dynamically loadable module stuff */
109 #ifdef COMPILE_DL_XML
110 #ifdef ZTS
111 ZEND_TSRMLS_CACHE_DEFINE()
112 #endif
113 ZEND_GET_MODULE(xml)
114 #endif /* COMPILE_DL_XML */
115 /* }}} */
116
117 #define XML_MAXLEVEL 255 /* XXX this should be dynamic */
118
119 #define SKIP_TAGSTART(str) ((str) + (parser->toffset > strlen(str) ? strlen(str) : parser->toffset))
120
121 static zend_class_entry *xml_parser_ce;
122 static zend_object_handlers xml_parser_object_handlers;
123
124 /* {{{ function prototypes */
125 PHP_MINIT_FUNCTION(xml);
126 PHP_MINFO_FUNCTION(xml);
127 static PHP_GINIT_FUNCTION(xml);
128
129 static zend_object *xml_parser_create_object(zend_class_entry *class_type);
130 static void xml_parser_free_obj(zend_object *object);
131 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n);
132 static zend_function *xml_parser_get_constructor(zend_object *object);
133
134 static zend_string *xml_utf8_decode(const XML_Char *, size_t, const XML_Char *);
135 inline static unsigned short xml_encode_iso_8859_1(unsigned char);
136 inline static char xml_decode_iso_8859_1(unsigned short);
137 inline static unsigned short xml_encode_us_ascii(unsigned char);
138 inline static char xml_decode_us_ascii(unsigned short);
139 static void xml_xmlchar_zval(const XML_Char *, int, const XML_Char *, zval *);
140 static int xml_xmlcharlen(const XML_Char *);
141 static void xml_add_to_info(xml_parser *parser, const char *name);
142 inline static zend_string *xml_decode_tag(xml_parser *parser, const XML_Char *tag);
143
144 void xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
145 void xml_endElementHandler(void *, const XML_Char *);
146 void xml_characterDataHandler(void *, const XML_Char *, int);
147 void xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
148 void xml_defaultHandler(void *, const XML_Char *, int);
149 void xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
150 void xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
151 int xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
152
153 void xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
154 void xml_endNamespaceDeclHandler(void *, const XML_Char *);
155 /* }}} */
156
157 #ifdef LIBXML_EXPAT_COMPAT
158 static const zend_module_dep xml_deps[] = {
159 ZEND_MOD_REQUIRED("libxml")
160 ZEND_MOD_END
161 };
162 #endif
163
164 zend_module_entry xml_module_entry = {
165 #ifdef LIBXML_EXPAT_COMPAT
166 STANDARD_MODULE_HEADER_EX, NULL,
167 xml_deps,
168 #else
169 STANDARD_MODULE_HEADER,
170 #endif
171 "xml", /* extension name */
172 ext_functions, /* extension function list */
173 PHP_MINIT(xml), /* extension-wide startup function */
174 NULL, /* extension-wide shutdown function */
175 NULL, /* per-request startup function */
176 NULL, /* per-request shutdown function */
177 PHP_MINFO(xml), /* information function */
178 PHP_XML_VERSION,
179 PHP_MODULE_GLOBALS(xml), /* globals descriptor */
180 PHP_GINIT(xml), /* globals ctor */
181 NULL, /* globals dtor */
182 NULL, /* post deactivate */
183 STANDARD_MODULE_PROPERTIES_EX
184 };
185
186 /* All the encoding functions are set to NULL right now, since all
187 * the encoding is currently done internally by expat/xmltok.
188 */
189 static const xml_encoding xml_encodings[] = {
190 { (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
191 { (XML_Char *)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
192 { (XML_Char *)"UTF-8", NULL, NULL },
193 { (XML_Char *)NULL, NULL, NULL }
194 };
195
196 static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
197
198 /* }}} */
199
200 /* {{{ startup, shutdown and info functions */
PHP_GINIT_FUNCTION(xml)201 static PHP_GINIT_FUNCTION(xml)
202 {
203 #if defined(COMPILE_DL_XML) && defined(ZTS)
204 ZEND_TSRMLS_CACHE_UPDATE();
205 #endif
206 xml_globals->default_encoding = (XML_Char*)"UTF-8";
207 }
208
php_xml_malloc_wrapper(size_t sz)209 static void *php_xml_malloc_wrapper(size_t sz)
210 {
211 return emalloc(sz);
212 }
213
php_xml_realloc_wrapper(void * ptr,size_t sz)214 static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
215 {
216 return erealloc(ptr, sz);
217 }
218
php_xml_free_wrapper(void * ptr)219 static void php_xml_free_wrapper(void *ptr)
220 {
221 if (ptr != NULL) {
222 efree(ptr);
223 }
224 }
225
PHP_MINIT_FUNCTION(xml)226 PHP_MINIT_FUNCTION(xml)
227 {
228 xml_parser_ce = register_class_XMLParser();
229 xml_parser_ce->create_object = xml_parser_create_object;
230 xml_parser_ce->default_object_handlers = &xml_parser_object_handlers;
231
232 memcpy(&xml_parser_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
233 xml_parser_object_handlers.offset = XtOffsetOf(xml_parser, std);
234 xml_parser_object_handlers.free_obj = xml_parser_free_obj;
235 xml_parser_object_handlers.get_gc = xml_parser_get_gc;
236 xml_parser_object_handlers.get_constructor = xml_parser_get_constructor;
237 xml_parser_object_handlers.clone_obj = NULL;
238 xml_parser_object_handlers.compare = zend_objects_not_comparable;
239
240 register_xml_symbols(module_number);
241
242 /* this object should not be pre-initialised at compile time,
243 as the order of members may vary */
244
245 php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
246 php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
247 php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
248
249 return SUCCESS;
250 }
251
PHP_MINFO_FUNCTION(xml)252 PHP_MINFO_FUNCTION(xml)
253 {
254 php_info_print_table_start();
255 php_info_print_table_row(2, "XML Support", "active");
256 php_info_print_table_row(2, "XML Namespace Support", "active");
257 #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
258 php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
259 #else
260 php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
261 #endif
262 php_info_print_table_end();
263 }
264 /* }}} */
265
266 /* {{{ extension-internal functions */
267
xml_parse_helper(xml_parser * parser,const char * data,size_t data_len,bool is_final)268 static int xml_parse_helper(xml_parser *parser, const char *data, size_t data_len, bool is_final)
269 {
270 ZEND_ASSERT(!parser->isparsing);
271
272 /* libxml2 specific options */
273 #ifdef LIBXML_EXPAT_COMPAT
274 /* TODO: In libxml2 2.14.0 change this to the new options API so we don't rely on deprecated APIs. */
275 ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations")
276 /* See xmlInitSAXParserCtxt() and xmlCtxtUseOptions() */
277 if (parser->parsehuge) {
278 parser->parser->parser->options |= XML_PARSE_HUGE;
279 xmlDictSetLimit(parser->parser->parser->dict, 0);
280 } else {
281 parser->parser->parser->options &= ~XML_PARSE_HUGE;
282 xmlDictSetLimit(parser->parser->parser->dict, XML_MAX_DICTIONARY_LIMIT);
283 }
284 ZEND_DIAGNOSTIC_IGNORED_END
285 #endif
286
287 parser->isparsing = 1;
288 int ret = XML_Parse(parser->parser, (const XML_Char *) data, data_len, is_final);
289 parser->isparsing = 0;
290 return ret;
291 }
292
xml_xmlchar_zval(const XML_Char * s,int len,const XML_Char * encoding,zval * ret)293 static void xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
294 {
295 if (s == NULL) {
296 ZVAL_FALSE(ret);
297 return;
298 }
299 if (len == 0) {
300 len = xml_xmlcharlen(s);
301 }
302 ZVAL_STR(ret, xml_utf8_decode(s, len, encoding));
303 }
304 /* }}} */
305
xml_parser_from_obj(zend_object * obj)306 static inline xml_parser *xml_parser_from_obj(zend_object *obj) {
307 return (xml_parser *)((char *)(obj) - XtOffsetOf(xml_parser, std));
308 }
309
310 #define Z_XMLPARSER_P(zv) xml_parser_from_obj(Z_OBJ_P(zv))
311
xml_parser_create_object(zend_class_entry * class_type)312 static zend_object *xml_parser_create_object(zend_class_entry *class_type) {
313 xml_parser *intern = zend_object_alloc(sizeof(xml_parser), class_type);
314 memset(intern, 0, sizeof(xml_parser) - sizeof(zend_object));
315
316 zend_object_std_init(&intern->std, class_type);
317 object_properties_init(&intern->std, class_type);
318
319 return &intern->std;
320 }
321
xml_parser_free_ltags(xml_parser * parser)322 static void xml_parser_free_ltags(xml_parser *parser)
323 {
324 if (parser->ltags) {
325 int inx;
326 for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++)
327 efree(parser->ltags[ inx ]);
328 efree(parser->ltags);
329 }
330 }
331
xml_parser_free_obj(zend_object * object)332 static void xml_parser_free_obj(zend_object *object)
333 {
334 xml_parser *parser = xml_parser_from_obj(object);
335
336 if (parser->parser) {
337 XML_ParserFree(parser->parser);
338 }
339 xml_parser_free_ltags(parser);
340 if (ZEND_FCC_INITIALIZED(parser->startElementHandler)) {
341 zend_fcc_dtor(&parser->startElementHandler);
342 parser->startElementHandler.function_handler = NULL;
343 }
344 if (ZEND_FCC_INITIALIZED(parser->endElementHandler)) {
345 zend_fcc_dtor(&parser->endElementHandler);
346 parser->endElementHandler.function_handler = NULL;
347 }
348 if (ZEND_FCC_INITIALIZED(parser->characterDataHandler)) {
349 zend_fcc_dtor(&parser->characterDataHandler);
350 parser->characterDataHandler.function_handler = NULL;
351 }
352 if (ZEND_FCC_INITIALIZED(parser->processingInstructionHandler)) {
353 zend_fcc_dtor(&parser->processingInstructionHandler);
354 parser->processingInstructionHandler.function_handler = NULL;
355 }
356 if (ZEND_FCC_INITIALIZED(parser->defaultHandler)) {
357 zend_fcc_dtor(&parser->defaultHandler);
358 parser->defaultHandler.function_handler = NULL;
359 }
360 if (ZEND_FCC_INITIALIZED(parser->unparsedEntityDeclHandler)) {
361 zend_fcc_dtor(&parser->unparsedEntityDeclHandler);
362 parser->unparsedEntityDeclHandler.function_handler = NULL;
363 }
364 if (ZEND_FCC_INITIALIZED(parser->notationDeclHandler)) {
365 zend_fcc_dtor(&parser->notationDeclHandler);
366 parser->notationDeclHandler.function_handler = NULL;
367 }
368 if (ZEND_FCC_INITIALIZED(parser->externalEntityRefHandler)) {
369 zend_fcc_dtor(&parser->externalEntityRefHandler);
370 parser->externalEntityRefHandler.function_handler = NULL;
371 }
372 if (ZEND_FCC_INITIALIZED(parser->startNamespaceDeclHandler)) {
373 zend_fcc_dtor(&parser->startNamespaceDeclHandler);
374 parser->startNamespaceDeclHandler.function_handler = NULL;
375 }
376 if (ZEND_FCC_INITIALIZED(parser->endNamespaceDeclHandler)) {
377 zend_fcc_dtor(&parser->endNamespaceDeclHandler);
378 parser->endNamespaceDeclHandler.function_handler = NULL;
379 }
380 if (parser->baseURI) {
381 efree(parser->baseURI);
382 }
383 if (parser->object) {
384 OBJ_RELEASE(parser->object);
385 }
386
387 zend_object_std_dtor(&parser->std);
388 }
389
xml_parser_get_gc(zend_object * object,zval ** table,int * n)390 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n)
391 {
392 xml_parser *parser = xml_parser_from_obj(object);
393
394 zend_get_gc_buffer *gc_buffer = zend_get_gc_buffer_create();
395 if (parser->object) {
396 zend_get_gc_buffer_add_obj(gc_buffer, parser->object);
397 }
398 if (ZEND_FCC_INITIALIZED(parser->startElementHandler)) {
399 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->startElementHandler);
400 }
401 if (ZEND_FCC_INITIALIZED(parser->endElementHandler)) {
402 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->endElementHandler);
403 }
404 if (ZEND_FCC_INITIALIZED(parser->characterDataHandler)) {
405 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->characterDataHandler);
406 }
407 if (ZEND_FCC_INITIALIZED(parser->processingInstructionHandler)) {
408 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->processingInstructionHandler);
409 }
410 if (ZEND_FCC_INITIALIZED(parser->defaultHandler)) {
411 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->defaultHandler);
412 }
413 if (ZEND_FCC_INITIALIZED(parser->unparsedEntityDeclHandler)) {
414 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->unparsedEntityDeclHandler);
415 }
416 if (ZEND_FCC_INITIALIZED(parser->notationDeclHandler)) {
417 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->notationDeclHandler);
418 }
419 if (ZEND_FCC_INITIALIZED(parser->externalEntityRefHandler)) {
420 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->externalEntityRefHandler);
421 }
422 if (ZEND_FCC_INITIALIZED(parser->startNamespaceDeclHandler)) {
423 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->startNamespaceDeclHandler);
424 }
425 if (ZEND_FCC_INITIALIZED(parser->endNamespaceDeclHandler)) {
426 zend_get_gc_buffer_add_fcc(gc_buffer, &parser->endNamespaceDeclHandler);
427 }
428
429 zend_get_gc_buffer_use(gc_buffer, table, n);
430
431 return zend_std_get_properties(object);
432 }
433
xml_parser_get_constructor(zend_object * object)434 static zend_function *xml_parser_get_constructor(zend_object *object) {
435 zend_throw_error(NULL, "Cannot directly construct XMLParser, use xml_parser_create() or xml_parser_create_ns() instead");
436 return NULL;
437 }
438
439 /* This is always called to simplify the mess to deal with BC breaks, but only set a new handler if it is initialized */
xml_set_handler(zend_fcall_info_cache * const parser_handler,const zend_fcall_info_cache * const fn)440 static void xml_set_handler(zend_fcall_info_cache *const parser_handler, const zend_fcall_info_cache *const fn)
441 {
442 /* If we have already a handler, release it */
443 if (ZEND_FCC_INITIALIZED(*parser_handler)) {
444 zend_fcc_dtor(parser_handler);
445 parser_handler->function_handler = NULL;
446 }
447
448 if (ZEND_FCC_INITIALIZED(*fn)) {
449 zend_fcc_dup(parser_handler, fn);
450 }
451 }
452
453 /* {{{ xml_encode_iso_8859_1() */
xml_encode_iso_8859_1(unsigned char c)454 inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
455 {
456 return (unsigned short)c;
457 }
458 /* }}} */
459
460 /* {{{ xml_decode_iso_8859_1() */
xml_decode_iso_8859_1(unsigned short c)461 inline static char xml_decode_iso_8859_1(unsigned short c)
462 {
463 return (char)(c > 0xff ? '?' : c);
464 }
465 /* }}} */
466
467 /* {{{ xml_encode_us_ascii() */
xml_encode_us_ascii(unsigned char c)468 inline static unsigned short xml_encode_us_ascii(unsigned char c)
469 {
470 return (unsigned short)c;
471 }
472 /* }}} */
473
474 /* {{{ xml_decode_us_ascii() */
xml_decode_us_ascii(unsigned short c)475 inline static char xml_decode_us_ascii(unsigned short c)
476 {
477 return (char)(c > 0x7f ? '?' : c);
478 }
479 /* }}} */
480
481 /* {{{ xml_get_encoding() */
xml_get_encoding(const XML_Char * name)482 static const xml_encoding *xml_get_encoding(const XML_Char *name)
483 {
484 const xml_encoding *enc = &xml_encodings[0];
485
486 while (enc && enc->name) {
487 if (strcasecmp((char *)name, (char *)enc->name) == 0) {
488 return enc;
489 }
490 enc++;
491 }
492 return NULL;
493 }
494 /* }}} */
495
496 /* {{{ xml_utf8_decode() */
xml_utf8_decode(const XML_Char * s,size_t len,const XML_Char * encoding)497 static zend_string *xml_utf8_decode(const XML_Char *s, size_t len, const XML_Char *encoding)
498 {
499 size_t pos = 0;
500 unsigned int c;
501 char (*decoder)(unsigned short) = NULL;
502 const xml_encoding *enc = xml_get_encoding(encoding);
503 zend_string *str;
504
505 if (enc) {
506 decoder = enc->decoding_function;
507 }
508
509 if (decoder == NULL) {
510 /* If the target encoding was unknown, or no decoder function
511 * was specified, return the UTF-8-encoded data as-is.
512 */
513 str = zend_string_init((char *)s, len, 0);
514 return str;
515 }
516
517 str = zend_string_alloc(len, 0);
518 ZSTR_LEN(str) = 0;
519 while (pos < len) {
520 zend_result status = FAILURE;
521 c = php_next_utf8_char((const unsigned char*)s, len, &pos, &status);
522
523 if (status == FAILURE || c > 0xFFU) {
524 c = '?';
525 }
526
527 ZSTR_VAL(str)[ZSTR_LEN(str)++] = decoder(c);
528 }
529 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
530 if (ZSTR_LEN(str) < len) {
531 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
532 }
533
534 return str;
535 }
536 /* }}} */
537
538 /* {{{ xml_xmlcharlen() */
xml_xmlcharlen(const XML_Char * s)539 static int xml_xmlcharlen(const XML_Char *s)
540 {
541 int len = 0;
542
543 while (*s) {
544 len++;
545 s++;
546 }
547 return len;
548 }
549 /* }}} */
550
551 /* {{{ xml_add_to_info() */
xml_add_to_info(xml_parser * parser,const char * name)552 static void xml_add_to_info(xml_parser *parser, const char *name)
553 {
554 zval *element;
555
556 if (Z_ISUNDEF(parser->info)) {
557 return;
558 }
559
560 size_t name_len = strlen(name);
561 if ((element = zend_hash_str_find(Z_ARRVAL(parser->info), name, name_len)) == NULL) {
562 zval values;
563 array_init(&values);
564 element = zend_hash_str_update(Z_ARRVAL(parser->info), name, name_len, &values);
565 }
566
567 add_next_index_long(element, parser->curtag);
568
569 parser->curtag++;
570 }
571 /* }}} */
572
573 /* {{{ xml_decode_tag() */
xml_decode_tag(xml_parser * parser,const XML_Char * tag)574 static zend_string *xml_decode_tag(xml_parser *parser, const XML_Char *tag)
575 {
576 zend_string *str;
577
578 str = xml_utf8_decode(tag, xml_xmlcharlen(tag), parser->target_encoding);
579
580 if (parser->case_folding) {
581 zend_str_toupper(ZSTR_VAL(str), ZSTR_LEN(str));
582 }
583
584 return str;
585 }
586 /* }}} */
587
588 /* {{{ xml_startElementHandler() */
xml_startElementHandler(void * userData,const XML_Char * name,const XML_Char ** attributes)589 void xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
590 {
591 xml_parser *parser = (xml_parser *)userData;
592 const char **attrs = (const char **) attributes;
593 zend_string *att, *tag_name, *val;
594
595 if (!parser) {
596 return;
597 }
598
599 parser->level++;
600
601 tag_name = xml_decode_tag(parser, name);
602
603 if (ZEND_FCC_INITIALIZED(parser->startElementHandler)) {
604 zval args[3];
605 ZVAL_COPY(&args[0], &parser->index);
606 ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
607 array_init(&args[2]);
608
609 while (attributes && *attributes) {
610 zval tmp;
611
612 att = xml_decode_tag(parser, attributes[0]);
613 val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
614
615 ZVAL_STR(&tmp, val);
616 zend_symtable_update(Z_ARRVAL(args[2]), att, &tmp);
617
618 attributes += 2;
619
620 zend_string_release_ex(att, 0);
621 }
622
623 zend_call_known_fcc(&parser->startElementHandler, /* retval */ NULL, /* param_count */ 3, args, /* named_params */ NULL);
624 zval_ptr_dtor(&args[0]);
625 zval_ptr_dtor(&args[1]);
626 zval_ptr_dtor(&args[2]);
627 }
628
629 if (!Z_ISUNDEF(parser->data) && !EG(exception)) {
630 if (parser->level <= XML_MAXLEVEL) {
631 zval tag, atr;
632 int atcnt = 0;
633
634 array_init(&tag);
635 array_init(&atr);
636
637 xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
638
639 add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
640 add_assoc_string(&tag, "type", "open");
641 add_assoc_long(&tag, "level", parser->level);
642
643 parser->ltags[parser->level-1] = estrdup(ZSTR_VAL(tag_name));
644 parser->lastwasopen = 1;
645
646 attributes = (const XML_Char **) attrs;
647
648 while (attributes && *attributes) {
649 zval tmp;
650
651 att = xml_decode_tag(parser, attributes[0]);
652 val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
653
654 ZVAL_STR(&tmp, val);
655 zend_symtable_update(Z_ARRVAL(atr), att, &tmp);
656
657 atcnt++;
658 attributes += 2;
659
660 zend_string_release_ex(att, 0);
661 }
662
663 if (atcnt) {
664 zend_hash_str_add(Z_ARRVAL(tag), "attributes", sizeof("attributes") - 1, &atr);
665 } else {
666 zval_ptr_dtor(&atr);
667 }
668
669 parser->ctag = zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
670 } else if (parser->level == (XML_MAXLEVEL + 1)) {
671 php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
672 }
673 }
674
675 zend_string_release_ex(tag_name, 0);
676 }
677 /* }}} */
678
679 /* {{{ xml_endElementHandler() */
xml_endElementHandler(void * userData,const XML_Char * name)680 void xml_endElementHandler(void *userData, const XML_Char *name)
681 {
682 xml_parser *parser = (xml_parser *)userData;
683
684 if (!parser) {
685 return;
686 }
687
688 zend_string *tag_name = xml_decode_tag(parser, name);
689
690 if (ZEND_FCC_INITIALIZED(parser->endElementHandler)) {
691 zval args[2];
692 ZVAL_COPY(&args[0], &parser->index);
693 ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
694
695 zend_call_known_fcc(&parser->endElementHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
696 zval_ptr_dtor(&args[0]);
697 zval_ptr_dtor(&args[1]);
698 }
699
700 if (!Z_ISUNDEF(parser->data) && !EG(exception)) {
701 zval tag;
702
703 if (parser->lastwasopen) {
704 add_assoc_string(parser->ctag, "type", "complete");
705 } else {
706 array_init(&tag);
707
708 xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
709
710 add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
711 add_assoc_string(&tag, "type", "close");
712 add_assoc_long(&tag, "level", parser->level);
713
714 zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
715 }
716
717 parser->lastwasopen = 0;
718 }
719
720 zend_string_release_ex(tag_name, 0);
721
722 if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
723 efree(parser->ltags[parser->level-1]);
724 }
725
726 parser->level--;
727 }
728 /* }}} */
729
730 /* {{{ xml_characterDataHandler() */
xml_characterDataHandler(void * userData,const XML_Char * s,int len)731 void xml_characterDataHandler(void *userData, const XML_Char *s, int len)
732 {
733 xml_parser *parser = (xml_parser *)userData;
734
735 if (!parser) {
736 return;
737 }
738
739 if (ZEND_FCC_INITIALIZED(parser->characterDataHandler)) {
740 zval args[2];
741 ZVAL_COPY(&args[0], &parser->index);
742 xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
743
744 zend_call_known_fcc(&parser->characterDataHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
745 zval_ptr_dtor(&args[0]);
746 zval_ptr_dtor(&args[1]);
747 }
748
749 if (Z_ISUNDEF(parser->data) || EG(exception)) {
750 return;
751 }
752
753 bool doprint = 0;
754 zend_string *decoded_value;
755 decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
756 if (parser->skipwhite) {
757 for (size_t i = 0; i < ZSTR_LEN(decoded_value); i++) {
758 switch (ZSTR_VAL(decoded_value)[i]) {
759 case ' ':
760 case '\t':
761 case '\n':
762 continue;
763 default:
764 doprint = 1;
765 break;
766 }
767 if (doprint) {
768 break;
769 }
770 }
771 }
772 if (parser->lastwasopen) {
773 zval *myval;
774 /* check if the current tag already has a value - if yes append to that! */
775 if ((myval = zend_hash_find(Z_ARRVAL_P(parser->ctag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
776 size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
777 Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
778 strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
779 ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
780 zend_string_release_ex(decoded_value, 0);
781 } else {
782 if (doprint || (! parser->skipwhite)) {
783 add_assoc_str(parser->ctag, "value", decoded_value);
784 } else {
785 zend_string_release_ex(decoded_value, 0);
786 }
787 }
788 } else {
789 zval tag;
790 zval *curtag, *mytype, *myval;
791 ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
792 if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
793 if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) {
794 if ((myval = zend_hash_find(Z_ARRVAL_P(curtag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
795 size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
796 Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
797 strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
798 ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
799 zend_string_release_ex(decoded_value, 0);
800 return;
801 }
802 }
803 }
804 break;
805 } ZEND_HASH_FOREACH_END();
806 if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
807 array_init(&tag);
808 xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
809 add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
810 add_assoc_str(&tag, "value", decoded_value);
811 add_assoc_string(&tag, "type", "cdata");
812 add_assoc_long(&tag, "level", parser->level);
813 zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
814 } else if (parser->level == (XML_MAXLEVEL + 1)) {
815 php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
816 } else {
817 zend_string_release_ex(decoded_value, 0);
818 }
819 }
820 }
821 /* }}} */
822
823 /* {{{ xml_processingInstructionHandler() */
xml_processingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)824 void xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
825 {
826 xml_parser *parser = (xml_parser *)userData;
827
828 if (!parser || !ZEND_FCC_INITIALIZED(parser->processingInstructionHandler)) {
829 return;
830 }
831
832 zval args[3];
833
834 ZVAL_COPY(&args[0], &parser->index);
835 xml_xmlchar_zval(target, 0, parser->target_encoding, &args[1]);
836 xml_xmlchar_zval(data, 0, parser->target_encoding, &args[2]);
837
838 zend_call_known_fcc(&parser->processingInstructionHandler, /* retval */ NULL, /* param_count */ 3, args, /* named_params */ NULL);
839 zval_ptr_dtor(&args[0]);
840 zval_ptr_dtor(&args[1]);
841 zval_ptr_dtor(&args[2]);
842 }
843 /* }}} */
844
845 /* {{{ xml_defaultHandler() */
xml_defaultHandler(void * userData,const XML_Char * s,int len)846 void xml_defaultHandler(void *userData, const XML_Char *s, int len)
847 {
848 xml_parser *parser = (xml_parser *)userData;
849
850 if (!parser || !ZEND_FCC_INITIALIZED(parser->defaultHandler)) {
851 return;
852 }
853
854 zval args[2];
855
856 ZVAL_COPY(&args[0], &parser->index);
857 xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
858
859 zend_call_known_fcc(&parser->defaultHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
860 zval_ptr_dtor(&args[0]);
861 zval_ptr_dtor(&args[1]);
862 }
863 /* }}} */
864
865 /* {{{ xml_unparsedEntityDeclHandler() */
xml_unparsedEntityDeclHandler(void * userData,const XML_Char * entityName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)866 void xml_unparsedEntityDeclHandler(void *userData,
867 const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId,
868 const XML_Char *publicId, const XML_Char *notationName)
869 {
870 xml_parser *parser = (xml_parser *)userData;
871
872 if (!parser || !ZEND_FCC_INITIALIZED(parser->unparsedEntityDeclHandler)) {
873 return;
874 }
875
876 zval args[6];
877
878 ZVAL_COPY(&args[0], &parser->index);
879 xml_xmlchar_zval(entityName, 0, parser->target_encoding, &args[1]);
880 xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
881 xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
882 xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
883 xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[5]);
884
885 zend_call_known_fcc(&parser->unparsedEntityDeclHandler, /* retval */ NULL, /* param_count */ 6, args, /* named_params */ NULL);
886 zval_ptr_dtor(&args[0]);
887 zval_ptr_dtor(&args[1]);
888 zval_ptr_dtor(&args[2]);
889 zval_ptr_dtor(&args[3]);
890 zval_ptr_dtor(&args[4]);
891 zval_ptr_dtor(&args[5]);
892 }
893 /* }}} */
894
895 /* {{{ xml_notationDeclHandler() */
xml_notationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)896 void xml_notationDeclHandler(void *userData, const XML_Char *notationName,
897 const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
898 {
899 xml_parser *parser = (xml_parser *)userData;
900
901 if (!parser || !ZEND_FCC_INITIALIZED(parser->notationDeclHandler)) {
902 return;
903 }
904
905 zval args[5];
906
907 ZVAL_COPY(&args[0], &parser->index);
908 xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[1]);
909 xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
910 xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
911 xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
912
913 zend_call_known_fcc(&parser->notationDeclHandler, /* retval */ NULL, /* param_count */ 5, args, /* named_params */ NULL);
914 zval_ptr_dtor(&args[0]);
915 zval_ptr_dtor(&args[1]);
916 zval_ptr_dtor(&args[2]);
917 zval_ptr_dtor(&args[3]);
918 zval_ptr_dtor(&args[4]);
919 }
920 /* }}} */
921
922 /* {{{ xml_externalEntityRefHandler() */
xml_externalEntityRefHandler(XML_Parser userData,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)923 int xml_externalEntityRefHandler(XML_Parser userData, const XML_Char *openEntityNames,
924 const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
925 {
926 xml_parser *parser = XML_GetUserData(userData);
927
928 if (!parser || !ZEND_FCC_INITIALIZED(parser->externalEntityRefHandler)) {
929 return 0;
930 }
931
932 int ret = 0; /* abort if no handler is set (should be configurable?) */
933 zval args[5];
934 zval retval;
935
936 ZVAL_COPY(&args[0], &parser->index);
937 xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding, &args[1]);
938 xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
939 xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
940 xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
941
942 zend_call_known_fcc(&parser->externalEntityRefHandler, /* retval */ &retval, /* param_count */ 5, args, /* named_params */ NULL);
943 zval_ptr_dtor(&args[0]);
944 zval_ptr_dtor(&args[1]);
945 zval_ptr_dtor(&args[2]);
946 zval_ptr_dtor(&args[3]);
947 zval_ptr_dtor(&args[4]);
948
949 /* TODO Better handling from callable return value */
950 if (!Z_ISUNDEF(retval)) {
951 convert_to_long(&retval);
952 ret = Z_LVAL(retval);
953 } else {
954 ret = 0;
955 }
956 return ret;
957 }
958 /* }}} */
959
960 /* {{{ xml_startNamespaceDeclHandler() */
xml_startNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)961 void xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
962 {
963 xml_parser *parser = (xml_parser *)userData;
964
965 if (!parser || !ZEND_FCC_INITIALIZED(parser->startNamespaceDeclHandler)) {
966 return;
967 }
968
969 zval args[3];
970
971 ZVAL_COPY(&args[0], &parser->index);
972 xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
973 xml_xmlchar_zval(uri, 0, parser->target_encoding, &args[2]);
974
975 zend_call_known_fcc(&parser->startNamespaceDeclHandler, /* retval */ NULL, /* param_count */ 3, args, /* named_params */ NULL);
976 zval_ptr_dtor(&args[0]);
977 zval_ptr_dtor(&args[1]);
978 zval_ptr_dtor(&args[2]);
979 }
980 /* }}} */
981
982 /* {{{ xml_endNamespaceDeclHandler() */
xml_endNamespaceDeclHandler(void * userData,const XML_Char * prefix)983 void xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
984 {
985 xml_parser *parser = (xml_parser *)userData;
986
987 if (!parser || !ZEND_FCC_INITIALIZED(parser->endNamespaceDeclHandler)) {
988 return;
989 }
990
991 zval args[2];
992
993 ZVAL_COPY(&args[0], &parser->index);
994 xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
995
996 zend_call_known_fcc(&parser->endNamespaceDeclHandler, /* retval */ NULL, /* param_count */ 2, args, /* named_params */ NULL);
997 zval_ptr_dtor(&args[0]);
998 zval_ptr_dtor(&args[1]);
999 }
1000 /* }}} */
1001
1002 /************************* EXTENSION FUNCTIONS *************************/
1003
php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS,int ns_support)1004 static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support) /* {{{ */
1005 {
1006 xml_parser *parser;
1007 int auto_detect = 0;
1008
1009 zend_string *encoding_param = NULL;
1010
1011 char *ns_param = NULL;
1012 size_t ns_param_len = 0;
1013
1014 XML_Char *encoding;
1015
1016 if (zend_parse_parameters(ZEND_NUM_ARGS(), (ns_support ? "|S!s": "|S!"), &encoding_param, &ns_param, &ns_param_len) == FAILURE) {
1017 RETURN_THROWS();
1018 }
1019
1020 if (encoding_param != NULL) {
1021 /* The supported encoding types are hardcoded here because
1022 * we are limited to the encodings supported by expat/xmltok.
1023 */
1024 if (ZSTR_LEN(encoding_param) == 0) {
1025 encoding = XML(default_encoding);
1026 auto_detect = 1;
1027 } else if (zend_string_equals_literal_ci(encoding_param, "ISO-8859-1")) {
1028 encoding = (XML_Char*)"ISO-8859-1";
1029 } else if (zend_string_equals_literal_ci(encoding_param, "UTF-8")) {
1030 encoding = (XML_Char*)"UTF-8";
1031 } else if (zend_string_equals_literal_ci(encoding_param, "US-ASCII")) {
1032 encoding = (XML_Char*)"US-ASCII";
1033 } else {
1034 zend_argument_value_error(1, "is not a supported source encoding");
1035 RETURN_THROWS();
1036 }
1037 } else {
1038 encoding = XML(default_encoding);
1039 }
1040
1041 if (ns_support && ns_param == NULL){
1042 ns_param = ":";
1043 }
1044
1045 object_init_ex(return_value, xml_parser_ce);
1046 parser = Z_XMLPARSER_P(return_value);
1047 parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
1048 &php_xml_mem_hdlrs, (XML_Char*)ns_param);
1049
1050 parser->target_encoding = encoding;
1051 parser->case_folding = 1;
1052 parser->isparsing = 0;
1053 parser->parsehuge = false; /* It's the default for BC & DoS protection */
1054
1055 XML_SetUserData(parser->parser, parser);
1056 ZVAL_COPY_VALUE(&parser->index, return_value);
1057 }
1058 /* }}} */
1059
1060 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create)1061 PHP_FUNCTION(xml_parser_create)
1062 {
1063 php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1064 }
1065 /* }}} */
1066
1067 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create_ns)1068 PHP_FUNCTION(xml_parser_create_ns)
1069 {
1070 php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1071 }
1072 /* }}} */
1073
php_xml_check_string_method_arg(unsigned int arg_num,zend_object * object,zend_string * method_name,zend_fcall_info_cache * const parser_handler_fcc)1074 static bool php_xml_check_string_method_arg(
1075 unsigned int arg_num,
1076 zend_object *object,
1077 zend_string *method_name,
1078 zend_fcall_info_cache *const parser_handler_fcc
1079 ) {
1080 if (ZSTR_LEN(method_name) == 0) {
1081 ZEND_ASSERT(arg_num != 0);
1082 /* Unset handler */
1083 return true;
1084 }
1085
1086 if (!object) {
1087 ZEND_ASSERT(arg_num != 0);
1088 zend_argument_value_error(arg_num, "an object must be set via xml_set_object() to be able to lookup method");
1089 return false;
1090 }
1091
1092 zend_class_entry *ce = object->ce;
1093 zend_function *method_ptr = zend_hash_find_ptr_lc(&ce->function_table, method_name);
1094 if (!method_ptr) {
1095 if (arg_num) {
1096 zend_argument_value_error(arg_num, "method %s::%s() does not exist", ZSTR_VAL(ce->name), ZSTR_VAL(method_name));
1097 }
1098 return false;
1099 }
1100
1101 parser_handler_fcc->function_handler = method_ptr;
1102 /* We set the calling scope to NULL to be able to differentiate a "method" set from a proper callable */
1103 parser_handler_fcc->calling_scope = NULL;
1104 parser_handler_fcc->called_scope = ce;
1105 parser_handler_fcc->object = object;
1106
1107 return true;
1108 }
1109
1110 #define PHP_XML_CHECK_NEW_THIS_METHODS(parser_to_check, new_this_obj, fcc_field, handler_set_method) \
1111 if ( \
1112 ZEND_FCC_INITIALIZED(parser_to_check->fcc_field) \
1113 && parser_to_check->fcc_field.object == parser_to_check->object \
1114 && parser_to_check->fcc_field.calling_scope == NULL \
1115 ) { \
1116 zend_string *method_name = zend_string_copy(parser_to_check->fcc_field.function_handler->common.function_name); \
1117 zend_fcc_dtor(&parser_to_check->fcc_field); \
1118 bool status = php_xml_check_string_method_arg(0, new_this_obj, method_name, &parser_to_check->fcc_field); \
1119 if (status == false) { \
1120 zend_argument_value_error(2, "cannot safely swap to object of class %s as method \"%s\" does not exist, which was set via " handler_set_method, \
1121 ZSTR_VAL(new_this_obj->ce->name), ZSTR_VAL(method_name)); \
1122 zend_string_release(method_name); \
1123 RETURN_THROWS(); \
1124 } \
1125 zend_string_release(method_name); \
1126 zend_fcc_addref(&parser_to_check->fcc_field); \
1127 }
1128
1129
1130 /* {{{ Set up object which should be used for callbacks */
PHP_FUNCTION(xml_set_object)1131 PHP_FUNCTION(xml_set_object)
1132 {
1133 xml_parser *parser;
1134 zval *pind, *mythis;
1135 zend_object *new_this;
1136
1137 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oo", &pind, xml_parser_ce, &mythis) == FAILURE) {
1138 RETURN_THROWS();
1139 }
1140
1141 parser = Z_XMLPARSER_P(pind);
1142 new_this = Z_OBJ_P(mythis);
1143
1144 if (parser->object) {
1145 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, startElementHandler, "xml_set_element_handler()");
1146 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, endElementHandler, "xml_set_element_handler()");
1147 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, characterDataHandler, "xml_set_character_data_handler()");
1148 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, processingInstructionHandler, "xml_set_processing_instruction_handler()");
1149 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, defaultHandler, "xml_set_default_handler()");
1150 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, unparsedEntityDeclHandler, "xml_set_unparsed_entity_decl_handler()");
1151 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, notationDeclHandler, "xml_set_notation_decl_handler()");
1152 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, externalEntityRefHandler, "xml_set_external_entity_ref_handler()");
1153 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, startNamespaceDeclHandler, "xml_set_start_namespace_decl_handler()");
1154 PHP_XML_CHECK_NEW_THIS_METHODS(parser, new_this, endNamespaceDeclHandler, "xml_set_end_namespace_decl_handler()");
1155
1156 OBJ_RELEASE(parser->object);
1157 }
1158
1159 parser->object = new_this;
1160 GC_ADDREF(parser->object);
1161
1162 RETURN_TRUE;
1163 }
1164 /* }}} */
1165
1166 /* {{{ Set up start and end element handlers */
PHP_FUNCTION(xml_set_element_handler)1167 PHP_FUNCTION(xml_set_element_handler)
1168 {
1169 xml_parser *parser;
1170 zval *pind;
1171 zend_fcall_info start_fci = {0};
1172 zend_fcall_info_cache start_fcc = {0};
1173 zend_fcall_info end_fci = {0};
1174 zend_fcall_info_cache end_fcc = {0};
1175 zend_string *start_method_name = NULL;
1176 zend_string *end_method_name = NULL;
1177
1178 if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OF!F!", &pind, xml_parser_ce, &start_fci, &start_fcc, &end_fci, &end_fcc) == SUCCESS) {
1179 parser = Z_XMLPARSER_P(pind);
1180 goto set_handlers;
1181 }
1182 zend_release_fcall_info_cache(&start_fcc);
1183 zend_release_fcall_info_cache(&end_fcc);
1184
1185 if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OF!S", &pind, xml_parser_ce, &start_fci, &start_fcc, &end_method_name) == SUCCESS) {
1186 parser = Z_XMLPARSER_P(pind);
1187
1188 php_error_docref(NULL, E_DEPRECATED, "Passing non-callable strings is deprecated since 8.4");
1189 if (UNEXPECTED(EG(exception))) {
1190 zend_release_fcall_info_cache(&start_fcc);
1191 zend_release_fcall_info_cache(&end_fcc);
1192 RETURN_THROWS();
1193 }
1194
1195 bool status = php_xml_check_string_method_arg(3, parser->object, end_method_name, &end_fcc);
1196 if (status == false) {
1197 zend_release_fcall_info_cache(&start_fcc);
1198 zend_release_fcall_info_cache(&end_fcc);
1199 RETURN_THROWS();
1200 }
1201 } else if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OSF!", &pind, xml_parser_ce, &start_method_name, &end_fci, &end_fcc) == SUCCESS) {
1202 parser = Z_XMLPARSER_P(pind);
1203
1204 php_error_docref(NULL, E_DEPRECATED, "Passing non-callable strings is deprecated since 8.4");
1205 if (UNEXPECTED(EG(exception))) {
1206 zend_release_fcall_info_cache(&start_fcc);
1207 zend_release_fcall_info_cache(&end_fcc);
1208 RETURN_THROWS();
1209 }
1210
1211 bool status = php_xml_check_string_method_arg(2, parser->object, start_method_name, &start_fcc);
1212 if (status == false) {
1213 zend_release_fcall_info_cache(&start_fcc);
1214 zend_release_fcall_info_cache(&end_fcc);
1215 RETURN_THROWS();
1216 }
1217 } else if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OSS", &pind, xml_parser_ce, &start_method_name, &end_method_name) == SUCCESS) {
1218 zend_release_fcall_info_cache(&start_fcc);
1219 zend_release_fcall_info_cache(&end_fcc);
1220
1221 php_error_docref(NULL, E_DEPRECATED, "Passing non-callable strings is deprecated since 8.4");
1222 if (UNEXPECTED(EG(exception))) {
1223 RETURN_THROWS();
1224 }
1225
1226 parser = Z_XMLPARSER_P(pind);
1227
1228 bool status = php_xml_check_string_method_arg(2, parser->object, start_method_name, &start_fcc);
1229 if (status == false) {
1230 RETURN_THROWS();
1231 }
1232 status = php_xml_check_string_method_arg(3, parser->object, end_method_name, &end_fcc);
1233 if (status == false) {
1234 RETURN_THROWS();
1235 }
1236 } else {
1237 zval *dummy_start;
1238 zval *dummy_end;
1239
1240 zend_release_fcall_info_cache(&start_fcc);
1241 zend_release_fcall_info_cache(&end_fcc);
1242 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ozz", &pind, xml_parser_ce, &dummy_start, &dummy_end) == FAILURE) {
1243 RETURN_THROWS();
1244 } else {
1245 switch (Z_TYPE_P(dummy_start)) {
1246 case IS_NULL:
1247 case IS_STRING:
1248 break;
1249 default:
1250 zend_argument_type_error(2, "must be of type callable|string|null");
1251 RETURN_THROWS();
1252 }
1253 zend_argument_type_error(3, "must be of type callable|string|null");
1254 RETURN_THROWS();
1255 }
1256 }
1257
1258 set_handlers:
1259 xml_set_handler(&parser->startElementHandler, &start_fcc);
1260 xml_set_handler(&parser->endElementHandler, &end_fcc);
1261 XML_SetElementHandler(parser->parser, xml_startElementHandler, xml_endElementHandler);
1262
1263 RETURN_TRUE;
1264 }
1265 /* }}} */
1266
php_xml_set_handler_parse_callable(INTERNAL_FUNCTION_PARAMETERS,xml_parser ** const parser,zend_fcall_info_cache * const parser_handler_fcc)1267 static void php_xml_set_handler_parse_callable(
1268 INTERNAL_FUNCTION_PARAMETERS,
1269 xml_parser **const parser,
1270 zend_fcall_info_cache *const parser_handler_fcc
1271 ) {
1272 zval *pind;
1273 zend_fcall_info handler_fci = {0};
1274 zend_fcall_info_cache handler_fcc = {0};
1275 zend_string *method_name = NULL;
1276
1277 if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OF!", &pind, xml_parser_ce, &handler_fci, &handler_fcc) == SUCCESS) {
1278 *parser = Z_XMLPARSER_P(pind);
1279 if (!ZEND_FCI_INITIALIZED(handler_fci)) {
1280 /* Free handler, so just return and an uninitialized FCC communicates this */
1281 return;
1282 }
1283 memcpy(parser_handler_fcc, &handler_fcc, sizeof(zend_fcall_info_cache));
1284 } else if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "OS", &pind, xml_parser_ce, &method_name) == SUCCESS) {
1285 *parser = Z_XMLPARSER_P(pind);
1286 php_error_docref(NULL, E_DEPRECATED, "Passing non-callable strings is deprecated since 8.4");
1287 if (UNEXPECTED(EG(exception))) {
1288 RETURN_THROWS();
1289 }
1290 bool status = php_xml_check_string_method_arg(2, (*parser)->object, method_name, parser_handler_fcc);
1291 if (status == false) {
1292 RETURN_THROWS();
1293 }
1294 } else {
1295 zval *dummy;
1296 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &dummy) == FAILURE) {
1297 RETURN_THROWS();
1298 }
1299 zend_argument_type_error(2, "must be of type callable|string|null");
1300 RETURN_THROWS();
1301 }
1302 }
1303
1304 #define XML_SET_HANDLER_PHP_FUNCTION(function_name, parser_handler_name, parse_function, c_function) \
1305 PHP_FUNCTION(function_name) \
1306 { \
1307 xml_parser *parser = NULL; \
1308 zend_fcall_info_cache handler_fcc = {0}; \
1309 php_xml_set_handler_parse_callable(INTERNAL_FUNCTION_PARAM_PASSTHRU, &parser, &handler_fcc); \
1310 if (EG(exception)) { return; } \
1311 ZEND_ASSERT(parser); \
1312 xml_set_handler(&parser->parser_handler_name, &handler_fcc); \
1313 parse_function(parser->parser, c_function); \
1314 RETURN_TRUE; \
1315 }
1316
1317 XML_SET_HANDLER_PHP_FUNCTION(xml_set_character_data_handler, characterDataHandler, XML_SetCharacterDataHandler, xml_characterDataHandler);
1318 XML_SET_HANDLER_PHP_FUNCTION(xml_set_processing_instruction_handler, processingInstructionHandler, XML_SetProcessingInstructionHandler, xml_processingInstructionHandler);
1319 XML_SET_HANDLER_PHP_FUNCTION(xml_set_default_handler, defaultHandler, XML_SetDefaultHandler, xml_defaultHandler);
1320 XML_SET_HANDLER_PHP_FUNCTION(xml_set_unparsed_entity_decl_handler, unparsedEntityDeclHandler, XML_SetUnparsedEntityDeclHandler, xml_unparsedEntityDeclHandler);
1321 XML_SET_HANDLER_PHP_FUNCTION(xml_set_notation_decl_handler, notationDeclHandler, XML_SetNotationDeclHandler, xml_notationDeclHandler);
1322 XML_SET_HANDLER_PHP_FUNCTION(xml_set_external_entity_ref_handler, externalEntityRefHandler, XML_SetExternalEntityRefHandler, xml_externalEntityRefHandler);
1323 XML_SET_HANDLER_PHP_FUNCTION(xml_set_start_namespace_decl_handler, startNamespaceDeclHandler, XML_SetStartNamespaceDeclHandler, xml_startNamespaceDeclHandler);
1324 XML_SET_HANDLER_PHP_FUNCTION(xml_set_end_namespace_decl_handler, endNamespaceDeclHandler, XML_SetEndNamespaceDeclHandler, xml_endNamespaceDeclHandler);
1325
1326 /* {{{ Start parsing an XML document */
PHP_FUNCTION(xml_parse)1327 PHP_FUNCTION(xml_parse)
1328 {
1329 xml_parser *parser;
1330 zval *pind;
1331 char *data;
1332 size_t data_len;
1333 bool isFinal = 0;
1334
1335 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
1336 RETURN_THROWS();
1337 }
1338
1339 parser = Z_XMLPARSER_P(pind);
1340 if (parser->isparsing) {
1341 zend_throw_error(NULL, "Parser must not be called recursively");
1342 RETURN_THROWS();
1343 }
1344 RETURN_LONG(xml_parse_helper(parser, data, data_len, isFinal));
1345 }
1346
1347 /* }}} */
1348
1349 /* {{{ Parsing a XML document */
PHP_FUNCTION(xml_parse_into_struct)1350 PHP_FUNCTION(xml_parse_into_struct)
1351 {
1352 xml_parser *parser;
1353 zval *pind, *xdata, *info = NULL;
1354 char *data;
1355 size_t data_len;
1356
1357 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
1358 RETURN_THROWS();
1359 }
1360
1361 parser = Z_XMLPARSER_P(pind);
1362
1363 if (parser->isparsing) {
1364 php_error_docref(NULL, E_WARNING, "Parser must not be called recursively");
1365 RETURN_FALSE;
1366 }
1367
1368 if (info) {
1369 info = zend_try_array_init(info);
1370 if (!info) {
1371 RETURN_THROWS();
1372 }
1373 }
1374
1375 xdata = zend_try_array_init(xdata);
1376 if (!xdata) {
1377 RETURN_THROWS();
1378 }
1379
1380 ZVAL_COPY_VALUE(&parser->data, xdata);
1381
1382 if (info) {
1383 ZVAL_COPY_VALUE(&parser->info, info);
1384 }
1385
1386 parser->level = 0;
1387 xml_parser_free_ltags(parser);
1388 parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
1389 memset(parser->ltags, 0, XML_MAXLEVEL * sizeof(char *));
1390
1391 XML_SetElementHandler(parser->parser, xml_startElementHandler, xml_endElementHandler);
1392 XML_SetCharacterDataHandler(parser->parser, xml_characterDataHandler);
1393
1394 RETURN_LONG(xml_parse_helper(parser, data, data_len, true));
1395 }
1396 /* }}} */
1397
1398 /* {{{ Get XML parser error code */
PHP_FUNCTION(xml_get_error_code)1399 PHP_FUNCTION(xml_get_error_code)
1400 {
1401 xml_parser *parser;
1402 zval *pind;
1403
1404 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1405 RETURN_THROWS();
1406 }
1407
1408 parser = Z_XMLPARSER_P(pind);
1409 RETURN_LONG((zend_long)XML_GetErrorCode(parser->parser));
1410 }
1411 /* }}} */
1412
1413 /* {{{ Get XML parser error string */
PHP_FUNCTION(xml_error_string)1414 PHP_FUNCTION(xml_error_string)
1415 {
1416 zend_long code;
1417 char *str;
1418
1419 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &code) == FAILURE) {
1420 RETURN_THROWS();
1421 }
1422
1423 str = (char *)XML_ErrorString((int)code);
1424 if (str) {
1425 RETVAL_STRING(str);
1426 }
1427 }
1428 /* }}} */
1429
1430 /* {{{ Get current line number for an XML parser */
PHP_FUNCTION(xml_get_current_line_number)1431 PHP_FUNCTION(xml_get_current_line_number)
1432 {
1433 xml_parser *parser;
1434 zval *pind;
1435
1436 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1437 RETURN_THROWS();
1438 }
1439
1440 parser = Z_XMLPARSER_P(pind);
1441 RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
1442 }
1443 /* }}} */
1444
1445 /* {{{ Get current column number for an XML parser */
PHP_FUNCTION(xml_get_current_column_number)1446 PHP_FUNCTION(xml_get_current_column_number)
1447 {
1448 xml_parser *parser;
1449 zval *pind;
1450
1451 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1452 RETURN_THROWS();
1453 }
1454
1455 parser = Z_XMLPARSER_P(pind);
1456 RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
1457 }
1458 /* }}} */
1459
1460 /* {{{ Get current byte index for an XML parser */
PHP_FUNCTION(xml_get_current_byte_index)1461 PHP_FUNCTION(xml_get_current_byte_index)
1462 {
1463 xml_parser *parser;
1464 zval *pind;
1465
1466 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1467 RETURN_THROWS();
1468 }
1469
1470 parser = Z_XMLPARSER_P(pind);
1471 RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
1472 }
1473 /* }}} */
1474
1475 /* {{{ Free an XML parser */
PHP_FUNCTION(xml_parser_free)1476 PHP_FUNCTION(xml_parser_free)
1477 {
1478 zval *pind;
1479 xml_parser *parser;
1480
1481 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1482 RETURN_THROWS();
1483 }
1484
1485 parser = Z_XMLPARSER_P(pind);
1486 if (parser->isparsing == 1) {
1487 php_error_docref(NULL, E_WARNING, "Parser cannot be freed while it is parsing");
1488 RETURN_FALSE;
1489 }
1490
1491 RETURN_TRUE;
1492 }
1493 /* }}} */
1494
1495 /* {{{ Set options in an XML parser */
PHP_FUNCTION(xml_parser_set_option)1496 PHP_FUNCTION(xml_parser_set_option)
1497 {
1498 xml_parser *parser;
1499 zval *pind;
1500 zend_long opt;
1501 zval *value;
1502
1503 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Olz", &pind, xml_parser_ce, &opt, &value) == FAILURE) {
1504 RETURN_THROWS();
1505 }
1506
1507 if (Z_TYPE_P(value) != IS_FALSE && Z_TYPE_P(value) != IS_TRUE &&
1508 Z_TYPE_P(value) != IS_LONG && Z_TYPE_P(value) != IS_STRING) {
1509 php_error_docref(NULL, E_WARNING,
1510 "Argument #3 ($value) must be of type string|int|bool, %s given", zend_zval_type_name(value));
1511 }
1512
1513 parser = Z_XMLPARSER_P(pind);
1514 switch (opt) {
1515 /* Boolean option */
1516 case PHP_XML_OPTION_CASE_FOLDING:
1517 parser->case_folding = zend_is_true(value);
1518 break;
1519 /* Boolean option */
1520 case PHP_XML_OPTION_SKIP_WHITE:
1521 parser->skipwhite = zend_is_true(value);
1522 break;
1523 /* Boolean option */
1524 case PHP_XML_OPTION_PARSE_HUGE:
1525 /* Prevent wreaking havock to the parser internals during parsing */
1526 if (UNEXPECTED(parser->isparsing)) {
1527 zend_throw_error(NULL, "Cannot change option XML_OPTION_PARSE_HUGE while parsing");
1528 RETURN_THROWS();
1529 }
1530 parser->parsehuge = zend_is_true(value);
1531 break;
1532 /* Integer option */
1533 case PHP_XML_OPTION_SKIP_TAGSTART: {
1534 /* The tag start offset is stored in an int */
1535 /* TODO Improve handling of values? */
1536 zend_long value_long = zval_get_long(value);
1537 if (value_long < 0 || value_long > INT_MAX) {
1538 /* TODO Promote to ValueError in PHP 9.0 */
1539 php_error_docref(NULL, E_WARNING, "Argument #3 ($value) must be between 0 and %d"
1540 " for option XML_OPTION_SKIP_TAGSTART", INT_MAX);
1541 RETURN_FALSE;
1542 }
1543 parser->toffset = (int) value_long;
1544 break;
1545 }
1546 /* String option */
1547 case PHP_XML_OPTION_TARGET_ENCODING: {
1548 const xml_encoding *enc;
1549 if (!try_convert_to_string(value)) {
1550 RETURN_THROWS();
1551 }
1552
1553 enc = xml_get_encoding((XML_Char*)Z_STRVAL_P(value));
1554 if (enc == NULL) {
1555 zend_argument_value_error(3, "is not a supported target encoding");
1556 RETURN_THROWS();
1557 }
1558
1559 parser->target_encoding = enc->name;
1560 break;
1561 }
1562 default:
1563 zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1564 RETURN_THROWS();
1565 break;
1566 }
1567
1568 RETURN_TRUE;
1569 }
1570 /* }}} */
1571
1572 /* {{{ Get options from an XML parser */
PHP_FUNCTION(xml_parser_get_option)1573 PHP_FUNCTION(xml_parser_get_option)
1574 {
1575 xml_parser *parser;
1576 zval *pind;
1577 zend_long opt;
1578
1579 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pind, xml_parser_ce, &opt) == FAILURE) {
1580 RETURN_THROWS();
1581 }
1582
1583 parser = Z_XMLPARSER_P(pind);
1584 switch (opt) {
1585 case PHP_XML_OPTION_CASE_FOLDING:
1586 RETURN_BOOL(parser->case_folding);
1587 break;
1588 case PHP_XML_OPTION_SKIP_TAGSTART:
1589 RETURN_LONG(parser->toffset);
1590 break;
1591 case PHP_XML_OPTION_SKIP_WHITE:
1592 RETURN_BOOL(parser->skipwhite);
1593 break;
1594 case PHP_XML_OPTION_PARSE_HUGE:
1595 RETURN_BOOL(parser->parsehuge);
1596 break;
1597 case PHP_XML_OPTION_TARGET_ENCODING:
1598 RETURN_STRING((char *)parser->target_encoding);
1599 break;
1600 default:
1601 zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1602 RETURN_THROWS();
1603 }
1604 }
1605 /* }}} */
1606
1607 #endif
1608