1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Stig Sæther Bakken <ssb@php.net> |
14 | Thies C. Arntzen <thies@thieso.net> |
15 | Sterling Hughes <sterling@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22
23 #include "php.h"
24
25 #include "zend_variables.h"
26 #include "ext/standard/info.h"
27 #include "ext/standard/html.h"
28
29 #ifdef HAVE_XML
30
31 #include "php_xml.h"
32 # include "ext/standard/head.h"
33 #ifdef LIBXML_EXPAT_COMPAT
34 #include "ext/libxml/php_libxml.h"
35 #endif
36
37 #include "xml_arginfo.h"
38
39 /* Short-term TODO list:
40 * - Implement XML_ExternalEntityParserCreate()
41 * - XML_SetCommentHandler
42 * - XML_SetCdataSectionHandler
43 * - XML_SetParamEntityParsing
44 */
45
46 /* Long-term TODO list:
47 * - Fix the expat library so you can install your own memory manager
48 * functions
49 */
50
51 /* Known bugs:
52 * - Weird things happen with <![CDATA[]]> sections.
53 */
54
ZEND_BEGIN_MODULE_GLOBALS(xml)55 ZEND_BEGIN_MODULE_GLOBALS(xml)
56 XML_Char *default_encoding;
57 ZEND_END_MODULE_GLOBALS(xml)
58
59 ZEND_DECLARE_MODULE_GLOBALS(xml)
60
61 #define XML(v) ZEND_MODULE_GLOBALS_ACCESSOR(xml, v)
62
63 typedef struct {
64 int case_folding;
65 XML_Parser parser;
66 XML_Char *target_encoding;
67
68 /* Reference to the object itself, for convenience.
69 * It is not owned, do not release it. */
70 zval index;
71
72 /* We return a pointer to these zvals in get_gc(), so it's
73 * important that a) they are adjacent b) object is the first
74 * and c) the number of zvals is kept up to date. */
75 #define XML_PARSER_NUM_ZVALS 12
76 zval object;
77 zval startElementHandler;
78 zval endElementHandler;
79 zval characterDataHandler;
80 zval processingInstructionHandler;
81 zval defaultHandler;
82 zval unparsedEntityDeclHandler;
83 zval notationDeclHandler;
84 zval externalEntityRefHandler;
85 zval unknownEncodingHandler;
86 zval startNamespaceDeclHandler;
87 zval endNamespaceDeclHandler;
88
89 zend_function *startElementPtr;
90 zend_function *endElementPtr;
91 zend_function *characterDataPtr;
92 zend_function *processingInstructionPtr;
93 zend_function *defaultPtr;
94 zend_function *unparsedEntityDeclPtr;
95 zend_function *notationDeclPtr;
96 zend_function *externalEntityRefPtr;
97 zend_function *unknownEncodingPtr;
98 zend_function *startNamespaceDeclPtr;
99 zend_function *endNamespaceDeclPtr;
100
101 zval data;
102 zval info;
103 int level;
104 int toffset;
105 int curtag;
106 zval *ctag;
107 char **ltags;
108 int lastwasopen;
109 int skipwhite;
110 int isparsing;
111
112 XML_Char *baseURI;
113
114 zend_object std;
115 } xml_parser;
116
117
118 typedef struct {
119 XML_Char *name;
120 char (*decoding_function)(unsigned short);
121 unsigned short (*encoding_function)(unsigned char);
122 } xml_encoding;
123
124 /* {{{ dynamically loadable module stuff */
125 #ifdef COMPILE_DL_XML
126 #ifdef ZTS
127 ZEND_TSRMLS_CACHE_DEFINE()
128 #endif
129 ZEND_GET_MODULE(xml)
130 #endif /* COMPILE_DL_XML */
131 /* }}} */
132
133 #define XML_MAXLEVEL 255 /* XXX this should be dynamic */
134
135 #define SKIP_TAGSTART(str) ((str) + (parser->toffset > strlen(str) ? strlen(str) : parser->toffset))
136
137 static zend_class_entry *xml_parser_ce;
138 static zend_object_handlers xml_parser_object_handlers;
139
140 /* {{{ function prototypes */
141 PHP_MINIT_FUNCTION(xml);
142 PHP_MINFO_FUNCTION(xml);
143 static PHP_GINIT_FUNCTION(xml);
144
145 static zend_object *xml_parser_create_object(zend_class_entry *class_type);
146 static void xml_parser_free_obj(zend_object *object);
147 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n);
148 static zend_function *xml_parser_get_constructor(zend_object *object);
149
150 static zend_string *xml_utf8_decode(const XML_Char *, size_t, const XML_Char *);
151 static void xml_set_handler(zval *, zval *);
152 inline static unsigned short xml_encode_iso_8859_1(unsigned char);
153 inline static char xml_decode_iso_8859_1(unsigned short);
154 inline static unsigned short xml_encode_us_ascii(unsigned char);
155 inline static char xml_decode_us_ascii(unsigned short);
156 static void xml_call_handler(xml_parser *, zval *, zend_function *, int, zval *, zval *);
157 static void _xml_xmlchar_zval(const XML_Char *, int, const XML_Char *, zval *);
158 static int _xml_xmlcharlen(const XML_Char *);
159 static void _xml_add_to_info(xml_parser *parser, const char *name);
160 inline static zend_string *_xml_decode_tag(xml_parser *parser, const XML_Char *tag);
161
162 void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
163 void _xml_endElementHandler(void *, const XML_Char *);
164 void _xml_characterDataHandler(void *, const XML_Char *, int);
165 void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
166 void _xml_defaultHandler(void *, const XML_Char *, int);
167 void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
168 void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
169 int _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
170
171 void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
172 void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
173 /* }}} */
174
175 #ifdef LIBXML_EXPAT_COMPAT
176 static const zend_module_dep xml_deps[] = {
177 ZEND_MOD_REQUIRED("libxml")
178 ZEND_MOD_END
179 };
180 #endif
181
182 zend_module_entry xml_module_entry = {
183 #ifdef LIBXML_EXPAT_COMPAT
184 STANDARD_MODULE_HEADER_EX, NULL,
185 xml_deps,
186 #else
187 STANDARD_MODULE_HEADER,
188 #endif
189 "xml", /* extension name */
190 ext_functions, /* extension function list */
191 PHP_MINIT(xml), /* extension-wide startup function */
192 NULL, /* extension-wide shutdown function */
193 NULL, /* per-request startup function */
194 NULL, /* per-request shutdown function */
195 PHP_MINFO(xml), /* information function */
196 PHP_XML_VERSION,
197 PHP_MODULE_GLOBALS(xml), /* globals descriptor */
198 PHP_GINIT(xml), /* globals ctor */
199 NULL, /* globals dtor */
200 NULL, /* post deactivate */
201 STANDARD_MODULE_PROPERTIES_EX
202 };
203
204 /* All the encoding functions are set to NULL right now, since all
205 * the encoding is currently done internally by expat/xmltok.
206 */
207 const xml_encoding xml_encodings[] = {
208 { (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
209 { (XML_Char *)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
210 { (XML_Char *)"UTF-8", NULL, NULL },
211 { (XML_Char *)NULL, NULL, NULL }
212 };
213
214 static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
215
216 /* }}} */
217
218 /* {{{ startup, shutdown and info functions */
PHP_GINIT_FUNCTION(xml)219 static PHP_GINIT_FUNCTION(xml)
220 {
221 #if defined(COMPILE_DL_XML) && defined(ZTS)
222 ZEND_TSRMLS_CACHE_UPDATE();
223 #endif
224 xml_globals->default_encoding = (XML_Char*)"UTF-8";
225 }
226
php_xml_malloc_wrapper(size_t sz)227 static void *php_xml_malloc_wrapper(size_t sz)
228 {
229 return emalloc(sz);
230 }
231
php_xml_realloc_wrapper(void * ptr,size_t sz)232 static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
233 {
234 return erealloc(ptr, sz);
235 }
236
php_xml_free_wrapper(void * ptr)237 static void php_xml_free_wrapper(void *ptr)
238 {
239 if (ptr != NULL) {
240 efree(ptr);
241 }
242 }
243
PHP_MINIT_FUNCTION(xml)244 PHP_MINIT_FUNCTION(xml)
245 {
246 xml_parser_ce = register_class_XMLParser();
247 xml_parser_ce->create_object = xml_parser_create_object;
248 xml_parser_ce->default_object_handlers = &xml_parser_object_handlers;
249
250 memcpy(&xml_parser_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
251 xml_parser_object_handlers.offset = XtOffsetOf(xml_parser, std);
252 xml_parser_object_handlers.free_obj = xml_parser_free_obj;
253 xml_parser_object_handlers.get_gc = xml_parser_get_gc;
254 xml_parser_object_handlers.get_constructor = xml_parser_get_constructor;
255 xml_parser_object_handlers.clone_obj = NULL;
256 xml_parser_object_handlers.compare = zend_objects_not_comparable;
257
258 register_xml_symbols(module_number);
259
260 /* this object should not be pre-initialised at compile time,
261 as the order of members may vary */
262
263 php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
264 php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
265 php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
266
267 return SUCCESS;
268 }
269
PHP_MINFO_FUNCTION(xml)270 PHP_MINFO_FUNCTION(xml)
271 {
272 php_info_print_table_start();
273 php_info_print_table_row(2, "XML Support", "active");
274 php_info_print_table_row(2, "XML Namespace Support", "active");
275 #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
276 php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
277 #else
278 php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
279 #endif
280 php_info_print_table_end();
281 }
282 /* }}} */
283
284 /* {{{ extension-internal functions */
285
_xml_xmlchar_zval(const XML_Char * s,int len,const XML_Char * encoding,zval * ret)286 static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
287 {
288 if (s == NULL) {
289 ZVAL_FALSE(ret);
290 return;
291 }
292 if (len == 0) {
293 len = _xml_xmlcharlen(s);
294 }
295 ZVAL_STR(ret, xml_utf8_decode(s, len, encoding));
296 }
297 /* }}} */
298
xml_parser_from_obj(zend_object * obj)299 static inline xml_parser *xml_parser_from_obj(zend_object *obj) {
300 return (xml_parser *)((char *)(obj) - XtOffsetOf(xml_parser, std));
301 }
302
303 #define Z_XMLPARSER_P(zv) xml_parser_from_obj(Z_OBJ_P(zv))
304
xml_parser_create_object(zend_class_entry * class_type)305 static zend_object *xml_parser_create_object(zend_class_entry *class_type) {
306 xml_parser *intern = zend_object_alloc(sizeof(xml_parser), class_type);
307 memset(intern, 0, sizeof(xml_parser) - sizeof(zend_object));
308
309 zend_object_std_init(&intern->std, class_type);
310 object_properties_init(&intern->std, class_type);
311
312 return &intern->std;
313 }
314
xml_parser_free_ltags(xml_parser * parser)315 static void xml_parser_free_ltags(xml_parser *parser)
316 {
317 if (parser->ltags) {
318 int inx;
319 for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++)
320 efree(parser->ltags[ inx ]);
321 efree(parser->ltags);
322 }
323 }
324
xml_parser_free_obj(zend_object * object)325 static void xml_parser_free_obj(zend_object *object)
326 {
327 xml_parser *parser = xml_parser_from_obj(object);
328
329 if (parser->parser) {
330 XML_ParserFree(parser->parser);
331 }
332 xml_parser_free_ltags(parser);
333 if (!Z_ISUNDEF(parser->startElementHandler)) {
334 zval_ptr_dtor(&parser->startElementHandler);
335 }
336 if (!Z_ISUNDEF(parser->endElementHandler)) {
337 zval_ptr_dtor(&parser->endElementHandler);
338 }
339 if (!Z_ISUNDEF(parser->characterDataHandler)) {
340 zval_ptr_dtor(&parser->characterDataHandler);
341 }
342 if (!Z_ISUNDEF(parser->processingInstructionHandler)) {
343 zval_ptr_dtor(&parser->processingInstructionHandler);
344 }
345 if (!Z_ISUNDEF(parser->defaultHandler)) {
346 zval_ptr_dtor(&parser->defaultHandler);
347 }
348 if (!Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
349 zval_ptr_dtor(&parser->unparsedEntityDeclHandler);
350 }
351 if (!Z_ISUNDEF(parser->notationDeclHandler)) {
352 zval_ptr_dtor(&parser->notationDeclHandler);
353 }
354 if (!Z_ISUNDEF(parser->externalEntityRefHandler)) {
355 zval_ptr_dtor(&parser->externalEntityRefHandler);
356 }
357 if (!Z_ISUNDEF(parser->unknownEncodingHandler)) {
358 zval_ptr_dtor(&parser->unknownEncodingHandler);
359 }
360 if (!Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
361 zval_ptr_dtor(&parser->startNamespaceDeclHandler);
362 }
363 if (!Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
364 zval_ptr_dtor(&parser->endNamespaceDeclHandler);
365 }
366 if (parser->baseURI) {
367 efree(parser->baseURI);
368 }
369 if (!Z_ISUNDEF(parser->object)) {
370 zval_ptr_dtor(&parser->object);
371 }
372
373 zend_object_std_dtor(&parser->std);
374 }
375
xml_parser_get_gc(zend_object * object,zval ** table,int * n)376 static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n)
377 {
378 xml_parser *parser = xml_parser_from_obj(object);
379 *table = &parser->object;
380 *n = XML_PARSER_NUM_ZVALS;
381 return zend_std_get_properties(object);
382 }
383
xml_parser_get_constructor(zend_object * object)384 static zend_function *xml_parser_get_constructor(zend_object *object) {
385 zend_throw_error(NULL, "Cannot directly construct XMLParser, use xml_parser_create() or xml_parser_create_ns() instead");
386 return NULL;
387 }
388
389 /* {{{ xml_set_handler() */
xml_set_handler(zval * handler,zval * data)390 static void xml_set_handler(zval *handler, zval *data)
391 {
392 /* If we have already a handler, release it */
393 if (handler) {
394 zval_ptr_dtor(handler);
395 }
396
397 /* IS_ARRAY might indicate that we're using array($obj, 'method') syntax */
398 if (Z_TYPE_P(data) != IS_ARRAY && Z_TYPE_P(data) != IS_OBJECT) {
399 convert_to_string(data);
400 if (Z_STRLEN_P(data) == 0) {
401 ZVAL_UNDEF(handler);
402 return;
403 }
404 }
405
406 ZVAL_COPY(handler, data);
407 }
408 /* }}} */
409
410 /* {{{ xml_call_handler() */
xml_call_handler(xml_parser * parser,zval * handler,zend_function * function_ptr,int argc,zval * argv,zval * retval)411 static void xml_call_handler(xml_parser *parser, zval *handler, zend_function *function_ptr, int argc, zval *argv, zval *retval)
412 {
413 int i;
414
415 ZVAL_UNDEF(retval);
416 if (parser && handler && !EG(exception)) {
417 int result;
418 zend_fcall_info fci;
419
420 fci.size = sizeof(fci);
421 ZVAL_COPY_VALUE(&fci.function_name, handler);
422 fci.object = Z_OBJ(parser->object);
423 fci.retval = retval;
424 fci.param_count = argc;
425 fci.params = argv;
426 fci.named_params = NULL;
427
428 result = zend_call_function(&fci, NULL);
429 if (result == FAILURE) {
430 zval *method;
431 zval *obj;
432
433 if (Z_TYPE_P(handler) == IS_STRING) {
434 php_error_docref(NULL, E_WARNING, "Unable to call handler %s()", Z_STRVAL_P(handler));
435 } else if (Z_TYPE_P(handler) == IS_ARRAY &&
436 (obj = zend_hash_index_find(Z_ARRVAL_P(handler), 0)) != NULL &&
437 (method = zend_hash_index_find(Z_ARRVAL_P(handler), 1)) != NULL &&
438 Z_TYPE_P(obj) == IS_OBJECT &&
439 Z_TYPE_P(method) == IS_STRING) {
440 php_error_docref(NULL, E_WARNING, "Unable to call handler %s::%s()", ZSTR_VAL(Z_OBJCE_P(obj)->name), Z_STRVAL_P(method));
441 } else
442 php_error_docref(NULL, E_WARNING, "Unable to call handler");
443 }
444 }
445 for (i = 0; i < argc; i++) {
446 zval_ptr_dtor(&argv[i]);
447 }
448 }
449 /* }}} */
450
451 /* {{{ xml_encode_iso_8859_1() */
xml_encode_iso_8859_1(unsigned char c)452 inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
453 {
454 return (unsigned short)c;
455 }
456 /* }}} */
457
458 /* {{{ xml_decode_iso_8859_1() */
xml_decode_iso_8859_1(unsigned short c)459 inline static char xml_decode_iso_8859_1(unsigned short c)
460 {
461 return (char)(c > 0xff ? '?' : c);
462 }
463 /* }}} */
464
465 /* {{{ xml_encode_us_ascii() */
xml_encode_us_ascii(unsigned char c)466 inline static unsigned short xml_encode_us_ascii(unsigned char c)
467 {
468 return (unsigned short)c;
469 }
470 /* }}} */
471
472 /* {{{ xml_decode_us_ascii() */
xml_decode_us_ascii(unsigned short c)473 inline static char xml_decode_us_ascii(unsigned short c)
474 {
475 return (char)(c > 0x7f ? '?' : c);
476 }
477 /* }}} */
478
479 /* {{{ xml_get_encoding() */
xml_get_encoding(const XML_Char * name)480 static const xml_encoding *xml_get_encoding(const XML_Char *name)
481 {
482 const xml_encoding *enc = &xml_encodings[0];
483
484 while (enc && enc->name) {
485 if (strcasecmp((char *)name, (char *)enc->name) == 0) {
486 return enc;
487 }
488 enc++;
489 }
490 return NULL;
491 }
492 /* }}} */
493
494 /* {{{ xml_utf8_decode() */
xml_utf8_decode(const XML_Char * s,size_t len,const XML_Char * encoding)495 static zend_string *xml_utf8_decode(const XML_Char *s, size_t len, const XML_Char *encoding)
496 {
497 size_t pos = 0;
498 unsigned int c;
499 char (*decoder)(unsigned short) = NULL;
500 const xml_encoding *enc = xml_get_encoding(encoding);
501 zend_string *str;
502
503 if (enc) {
504 decoder = enc->decoding_function;
505 }
506
507 if (decoder == NULL) {
508 /* If the target encoding was unknown, or no decoder function
509 * was specified, return the UTF-8-encoded data as-is.
510 */
511 str = zend_string_init((char *)s, len, 0);
512 return str;
513 }
514
515 str = zend_string_alloc(len, 0);
516 ZSTR_LEN(str) = 0;
517 while (pos < len) {
518 zend_result status = FAILURE;
519 c = php_next_utf8_char((const unsigned char*)s, len, &pos, &status);
520
521 if (status == FAILURE || c > 0xFFU) {
522 c = '?';
523 }
524
525 ZSTR_VAL(str)[ZSTR_LEN(str)++] = (unsigned int)decoder(c);
526 }
527 ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
528 if (ZSTR_LEN(str) < len) {
529 str = zend_string_truncate(str, ZSTR_LEN(str), 0);
530 }
531
532 return str;
533 }
534 /* }}} */
535
536 /* {{{ _xml_xmlcharlen() */
_xml_xmlcharlen(const XML_Char * s)537 static int _xml_xmlcharlen(const XML_Char *s)
538 {
539 int len = 0;
540
541 while (*s) {
542 len++;
543 s++;
544 }
545 return len;
546 }
547 /* }}} */
548
549 /* {{{ _xml_add_to_info() */
_xml_add_to_info(xml_parser * parser,const char * name)550 static void _xml_add_to_info(xml_parser *parser, const char *name)
551 {
552 zval *element;
553
554 if (Z_ISUNDEF(parser->info)) {
555 return;
556 }
557
558 size_t name_len = strlen(name);
559 if ((element = zend_hash_str_find(Z_ARRVAL(parser->info), name, name_len)) == NULL) {
560 zval values;
561 array_init(&values);
562 element = zend_hash_str_update(Z_ARRVAL(parser->info), name, name_len, &values);
563 }
564
565 add_next_index_long(element, parser->curtag);
566
567 parser->curtag++;
568 }
569 /* }}} */
570
571 /* {{{ _xml_decode_tag() */
_xml_decode_tag(xml_parser * parser,const XML_Char * tag)572 static zend_string *_xml_decode_tag(xml_parser *parser, const XML_Char *tag)
573 {
574 zend_string *str;
575
576 str = xml_utf8_decode(tag, _xml_xmlcharlen(tag), parser->target_encoding);
577
578 if (parser->case_folding) {
579 zend_str_toupper(ZSTR_VAL(str), ZSTR_LEN(str));
580 }
581
582 return str;
583 }
584 /* }}} */
585
586 /* {{{ _xml_startElementHandler() */
_xml_startElementHandler(void * userData,const XML_Char * name,const XML_Char ** attributes)587 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
588 {
589 xml_parser *parser = (xml_parser *)userData;
590 const char **attrs = (const char **) attributes;
591 zend_string *att, *tag_name, *val;
592 zval retval, args[3];
593
594 if (!parser) {
595 return;
596 }
597
598 parser->level++;
599
600 tag_name = _xml_decode_tag(parser, name);
601
602 if (!Z_ISUNDEF(parser->startElementHandler)) {
603 ZVAL_COPY(&args[0], &parser->index);
604 ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
605 array_init(&args[2]);
606
607 while (attributes && *attributes) {
608 zval tmp;
609
610 att = _xml_decode_tag(parser, attributes[0]);
611 val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
612
613 ZVAL_STR(&tmp, val);
614 zend_symtable_update(Z_ARRVAL(args[2]), att, &tmp);
615
616 attributes += 2;
617
618 zend_string_release_ex(att, 0);
619 }
620
621 xml_call_handler(parser, &parser->startElementHandler, parser->startElementPtr, 3, args, &retval);
622 zval_ptr_dtor(&retval);
623 }
624
625 if (!Z_ISUNDEF(parser->data) && !EG(exception)) {
626 if (parser->level <= XML_MAXLEVEL) {
627 zval tag, atr;
628 int atcnt = 0;
629
630 array_init(&tag);
631 array_init(&atr);
632
633 _xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
634
635 add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
636 add_assoc_string(&tag, "type", "open");
637 add_assoc_long(&tag, "level", parser->level);
638
639 parser->ltags[parser->level-1] = estrdup(ZSTR_VAL(tag_name));
640 parser->lastwasopen = 1;
641
642 attributes = (const XML_Char **) attrs;
643
644 while (attributes && *attributes) {
645 zval tmp;
646
647 att = _xml_decode_tag(parser, attributes[0]);
648 val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
649
650 ZVAL_STR(&tmp, val);
651 zend_symtable_update(Z_ARRVAL(atr), att, &tmp);
652
653 atcnt++;
654 attributes += 2;
655
656 zend_string_release_ex(att, 0);
657 }
658
659 if (atcnt) {
660 zend_hash_str_add(Z_ARRVAL(tag), "attributes", sizeof("attributes") - 1, &atr);
661 } else {
662 zval_ptr_dtor(&atr);
663 }
664
665 parser->ctag = zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
666 } else if (parser->level == (XML_MAXLEVEL + 1)) {
667 php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
668 }
669 }
670
671 zend_string_release_ex(tag_name, 0);
672 }
673 /* }}} */
674
675 /* {{{ _xml_endElementHandler() */
_xml_endElementHandler(void * userData,const XML_Char * name)676 void _xml_endElementHandler(void *userData, const XML_Char *name)
677 {
678 xml_parser *parser = (xml_parser *)userData;
679
680 if (!parser) {
681 return;
682 }
683
684 zval retval, args[2];
685
686 zend_string *tag_name = _xml_decode_tag(parser, name);
687
688 if (!Z_ISUNDEF(parser->endElementHandler)) {
689 ZVAL_COPY(&args[0], &parser->index);
690 ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
691
692 xml_call_handler(parser, &parser->endElementHandler, parser->endElementPtr, 2, args, &retval);
693 zval_ptr_dtor(&retval);
694 }
695
696 if (!Z_ISUNDEF(parser->data) && !EG(exception)) {
697 zval tag;
698
699 if (parser->lastwasopen) {
700 add_assoc_string(parser->ctag, "type", "complete");
701 } else {
702 array_init(&tag);
703
704 _xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
705
706 add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
707 add_assoc_string(&tag, "type", "close");
708 add_assoc_long(&tag, "level", parser->level);
709
710 zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
711 }
712
713 parser->lastwasopen = 0;
714 }
715
716 zend_string_release_ex(tag_name, 0);
717
718 if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
719 efree(parser->ltags[parser->level-1]);
720 }
721
722 parser->level--;
723 }
724 /* }}} */
725
726 /* {{{ _xml_characterDataHandler() */
_xml_characterDataHandler(void * userData,const XML_Char * s,int len)727 void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
728 {
729 xml_parser *parser = (xml_parser *)userData;
730
731 if (!parser) {
732 return;
733 }
734
735 zval retval, args[2];
736
737 if (!Z_ISUNDEF(parser->characterDataHandler)) {
738 ZVAL_COPY(&args[0], &parser->index);
739 _xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
740 xml_call_handler(parser, &parser->characterDataHandler, parser->characterDataPtr, 2, args, &retval);
741 zval_ptr_dtor(&retval);
742 }
743
744 if (Z_ISUNDEF(parser->data) || EG(exception)) {
745 return;
746 }
747
748 bool doprint = 0;
749 zend_string *decoded_value;
750 decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
751 if (parser->skipwhite) {
752 for (size_t i = 0; i < ZSTR_LEN(decoded_value); i++) {
753 switch (ZSTR_VAL(decoded_value)[i]) {
754 case ' ':
755 case '\t':
756 case '\n':
757 continue;
758 default:
759 doprint = 1;
760 break;
761 }
762 if (doprint) {
763 break;
764 }
765 }
766 }
767 if (parser->lastwasopen) {
768 zval *myval;
769 /* check if the current tag already has a value - if yes append to that! */
770 if ((myval = zend_hash_find(Z_ARRVAL_P(parser->ctag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
771 size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
772 Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
773 strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
774 ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
775 zend_string_release_ex(decoded_value, 0);
776 } else {
777 if (doprint || (! parser->skipwhite)) {
778 add_assoc_str(parser->ctag, "value", decoded_value);
779 } else {
780 zend_string_release_ex(decoded_value, 0);
781 }
782 }
783 } else {
784 zval tag;
785 zval *curtag, *mytype, *myval;
786 ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
787 if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
788 if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) {
789 if ((myval = zend_hash_find(Z_ARRVAL_P(curtag), ZSTR_KNOWN(ZEND_STR_VALUE)))) {
790 size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
791 Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
792 strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
793 ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
794 zend_string_release_ex(decoded_value, 0);
795 return;
796 }
797 }
798 }
799 break;
800 } ZEND_HASH_FOREACH_END();
801 if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
802 array_init(&tag);
803 _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
804 add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
805 add_assoc_str(&tag, "value", decoded_value);
806 add_assoc_string(&tag, "type", "cdata");
807 add_assoc_long(&tag, "level", parser->level);
808 zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
809 } else if (parser->level == (XML_MAXLEVEL + 1)) {
810 php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
811 } else {
812 zend_string_release_ex(decoded_value, 0);
813 }
814 }
815 }
816 /* }}} */
817
818 /* {{{ _xml_processingInstructionHandler() */
_xml_processingInstructionHandler(void * userData,const XML_Char * target,const XML_Char * data)819 void _xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
820 {
821 xml_parser *parser = (xml_parser *)userData;
822
823 if (!parser || Z_ISUNDEF(parser->processingInstructionHandler)) {
824 return;
825 }
826
827 zval retval, args[3];
828
829 ZVAL_COPY(&args[0], &parser->index);
830 _xml_xmlchar_zval(target, 0, parser->target_encoding, &args[1]);
831 _xml_xmlchar_zval(data, 0, parser->target_encoding, &args[2]);
832 xml_call_handler(parser, &parser->processingInstructionHandler, parser->processingInstructionPtr, 3, args, &retval);
833 zval_ptr_dtor(&retval);
834 }
835 /* }}} */
836
837 /* {{{ _xml_defaultHandler() */
_xml_defaultHandler(void * userData,const XML_Char * s,int len)838 void _xml_defaultHandler(void *userData, const XML_Char *s, int len)
839 {
840 xml_parser *parser = (xml_parser *)userData;
841
842 if (!parser || Z_ISUNDEF(parser->defaultHandler)) {
843 return;
844 }
845
846 zval retval, args[2];
847
848 ZVAL_COPY(&args[0], &parser->index);
849 _xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
850 xml_call_handler(parser, &parser->defaultHandler, parser->defaultPtr, 2, args, &retval);
851 zval_ptr_dtor(&retval);
852 }
853 /* }}} */
854
855 /* {{{ _xml_unparsedEntityDeclHandler() */
_xml_unparsedEntityDeclHandler(void * userData,const XML_Char * entityName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)856 void _xml_unparsedEntityDeclHandler(void *userData,
857 const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId,
858 const XML_Char *publicId, const XML_Char *notationName)
859 {
860 xml_parser *parser = (xml_parser *)userData;
861
862 if (!parser || Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
863 return;
864 }
865
866 zval retval, args[6];
867
868 ZVAL_COPY(&args[0], &parser->index);
869 _xml_xmlchar_zval(entityName, 0, parser->target_encoding, &args[1]);
870 _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
871 _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
872 _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
873 _xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[5]);
874 xml_call_handler(parser, &parser->unparsedEntityDeclHandler, parser->unparsedEntityDeclPtr, 6, args, &retval);
875 zval_ptr_dtor(&retval);
876 }
877 /* }}} */
878
879 /* {{{ _xml_notationDeclHandler() */
_xml_notationDeclHandler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)880 void _xml_notationDeclHandler(void *userData, const XML_Char *notationName,
881 const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
882 {
883 xml_parser *parser = (xml_parser *)userData;
884
885 if (!parser || Z_ISUNDEF(parser->notationDeclHandler)) {
886 return;
887 }
888
889 zval retval, args[5];
890
891 ZVAL_COPY(&args[0], &parser->index);
892 _xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[1]);
893 _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
894 _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
895 _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
896 xml_call_handler(parser, &parser->notationDeclHandler, parser->notationDeclPtr, 5, args, &retval);
897 zval_ptr_dtor(&retval);
898 }
899 /* }}} */
900
901 /* {{{ _xml_externalEntityRefHandler() */
_xml_externalEntityRefHandler(XML_Parser parserPtr,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)902 int _xml_externalEntityRefHandler(XML_Parser parserPtr, const XML_Char *openEntityNames,
903 const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
904 {
905 xml_parser *parser = XML_GetUserData(parserPtr);
906
907 if (!parser || Z_ISUNDEF(parser->externalEntityRefHandler)) {
908 return 0;
909 }
910
911 int ret = 0; /* abort if no handler is set (should be configurable?) */
912 zval retval, args[5];
913
914 ZVAL_COPY(&args[0], &parser->index);
915 _xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding, &args[1]);
916 _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
917 _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
918 _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
919 xml_call_handler(parser, &parser->externalEntityRefHandler, parser->externalEntityRefPtr, 5, args, &retval);
920 if (!Z_ISUNDEF(retval)) {
921 convert_to_long(&retval);
922 ret = Z_LVAL(retval);
923 } else {
924 ret = 0;
925 }
926
927 return ret;
928 }
929 /* }}} */
930
931 /* {{{ _xml_startNamespaceDeclHandler() */
_xml_startNamespaceDeclHandler(void * userData,const XML_Char * prefix,const XML_Char * uri)932 void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
933 {
934 xml_parser *parser = (xml_parser *)userData;
935
936 if (!parser || Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
937 return;
938 }
939
940 zval retval, args[3];
941
942 ZVAL_COPY(&args[0], &parser->index);
943 _xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
944 _xml_xmlchar_zval(uri, 0, parser->target_encoding, &args[2]);
945 xml_call_handler(parser, &parser->startNamespaceDeclHandler, parser->startNamespaceDeclPtr, 3, args, &retval);
946 zval_ptr_dtor(&retval);
947 }
948 /* }}} */
949
950 /* {{{ _xml_endNamespaceDeclHandler() */
_xml_endNamespaceDeclHandler(void * userData,const XML_Char * prefix)951 void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
952 {
953 xml_parser *parser = (xml_parser *)userData;
954
955 if (!parser || Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
956 return;
957 }
958
959 zval retval, args[2];
960
961 ZVAL_COPY(&args[0], &parser->index);
962 _xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
963 xml_call_handler(parser, &parser->endNamespaceDeclHandler, parser->endNamespaceDeclPtr, 2, args, &retval);
964 zval_ptr_dtor(&retval);
965 }
966 /* }}} */
967
968 /************************* EXTENSION FUNCTIONS *************************/
969
php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS,int ns_support)970 static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support) /* {{{ */
971 {
972 xml_parser *parser;
973 int auto_detect = 0;
974
975 zend_string *encoding_param = NULL;
976
977 char *ns_param = NULL;
978 size_t ns_param_len = 0;
979
980 XML_Char *encoding;
981
982 if (zend_parse_parameters(ZEND_NUM_ARGS(), (ns_support ? "|S!s": "|S!"), &encoding_param, &ns_param, &ns_param_len) == FAILURE) {
983 RETURN_THROWS();
984 }
985
986 if (encoding_param != NULL) {
987 /* The supported encoding types are hardcoded here because
988 * we are limited to the encodings supported by expat/xmltok.
989 */
990 if (ZSTR_LEN(encoding_param) == 0) {
991 encoding = XML(default_encoding);
992 auto_detect = 1;
993 } else if (zend_string_equals_literal_ci(encoding_param, "ISO-8859-1")) {
994 encoding = (XML_Char*)"ISO-8859-1";
995 } else if (zend_string_equals_literal_ci(encoding_param, "UTF-8")) {
996 encoding = (XML_Char*)"UTF-8";
997 } else if (zend_string_equals_literal_ci(encoding_param, "US-ASCII")) {
998 encoding = (XML_Char*)"US-ASCII";
999 } else {
1000 zend_argument_value_error(1, "is not a supported source encoding");
1001 RETURN_THROWS();
1002 }
1003 } else {
1004 encoding = XML(default_encoding);
1005 }
1006
1007 if (ns_support && ns_param == NULL){
1008 ns_param = ":";
1009 }
1010
1011 object_init_ex(return_value, xml_parser_ce);
1012 parser = Z_XMLPARSER_P(return_value);
1013 parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
1014 &php_xml_mem_hdlrs, (XML_Char*)ns_param);
1015
1016 parser->target_encoding = encoding;
1017 parser->case_folding = 1;
1018 parser->isparsing = 0;
1019
1020 XML_SetUserData(parser->parser, parser);
1021 ZVAL_COPY_VALUE(&parser->index, return_value);
1022 }
1023 /* }}} */
1024
1025 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create)1026 PHP_FUNCTION(xml_parser_create)
1027 {
1028 php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1029 }
1030 /* }}} */
1031
1032 /* {{{ Create an XML parser */
PHP_FUNCTION(xml_parser_create_ns)1033 PHP_FUNCTION(xml_parser_create_ns)
1034 {
1035 php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1036 }
1037 /* }}} */
1038
1039 /* {{{ Set up object which should be used for callbacks */
PHP_FUNCTION(xml_set_object)1040 PHP_FUNCTION(xml_set_object)
1041 {
1042 xml_parser *parser;
1043 zval *pind, *mythis;
1044
1045 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oo", &pind, xml_parser_ce, &mythis) == FAILURE) {
1046 RETURN_THROWS();
1047 }
1048
1049 parser = Z_XMLPARSER_P(pind);
1050
1051 zval_ptr_dtor(&parser->object);
1052 ZVAL_OBJ_COPY(&parser->object, Z_OBJ_P(mythis));
1053
1054 RETURN_TRUE;
1055 }
1056 /* }}} */
1057
1058 /* {{{ Set up start and end element handlers */
PHP_FUNCTION(xml_set_element_handler)1059 PHP_FUNCTION(xml_set_element_handler)
1060 {
1061 xml_parser *parser;
1062 zval *pind, *shdl, *ehdl;
1063
1064 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ozz", &pind, xml_parser_ce, &shdl, &ehdl) == FAILURE) {
1065 RETURN_THROWS();
1066 }
1067
1068 parser = Z_XMLPARSER_P(pind);
1069 xml_set_handler(&parser->startElementHandler, shdl);
1070 xml_set_handler(&parser->endElementHandler, ehdl);
1071 XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1072
1073 RETURN_TRUE;
1074 }
1075 /* }}} */
1076
1077 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_character_data_handler)1078 PHP_FUNCTION(xml_set_character_data_handler)
1079 {
1080 xml_parser *parser;
1081 zval *pind, *hdl;
1082
1083 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1084 RETURN_THROWS();
1085 }
1086
1087 parser = Z_XMLPARSER_P(pind);
1088 xml_set_handler(&parser->characterDataHandler, hdl);
1089 XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1090
1091 RETURN_TRUE;
1092 }
1093 /* }}} */
1094
1095 /* {{{ Set up processing instruction (PI) handler */
PHP_FUNCTION(xml_set_processing_instruction_handler)1096 PHP_FUNCTION(xml_set_processing_instruction_handler)
1097 {
1098 xml_parser *parser;
1099 zval *pind, *hdl;
1100
1101 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1102 RETURN_THROWS();
1103 }
1104
1105 parser = Z_XMLPARSER_P(pind);
1106 xml_set_handler(&parser->processingInstructionHandler, hdl);
1107 XML_SetProcessingInstructionHandler(parser->parser, _xml_processingInstructionHandler);
1108
1109 RETURN_TRUE;
1110 }
1111 /* }}} */
1112
1113 /* {{{ Set up default handler */
PHP_FUNCTION(xml_set_default_handler)1114 PHP_FUNCTION(xml_set_default_handler)
1115 {
1116 xml_parser *parser;
1117 zval *pind, *hdl;
1118
1119 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1120 RETURN_THROWS();
1121 }
1122
1123 parser = Z_XMLPARSER_P(pind);
1124 xml_set_handler(&parser->defaultHandler, hdl);
1125 XML_SetDefaultHandler(parser->parser, _xml_defaultHandler);
1126
1127 RETURN_TRUE;
1128 }
1129 /* }}} */
1130
1131 /* {{{ Set up unparsed entity declaration handler */
PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)1132 PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)
1133 {
1134 xml_parser *parser;
1135 zval *pind, *hdl;
1136
1137 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1138 RETURN_THROWS();
1139 }
1140
1141 parser = Z_XMLPARSER_P(pind);
1142 xml_set_handler(&parser->unparsedEntityDeclHandler, hdl);
1143 XML_SetUnparsedEntityDeclHandler(parser->parser, _xml_unparsedEntityDeclHandler);
1144
1145 RETURN_TRUE;
1146 }
1147 /* }}} */
1148
1149 /* {{{ Set up notation declaration handler */
PHP_FUNCTION(xml_set_notation_decl_handler)1150 PHP_FUNCTION(xml_set_notation_decl_handler)
1151 {
1152 xml_parser *parser;
1153 zval *pind, *hdl;
1154
1155 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1156 RETURN_THROWS();
1157 }
1158
1159 parser = Z_XMLPARSER_P(pind);
1160 xml_set_handler(&parser->notationDeclHandler, hdl);
1161 XML_SetNotationDeclHandler(parser->parser, _xml_notationDeclHandler);
1162
1163 RETURN_TRUE;
1164 }
1165 /* }}} */
1166
1167 /* {{{ Set up external entity reference handler */
PHP_FUNCTION(xml_set_external_entity_ref_handler)1168 PHP_FUNCTION(xml_set_external_entity_ref_handler)
1169 {
1170 xml_parser *parser;
1171 zval *pind, *hdl;
1172
1173 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1174 RETURN_THROWS();
1175 }
1176
1177 parser = Z_XMLPARSER_P(pind);
1178 xml_set_handler(&parser->externalEntityRefHandler, hdl);
1179 XML_SetExternalEntityRefHandler(parser->parser, (void *) _xml_externalEntityRefHandler);
1180
1181 RETURN_TRUE;
1182 }
1183 /* }}} */
1184
1185 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_start_namespace_decl_handler)1186 PHP_FUNCTION(xml_set_start_namespace_decl_handler)
1187 {
1188 xml_parser *parser;
1189 zval *pind, *hdl;
1190
1191 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1192 RETURN_THROWS();
1193 }
1194
1195 parser = Z_XMLPARSER_P(pind);
1196 xml_set_handler(&parser->startNamespaceDeclHandler, hdl);
1197 XML_SetStartNamespaceDeclHandler(parser->parser, _xml_startNamespaceDeclHandler);
1198
1199 RETURN_TRUE;
1200 }
1201 /* }}} */
1202
1203 /* {{{ Set up character data handler */
PHP_FUNCTION(xml_set_end_namespace_decl_handler)1204 PHP_FUNCTION(xml_set_end_namespace_decl_handler)
1205 {
1206 xml_parser *parser;
1207 zval *pind, *hdl;
1208
1209 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
1210 RETURN_THROWS();
1211 }
1212
1213 parser = Z_XMLPARSER_P(pind);
1214 xml_set_handler(&parser->endNamespaceDeclHandler, hdl);
1215 XML_SetEndNamespaceDeclHandler(parser->parser, _xml_endNamespaceDeclHandler);
1216
1217 RETURN_TRUE;
1218 }
1219 /* }}} */
1220
1221 /* {{{ Start parsing an XML document */
PHP_FUNCTION(xml_parse)1222 PHP_FUNCTION(xml_parse)
1223 {
1224 xml_parser *parser;
1225 zval *pind;
1226 char *data;
1227 size_t data_len;
1228 int ret;
1229 bool isFinal = 0;
1230
1231 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
1232 RETURN_THROWS();
1233 }
1234
1235 parser = Z_XMLPARSER_P(pind);
1236 if (parser->isparsing) {
1237 zend_throw_error(NULL, "Parser must not be called recursively");
1238 RETURN_THROWS();
1239 }
1240 parser->isparsing = 1;
1241 ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
1242 parser->isparsing = 0;
1243 RETVAL_LONG(ret);
1244 }
1245
1246 /* }}} */
1247
1248 /* {{{ Parsing a XML document */
PHP_FUNCTION(xml_parse_into_struct)1249 PHP_FUNCTION(xml_parse_into_struct)
1250 {
1251 xml_parser *parser;
1252 zval *pind, *xdata, *info = NULL;
1253 char *data;
1254 size_t data_len;
1255 int ret;
1256
1257 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
1258 RETURN_THROWS();
1259 }
1260
1261 parser = Z_XMLPARSER_P(pind);
1262
1263 if (parser->isparsing) {
1264 php_error_docref(NULL, E_WARNING, "Parser must not be called recursively");
1265 RETURN_FALSE;
1266 }
1267
1268 if (info) {
1269 info = zend_try_array_init(info);
1270 if (!info) {
1271 RETURN_THROWS();
1272 }
1273 }
1274
1275 xdata = zend_try_array_init(xdata);
1276 if (!xdata) {
1277 RETURN_THROWS();
1278 }
1279
1280 ZVAL_COPY_VALUE(&parser->data, xdata);
1281
1282 if (info) {
1283 ZVAL_COPY_VALUE(&parser->info, info);
1284 }
1285
1286 parser->level = 0;
1287 xml_parser_free_ltags(parser);
1288 parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
1289 memset(parser->ltags, 0, XML_MAXLEVEL * sizeof(char *));
1290
1291 XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
1292 XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
1293
1294 parser->isparsing = 1;
1295 ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
1296 parser->isparsing = 0;
1297
1298 RETVAL_LONG(ret);
1299 }
1300 /* }}} */
1301
1302 /* {{{ Get XML parser error code */
PHP_FUNCTION(xml_get_error_code)1303 PHP_FUNCTION(xml_get_error_code)
1304 {
1305 xml_parser *parser;
1306 zval *pind;
1307
1308 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1309 RETURN_THROWS();
1310 }
1311
1312 parser = Z_XMLPARSER_P(pind);
1313 RETURN_LONG((zend_long)XML_GetErrorCode(parser->parser));
1314 }
1315 /* }}} */
1316
1317 /* {{{ Get XML parser error string */
PHP_FUNCTION(xml_error_string)1318 PHP_FUNCTION(xml_error_string)
1319 {
1320 zend_long code;
1321 char *str;
1322
1323 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &code) == FAILURE) {
1324 RETURN_THROWS();
1325 }
1326
1327 str = (char *)XML_ErrorString((int)code);
1328 if (str) {
1329 RETVAL_STRING(str);
1330 }
1331 }
1332 /* }}} */
1333
1334 /* {{{ Get current line number for an XML parser */
PHP_FUNCTION(xml_get_current_line_number)1335 PHP_FUNCTION(xml_get_current_line_number)
1336 {
1337 xml_parser *parser;
1338 zval *pind;
1339
1340 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1341 RETURN_THROWS();
1342 }
1343
1344 parser = Z_XMLPARSER_P(pind);
1345 RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
1346 }
1347 /* }}} */
1348
1349 /* {{{ Get current column number for an XML parser */
PHP_FUNCTION(xml_get_current_column_number)1350 PHP_FUNCTION(xml_get_current_column_number)
1351 {
1352 xml_parser *parser;
1353 zval *pind;
1354
1355 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1356 RETURN_THROWS();
1357 }
1358
1359 parser = Z_XMLPARSER_P(pind);
1360 RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
1361 }
1362 /* }}} */
1363
1364 /* {{{ Get current byte index for an XML parser */
PHP_FUNCTION(xml_get_current_byte_index)1365 PHP_FUNCTION(xml_get_current_byte_index)
1366 {
1367 xml_parser *parser;
1368 zval *pind;
1369
1370 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1371 RETURN_THROWS();
1372 }
1373
1374 parser = Z_XMLPARSER_P(pind);
1375 RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
1376 }
1377 /* }}} */
1378
1379 /* {{{ Free an XML parser */
PHP_FUNCTION(xml_parser_free)1380 PHP_FUNCTION(xml_parser_free)
1381 {
1382 zval *pind;
1383 xml_parser *parser;
1384
1385 if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
1386 RETURN_THROWS();
1387 }
1388
1389 parser = Z_XMLPARSER_P(pind);
1390 if (parser->isparsing == 1) {
1391 php_error_docref(NULL, E_WARNING, "Parser cannot be freed while it is parsing");
1392 RETURN_FALSE;
1393 }
1394
1395 RETURN_TRUE;
1396 }
1397 /* }}} */
1398
1399 /* {{{ Set options in an XML parser */
PHP_FUNCTION(xml_parser_set_option)1400 PHP_FUNCTION(xml_parser_set_option)
1401 {
1402 xml_parser *parser;
1403 zval *pind;
1404 zend_long opt;
1405 zval *value;
1406
1407 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Olz", &pind, xml_parser_ce, &opt, &value) == FAILURE) {
1408 RETURN_THROWS();
1409 }
1410
1411 if (Z_TYPE_P(value) != IS_FALSE && Z_TYPE_P(value) != IS_TRUE &&
1412 Z_TYPE_P(value) != IS_LONG && Z_TYPE_P(value) != IS_STRING) {
1413 php_error_docref(NULL, E_WARNING,
1414 "Argument #3 ($value) must be of type string|int|bool, %s given", zend_zval_type_name(value));
1415 }
1416
1417 parser = Z_XMLPARSER_P(pind);
1418 switch (opt) {
1419 /* Boolean option */
1420 case PHP_XML_OPTION_CASE_FOLDING:
1421 parser->case_folding = zend_is_true(value);
1422 break;
1423 /* Boolean option */
1424 case PHP_XML_OPTION_SKIP_WHITE:
1425 parser->skipwhite = zend_is_true(value);
1426 break;
1427 /* Integer option */
1428 case PHP_XML_OPTION_SKIP_TAGSTART:
1429 /* The tag start offset is stored in an int */
1430 /* TODO Improve handling of values? */
1431 parser->toffset = zval_get_long(value);
1432 if (parser->toffset < 0) {
1433 /* TODO Promote to ValueError in PHP 9.0 */
1434 php_error_docref(NULL, E_WARNING, "Argument #3 ($value) must be between 0 and %d"
1435 " for option XML_OPTION_SKIP_TAGSTART", INT_MAX);
1436 parser->toffset = 0;
1437 RETURN_FALSE;
1438 }
1439 break;
1440 /* String option */
1441 case PHP_XML_OPTION_TARGET_ENCODING: {
1442 const xml_encoding *enc;
1443 if (!try_convert_to_string(value)) {
1444 RETURN_THROWS();
1445 }
1446
1447 enc = xml_get_encoding((XML_Char*)Z_STRVAL_P(value));
1448 if (enc == NULL) {
1449 zend_argument_value_error(3, "is not a supported target encoding");
1450 RETURN_THROWS();
1451 }
1452
1453 parser->target_encoding = enc->name;
1454 break;
1455 }
1456 default:
1457 zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1458 RETURN_THROWS();
1459 break;
1460 }
1461
1462 RETURN_TRUE;
1463 }
1464 /* }}} */
1465
1466 /* {{{ Get options from an XML parser */
PHP_FUNCTION(xml_parser_get_option)1467 PHP_FUNCTION(xml_parser_get_option)
1468 {
1469 xml_parser *parser;
1470 zval *pind;
1471 zend_long opt;
1472
1473 if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pind, xml_parser_ce, &opt) == FAILURE) {
1474 RETURN_THROWS();
1475 }
1476
1477 parser = Z_XMLPARSER_P(pind);
1478 switch (opt) {
1479 case PHP_XML_OPTION_CASE_FOLDING:
1480 RETURN_BOOL(parser->case_folding);
1481 break;
1482 case PHP_XML_OPTION_SKIP_TAGSTART:
1483 RETURN_LONG(parser->toffset);
1484 break;
1485 case PHP_XML_OPTION_SKIP_WHITE:
1486 RETURN_BOOL(parser->skipwhite);
1487 break;
1488 case PHP_XML_OPTION_TARGET_ENCODING:
1489 RETURN_STRING((char *)parser->target_encoding);
1490 break;
1491 default:
1492 zend_argument_value_error(2, "must be a XML_OPTION_* constant");
1493 RETURN_THROWS();
1494 }
1495 }
1496 /* }}} */
1497
1498 #endif
1499