xref: /PHP-7.4/ext/xmlrpc/libxmlrpc/xml_element.c (revision 29eb3561)
1 /*
2   This file is part of libXMLRPC - a C library for xml-encoded function calls.
3 
4   Author: Dan Libby (dan@libby.com)
5   Epinions.com may be contacted at feedback@epinions-inc.com
6 */
7 
8 /*
9   Copyright 2000 Epinions, Inc.
10 
11   Subject to the following 3 conditions, Epinions, Inc.  permits you, free
12   of charge, to (a) use, copy, distribute, modify, perform and display this
13   software and associated documentation files (the "Software"), and (b)
14   permit others to whom the Software is furnished to do so as well.
15 
16   1) The above copyright notice and this permission notice shall be included
17   without modification in all copies or substantial portions of the
18   Software.
19 
20   2) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT ANY WARRANTY OR CONDITION OF
21   ANY KIND, EXPRESS, IMPLIED OR STATUTORY, INCLUDING WITHOUT LIMITATION ANY
22   IMPLIED WARRANTIES OF ACCURACY, MERCHANTABILITY, FITNESS FOR A PARTICULAR
23   PURPOSE OR NONINFRINGEMENT.
24 
25   3) IN NO EVENT SHALL EPINIONS, INC. BE LIABLE FOR ANY DIRECT, INDIRECT,
26   SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OR LOST PROFITS ARISING OUT
27   OF OR IN CONNECTION WITH THE SOFTWARE (HOWEVER ARISING, INCLUDING
28   NEGLIGENCE), EVEN IF EPINIONS, INC.  IS AWARE OF THE POSSIBILITY OF SUCH
29   DAMAGES.
30 
31 */
32 
33 
34 /****h* ABOUT/xml_element
35  * NAME
36  *   xml_element
37  * AUTHOR
38  *   Dan Libby, aka danda  (dan@libby.com)
39  * CREATION DATE
40  *   06/2000
41  * HISTORY
42  *   $Log$
43  *   Revision 1.9.4.1.2.1  2008/12/09 17:22:12  iliaa
44  *
45  *   MFH: Fixed bug #46746 (xmlrpc_decode_request outputs non-suppressable error
46  *   when given bad data).
47  *
48  *   Revision 1.9.4.1  2006/07/30 11:34:02  tony2001
49  *   MFH: fix compile warnings (#38257)
50  *
51  *   Revision 1.9  2005/04/22 11:06:53  jorton
52  *   Fixed bug #32797 (invalid C code in xmlrpc extension).
53  *
54  *   Revision 1.8  2005/03/28 00:07:24  edink
55  *   Reshufle includes to make it compile on windows
56  *
57  *   Revision 1.7  2005/03/26 03:13:58  sniper
58  *   - Made it possible to build ext/xmlrpc with libxml2
59  *
60  *   Revision 1.6  2004/06/01 20:16:06  iliaa
61  *   Fixed bug #28597 (xmlrpc_encode_request() incorrectly encodes chars in
62  *   200-210 range).
63  *   Patch by: fernando dot nemec at folha dot com dot br
64  *
65  *   Revision 1.5  2003/12/16 21:00:21  sniper
66  *   Fix some compile warnings (patch by Joe Orton)
67  *
68  *   Revision 1.4  2002/11/26 23:01:16  fmk
69  *   removing unused variables
70  *
71  *   Revision 1.3  2002/07/05 04:43:53  danda
72  *   merged in updates from SF project.  bring php repository up to date with xmlrpc-epi version 0.51
73  *
74  *   Revision 1.9  2002/07/03 20:54:30  danda
75  *   root element should not have a parent. patch from anon SF user
76  *
77  *   Revision 1.8  2002/05/23 17:46:51  danda
78  *   patch from mukund - fix non utf-8 encoding conversions
79  *
80  *   Revision 1.7  2002/02/13 20:58:50  danda
81  *   patch to make source more windows friendly, contributed by Jeff Lawson
82  *
83  *   Revision 1.6  2002/01/08 01:06:55  danda
84  *   enable <?xml version="1.0"?> format for parsers that are very picky.
85  *
86  *   Revision 1.5  2001/09/29 21:58:05  danda
87  *   adding cvs log to history section
88  *
89  *   10/15/2000 -- danda -- adding robodoc documentation
90  * TODO
91  *   Nicer external API. Get rid of macros.  Make opaque types, etc.
92  * PORTABILITY
93  *   Coded on RedHat Linux 6.2.  Builds on Solaris x86.  Should build on just
94  *   about anything with minor mods.
95  * NOTES
96  *   This code incorporates ideas from expat-ensor from http://xml.ensor.org.
97  *
98  *   It was coded primarily to act as a go-between for expat and xmlrpc. To this
99  *   end, it stores xml elements, their sub-elements, and their attributes in an
100  *   in-memory tree.  When expat is done parsing, the tree can be walked, thus
101  *   retrieving the values.  The code can also be used to build a tree via API then
102  *   write out the tree to a buffer, thus "serializing" the xml.
103  *
104  *   It turns out this is useful for other purposes, such as parsing config files.
105  *   YMMV.
106  *
107  *   Some Features:
108  *     - output option for xml escaping data.  Choices include no escaping, entity escaping,
109  *       or CDATA sections.
110  *     - output option for character encoding.  Defaults to (none) utf-8.
111  *     - output option for verbosity/readability.  ultra-compact, newlines, pretty/level indented.
112  *
113  * BUGS
114  *   there must be some.
115  ******/
116 
117 #include "ext/xml/expat_compat.h"
118 #include <stdlib.h>
119 #include <string.h>
120 #include <ctype.h>
121 
122 #include "xml_element.h"
123 #include "queue.h"
124 #include "encodings.h"
125 
126 #define my_free(thing)  if(thing) {efree(thing); thing = NULL;}
127 
128 #define XML_DECL_START                 "<?xml"
129 #define XML_DECL_START_LEN             sizeof(XML_DECL_START) - 1
130 #define XML_DECL_VERSION               "version=\"1.0\""
131 #define XML_DECL_VERSION_LEN           sizeof(XML_DECL_VERSION) - 1
132 #define XML_DECL_ENCODING_ATTR         "encoding"
133 #define XML_DECL_ENCODING_ATTR_LEN     sizeof(XML_DECL_ENCODING_ATTR) - 1
134 #define XML_DECL_ENCODING_DEFAULT      "utf-8"
135 #define XML_DECL_ENCODING_DEFAULT_LEN  sizeof(XML_DECL_ENCODING_DEFAULT) - 1
136 #define XML_DECL_END                   "?>"
137 #define XML_DECL_END_LEN               sizeof(XML_DECL_END) - 1
138 #define START_TOKEN_BEGIN              "<"
139 #define START_TOKEN_BEGIN_LEN          sizeof(START_TOKEN_BEGIN) - 1
140 #define START_TOKEN_END                ">"
141 #define START_TOKEN_END_LEN            sizeof(START_TOKEN_END) - 1
142 #define EMPTY_START_TOKEN_END          "/>"
143 #define EMPTY_START_TOKEN_END_LEN      sizeof(EMPTY_START_TOKEN_END) - 1
144 #define END_TOKEN_BEGIN                "</"
145 #define END_TOKEN_BEGIN_LEN            sizeof(END_TOKEN_BEGIN) - 1
146 #define END_TOKEN_END                  ">"
147 #define END_TOKEN_END_LEN              sizeof(END_TOKEN_END) - 1
148 #define ATTR_DELIMITER                 "\""
149 #define ATTR_DELIMITER_LEN             sizeof(ATTR_DELIMITER) - 1
150 #define CDATA_BEGIN                    "<![CDATA["
151 #define CDATA_BEGIN_LEN                sizeof(CDATA_BEGIN) - 1
152 #define CDATA_END                      "]]>"
153 #define CDATA_END_LEN                  sizeof(CDATA_END) - 1
154 #define EQUALS                         "="
155 #define EQUALS_LEN                     sizeof(EQUALS) - 1
156 #define WHITESPACE                     " "
157 #define WHITESPACE_LEN                 sizeof(WHITESPACE) - 1
158 #define NEWLINE                        "\n"
159 #define NEWLINE_LEN                    sizeof(NEWLINE) - 1
160 #define MAX_VAL_BUF                    144
161 #define SCALAR_STR                     "SCALAR"
162 #define SCALAR_STR_LEN                 sizeof(SCALAR_STR) - 1
163 #define VECTOR_STR                     "VECTOR"
164 #define VECTOR_STR_LEN                 sizeof(VECTOR_STR) - 1
165 #define RESPONSE_STR                   "RESPONSE"
166 #define RESPONSE_STR_LEN               sizeof(RESPONSE_STR) - 1
167 
168 
169 /*-----------------------------
170 - Begin xml_element Functions -
171 -----------------------------*/
172 
173 /****f* xml_element/xml_elem_free_non_recurse
174  * NAME
175  *   xml_elem_free_non_recurse
176  * SYNOPSIS
177  *   void xml_elem_free_non_recurse(xml_element* root)
178  * FUNCTION
179  *   free a single xml element.  child elements will not be freed.
180  * INPUTS
181  *   root - the element to free
182  * RESULT
183  *   void
184  * NOTES
185  * SEE ALSO
186  *   xml_elem_free ()
187  *   xml_elem_new ()
188  * SOURCE
189  */
xml_elem_free_non_recurse(xml_element * root)190 void xml_elem_free_non_recurse(xml_element* root) {
191    if(root) {
192       xml_element_attr* attrs = Q_Head(&root->attrs);
193       while(attrs) {
194          my_free(attrs->key);
195          my_free(attrs->val);
196          my_free(attrs);
197          attrs = Q_Next(&root->attrs);
198       }
199 
200       Q_Destroy(&root->children);
201       Q_Destroy(&root->attrs);
202       if(root->name) {
203           efree((char *)root->name);
204           root->name = NULL;
205       }
206       simplestring_free(&root->text);
207       my_free(root);
208    }
209 }
210 /******/
211 
212 /****f* xml_element/xml_elem_free
213  * NAME
214  *   xml_elem_free
215  * SYNOPSIS
216  *   void xml_elem_free(xml_element* root)
217  * FUNCTION
218  *   free an xml element and all of its child elements
219  * INPUTS
220  *   root - the root of an xml tree you would like to free
221  * RESULT
222  *   void
223  * NOTES
224  * SEE ALSO
225  *   xml_elem_free_non_recurse ()
226  *   xml_elem_new ()
227  * SOURCE
228  */
xml_elem_free(xml_element * root)229 void xml_elem_free(xml_element* root) {
230    if(root) {
231       xml_element* kids = Q_Head(&root->children);
232       while(kids) {
233          xml_elem_free(kids);
234          kids = Q_Next(&root->children);
235       }
236       xml_elem_free_non_recurse(root);
237    }
238 }
239 /******/
240 
241 /****f* xml_element/xml_elem_new
242  * NAME
243  *   xml_elem_new
244  * SYNOPSIS
245  *   xml_element* xml_elem_new()
246  * FUNCTION
247  *   allocates and initializes a new xml_element
248  * INPUTS
249  *   none
250  * RESULT
251  *   xml_element* or NULL.  NULL indicates an out-of-memory condition.
252  * NOTES
253  * SEE ALSO
254  *   xml_elem_free ()
255  *   xml_elem_free_non_recurse ()
256  * SOURCE
257  */
xml_elem_new()258 xml_element* xml_elem_new() {
259    xml_element* elem = ecalloc(1, sizeof(xml_element));
260    if(elem) {
261       Q_Init(&elem->children);
262       Q_Init(&elem->attrs);
263       simplestring_init(&elem->text);
264 
265       /* init empty string in case we don't find any char data */
266       simplestring_addn(&elem->text, "", 0);
267    }
268    return elem;
269 }
270 /******/
271 
xml_elem_writefunc(int (* fptr)(void * data,const char * text,int size),const char * text,void * data,int len)272 static int xml_elem_writefunc(int (*fptr)(void *data, const char *text, int size), const char *text, void *data, int len)
273 {
274    return fptr && text ? fptr(data, text, len ? len : strlen(text)) : 0;
275 }
276 
277 
278 
create_xml_escape(char * pString,unsigned char c)279 static int create_xml_escape(char *pString, unsigned char c)
280 {
281   int counter = 0;
282 
283   pString[counter++] = '&';
284   pString[counter++] = '#';
285   if(c >= 100) {
286     pString[counter++] = c / 100 + '0';
287     c = c % 100;
288   }
289   pString[counter++] = c / 10 + '0';
290   c = c % 10;
291 
292   pString[counter++] = c + '0';
293   pString[counter++] = ';';
294   return counter;
295 }
296 
297 #define non_ascii(c) (c > 127)
298 #define non_print(c) (!isprint(c))
299 #define markup(c) (c == '&' || c == '\"' || c == '>' || c == '<')
300 #define entity_length(c) ( (c >= 100) ? 3 : ((c >= 10) ? 2 : 1) ) + 3; /* "&#" + c + ";" */
301 
302 /*
303  * xml_elem_entity_escape
304  *
305  * Purpose:
306  *   escape reserved xml chars and non utf-8 chars as xml entities
307  * Comments:
308  *   The return value may be a new string, or null if no
309  *     conversion was performed. In the latter case, *newlen will
310  *     be 0.
311  * Flags (to escape)
312  *  xml_elem_no_escaping             = 0x000,
313  *  xml_elem_entity_escaping         = 0x002,   // escape xml special chars as entities
314  *  xml_elem_non_ascii_escaping      = 0x008,   // escape chars above 127
315  *  xml_elem_cdata_escaping          = 0x010,   // wrap in cdata
316  */
xml_elem_entity_escape(const char * buf,int old_len,int * newlen,XML_ELEM_ESCAPING flags)317 static char* xml_elem_entity_escape(const char* buf, int old_len, int *newlen, XML_ELEM_ESCAPING flags) {
318   char *pRetval = 0;
319   int iNewBufLen=0;
320 
321 #define should_escape(c, flag) ( ((flag & xml_elem_markup_escaping) && markup(c)) || \
322                                  ((flag & xml_elem_non_ascii_escaping) && non_ascii(c)) || \
323                                  ((flag & xml_elem_non_print_escaping) && non_print(c)) )
324 
325   if(buf && *buf) {
326     const unsigned char *bufcopy;
327     char *NewBuffer;
328     int ToBeXmlEscaped=0;
329     int iLength;
330     bufcopy = (const unsigned char *) buf;
331     iLength= old_len ? old_len : strlen(buf);
332     while(*bufcopy) {
333       if( should_escape(*bufcopy, flags) ) {
334 	/* the length will increase by length of xml escape - the character length */
335 	iLength += entity_length(*bufcopy);
336 	ToBeXmlEscaped=1;
337       }
338       bufcopy++;
339     }
340 
341     if(ToBeXmlEscaped) {
342 
343       NewBuffer= emalloc(iLength+1);
344       if(NewBuffer) {
345 	bufcopy = (const unsigned char *) buf;
346 	while(*bufcopy) {
347 	  if(should_escape(*bufcopy, flags)) {
348 	    iNewBufLen += create_xml_escape(NewBuffer+iNewBufLen,*bufcopy);
349 	  }
350 	  else {
351 	    NewBuffer[iNewBufLen++]=*bufcopy;
352 	  }
353 	  bufcopy++;
354 	}
355 	NewBuffer[iNewBufLen] = 0;
356 	pRetval = NewBuffer;
357       }
358     }
359   }
360 
361   if(newlen) {
362      *newlen = iNewBufLen;
363   }
364 
365   return pRetval;
366 }
367 
368 
xml_element_serialize(xml_element * el,int (* fptr)(void * data,const char * text,int size),void * data,XML_ELEM_OUTPUT_OPTIONS options,int depth)369 static void xml_element_serialize(xml_element *el, int (*fptr)(void *data, const char *text, int size), void *data, XML_ELEM_OUTPUT_OPTIONS options, int depth)
370 {
371    int i;
372    static STRUCT_XML_ELEM_OUTPUT_OPTIONS default_opts = {xml_elem_pretty, xml_elem_markup_escaping | xml_elem_non_print_escaping, XML_DECL_ENCODING_DEFAULT};
373    static char whitespace[] = "                                                                                               "
374                               "                                                                                               "
375                               "                                                                                               ";
376    depth++;
377 
378    if(!el) {
379       /* fprintf(stderr, "Nothing to write\n"); */
380       return;
381    }
382    if(!options) {
383       options = &default_opts;
384    }
385 
386    /* print xml declaration if at root level */
387    if(depth == 1) {
388       xml_elem_writefunc(fptr, XML_DECL_START, data, XML_DECL_START_LEN);
389       xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
390       xml_elem_writefunc(fptr, XML_DECL_VERSION, data, XML_DECL_VERSION_LEN);
391       if(options->encoding && *options->encoding) {
392           xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
393           xml_elem_writefunc(fptr, XML_DECL_ENCODING_ATTR, data, XML_DECL_ENCODING_ATTR_LEN);
394           xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
395           xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
396           xml_elem_writefunc(fptr, options->encoding, data, 0);
397           xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
398       }
399       xml_elem_writefunc(fptr, XML_DECL_END, data, XML_DECL_END_LEN);
400       if(options->verbosity != xml_elem_no_white_space) {
401          xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
402       }
403    }
404 
405    if(options->verbosity == xml_elem_pretty && depth > 2) {
406          xml_elem_writefunc(fptr, whitespace, data, depth - 2);
407    }
408    /* begin element */
409    xml_elem_writefunc(fptr,START_TOKEN_BEGIN, data, START_TOKEN_BEGIN_LEN);
410    if(el->name) {
411       xml_elem_writefunc(fptr, el->name, data, 0);
412 
413       /* write attrs, if any */
414       if(Q_Size(&el->attrs)) {
415          xml_element_attr* iter = Q_Head(&el->attrs);
416          while( iter ) {
417             xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
418             xml_elem_writefunc(fptr, iter->key, data, 0);
419             xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
420             xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
421             xml_elem_writefunc(fptr, iter->val, data, 0);
422             xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
423 
424             iter = Q_Next(&el->attrs);
425          }
426       }
427    }
428    else {
429       xml_elem_writefunc(fptr, "None", data, 0);
430    }
431    /* if no text and no children, use abbreviated form, eg: <foo/> */
432    if(!el->text.len && !Q_Size(&el->children)) {
433        xml_elem_writefunc(fptr, EMPTY_START_TOKEN_END, data, EMPTY_START_TOKEN_END_LEN);
434    }
435    /* otherwise, print element contents */
436    else {
437        xml_elem_writefunc(fptr, START_TOKEN_END, data, START_TOKEN_END_LEN);
438 
439        /* print text, if any */
440        if(el->text.len) {
441           char* escaped_str = el->text.str;
442           int buflen = el->text.len;
443 
444           if(options->escaping && options->escaping != xml_elem_cdata_escaping) {
445              escaped_str = xml_elem_entity_escape(el->text.str, buflen, &buflen, options->escaping );
446              if(!escaped_str) {
447                 escaped_str = el->text.str;
448              }
449           }
450 
451           if(options->escaping & xml_elem_cdata_escaping) {
452              xml_elem_writefunc(fptr, CDATA_BEGIN, data, CDATA_BEGIN_LEN);
453           }
454 
455           xml_elem_writefunc(fptr, escaped_str, data, buflen);
456 
457           if(escaped_str != el->text.str) {
458              my_free(escaped_str);
459           }
460 
461           if(options->escaping & xml_elem_cdata_escaping) {
462              xml_elem_writefunc(fptr, CDATA_END, data, CDATA_END_LEN);
463           }
464        }
465        /* no text, so print child elems */
466        else {
467           xml_element *kids = Q_Head(&el->children);
468           i = 0;
469           while( kids ) {
470              if(i++ == 0) {
471                 if(options->verbosity != xml_elem_no_white_space) {
472                    xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
473                 }
474              }
475              xml_element_serialize(kids, fptr, data, options, depth);
476              kids = Q_Next(&el->children);
477           }
478           if(i) {
479              if(options->verbosity == xml_elem_pretty && depth > 2) {
480                    xml_elem_writefunc(fptr, whitespace, data, depth - 2);
481              }
482           }
483        }
484 
485        xml_elem_writefunc(fptr, END_TOKEN_BEGIN, data, END_TOKEN_BEGIN_LEN);
486        xml_elem_writefunc(fptr,el->name ? el->name : "None", data, 0);
487        xml_elem_writefunc(fptr, END_TOKEN_END, data, END_TOKEN_END_LEN);
488    }
489    if(options->verbosity != xml_elem_no_white_space) {
490       xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
491    }
492 }
493 
494 /* print buf to file */
file_out_fptr(void * f,const char * text,int size)495 static int file_out_fptr(void *f, const char *text, int size)
496 {
497    fputs(text, (FILE *)f);
498    return 0;
499 }
500 
501 /* print buf to simplestring */
simplestring_out_fptr(void * f,const char * text,int size)502 static int simplestring_out_fptr(void *f, const char *text, int size)
503 {
504    simplestring* buf = (simplestring*)f;
505    if(buf) {
506       simplestring_addn(buf, text, size);
507    }
508    return 0;
509 }
510 
511 /****f* xml_element/xml_elem_serialize_to_string
512  * NAME
513  *   xml_elem_serialize_to_string
514  * SYNOPSIS
515  *   void xml_element_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
516  * FUNCTION
517  *   writes element tree as XML into a newly allocated buffer
518  * INPUTS
519  *   el      - root element of tree
520  *   options - options determining how output is written.  see XML_ELEM_OUTPUT_OPTIONS
521  *   buf_len - length of returned buffer, if not null.
522  * RESULT
523  *   char* or NULL. Must be free'd by caller.
524  * NOTES
525  * SEE ALSO
526  *   xml_elem_serialize_to_stream ()
527  *   xml_elem_parse_buf ()
528  * SOURCE
529  */
xml_elem_serialize_to_string(xml_element * el,XML_ELEM_OUTPUT_OPTIONS options,int * buf_len)530 char* xml_elem_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
531 {
532    simplestring buf;
533    simplestring_init(&buf);
534 
535    xml_element_serialize(el, simplestring_out_fptr, (void *)&buf, options, 0);
536 
537    if(buf_len) {
538       *buf_len = buf.len;
539    }
540 
541    return buf.str;
542 }
543 /******/
544 
545 /****f* xml_element/xml_elem_serialize_to_stream
546  * NAME
547  *   xml_elem_serialize_to_stream
548  * SYNOPSIS
549  *   void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
550  * FUNCTION
551  *   writes element tree as XML into a stream (typically an opened file)
552  * INPUTS
553  *   el      - root element of tree
554  *   output  - stream handle
555  *   options - options determining how output is written.  see XML_ELEM_OUTPUT_OPTIONS
556  * RESULT
557  *   void
558  * NOTES
559  * SEE ALSO
560  *   xml_elem_serialize_to_string ()
561  *   xml_elem_parse_buf ()
562  * SOURCE
563  */
xml_elem_serialize_to_stream(xml_element * el,FILE * output,XML_ELEM_OUTPUT_OPTIONS options)564 void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
565 {
566    xml_element_serialize(el, file_out_fptr, (void *)output, options, 0);
567 }
568 /******/
569 
570 /*--------------------------*
571 * End xml_element Functions *
572 *--------------------------*/
573 
574 
575 /*----------------------
576 * Begin Expat Handlers *
577 *---------------------*/
578 
579 typedef struct _xml_elem_data {
580    xml_element*           root;
581    xml_element*           current;
582    XML_ELEM_INPUT_OPTIONS input_options;
583    int                    needs_enc_conversion;
584 } xml_elem_data;
585 
586 
587 /* expat start of element handler */
_xmlrpc_startElement(void * userData,const char * name,const char ** attrs)588 static void _xmlrpc_startElement(void *userData, const char *name, const char **attrs)
589 {
590    xml_element *c;
591    xml_elem_data* mydata = (xml_elem_data*)userData;
592    const char** p = attrs;
593 
594    if(mydata) {
595       c = mydata->current;
596 
597       mydata->current = xml_elem_new();
598       mydata->current->name = (char*)estrdup(name);
599       mydata->current->parent = c;
600 
601       /* init attrs */
602       while(p && *p) {
603          xml_element_attr* attr = emalloc(sizeof(xml_element_attr));
604          if(attr) {
605             attr->key = estrdup(*p);
606             attr->val = estrdup(*(p+1));
607             Q_PushTail(&mydata->current->attrs, attr);
608 
609             p += 2;
610          }
611       }
612    }
613 }
614 
615 /* expat end of element handler */
_xmlrpc_endElement(void * userData,const char * name)616 static void _xmlrpc_endElement(void *userData, const char *name)
617 {
618    xml_elem_data* mydata = (xml_elem_data*)userData;
619 
620    if(mydata && mydata->current && mydata->current->parent) {
621       Q_PushTail(&mydata->current->parent->children, mydata->current);
622 
623       mydata->current = mydata->current->parent;
624    }
625 }
626 
627 /* expat char data handler */
_xmlrpc_charHandler(void * userData,const char * s,int len)628 static void _xmlrpc_charHandler(void *userData,
629                         const char *s,
630                         int len)
631 {
632    xml_elem_data* mydata = (xml_elem_data*)userData;
633    if(mydata && mydata->current) {
634 
635       /* Check if we need to decode utf-8 parser output to another encoding */
636       if(mydata->needs_enc_conversion && mydata->input_options->encoding) {
637          int new_len = 0;
638          char* add_text = utf8_decode(s, len, &new_len, mydata->input_options->encoding);
639          if(add_text) {
640             len = new_len;
641             simplestring_addn(&mydata->current->text, add_text, len);
642             efree(add_text);
643             return;
644          }
645       }
646       simplestring_addn(&mydata->current->text, s, len);
647    }
648 }
649 /******/
650 
651 /*-------------------*
652 * End Expat Handlers *
653 *-------------------*/
654 
655 /*-------------------*
656 * xml_elem_parse_buf *
657 *-------------------*/
658 
659 /****f* xml_element/xml_elem_parse_buf
660  * NAME
661  *   xml_elem_parse_buf
662  * SYNOPSIS
663  *   xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
664  * FUNCTION
665  *   parse a buffer containing XML into an xml_element in-memory tree
666  * INPUTS
667  *   in_buf   - buffer containing XML document
668  *   len      - length of buffer
669  *   options  - input options. optional
670  *   error    - error result data. optional. check if result is null.
671  * RESULT
672  *   void
673  * NOTES
674  *   The returned data must be free'd by caller
675  * SEE ALSO
676  *   xml_elem_serialize_to_string ()
677  *   xml_elem_free ()
678  * SOURCE
679  */
xml_elem_parse_buf(const char * in_buf,int len,XML_ELEM_INPUT_OPTIONS options,XML_ELEM_ERROR error)680 xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
681 {
682    xml_element* xReturn = NULL;
683    char buf[100] = "";
684    static STRUCT_XML_ELEM_INPUT_OPTIONS default_opts = {encoding_utf_8};
685 
686    if(!options) {
687       options = &default_opts;
688    }
689 
690    if(in_buf) {
691       XML_Parser parser;
692       xml_elem_data mydata = {0};
693 
694       parser = XML_ParserCreate(NULL);
695 
696       mydata.root = xml_elem_new();
697       mydata.current = mydata.root;
698       mydata.input_options = options;
699       mydata.needs_enc_conversion = options->encoding && strcmp(options->encoding, encoding_utf_8);
700 
701       XML_SetElementHandler(parser, (XML_StartElementHandler)_xmlrpc_startElement, (XML_EndElementHandler)_xmlrpc_endElement);
702       XML_SetCharacterDataHandler(parser, (XML_CharacterDataHandler)_xmlrpc_charHandler);
703 
704       /* pass the xml_elem_data struct along */
705       XML_SetUserData(parser, (void*)&mydata);
706 
707       if(!len) {
708          len = strlen(in_buf);
709       }
710 
711       /* parse the XML */
712       if(XML_Parse(parser, (const unsigned char *) in_buf, len, 1) == 0) {
713          enum XML_Error err_code = XML_GetErrorCode(parser);
714          int line_num = XML_GetCurrentLineNumber(parser);
715          int col_num = XML_GetCurrentColumnNumber(parser);
716          long byte_idx = XML_GetCurrentByteIndex(parser);
717 /*         int byte_total = XML_GetCurrentByteCount(parser); */
718          const char * error_str = (const char *) XML_ErrorString(err_code);
719          if(byte_idx > len) {
720              byte_idx = len;
721          }
722          if(byte_idx >= 0) {
723              snprintf(buf,
724                       sizeof(buf),
725                       "\n\tdata beginning %ld before byte index: %s\n",
726                       byte_idx > 10  ? 10 : byte_idx,
727                       in_buf + (byte_idx > 10 ? byte_idx - 10 : byte_idx));
728          }
729 /*
730          fprintf(stderr, "expat reports error code %i\n"
731                 "\tdescription: %s\n"
732                 "\tline: %i\n"
733                 "\tcolumn: %i\n"
734                 "\tbyte index: %ld\n"
735                 "\ttotal bytes: %i\n%s ",
736                 err_code, error_str, line_num,
737                 col_num, byte_idx, byte_total, buf);
738 */
739 
740           /* error condition */
741           if(error) {
742               error->parser_code = (long)err_code;
743               error->line = line_num;
744               error->column = col_num;
745               error->byte_index = byte_idx;
746               error->parser_error = error_str;
747           }
748       }
749       else {
750          xReturn = (xml_element*)Q_Head(&mydata.root->children);
751          xReturn->parent = NULL;
752       }
753 
754       XML_ParserFree(parser);
755 
756 
757       xml_elem_free_non_recurse(mydata.root);
758    }
759 
760    return xReturn;
761 }
762 
763 /******/
764