xref: /PHP-5.5/ext/xmlrpc/libxmlrpc/xml_element.c (revision 5856a68c)
1 /*
2   This file is part of libXMLRPC - a C library for xml-encoded function calls.
3 
4   Author: Dan Libby (dan@libby.com)
5   Epinions.com may be contacted at feedback@epinions-inc.com
6 */
7 
8 /*
9   Copyright 2000 Epinions, Inc.
10 
11   Subject to the following 3 conditions, Epinions, Inc.  permits you, free
12   of charge, to (a) use, copy, distribute, modify, perform and display this
13   software and associated documentation files (the "Software"), and (b)
14   permit others to whom the Software is furnished to do so as well.
15 
16   1) The above copyright notice and this permission notice shall be included
17   without modification in all copies or substantial portions of the
18   Software.
19 
20   2) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT ANY WARRANTY OR CONDITION OF
21   ANY KIND, EXPRESS, IMPLIED OR STATUTORY, INCLUDING WITHOUT LIMITATION ANY
22   IMPLIED WARRANTIES OF ACCURACY, MERCHANTABILITY, FITNESS FOR A PARTICULAR
23   PURPOSE OR NONINFRINGEMENT.
24 
25   3) IN NO EVENT SHALL EPINIONS, INC. BE LIABLE FOR ANY DIRECT, INDIRECT,
26   SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OR LOST PROFITS ARISING OUT
27   OF OR IN CONNECTION WITH THE SOFTWARE (HOWEVER ARISING, INCLUDING
28   NEGLIGENCE), EVEN IF EPINIONS, INC.  IS AWARE OF THE POSSIBILITY OF SUCH
29   DAMAGES.
30 
31 */
32 
33 
34 static const char rcsid[] = "#(@) $Id$";
35 
36 
37 
38 /****h* ABOUT/xml_element
39  * NAME
40  *   xml_element
41  * AUTHOR
42  *   Dan Libby, aka danda  (dan@libby.com)
43  * CREATION DATE
44  *   06/2000
45  * HISTORY
46  *   $Log$
47  *   Revision 1.9.4.1.2.1  2008/12/09 17:22:12  iliaa
48  *
49  *   MFH: Fixed bug #46746 (xmlrpc_decode_request outputs non-suppressable error
50  *   when given bad data).
51  *
52  *   Revision 1.9.4.1  2006/07/30 11:34:02  tony2001
53  *   MFH: fix compile warnings (#38257)
54  *
55  *   Revision 1.9  2005/04/22 11:06:53  jorton
56  *   Fixed bug #32797 (invalid C code in xmlrpc extension).
57  *
58  *   Revision 1.8  2005/03/28 00:07:24  edink
59  *   Reshufle includes to make it compile on windows
60  *
61  *   Revision 1.7  2005/03/26 03:13:58  sniper
62  *   - Made it possible to build ext/xmlrpc with libxml2
63  *
64  *   Revision 1.6  2004/06/01 20:16:06  iliaa
65  *   Fixed bug #28597 (xmlrpc_encode_request() incorrectly encodes chars in
66  *   200-210 range).
67  *   Patch by: fernando dot nemec at folha dot com dot br
68  *
69  *   Revision 1.5  2003/12/16 21:00:21  sniper
70  *   Fix some compile warnings (patch by Joe Orton)
71  *
72  *   Revision 1.4  2002/11/26 23:01:16  fmk
73  *   removing unused variables
74  *
75  *   Revision 1.3  2002/07/05 04:43:53  danda
76  *   merged in updates from SF project.  bring php repository up to date with xmlrpc-epi version 0.51
77  *
78  *   Revision 1.9  2002/07/03 20:54:30  danda
79  *   root element should not have a parent. patch from anon SF user
80  *
81  *   Revision 1.8  2002/05/23 17:46:51  danda
82  *   patch from mukund - fix non utf-8 encoding conversions
83  *
84  *   Revision 1.7  2002/02/13 20:58:50  danda
85  *   patch to make source more windows friendly, contributed by Jeff Lawson
86  *
87  *   Revision 1.6  2002/01/08 01:06:55  danda
88  *   enable <?xml version="1.0"?> format for parsers that are very picky.
89  *
90  *   Revision 1.5  2001/09/29 21:58:05  danda
91  *   adding cvs log to history section
92  *
93  *   10/15/2000 -- danda -- adding robodoc documentation
94  * TODO
95  *   Nicer external API. Get rid of macros.  Make opaque types, etc.
96  * PORTABILITY
97  *   Coded on RedHat Linux 6.2.  Builds on Solaris x86.  Should build on just
98  *   about anything with minor mods.
99  * NOTES
100  *   This code incorporates ideas from expat-ensor from http://xml.ensor.org.
101  *
102  *   It was coded primarily to act as a go-between for expat and xmlrpc. To this
103  *   end, it stores xml elements, their sub-elements, and their attributes in an
104  *   in-memory tree.  When expat is done parsing, the tree can be walked, thus
105  *   retrieving the values.  The code can also be used to build a tree via API then
106  *   write out the tree to a buffer, thus "serializing" the xml.
107  *
108  *   It turns out this is useful for other purposes, such as parsing config files.
109  *   YMMV.
110  *
111  *   Some Features:
112  *     - output option for xml escaping data.  Choices include no escaping, entity escaping,
113  *       or CDATA sections.
114  *     - output option for character encoding.  Defaults to (none) utf-8.
115  *     - output option for verbosity/readability.  ultra-compact, newlines, pretty/level indented.
116  *
117  * BUGS
118  *   there must be some.
119  ******/
120 
121 #include "ext/xml/expat_compat.h"
122 #ifdef _WIN32
123 #include "xmlrpc_win32.h"
124 #endif
125 #include <stdlib.h>
126 #include <string.h>
127 #include <ctype.h>
128 
129 #include "xml_element.h"
130 #include "queue.h"
131 #include "encodings.h"
132 
133 #define my_free(thing)  if(thing) {free(thing); thing = NULL;}
134 
135 #define XML_DECL_START                 "<?xml"
136 #define XML_DECL_START_LEN             sizeof(XML_DECL_START) - 1
137 #define XML_DECL_VERSION               "version=\"1.0\""
138 #define XML_DECL_VERSION_LEN           sizeof(XML_DECL_VERSION) - 1
139 #define XML_DECL_ENCODING_ATTR         "encoding"
140 #define XML_DECL_ENCODING_ATTR_LEN     sizeof(XML_DECL_ENCODING_ATTR) - 1
141 #define XML_DECL_ENCODING_DEFAULT      "utf-8"
142 #define XML_DECL_ENCODING_DEFAULT_LEN  sizeof(XML_DECL_ENCODING_DEFAULT) - 1
143 #define XML_DECL_END                   "?>"
144 #define XML_DECL_END_LEN               sizeof(XML_DECL_END) - 1
145 #define START_TOKEN_BEGIN              "<"
146 #define START_TOKEN_BEGIN_LEN          sizeof(START_TOKEN_BEGIN) - 1
147 #define START_TOKEN_END                ">"
148 #define START_TOKEN_END_LEN            sizeof(START_TOKEN_END) - 1
149 #define EMPTY_START_TOKEN_END          "/>"
150 #define EMPTY_START_TOKEN_END_LEN      sizeof(EMPTY_START_TOKEN_END) - 1
151 #define END_TOKEN_BEGIN                "</"
152 #define END_TOKEN_BEGIN_LEN            sizeof(END_TOKEN_BEGIN) - 1
153 #define END_TOKEN_END                  ">"
154 #define END_TOKEN_END_LEN              sizeof(END_TOKEN_END) - 1
155 #define ATTR_DELIMITER                 "\""
156 #define ATTR_DELIMITER_LEN             sizeof(ATTR_DELIMITER) - 1
157 #define CDATA_BEGIN                    "<![CDATA["
158 #define CDATA_BEGIN_LEN                sizeof(CDATA_BEGIN) - 1
159 #define CDATA_END                      "]]>"
160 #define CDATA_END_LEN                  sizeof(CDATA_END) - 1
161 #define EQUALS                         "="
162 #define EQUALS_LEN                     sizeof(EQUALS) - 1
163 #define WHITESPACE                     " "
164 #define WHITESPACE_LEN                 sizeof(WHITESPACE) - 1
165 #define NEWLINE                        "\n"
166 #define NEWLINE_LEN                    sizeof(NEWLINE) - 1
167 #define MAX_VAL_BUF                    144
168 #define SCALAR_STR                     "SCALAR"
169 #define SCALAR_STR_LEN                 sizeof(SCALAR_STR) - 1
170 #define VECTOR_STR                     "VECTOR"
171 #define VECTOR_STR_LEN                 sizeof(VECTOR_STR) - 1
172 #define RESPONSE_STR                   "RESPONSE"
173 #define RESPONSE_STR_LEN               sizeof(RESPONSE_STR) - 1
174 
175 
176 /*-----------------------------
177 - Begin xml_element Functions -
178 -----------------------------*/
179 
180 /****f* xml_element/xml_elem_free_non_recurse
181  * NAME
182  *   xml_elem_free_non_recurse
183  * SYNOPSIS
184  *   void xml_elem_free_non_recurse(xml_element* root)
185  * FUNCTION
186  *   free a single xml element.  child elements will not be freed.
187  * INPUTS
188  *   root - the element to free
189  * RESULT
190  *   void
191  * NOTES
192  * SEE ALSO
193  *   xml_elem_free ()
194  *   xml_elem_new ()
195  * SOURCE
196  */
xml_elem_free_non_recurse(xml_element * root)197 void xml_elem_free_non_recurse(xml_element* root) {
198    if(root) {
199       xml_element_attr* attrs = Q_Head(&root->attrs);
200       while(attrs) {
201          my_free(attrs->key);
202          my_free(attrs->val);
203          my_free(attrs);
204          attrs = Q_Next(&root->attrs);
205       }
206 
207       Q_Destroy(&root->children);
208       Q_Destroy(&root->attrs);
209       if(root->name) {
210           free((char *)root->name);
211           root->name = NULL;
212       }
213       simplestring_free(&root->text);
214       my_free(root);
215    }
216 }
217 /******/
218 
219 /****f* xml_element/xml_elem_free
220  * NAME
221  *   xml_elem_free
222  * SYNOPSIS
223  *   void xml_elem_free(xml_element* root)
224  * FUNCTION
225  *   free an xml element and all of its child elements
226  * INPUTS
227  *   root - the root of an xml tree you would like to free
228  * RESULT
229  *   void
230  * NOTES
231  * SEE ALSO
232  *   xml_elem_free_non_recurse ()
233  *   xml_elem_new ()
234  * SOURCE
235  */
xml_elem_free(xml_element * root)236 void xml_elem_free(xml_element* root) {
237    if(root) {
238       xml_element* kids = Q_Head(&root->children);
239       while(kids) {
240          xml_elem_free(kids);
241          kids = Q_Next(&root->children);
242       }
243       xml_elem_free_non_recurse(root);
244    }
245 }
246 /******/
247 
248 /****f* xml_element/xml_elem_new
249  * NAME
250  *   xml_elem_new
251  * SYNOPSIS
252  *   xml_element* xml_elem_new()
253  * FUNCTION
254  *   allocates and initializes a new xml_element
255  * INPUTS
256  *   none
257  * RESULT
258  *   xml_element* or NULL.  NULL indicates an out-of-memory condition.
259  * NOTES
260  * SEE ALSO
261  *   xml_elem_free ()
262  *   xml_elem_free_non_recurse ()
263  * SOURCE
264  */
xml_elem_new()265 xml_element* xml_elem_new() {
266    xml_element* elem = calloc(1, sizeof(xml_element));
267    if(elem) {
268       Q_Init(&elem->children);
269       Q_Init(&elem->attrs);
270       simplestring_init(&elem->text);
271 
272       /* init empty string in case we don't find any char data */
273       simplestring_addn(&elem->text, "", 0);
274    }
275    return elem;
276 }
277 /******/
278 
xml_elem_writefunc(int (* fptr)(void * data,const char * text,int size),const char * text,void * data,int len)279 static int xml_elem_writefunc(int (*fptr)(void *data, const char *text, int size), const char *text, void *data, int len)
280 {
281    return fptr && text ? fptr(data, text, len ? len : strlen(text)) : 0;
282 }
283 
284 
285 
create_xml_escape(char * pString,unsigned char c)286 static int create_xml_escape(char *pString, unsigned char c)
287 {
288   int counter = 0;
289 
290   pString[counter++] = '&';
291   pString[counter++] = '#';
292   if(c >= 100) {
293     pString[counter++] = c / 100 + '0';
294     c = c % 100;
295   }
296   pString[counter++] = c / 10 + '0';
297   c = c % 10;
298 
299   pString[counter++] = c + '0';
300   pString[counter++] = ';';
301   return counter;
302 }
303 
304 #define non_ascii(c) (c > 127)
305 #define non_print(c) (!isprint(c))
306 #define markup(c) (c == '&' || c == '\"' || c == '>' || c == '<')
307 #define entity_length(c) ( (c >= 100) ? 3 : ((c >= 10) ? 2 : 1) ) + 3; /* "&#" + c + ";" */
308 
309 /*
310  * xml_elem_entity_escape
311  *
312  * Purpose:
313  *   escape reserved xml chars and non utf-8 chars as xml entities
314  * Comments:
315  *   The return value may be a new string, or null if no
316  *     conversion was performed. In the latter case, *newlen will
317  *     be 0.
318  * Flags (to escape)
319  *  xml_elem_no_escaping             = 0x000,
320  *  xml_elem_entity_escaping         = 0x002,   // escape xml special chars as entities
321  *  xml_elem_non_ascii_escaping      = 0x008,   // escape chars above 127
322  *  xml_elem_cdata_escaping          = 0x010,   // wrap in cdata
323  */
xml_elem_entity_escape(const char * buf,int old_len,int * newlen,XML_ELEM_ESCAPING flags)324 static char* xml_elem_entity_escape(const char* buf, int old_len, int *newlen, XML_ELEM_ESCAPING flags) {
325   char *pRetval = 0;
326   int iNewBufLen=0;
327 
328 #define should_escape(c, flag) ( ((flag & xml_elem_markup_escaping) && markup(c)) || \
329                                  ((flag & xml_elem_non_ascii_escaping) && non_ascii(c)) || \
330                                  ((flag & xml_elem_non_print_escaping) && non_print(c)) )
331 
332   if(buf && *buf) {
333     const unsigned char *bufcopy;
334     char *NewBuffer;
335     int ToBeXmlEscaped=0;
336     int iLength;
337     bufcopy = buf;
338     iLength= old_len ? old_len : strlen(buf);
339     while(*bufcopy) {
340       if( should_escape(*bufcopy, flags) ) {
341 	/* the length will increase by length of xml escape - the character length */
342 	iLength += entity_length(*bufcopy);
343 	ToBeXmlEscaped=1;
344       }
345       bufcopy++;
346     }
347 
348     if(ToBeXmlEscaped) {
349 
350       NewBuffer= malloc(iLength+1);
351       if(NewBuffer) {
352 	bufcopy=buf;
353 	while(*bufcopy) {
354 	  if(should_escape(*bufcopy, flags)) {
355 	    iNewBufLen += create_xml_escape(NewBuffer+iNewBufLen,*bufcopy);
356 	  }
357 	  else {
358 	    NewBuffer[iNewBufLen++]=*bufcopy;
359 	  }
360 	  bufcopy++;
361 	}
362 	NewBuffer[iNewBufLen] = 0;
363 	pRetval = NewBuffer;
364       }
365     }
366   }
367 
368   if(newlen) {
369      *newlen = iNewBufLen;
370   }
371 
372   return pRetval;
373 }
374 
375 
xml_element_serialize(xml_element * el,int (* fptr)(void * data,const char * text,int size),void * data,XML_ELEM_OUTPUT_OPTIONS options,int depth)376 static void xml_element_serialize(xml_element *el, int (*fptr)(void *data, const char *text, int size), void *data, XML_ELEM_OUTPUT_OPTIONS options, int depth)
377 {
378    int i;
379    static STRUCT_XML_ELEM_OUTPUT_OPTIONS default_opts = {xml_elem_pretty, xml_elem_markup_escaping | xml_elem_non_print_escaping, XML_DECL_ENCODING_DEFAULT};
380    static char whitespace[] = "                                                                                               "
381                               "                                                                                               "
382                               "                                                                                               ";
383    depth++;
384 
385    if(!el) {
386       /* fprintf(stderr, "Nothing to write\n"); */
387       return;
388    }
389    if(!options) {
390       options = &default_opts;
391    }
392 
393    /* print xml declaration if at root level */
394    if(depth == 1) {
395       xml_elem_writefunc(fptr, XML_DECL_START, data, XML_DECL_START_LEN);
396       xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
397       xml_elem_writefunc(fptr, XML_DECL_VERSION, data, XML_DECL_VERSION_LEN);
398       if(options->encoding && *options->encoding) {
399           xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
400           xml_elem_writefunc(fptr, XML_DECL_ENCODING_ATTR, data, XML_DECL_ENCODING_ATTR_LEN);
401           xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
402           xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
403           xml_elem_writefunc(fptr, options->encoding, data, 0);
404           xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
405       }
406       xml_elem_writefunc(fptr, XML_DECL_END, data, XML_DECL_END_LEN);
407       if(options->verbosity != xml_elem_no_white_space) {
408          xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
409       }
410    }
411 
412    if(options->verbosity == xml_elem_pretty && depth > 2) {
413          xml_elem_writefunc(fptr, whitespace, data, depth - 2);
414    }
415    /* begin element */
416    xml_elem_writefunc(fptr,START_TOKEN_BEGIN, data, START_TOKEN_BEGIN_LEN);
417    if(el->name) {
418       xml_elem_writefunc(fptr, el->name, data, 0);
419 
420       /* write attrs, if any */
421       if(Q_Size(&el->attrs)) {
422          xml_element_attr* iter = Q_Head(&el->attrs);
423          while( iter ) {
424             xml_elem_writefunc(fptr, WHITESPACE, data, WHITESPACE_LEN);
425             xml_elem_writefunc(fptr, iter->key, data, 0);
426             xml_elem_writefunc(fptr, EQUALS, data, EQUALS_LEN);
427             xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
428             xml_elem_writefunc(fptr, iter->val, data, 0);
429             xml_elem_writefunc(fptr, ATTR_DELIMITER, data, ATTR_DELIMITER_LEN);
430 
431             iter = Q_Next(&el->attrs);
432          }
433       }
434    }
435    else {
436       xml_elem_writefunc(fptr, "None", data, 0);
437    }
438    /* if no text and no children, use abbreviated form, eg: <foo/> */
439    if(!el->text.len && !Q_Size(&el->children)) {
440        xml_elem_writefunc(fptr, EMPTY_START_TOKEN_END, data, EMPTY_START_TOKEN_END_LEN);
441    }
442    /* otherwise, print element contents */
443    else {
444        xml_elem_writefunc(fptr, START_TOKEN_END, data, START_TOKEN_END_LEN);
445 
446        /* print text, if any */
447        if(el->text.len) {
448           char* escaped_str = el->text.str;
449           int buflen = el->text.len;
450 
451           if(options->escaping && options->escaping != xml_elem_cdata_escaping) {
452              escaped_str = xml_elem_entity_escape(el->text.str, buflen, &buflen, options->escaping );
453              if(!escaped_str) {
454                 escaped_str = el->text.str;
455              }
456           }
457 
458           if(options->escaping & xml_elem_cdata_escaping) {
459              xml_elem_writefunc(fptr, CDATA_BEGIN, data, CDATA_BEGIN_LEN);
460           }
461 
462           xml_elem_writefunc(fptr, escaped_str, data, buflen);
463 
464           if(escaped_str != el->text.str) {
465              my_free(escaped_str);
466           }
467 
468           if(options->escaping & xml_elem_cdata_escaping) {
469              xml_elem_writefunc(fptr, CDATA_END, data, CDATA_END_LEN);
470           }
471        }
472        /* no text, so print child elems */
473        else {
474           xml_element *kids = Q_Head(&el->children);
475           i = 0;
476           while( kids ) {
477              if(i++ == 0) {
478                 if(options->verbosity != xml_elem_no_white_space) {
479                    xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
480                 }
481              }
482              xml_element_serialize(kids, fptr, data, options, depth);
483              kids = Q_Next(&el->children);
484           }
485           if(i) {
486              if(options->verbosity == xml_elem_pretty && depth > 2) {
487                    xml_elem_writefunc(fptr, whitespace, data, depth - 2);
488              }
489           }
490        }
491 
492        xml_elem_writefunc(fptr, END_TOKEN_BEGIN, data, END_TOKEN_BEGIN_LEN);
493        xml_elem_writefunc(fptr,el->name ? el->name : "None", data, 0);
494        xml_elem_writefunc(fptr, END_TOKEN_END, data, END_TOKEN_END_LEN);
495    }
496    if(options->verbosity != xml_elem_no_white_space) {
497       xml_elem_writefunc(fptr, NEWLINE, data, NEWLINE_LEN);
498    }
499 }
500 
501 /* print buf to file */
file_out_fptr(void * f,const char * text,int size)502 static int file_out_fptr(void *f, const char *text, int size)
503 {
504    fputs(text, (FILE *)f);
505    return 0;
506 }
507 
508 /* print buf to simplestring */
simplestring_out_fptr(void * f,const char * text,int size)509 static int simplestring_out_fptr(void *f, const char *text, int size)
510 {
511    simplestring* buf = (simplestring*)f;
512    if(buf) {
513       simplestring_addn(buf, text, size);
514    }
515    return 0;
516 }
517 
518 /****f* xml_element/xml_elem_serialize_to_string
519  * NAME
520  *   xml_elem_serialize_to_string
521  * SYNOPSIS
522  *   void xml_element_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
523  * FUNCTION
524  *   writes element tree as XML into a newly allocated buffer
525  * INPUTS
526  *   el      - root element of tree
527  *   options - options determining how output is written.  see XML_ELEM_OUTPUT_OPTIONS
528  *   buf_len - length of returned buffer, if not null.
529  * RESULT
530  *   char* or NULL. Must be free'd by caller.
531  * NOTES
532  * SEE ALSO
533  *   xml_elem_serialize_to_stream ()
534  *   xml_elem_parse_buf ()
535  * SOURCE
536  */
xml_elem_serialize_to_string(xml_element * el,XML_ELEM_OUTPUT_OPTIONS options,int * buf_len)537 char* xml_elem_serialize_to_string(xml_element *el, XML_ELEM_OUTPUT_OPTIONS options, int *buf_len)
538 {
539    simplestring buf;
540    simplestring_init(&buf);
541 
542    xml_element_serialize(el, simplestring_out_fptr, (void *)&buf, options, 0);
543 
544    if(buf_len) {
545       *buf_len = buf.len;
546    }
547 
548    return buf.str;
549 }
550 /******/
551 
552 /****f* xml_element/xml_elem_serialize_to_stream
553  * NAME
554  *   xml_elem_serialize_to_stream
555  * SYNOPSIS
556  *   void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
557  * FUNCTION
558  *   writes element tree as XML into a stream (typically an opened file)
559  * INPUTS
560  *   el      - root element of tree
561  *   output  - stream handle
562  *   options - options determining how output is written.  see XML_ELEM_OUTPUT_OPTIONS
563  * RESULT
564  *   void
565  * NOTES
566  * SEE ALSO
567  *   xml_elem_serialize_to_string ()
568  *   xml_elem_parse_buf ()
569  * SOURCE
570  */
xml_elem_serialize_to_stream(xml_element * el,FILE * output,XML_ELEM_OUTPUT_OPTIONS options)571 void xml_elem_serialize_to_stream(xml_element *el, FILE *output, XML_ELEM_OUTPUT_OPTIONS options)
572 {
573    xml_element_serialize(el, file_out_fptr, (void *)output, options, 0);
574 }
575 /******/
576 
577 /*--------------------------*
578 * End xml_element Functions *
579 *--------------------------*/
580 
581 
582 /*----------------------
583 * Begin Expat Handlers *
584 *---------------------*/
585 
586 typedef struct _xml_elem_data {
587    xml_element*           root;
588    xml_element*           current;
589    XML_ELEM_INPUT_OPTIONS input_options;
590    int                    needs_enc_conversion;
591 } xml_elem_data;
592 
593 
594 /* expat start of element handler */
_xmlrpc_startElement(void * userData,const char * name,const char ** attrs)595 static void _xmlrpc_startElement(void *userData, const char *name, const char **attrs)
596 {
597    xml_element *c;
598    xml_elem_data* mydata = (xml_elem_data*)userData;
599    const char** p = attrs;
600 
601    if(mydata) {
602       c = mydata->current;
603 
604       mydata->current = xml_elem_new();
605       mydata->current->name = (char*)strdup(name);
606       mydata->current->parent = c;
607 
608       /* init attrs */
609       while(p && *p) {
610          xml_element_attr* attr = malloc(sizeof(xml_element_attr));
611          if(attr) {
612             attr->key = strdup(*p);
613             attr->val = strdup(*(p+1));
614             Q_PushTail(&mydata->current->attrs, attr);
615 
616             p += 2;
617          }
618       }
619    }
620 }
621 
622 /* expat end of element handler */
_xmlrpc_endElement(void * userData,const char * name)623 static void _xmlrpc_endElement(void *userData, const char *name)
624 {
625    xml_elem_data* mydata = (xml_elem_data*)userData;
626 
627    if(mydata && mydata->current && mydata->current->parent) {
628       Q_PushTail(&mydata->current->parent->children, mydata->current);
629 
630       mydata->current = mydata->current->parent;
631    }
632 }
633 
634 /* expat char data handler */
_xmlrpc_charHandler(void * userData,const char * s,int len)635 static void _xmlrpc_charHandler(void *userData,
636                         const char *s,
637                         int len)
638 {
639    xml_elem_data* mydata = (xml_elem_data*)userData;
640    if(mydata && mydata->current) {
641 
642       /* Check if we need to decode utf-8 parser output to another encoding */
643       if(mydata->needs_enc_conversion && mydata->input_options->encoding) {
644          int new_len = 0;
645          char* add_text = utf8_decode(s, len, &new_len, mydata->input_options->encoding);
646          if(add_text) {
647             len = new_len;
648             simplestring_addn(&mydata->current->text, add_text, len);
649             free(add_text);
650             return;
651          }
652       }
653       simplestring_addn(&mydata->current->text, s, len);
654    }
655 }
656 /******/
657 
658 /*-------------------*
659 * End Expat Handlers *
660 *-------------------*/
661 
662 /*-------------------*
663 * xml_elem_parse_buf *
664 *-------------------*/
665 
666 /****f* xml_element/xml_elem_parse_buf
667  * NAME
668  *   xml_elem_parse_buf
669  * SYNOPSIS
670  *   xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
671  * FUNCTION
672  *   parse a buffer containing XML into an xml_element in-memory tree
673  * INPUTS
674  *   in_buf   - buffer containing XML document
675  *   len      - length of buffer
676  *   options  - input options. optional
677  *   error    - error result data. optional. check if result is null.
678  * RESULT
679  *   void
680  * NOTES
681  *   The returned data must be free'd by caller
682  * SEE ALSO
683  *   xml_elem_serialize_to_string ()
684  *   xml_elem_free ()
685  * SOURCE
686  */
xml_elem_parse_buf(const char * in_buf,int len,XML_ELEM_INPUT_OPTIONS options,XML_ELEM_ERROR error)687 xml_element* xml_elem_parse_buf(const char* in_buf, int len, XML_ELEM_INPUT_OPTIONS options, XML_ELEM_ERROR error)
688 {
689    xml_element* xReturn = NULL;
690    char buf[100] = "";
691    static STRUCT_XML_ELEM_INPUT_OPTIONS default_opts = {encoding_utf_8};
692 
693    if(!options) {
694       options = &default_opts;
695    }
696 
697    if(in_buf) {
698       XML_Parser parser;
699       xml_elem_data mydata = {0};
700 
701       parser = XML_ParserCreate(NULL);
702 
703       mydata.root = xml_elem_new();
704       mydata.current = mydata.root;
705       mydata.input_options = options;
706       mydata.needs_enc_conversion = options->encoding && strcmp(options->encoding, encoding_utf_8);
707 
708       XML_SetElementHandler(parser, (XML_StartElementHandler)_xmlrpc_startElement, (XML_EndElementHandler)_xmlrpc_endElement);
709       XML_SetCharacterDataHandler(parser, (XML_CharacterDataHandler)_xmlrpc_charHandler);
710 
711       /* pass the xml_elem_data struct along */
712       XML_SetUserData(parser, (void*)&mydata);
713 
714       if(!len) {
715          len = strlen(in_buf);
716       }
717 
718       /* parse the XML */
719       if(XML_Parse(parser, in_buf, len, 1) == 0) {
720          enum XML_Error err_code = XML_GetErrorCode(parser);
721          int line_num = XML_GetCurrentLineNumber(parser);
722          int col_num = XML_GetCurrentColumnNumber(parser);
723          long byte_idx = XML_GetCurrentByteIndex(parser);
724 /*         int byte_total = XML_GetCurrentByteCount(parser); */
725          const char * error_str = XML_ErrorString(err_code);
726          if(byte_idx >= 0) {
727              snprintf(buf,
728                       sizeof(buf),
729                       "\n\tdata beginning %ld before byte index: %s\n",
730                       byte_idx > 10  ? 10 : byte_idx,
731                       in_buf + (byte_idx > 10 ? byte_idx - 10 : byte_idx));
732          }
733 /*
734          fprintf(stderr, "expat reports error code %i\n"
735                 "\tdescription: %s\n"
736                 "\tline: %i\n"
737                 "\tcolumn: %i\n"
738                 "\tbyte index: %ld\n"
739                 "\ttotal bytes: %i\n%s ",
740                 err_code, error_str, line_num,
741                 col_num, byte_idx, byte_total, buf);
742 */
743 
744           /* error condition */
745           if(error) {
746               error->parser_code = (long)err_code;
747               error->line = line_num;
748               error->column = col_num;
749               error->byte_index = byte_idx;
750               error->parser_error = error_str;
751           }
752       }
753       else {
754          xReturn = (xml_element*)Q_Head(&mydata.root->children);
755          xReturn->parent = NULL;
756       }
757 
758       XML_ParserFree(parser);
759 
760 
761       xml_elem_free_non_recurse(mydata.root);
762    }
763 
764    return xReturn;
765 }
766 
767 /******/
768