1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24 /* <DESC>
25 * Stream-parse a document using the streaming Expat parser.
26 * </DESC>
27 */
28 /* Written by David Strauss
29 *
30 * Expat => https://libexpat.github.io/
31 *
32 * gcc -Wall -I/usr/local/include xmlstream.c -lcurl -lexpat -o xmlstream
33 *
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #include <expat.h>
41 #include <curl/curl.h>
42
43 struct MemoryStruct {
44 char *memory;
45 size_t size;
46 };
47
48 struct ParserStruct {
49 int ok;
50 size_t tags;
51 size_t depth;
52 struct MemoryStruct characters;
53 };
54
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)55 static void startElement(void *userData, const XML_Char *name,
56 const XML_Char **atts)
57 {
58 struct ParserStruct *state = (struct ParserStruct *) userData;
59 state->tags++;
60 state->depth++;
61
62 /* Get a clean slate for reading in character data. */
63 free(state->characters.memory);
64 state->characters.memory = NULL;
65 state->characters.size = 0;
66 }
67
characterDataHandler(void * userData,const XML_Char * s,int len)68 static void characterDataHandler(void *userData, const XML_Char *s, int len)
69 {
70 struct ParserStruct *state = (struct ParserStruct *) userData;
71 struct MemoryStruct *mem = &state->characters;
72
73 char *ptr = realloc(mem->memory, mem->size + len + 1);
74 if(!ptr) {
75 /* Out of memory. */
76 fprintf(stderr, "Not enough memory (realloc returned NULL).\n");
77 state->ok = 0;
78 return;
79 }
80
81 mem->memory = ptr;
82 memcpy(&(mem->memory[mem->size]), s, len);
83 mem->size += len;
84 mem->memory[mem->size] = 0;
85 }
86
endElement(void * userData,const XML_Char * name)87 static void endElement(void *userData, const XML_Char *name)
88 {
89 struct ParserStruct *state = (struct ParserStruct *) userData;
90 state->depth--;
91
92 printf("%5lu %10lu %s\n", state->depth, state->characters.size, name);
93 }
94
parseStreamCallback(void * contents,size_t length,size_t nmemb,void * userp)95 static size_t parseStreamCallback(void *contents, size_t length, size_t nmemb,
96 void *userp)
97 {
98 XML_Parser parser = (XML_Parser) userp;
99 size_t real_size = length * nmemb;
100 struct ParserStruct *state = (struct ParserStruct *) XML_GetUserData(parser);
101
102 /* Only parse if we are not already in a failure state. */
103 if(state->ok && XML_Parse(parser, contents, real_size, 0) == 0) {
104 int error_code = XML_GetErrorCode(parser);
105 fprintf(stderr, "Parsing response buffer of length %lu failed"
106 " with error code %d (%s).\n",
107 real_size, error_code, XML_ErrorString(error_code));
108 state->ok = 0;
109 }
110
111 return real_size;
112 }
113
main(void)114 int main(void)
115 {
116 CURL *curl_handle;
117 CURLcode res;
118 XML_Parser parser;
119 struct ParserStruct state;
120
121 /* Initialize the state structure for parsing. */
122 memset(&state, 0, sizeof(struct ParserStruct));
123 state.ok = 1;
124
125 /* Initialize a namespace-aware parser. */
126 parser = XML_ParserCreateNS(NULL, '\0');
127 XML_SetUserData(parser, &state);
128 XML_SetElementHandler(parser, startElement, endElement);
129 XML_SetCharacterDataHandler(parser, characterDataHandler);
130
131 /* Initialize a libcurl handle. */
132 curl_global_init(CURL_GLOBAL_DEFAULT);
133 curl_handle = curl_easy_init();
134 curl_easy_setopt(curl_handle, CURLOPT_URL,
135 "https://www.w3schools.com/xml/simple.xml");
136 curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, parseStreamCallback);
137 curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)parser);
138
139 printf("Depth Characters Closing Tag\n");
140
141 /* Perform the request and any follow-up parsing. */
142 res = curl_easy_perform(curl_handle);
143 if(res != CURLE_OK) {
144 fprintf(stderr, "curl_easy_perform() failed: %s\n",
145 curl_easy_strerror(res));
146 }
147 else if(state.ok) {
148 /* Expat requires one final call to finalize parsing. */
149 if(XML_Parse(parser, NULL, 0, 1) == 0) {
150 int error_code = XML_GetErrorCode(parser);
151 fprintf(stderr, "Finalizing parsing failed with error code %d (%s).\n",
152 error_code, XML_ErrorString(error_code));
153 }
154 else {
155 printf(" --------------\n");
156 printf(" %lu tags total\n", state.tags);
157 }
158 }
159
160 /* Clean up. */
161 free(state.characters.memory);
162 XML_ParserFree(parser);
163 curl_easy_cleanup(curl_handle);
164 curl_global_cleanup();
165
166 return 0;
167 }
168