1 /*
2  * Copyright (C) 2018-2021 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/dom/interfaces/document.h"
8 #include "lexbor/dom/interfaces/element.h"
9 #include "lexbor/dom/interfaces/text.h"
10 #include "lexbor/dom/interfaces/document_fragment.h"
11 #include "lexbor/dom/interfaces/comment.h"
12 #include "lexbor/dom/interfaces/cdata_section.h"
13 #include "lexbor/dom/interfaces/cdata_section.h"
14 #include "lexbor/dom/interfaces/processing_instruction.h"
15 
16 
17 lxb_dom_document_t *
lxb_dom_document_interface_create(lxb_dom_document_t * document)18 lxb_dom_document_interface_create(lxb_dom_document_t *document)
19 {
20     lxb_dom_document_t *doc;
21 
22     doc = lexbor_mraw_calloc(document->mraw, sizeof(lxb_dom_document_t));
23     if (doc == NULL) {
24         return NULL;
25     }
26 
27     (void) lxb_dom_document_init(doc, document, lxb_dom_interface_create,
28                                  lxb_dom_interface_clone, lxb_dom_interface_destroy,
29                                  LXB_DOM_DOCUMENT_DTYPE_UNDEF, 0);
30 
31     return doc;
32 }
33 
34 lxb_dom_document_t *
lxb_dom_document_interface_clone(lxb_dom_document_t * document,const lxb_dom_document_t * doc)35 lxb_dom_document_interface_clone(lxb_dom_document_t *document,
36                                  const lxb_dom_document_t *doc)
37 {
38     lxb_dom_document_t *new;
39 
40     new = lxb_dom_document_interface_create(document);
41     if (new == NULL) {
42         return NULL;
43     }
44 
45     new->doctype = doc->doctype;
46     new->compat_mode = doc->compat_mode;
47     new->type = doc->type;
48     new->user = doc->user;
49 
50     return new;
51 }
52 
53 lxb_dom_document_t *
lxb_dom_document_interface_destroy(lxb_dom_document_t * document)54 lxb_dom_document_interface_destroy(lxb_dom_document_t *document)
55 {
56     (void) lxb_dom_node_interface_destroy(lxb_dom_interface_node(document));
57 
58     return NULL;
59 }
60 
61 lxb_dom_document_t *
lxb_dom_document_create(lxb_dom_document_t * owner)62 lxb_dom_document_create(lxb_dom_document_t *owner)
63 {
64     if (owner != NULL) {
65         return lexbor_mraw_calloc(owner->mraw, sizeof(lxb_dom_document_t));
66     }
67 
68     return lexbor_calloc(1, sizeof(lxb_dom_document_t));
69 }
70 
71 lxb_status_t
lxb_dom_document_init(lxb_dom_document_t * document,lxb_dom_document_t * owner,lxb_dom_interface_create_f create_interface,lxb_dom_interface_clone_f clone_interface,lxb_dom_interface_destroy_f destroy_interface,lxb_dom_document_dtype_t type,unsigned int ns)72 lxb_dom_document_init(lxb_dom_document_t *document, lxb_dom_document_t *owner,
73                       lxb_dom_interface_create_f create_interface,
74                       lxb_dom_interface_clone_f clone_interface,
75                       lxb_dom_interface_destroy_f destroy_interface,
76                       lxb_dom_document_dtype_t type, unsigned int ns)
77 {
78     lxb_status_t status;
79     lxb_dom_node_t *node;
80 
81     if (document == NULL) {
82         return LXB_STATUS_ERROR_OBJECT_IS_NULL;
83     }
84 
85     document->type = type;
86     document->create_interface = create_interface;
87     document->clone_interface = clone_interface;
88     document->destroy_interface = destroy_interface;
89 
90     document->ev_insert = NULL;
91     document->ev_remove = NULL;
92     document->ev_destroy = NULL;
93 
94     node = lxb_dom_interface_node(document);
95 
96     node->type = LXB_DOM_NODE_TYPE_DOCUMENT;
97     node->local_name = LXB_TAG__DOCUMENT;
98     node->ns = ns;
99 
100     if (owner != NULL) {
101         document->mraw = owner->mraw;
102         document->text = owner->text;
103         document->tags = owner->tags;
104         document->ns = owner->ns;
105         document->prefix = owner->prefix;
106         document->attrs = owner->attrs;
107         document->parser = owner->parser;
108         document->user = owner->user;
109         document->scripting = owner->scripting;
110         document->compat_mode = owner->compat_mode;
111 
112         document->tags_inherited = true;
113         document->ns_inherited = true;
114 
115         node->owner_document = owner;
116 
117         return LXB_STATUS_OK;
118     }
119 
120     /* For nodes */
121     document->mraw = lexbor_mraw_create();
122     status = lexbor_mraw_init(document->mraw, (4096 * 8));
123 
124     if (status != LXB_STATUS_OK) {
125         goto failed;
126     }
127 
128     /* For text */
129     document->text = lexbor_mraw_create();
130     status = lexbor_mraw_init(document->text, (4096 * 12));
131 
132     if (status != LXB_STATUS_OK) {
133         goto failed;
134     }
135 
136     document->tags = lexbor_hash_create();
137     status = lexbor_hash_init(document->tags, 128, sizeof(lxb_tag_data_t));
138     if (status != LXB_STATUS_OK) {
139         goto failed;
140     }
141 
142     document->ns = lexbor_hash_create();
143     status = lexbor_hash_init(document->ns, 128, sizeof(lxb_ns_data_t));
144     if (status != LXB_STATUS_OK) {
145         goto failed;
146     }
147 
148     document->prefix = lexbor_hash_create();
149     status = lexbor_hash_init(document->prefix, 128,
150                               sizeof(lxb_dom_attr_data_t));
151     if (status != LXB_STATUS_OK) {
152         goto failed;
153     }
154 
155     document->attrs = lexbor_hash_create();
156     status = lexbor_hash_init(document->attrs, 128,
157                               sizeof(lxb_dom_attr_data_t));
158     if (status != LXB_STATUS_OK) {
159         goto failed;
160     }
161 
162     node->owner_document = document;
163 
164     return LXB_STATUS_OK;
165 
166 failed:
167 
168     lexbor_mraw_destroy(document->mraw, true);
169     lexbor_mraw_destroy(document->text, true);
170     lexbor_hash_destroy(document->tags, true);
171     lexbor_hash_destroy(document->ns, true);
172     lexbor_hash_destroy(document->attrs, true);
173     lexbor_hash_destroy(document->prefix, true);
174 
175     return LXB_STATUS_ERROR;
176 }
177 
178 lxb_status_t
lxb_dom_document_clean(lxb_dom_document_t * document)179 lxb_dom_document_clean(lxb_dom_document_t *document)
180 {
181     if (lxb_dom_interface_node(document)->owner_document == document) {
182         lexbor_mraw_clean(document->mraw);
183         lexbor_mraw_clean(document->text);
184         lexbor_hash_clean(document->tags);
185         lexbor_hash_clean(document->ns);
186         lexbor_hash_clean(document->attrs);
187         lexbor_hash_clean(document->prefix);
188     }
189 
190     document->node.first_child = NULL;
191     document->node.last_child = NULL;
192     document->element = NULL;
193     document->doctype = NULL;
194 
195     return LXB_STATUS_OK;
196 }
197 
198 lxb_dom_document_t *
lxb_dom_document_destroy(lxb_dom_document_t * document)199 lxb_dom_document_destroy(lxb_dom_document_t *document)
200 {
201     if (document == NULL) {
202         return NULL;
203     }
204 
205     if (lxb_dom_interface_node(document)->owner_document != document) {
206         lxb_dom_document_t *owner;
207 
208         owner = lxb_dom_interface_node(document)->owner_document;
209 
210         return lexbor_mraw_free(owner->mraw, document);
211     }
212 
213     lexbor_mraw_destroy(document->text, true);
214     lexbor_mraw_destroy(document->mraw, true);
215     lexbor_hash_destroy(document->tags, true);
216     lexbor_hash_destroy(document->ns, true);
217     lexbor_hash_destroy(document->attrs, true);
218     lexbor_hash_destroy(document->prefix, true);
219 
220     return lexbor_free(document);
221 }
222 
223 void
lxb_dom_document_attach_doctype(lxb_dom_document_t * document,lxb_dom_document_type_t * doctype)224 lxb_dom_document_attach_doctype(lxb_dom_document_t *document,
225                                 lxb_dom_document_type_t *doctype)
226 {
227     document->doctype = doctype;
228 }
229 
230 void
lxb_dom_document_attach_element(lxb_dom_document_t * document,lxb_dom_element_t * element)231 lxb_dom_document_attach_element(lxb_dom_document_t *document,
232                                 lxb_dom_element_t *element)
233 {
234     document->element = element;
235 }
236 
237 lxb_dom_element_t *
lxb_dom_document_create_element(lxb_dom_document_t * document,const lxb_char_t * local_name,size_t lname_len,void * reserved_for_opt)238 lxb_dom_document_create_element(lxb_dom_document_t *document,
239                                 const lxb_char_t *local_name, size_t lname_len,
240                                 void *reserved_for_opt)
241 {
242     /* TODO: If localName does not match the Name production... */
243 
244     const lxb_char_t *ns_link;
245     size_t ns_len;
246 
247     if (document->type == LXB_DOM_DOCUMENT_DTYPE_HTML) {
248         ns_link = (const lxb_char_t *) "http://www.w3.org/1999/xhtml";
249 
250         /* FIXME: he will get len at the compilation stage?!? */
251         ns_len = strlen((const char *) ns_link);
252     }
253     else {
254         ns_link = NULL;
255         ns_len = 0;
256     }
257 
258     return lxb_dom_element_create(document, local_name, lname_len,
259                                   ns_link, ns_len, NULL, 0, NULL, 0, true);
260 }
261 
262 lxb_dom_element_t *
lxb_dom_document_destroy_element(lxb_dom_element_t * element)263 lxb_dom_document_destroy_element(lxb_dom_element_t *element)
264 {
265     return lxb_dom_element_destroy(element);
266 }
267 
268 lxb_dom_document_fragment_t *
lxb_dom_document_create_document_fragment(lxb_dom_document_t * document)269 lxb_dom_document_create_document_fragment(lxb_dom_document_t *document)
270 {
271     return lxb_dom_document_fragment_interface_create(document);
272 }
273 
274 lxb_dom_text_t *
lxb_dom_document_create_text_node(lxb_dom_document_t * document,const lxb_char_t * data,size_t len)275 lxb_dom_document_create_text_node(lxb_dom_document_t *document,
276                                   const lxb_char_t *data, size_t len)
277 {
278     lxb_dom_text_t *text;
279 
280     text = lxb_dom_document_create_interface(document,
281                                              LXB_TAG__TEXT, LXB_NS_HTML);
282     if (text == NULL) {
283         return NULL;
284     }
285 
286     lexbor_str_init(&text->char_data.data, document->text, len);
287     if (text->char_data.data.data == NULL) {
288         return lxb_dom_document_destroy_interface(text);
289     }
290 
291     lexbor_str_append(&text->char_data.data, document->text, data, len);
292 
293     return text;
294 }
295 
296 lxb_dom_cdata_section_t *
lxb_dom_document_create_cdata_section(lxb_dom_document_t * document,const lxb_char_t * data,size_t len)297 lxb_dom_document_create_cdata_section(lxb_dom_document_t *document,
298                                       const lxb_char_t *data, size_t len)
299 {
300     if (document->type != LXB_DOM_DOCUMENT_DTYPE_HTML) {
301         return NULL;
302     }
303 
304     const lxb_char_t *end = data + len;
305     const lxb_char_t *ch = memchr(data, ']', sizeof(lxb_char_t) * len);
306 
307     while (ch != NULL) {
308         if ((end - ch) < 3) {
309             break;
310         }
311 
312         if(memcmp(ch, "]]>", 3) == 0) {
313             return NULL;
314         }
315 
316         ch++;
317         ch = memchr(ch, ']', sizeof(lxb_char_t) * (end - ch));
318     }
319 
320     lxb_dom_cdata_section_t *cdata;
321 
322     cdata = lxb_dom_cdata_section_interface_create(document);
323     if (cdata == NULL) {
324         return NULL;
325     }
326 
327     lexbor_str_init(&cdata->text.char_data.data, document->text, len);
328     if (cdata->text.char_data.data.data == NULL) {
329         return lxb_dom_cdata_section_interface_destroy(cdata);
330     }
331 
332     lexbor_str_append(&cdata->text.char_data.data, document->text, data, len);
333 
334     return cdata;
335 }
336 
337 lxb_dom_processing_instruction_t *
lxb_dom_document_create_processing_instruction(lxb_dom_document_t * document,const lxb_char_t * target,size_t target_len,const lxb_char_t * data,size_t data_len)338 lxb_dom_document_create_processing_instruction(lxb_dom_document_t *document,
339                                                const lxb_char_t *target, size_t target_len,
340                                                const lxb_char_t *data, size_t data_len)
341 {
342     /*
343      * TODO: If target does not match the Name production,
344      * then throw an "InvalidCharacterError" DOMException.
345      */
346 
347     const lxb_char_t *end = data + data_len;
348     const lxb_char_t *ch = memchr(data, '?', sizeof(lxb_char_t) * data_len);
349 
350     while (ch != NULL) {
351         if ((end - ch) < 2) {
352             break;
353         }
354 
355         if(memcmp(ch, "?>", 2) == 0) {
356             return NULL;
357         }
358 
359         ch++;
360         ch = memchr(ch, '?', sizeof(lxb_char_t) * (end - ch));
361     }
362 
363     lxb_dom_processing_instruction_t *pi;
364 
365     pi = lxb_dom_processing_instruction_interface_create(document);
366     if (pi == NULL) {
367         return NULL;
368     }
369 
370     lexbor_str_init(&pi->char_data.data, document->text, data_len);
371     if (pi->char_data.data.data == NULL) {
372         return lxb_dom_processing_instruction_interface_destroy(pi);
373     }
374 
375     lexbor_str_init(&pi->target, document->text, target_len);
376     if (pi->target.data == NULL) {
377         lexbor_str_destroy(&pi->char_data.data, document->text, false);
378 
379         return lxb_dom_processing_instruction_interface_destroy(pi);
380     }
381 
382     lexbor_str_append(&pi->char_data.data, document->text, data, data_len);
383     lexbor_str_append(&pi->target, document->text, target, target_len);
384 
385     return pi;
386 }
387 
388 
389 lxb_dom_comment_t *
lxb_dom_document_create_comment(lxb_dom_document_t * document,const lxb_char_t * data,size_t len)390 lxb_dom_document_create_comment(lxb_dom_document_t *document,
391                                 const lxb_char_t *data, size_t len)
392 {
393     lxb_dom_comment_t *comment;
394 
395     comment = lxb_dom_document_create_interface(document, LXB_TAG__EM_COMMENT,
396                                                 LXB_NS_HTML);
397     if (comment == NULL) {
398         return NULL;
399     }
400 
401     lexbor_str_init(&comment->char_data.data, document->text, len);
402     if (comment->char_data.data.data == NULL) {
403         return lxb_dom_document_destroy_interface(comment);
404     }
405 
406     lexbor_str_append(&comment->char_data.data, document->text, data, len);
407 
408     return comment;
409 }
410 
411 lxb_dom_node_t *
lxb_dom_document_root(lxb_dom_document_t * document)412 lxb_dom_document_root(lxb_dom_document_t *document)
413 {
414     lxb_dom_node_t *node;
415 
416     if (document->type == LXB_DOM_DOCUMENT_DTYPE_HTML) {
417         node = document->node.first_child;
418 
419         while (node != NULL) {
420             if (node->local_name == LXB_TAG_HTML) {
421                 return node;
422             }
423 
424             node = node->next;
425         }
426     }
427 
428     return document->node.first_child;
429 }
430 
431 lxb_dom_node_t *
lxb_dom_document_import_node(lxb_dom_document_t * doc,lxb_dom_node_t * node,bool deep)432 lxb_dom_document_import_node(lxb_dom_document_t *doc, lxb_dom_node_t *node,
433                              bool deep)
434 {
435     lxb_dom_node_t *new, *curr, *cnode, *root;
436 
437     new = doc->clone_interface(doc, node);
438     if (new == NULL) {
439         return NULL;
440     }
441 
442     if (!deep) {
443         return new;
444     }
445 
446     curr = new;
447     root = node;
448     node = node->first_child;
449 
450     while (node != NULL) {
451         cnode = doc->clone_interface(doc, node);
452         if (cnode == NULL) {
453             return NULL;
454         }
455 
456         lxb_dom_node_insert_child(curr, cnode);
457 
458         if (node->first_child != NULL) {
459             node = node->first_child;
460             curr = cnode;
461         }
462         else {
463             while (node->next == NULL && node != root) {
464                 node = node->parent;
465                 curr = curr->parent;
466             }
467 
468             if (node == root) {
469                 break;
470             }
471 
472             node = node->next;
473         }
474     }
475 
476     return new;
477 }
478 
479 
480 /*
481  * No inline functions for ABI.
482  */
483 lxb_dom_interface_t *
lxb_dom_document_create_interface_noi(lxb_dom_document_t * document,lxb_tag_id_t tag_id,lxb_ns_id_t ns)484 lxb_dom_document_create_interface_noi(lxb_dom_document_t *document,
485                                       lxb_tag_id_t tag_id, lxb_ns_id_t ns)
486 {
487     return lxb_dom_document_create_interface(document, tag_id, ns);
488 }
489 
490 lxb_dom_interface_t *
lxb_dom_document_destroy_interface_noi(lxb_dom_interface_t * intrfc)491 lxb_dom_document_destroy_interface_noi(lxb_dom_interface_t *intrfc)
492 {
493     return lxb_dom_document_destroy_interface(intrfc);
494 }
495 
496 void *
lxb_dom_document_create_struct_noi(lxb_dom_document_t * document,size_t struct_size)497 lxb_dom_document_create_struct_noi(lxb_dom_document_t *document,
498                                    size_t struct_size)
499 {
500     return lxb_dom_document_create_struct(document, struct_size);
501 }
502 
503 void *
lxb_dom_document_destroy_struct_noi(lxb_dom_document_t * document,void * structure)504 lxb_dom_document_destroy_struct_noi(lxb_dom_document_t *document,
505                                     void *structure)
506 {
507     return lxb_dom_document_destroy_struct(document, structure);
508 }
509 
510 lxb_char_t *
lxb_dom_document_create_text_noi(lxb_dom_document_t * document,size_t len)511 lxb_dom_document_create_text_noi(lxb_dom_document_t *document, size_t len)
512 {
513     return lxb_dom_document_create_text(document, len);
514 }
515 
516 void *
lxb_dom_document_destroy_text_noi(lxb_dom_document_t * document,lxb_char_t * text)517 lxb_dom_document_destroy_text_noi(lxb_dom_document_t *document,
518                                   lxb_char_t *text)
519 {
520     return lxb_dom_document_destroy_text(document, text);
521 }
522 
523 lxb_dom_element_t *
lxb_dom_document_element_noi(lxb_dom_document_t * document)524 lxb_dom_document_element_noi(lxb_dom_document_t *document)
525 {
526     return lxb_dom_document_element(document);
527 }
528 
529 bool
lxb_dom_document_scripting_noi(lxb_dom_document_t * document)530 lxb_dom_document_scripting_noi(lxb_dom_document_t *document)
531 {
532     return lxb_dom_document_scripting(document);
533 }
534 
535 void
lxb_dom_document_scripting_set_noi(lxb_dom_document_t * document,bool scripting)536 lxb_dom_document_scripting_set_noi(lxb_dom_document_t *document,
537                                    bool scripting)
538 {
539     lxb_dom_document_scripting_set(document, scripting);
540 }
541