1 /*
2  * Copyright (C) 2018-2021 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/core/str.h"
8 
9 #include "lexbor/html/interfaces/document.h"
10 #include "lexbor/html/interfaces/title_element.h"
11 #include "lexbor/html/interfaces/style_element.h"
12 #include "lexbor/html/node.h"
13 #include "lexbor/html/parser.h"
14 #include "lexbor/html/style.h"
15 
16 #include "lexbor/tag/tag.h"
17 
18 #include "lexbor/dom/interfaces/text.h"
19 #include "lexbor/dom/interfaces/element.h"
20 
21 #define LXB_HTML_TAG_RES_DATA
22 #define LXB_HTML_TAG_RES_SHS_DATA
23 #include "lexbor/html/tag_res.h"
24 
25 
26 static const lexbor_hash_search_t  lxb_html_document_css_customs_se = {
27     .cmp = lexbor_str_data_ncasecmp,
28     .hash = lexbor_hash_make_id
29 };
30 
31 static const lexbor_hash_insert_t  lxb_html_document_css_customs_in = {
32     .copy = lexbor_hash_copy,
33     .cmp = lexbor_str_data_ncasecmp,
34     .hash = lexbor_hash_make_id
35 };
36 
37 
38 typedef struct {
39     lexbor_hash_entry_t entry;
40     uintptr_t           id;
41 }
42 lxb_html_document_css_custom_entry_t;
43 
44 typedef struct {
45     lxb_html_document_t *doc;
46     bool                all;
47 }
48 lxb_html_document_event_ctx_t;
49 
50 typedef struct {
51     lxb_html_document_t             *doc;
52     lxb_css_rule_declaration_list_t *list;
53 }
54 lxb_html_document_remove_ctx_t;
55 
56 
57 static lxb_html_document_css_custom_entry_t *
58 lxb_html_document_css_customs_insert(lxb_html_document_t *document,
59                                      const lxb_char_t *key, size_t length);
60 
61 static lxb_status_t
62 lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node,
63                                           lxb_css_selector_specificity_t spec,
64                                           void *ctx);
65 
66 static lxb_status_t
67 lxb_html_document_style_remove_avl_cb(lexbor_avl_t *avl,
68                                       lexbor_avl_node_t **root,
69                                       lexbor_avl_node_t *node, void *ctx);
70 
71 static lxb_status_t
72 lxb_html_document_style_cb(lxb_dom_node_t *node,
73                            lxb_css_selector_specificity_t spec, void *ctx);
74 
75 #if 0
76 static lxb_status_t
77 lxb_html_document_done(lxb_html_document_t *document);
78 #endif
79 
80 
81 lxb_status_t
82 lxb_html_parse_chunk_prepare(lxb_html_parser_t *parser,
83                              lxb_html_document_t *document);
84 
85 lxb_inline lxb_status_t
86 lxb_html_document_parser_prepare(lxb_html_document_t *document);
87 
88 static lexbor_action_t
89 lxb_html_document_title_walker(lxb_dom_node_t *node, void *ctx);
90 
91 #if 0
92 static lxb_status_t
93 lxb_html_document_event_insert(lxb_dom_node_t *node);
94 
95 static lxb_status_t
96 lxb_html_document_event_insert_attribute(lxb_dom_node_t *node);
97 
98 static lxb_status_t
99 lxb_html_document_event_remove(lxb_dom_node_t *node);
100 
101 static lxb_status_t
102 lxb_html_document_style_remove_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
103                                   lexbor_avl_node_t *node, void *ctx);
104 
105 static lxb_status_t
106 lxb_html_document_event_remove_attribute(lxb_dom_node_t *node);
107 
108 static lxb_status_t
109 lxb_html_document_style_remove_my_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
110                                      lexbor_avl_node_t *node, void *ctx);
111 
112 static lxb_status_t
113 lxb_html_document_event_destroy(lxb_dom_node_t *node);
114 
115 static lxb_status_t
116 lxb_html_document_event_set_value(lxb_dom_node_t *node,
117                                   const lxb_char_t *value, size_t length);
118 #endif
119 
120 
121 lxb_html_document_t *
lxb_html_document_interface_create(lxb_html_document_t * document)122 lxb_html_document_interface_create(lxb_html_document_t *document)
123 {
124     lxb_status_t status;
125     lxb_dom_document_t *doc;
126     lxb_html_document_t *hdoc;
127     lxb_dom_interface_create_f icreator;
128 
129     if (document != NULL) {
130         doc = lexbor_mraw_calloc(lxb_html_document_mraw(document),
131                                  sizeof(lxb_html_document_t));
132     }
133     else {
134         doc = lexbor_calloc(1, sizeof(lxb_html_document_t));
135     }
136 
137     if (doc == NULL) {
138         return NULL;
139     }
140 
141     icreator = (lxb_dom_interface_create_f) lxb_html_interface_create;
142 
143     status = lxb_dom_document_init(doc, lxb_dom_interface_document(document),
144                                    icreator, lxb_html_interface_clone,
145                                    lxb_html_interface_destroy,
146                                    LXB_DOM_DOCUMENT_DTYPE_HTML, LXB_NS_HTML);
147     if (status != LXB_STATUS_OK) {
148         (void) lxb_dom_document_destroy(doc);
149         return NULL;
150     }
151 
152     hdoc = lxb_html_interface_document(doc);
153 
154     if (document == NULL) {
155         hdoc->css_init = false;
156     }
157     else {
158         hdoc->css = document->css;
159         hdoc->css_init = document->css_init;
160     }
161 
162     return hdoc;
163 }
164 
165 lxb_html_document_t *
lxb_html_document_interface_destroy(lxb_html_document_t * document)166 lxb_html_document_interface_destroy(lxb_html_document_t *document)
167 {
168     lxb_dom_document_t *doc;
169 
170     if (document == NULL) {
171         return NULL;
172     }
173 
174     doc = lxb_dom_interface_document(document);
175 
176     if (doc->node.owner_document == doc) {
177         (void) lxb_html_parser_unref(doc->parser);
178 #if 0
179         lxb_html_document_css_destroy(document);
180 #endif
181     }
182 
183     (void) lxb_dom_document_destroy(doc);
184 
185     return NULL;
186 }
187 
188 lxb_html_document_t *
lxb_html_document_create(void)189 lxb_html_document_create(void)
190 {
191     return lxb_html_document_interface_create(NULL);
192 }
193 
194 void
lxb_html_document_clean(lxb_html_document_t * document)195 lxb_html_document_clean(lxb_html_document_t *document)
196 {
197     document->body = NULL;
198     document->head = NULL;
199     document->iframe_srcdoc = NULL;
200     document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_UNDEF;
201 
202 #if 0
203     lxb_html_document_css_clean(document);
204 #endif
205 
206     lxb_dom_document_clean(lxb_dom_interface_document(document));
207 }
208 
209 #if 0
210 lxb_status_t
211 lxb_html_document_css_init(lxb_html_document_t *document)
212 {
213     lxb_status_t status;
214     lxb_html_document_css_t *css = &document->css;
215 
216     if (document->css_init) {
217         return LXB_HTML_STATUS_OK;
218     }
219 
220     css->memory = lxb_css_memory_create();
221     status = lxb_css_memory_init(css->memory, 1024);
222     if (status != LXB_STATUS_OK) {
223         goto failed;
224     }
225 
226     css->css_selectors = lxb_css_selectors_create();
227     status = lxb_css_selectors_init(css->css_selectors);
228     if (status != LXB_STATUS_OK) {
229         goto failed;
230     }
231 
232     css->parser = lxb_css_parser_create();
233     status = lxb_css_parser_init(css->parser, NULL);
234     if (status != LXB_STATUS_OK) {
235         goto failed;
236     }
237 
238     lxb_css_parser_memory_set(css->parser, css->memory);
239     lxb_css_parser_selectors_set(css->parser, css->css_selectors);
240 
241     css->selectors = lxb_selectors_create();
242     status = lxb_selectors_init(css->selectors);
243     if (status != LXB_STATUS_OK) {
244         goto failed;
245     }
246 
247     css->styles = lexbor_avl_create();
248     status = lexbor_avl_init(css->styles, 2048, sizeof(lxb_html_style_node_t));
249     if (status != LXB_STATUS_OK) {
250         goto failed;
251     }
252 
253     css->stylesheets = lexbor_array_create();
254     status = lexbor_array_init(css->stylesheets, 16);
255     if (status != LXB_STATUS_OK) {
256         goto failed;
257     }
258 
259     css->weak = lexbor_dobject_create();
260     status = lexbor_dobject_init(css->weak, 2048,
261                                  sizeof(lxb_html_style_weak_t));
262     if (status != LXB_STATUS_OK) {
263         goto failed;
264     }
265 
266     status = lxb_html_document_css_customs_init(document);
267     if (status != LXB_STATUS_OK) {
268         goto failed;
269     }
270 
271     document->css_init = true;
272 
273     document->dom_document.ev_insert = lxb_html_document_event_insert;
274     document->dom_document.ev_remove = lxb_html_document_event_remove;
275     document->dom_document.ev_destroy = lxb_html_document_event_destroy;
276     document->dom_document.ev_set_value = lxb_html_document_event_set_value;
277 
278     document->done = lxb_html_document_done;
279 
280     return LXB_STATUS_OK;
281 
282 failed:
283 
284     lxb_html_document_css_destroy(document);
285 
286     return status;
287 }
288 
289 void
290 lxb_html_document_css_destroy(lxb_html_document_t *document)
291 {
292     lxb_html_document_css_t *css = &document->css;
293 
294     if (!document->css_init
295         || lxb_dom_interface_node(document)->owner_document
296            != lxb_dom_interface_document(document))
297     {
298         return;
299     }
300 
301     css->memory = lxb_css_memory_destroy(css->memory, true);
302     css->css_selectors = lxb_css_selectors_destroy(css->css_selectors, true);
303     css->parser = lxb_css_parser_destroy(css->parser, true);
304     css->selectors = lxb_selectors_destroy(css->selectors, true);
305     css->styles = lexbor_avl_destroy(css->styles, true);
306     css->stylesheets = lexbor_array_destroy(css->stylesheets, true);
307     css->weak = lexbor_dobject_destroy(css->weak, true);
308 
309     document->dom_document.ev_insert = NULL;
310     document->dom_document.ev_remove = NULL;
311     document->dom_document.ev_destroy = NULL;
312     document->dom_document.ev_set_value = NULL;
313 
314     document->done = NULL;
315 
316     lxb_html_document_css_customs_destroy(document);
317 }
318 
319 void
320 lxb_html_document_css_clean(lxb_html_document_t *document)
321 {
322     lxb_html_document_css_t *css;
323 
324     if (lxb_dom_interface_node(document)->owner_document
325         == lxb_dom_interface_document(document))
326     {
327         if (!document->css_init) {
328             return;
329         }
330 
331         css = &document->css;
332 
333         lxb_css_memory_clean(css->memory);
334         lxb_css_selectors_clean(css->css_selectors);
335         lxb_css_parser_clean(css->parser);
336         lxb_selectors_clean(css->selectors);
337         lexbor_avl_clean(css->styles);
338         lexbor_array_clean(css->stylesheets);
339         lexbor_dobject_clean(css->weak);
340     }
341 }
342 #endif
343 
344 void
lxb_html_document_css_parser_attach(lxb_html_document_t * document,lxb_css_parser_t * parser)345 lxb_html_document_css_parser_attach(lxb_html_document_t *document,
346                                     lxb_css_parser_t *parser)
347 {
348     document->css.parser = parser;
349 }
350 
351 void
lxb_html_document_css_memory_attach(lxb_html_document_t * document,lxb_css_memory_t * memory)352 lxb_html_document_css_memory_attach(lxb_html_document_t *document,
353                                     lxb_css_memory_t *memory)
354 {
355     document->css.memory = memory;
356 }
357 
358 lxb_status_t
lxb_html_document_css_customs_init(lxb_html_document_t * document)359 lxb_html_document_css_customs_init(lxb_html_document_t *document)
360 {
361     lxb_html_document_css_t *css = &document->css;
362 
363     css->customs_id = LXB_CSS_PROPERTY__LAST_ENTRY;
364 
365     css->customs = lexbor_hash_create();
366     return lexbor_hash_init(css->customs, 512,
367                             sizeof(lxb_html_document_css_custom_entry_t));
368 }
369 
370 void
lxb_html_document_css_customs_destroy(lxb_html_document_t * document)371 lxb_html_document_css_customs_destroy(lxb_html_document_t *document)
372 {
373     document->css.customs = lexbor_hash_destroy(document->css.customs, true);
374 }
375 
376 uintptr_t
lxb_html_document_css_customs_find_id(lxb_html_document_t * document,const lxb_char_t * key,size_t length)377 lxb_html_document_css_customs_find_id(lxb_html_document_t *document,
378                                       const lxb_char_t *key, size_t length)
379 {
380     const lxb_html_document_css_custom_entry_t *entry;
381 
382     entry = lexbor_hash_search(document->css.customs,
383                                &lxb_html_document_css_customs_se, key, length);
384 
385     return (entry != NULL) ? entry->id : 0;
386 }
387 
388 static lxb_html_document_css_custom_entry_t *
lxb_html_document_css_customs_insert(lxb_html_document_t * document,const lxb_char_t * key,size_t length)389 lxb_html_document_css_customs_insert(lxb_html_document_t *document,
390                                      const lxb_char_t *key, size_t length)
391 {
392     lxb_html_document_css_custom_entry_t *entry;
393 
394     if (UINTPTR_MAX - document->css.customs_id == 0) {
395         return NULL;
396     }
397 
398     entry = lexbor_hash_insert(document->css.customs,
399                                &lxb_html_document_css_customs_in, key, length);
400     if (entry == NULL) {
401         return NULL;
402     }
403 
404     entry->id = document->css.customs_id++;
405 
406     return entry;
407 }
408 
409 uintptr_t
lxb_html_document_css_customs_id(lxb_html_document_t * document,const lxb_char_t * key,size_t length)410 lxb_html_document_css_customs_id(lxb_html_document_t *document,
411                                  const lxb_char_t *key, size_t length)
412 {
413     lxb_html_document_css_custom_entry_t *entry;
414 
415     entry = lexbor_hash_search(document->css.customs,
416                                &lxb_html_document_css_customs_se, key, length);
417     if (entry != NULL) {
418         return entry->id;
419     }
420 
421     entry = lxb_html_document_css_customs_insert(document, key, length);
422     if (entry == NULL) {
423         return 0;
424     }
425 
426     return entry->id;
427 }
428 
429 lxb_status_t
lxb_html_document_stylesheet_attach(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)430 lxb_html_document_stylesheet_attach(lxb_html_document_t *document,
431                                     lxb_css_stylesheet_t *sst)
432 {
433     lxb_status_t status;
434 
435     status = lexbor_array_push(document->css.stylesheets, sst);
436     if (status != LXB_STATUS_OK) {
437         return status;
438     }
439 
440     return lxb_html_document_stylesheet_apply(document, sst);
441 }
442 
443 lxb_status_t
lxb_html_document_stylesheet_apply(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)444 lxb_html_document_stylesheet_apply(lxb_html_document_t *document,
445                                    lxb_css_stylesheet_t *sst)
446 {
447     lxb_status_t status = LXB_STATUS_OK;
448     lxb_css_rule_t *rule;
449     lxb_css_rule_list_t *list;
450 
451     rule = sst->root;
452 
453     if (rule->type != LXB_CSS_RULE_LIST) {
454         return LXB_STATUS_ERROR_WRONG_ARGS;
455     }
456 
457     list = lxb_css_rule_list(rule);
458     rule = list->first;
459 
460     while (rule != NULL) {
461         switch (rule->type) {
462             case LXB_CSS_RULE_STYLE:
463                 status = lxb_html_document_style_attach(document,
464                                                         lxb_css_rule_style(rule));
465                 break;
466 
467             default:
468                 break;
469         }
470 
471         if (status != LXB_STATUS_OK) {
472             /* FIXME: what to do with an error? */
473         }
474 
475         rule = rule->next;
476     }
477 
478     return LXB_STATUS_OK;
479 }
480 
481 lxb_status_t
lxb_html_document_stylesheet_add(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)482 lxb_html_document_stylesheet_add(lxb_html_document_t *document,
483                                  lxb_css_stylesheet_t *sst)
484 {
485     if (sst == NULL) {
486         return LXB_STATUS_OK;
487     }
488 
489     return lexbor_array_push(document->css.stylesheets, sst);
490 }
491 
492 lxb_status_t
lxb_html_document_stylesheet_remove(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)493 lxb_html_document_stylesheet_remove(lxb_html_document_t *document,
494                                     lxb_css_stylesheet_t *sst)
495 {
496     size_t i, length;
497     lxb_status_t status = LXB_STATUS_OK;
498     lxb_css_rule_t *rule;
499     lxb_css_rule_list_t *list;
500     lxb_css_stylesheet_t *sst_in;
501 
502     rule = sst->root;
503 
504     if (rule->type != LXB_CSS_RULE_LIST) {
505         return LXB_STATUS_ERROR_WRONG_ARGS;
506     }
507 
508     list = lxb_css_rule_list(rule);
509     rule = list->first;
510 
511     while (rule != NULL) {
512         switch (rule->type) {
513             case LXB_CSS_RULE_STYLE:
514                 status = lxb_html_document_style_remove(document,
515                                                         lxb_css_rule_style(rule));
516                 break;
517 
518             default:
519                 break;
520         }
521 
522         if (status != LXB_STATUS_OK) {
523             /* FIXME: what to do with an error? */
524         }
525 
526         rule = rule->next;
527     }
528 
529     length = lexbor_array_length(document->css.stylesheets);
530 
531     for (i = 0; i < length; i++) {
532         sst_in = lexbor_array_get(document->css.stylesheets, i);
533 
534         if (sst_in == sst) {
535             lexbor_array_delete(document->css.stylesheets, i, 1);
536             length = lexbor_array_length(document->css.stylesheets);
537         }
538     }
539 
540     return LXB_STATUS_OK;
541 }
542 
543 lxb_status_t
lxb_html_document_element_styles_attach(lxb_html_element_t * element)544 lxb_html_document_element_styles_attach(lxb_html_element_t *element)
545 {
546     lxb_status_t status = LXB_STATUS_OK;
547     lxb_css_rule_t *rule;
548     lexbor_array_t *ssts;
549     lxb_css_rule_list_t *list;
550     lxb_css_stylesheet_t *sst;
551     lxb_html_document_t *document;
552 
553     document = lxb_html_element_document(element);
554     ssts = document->css.stylesheets;
555 
556     for (size_t i = 0; i < lexbor_array_length(ssts); i++) {
557         sst = lexbor_array_get(ssts, i);
558 
559         list = lxb_css_rule_list(sst->root);
560         rule = list->first;
561 
562         while (rule != NULL) {
563             switch (rule->type) {
564                 case LXB_CSS_RULE_STYLE:
565                     status = lxb_html_document_style_attach_by_element(document,
566                                              element, lxb_css_rule_style(rule));
567                     break;
568 
569                 default:
570                     break;
571             }
572 
573             if (status != LXB_STATUS_OK) {
574                 /* FIXME: what to do with an error? */
575             }
576 
577             rule = rule->next;
578         }
579     }
580 
581     return LXB_STATUS_OK;
582 }
583 
584 void
lxb_html_document_stylesheet_destroy_all(lxb_html_document_t * document,bool destroy_memory)585 lxb_html_document_stylesheet_destroy_all(lxb_html_document_t *document,
586                                          bool destroy_memory)
587 {
588 #if 0
589     size_t length;
590     lxb_css_stylesheet_t *sst;
591     lxb_html_document_css_t *css = &document->css;
592 
593     length = lexbor_array_length(css->stylesheets);
594 
595     for (size_t i = 0; i < length; i++) {
596         sst = lexbor_array_pop(css->stylesheets);
597 
598         (void) lxb_css_stylesheet_destroy(sst, destroy_memory);
599     }
600 #endif
601 }
602 
603 lxb_status_t
lxb_html_document_style_attach(lxb_html_document_t * document,lxb_css_rule_style_t * style)604 lxb_html_document_style_attach(lxb_html_document_t *document,
605                                lxb_css_rule_style_t *style)
606 {
607     lxb_html_document_css_t *css = &document->css;
608 
609     return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document),
610                               style->selector, lxb_html_document_style_cb, style);
611 }
612 
613 lxb_status_t
lxb_html_document_style_remove(lxb_html_document_t * document,lxb_css_rule_style_t * style)614 lxb_html_document_style_remove(lxb_html_document_t *document,
615                                lxb_css_rule_style_t *style)
616 {
617     lxb_html_document_css_t *css = &document->css;
618 
619     return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document),
620                               style->selector,
621                               lxb_html_document_style_remove_by_rule_cb, style);
622 }
623 
624 static lxb_status_t
lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t * node,lxb_css_selector_specificity_t spec,void * ctx)625 lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node,
626                                           lxb_css_selector_specificity_t spec,
627                                           void *ctx)
628 {
629     lxb_html_element_t *el;
630     lxb_html_document_t *doc;
631     lxb_css_rule_style_t *style = ctx;
632     lxb_html_document_remove_ctx_t context;
633 
634     el = lxb_html_interface_element(node);
635 
636     if (el->style == NULL) {
637         return LXB_STATUS_OK;
638     }
639 
640     doc = lxb_html_interface_document(node->owner_document);
641 
642     context.doc = doc;
643     context.list = style->declarations;
644 
645     return lexbor_avl_foreach(doc->css.styles, &el->style,
646                               lxb_html_document_style_remove_avl_cb, &context);
647 }
648 
649 static lxb_status_t
lxb_html_document_style_remove_avl_cb(lexbor_avl_t * avl,lexbor_avl_node_t ** root,lexbor_avl_node_t * node,void * ctx)650 lxb_html_document_style_remove_avl_cb(lexbor_avl_t *avl,
651                                       lexbor_avl_node_t **root,
652                                       lexbor_avl_node_t *node, void *ctx)
653 {
654     lxb_html_document_remove_ctx_t *context = ctx;
655     lxb_html_style_node_t *style = (lxb_html_style_node_t *) node;
656 
657     lxb_html_element_style_remove_by_list(context->doc, root,
658                                           style, context->list);
659     return LXB_STATUS_OK;
660 }
661 
662 lxb_status_t
lxb_html_document_style_attach_by_element(lxb_html_document_t * document,lxb_html_element_t * element,lxb_css_rule_style_t * style)663 lxb_html_document_style_attach_by_element(lxb_html_document_t *document,
664                                           lxb_html_element_t *element,
665                                           lxb_css_rule_style_t *style)
666 {
667     lxb_html_document_css_t *css = &document->css;
668 
669     return lxb_selectors_find_reverse(css->selectors, lxb_dom_interface_node(element),
670                               style->selector, lxb_html_document_style_cb, style);
671 }
672 
673 static lxb_status_t
lxb_html_document_style_cb(lxb_dom_node_t * node,lxb_css_selector_specificity_t spec,void * ctx)674 lxb_html_document_style_cb(lxb_dom_node_t *node,
675                            lxb_css_selector_specificity_t spec, void *ctx)
676 {
677     lxb_css_rule_style_t *style = ctx;
678 
679     // FIXME: we don't have support for anything other than HTML.
680 
681     if (node->ns != LXB_NS_HTML) {
682         return LXB_STATUS_OK;
683     }
684 
685     return lxb_html_element_style_list_append(lxb_html_interface_element(node),
686                                               style->declarations, spec);
687 }
688 
689 lxb_html_document_t *
lxb_html_document_destroy(lxb_html_document_t * document)690 lxb_html_document_destroy(lxb_html_document_t *document)
691 {
692     return lxb_html_document_interface_destroy(document);
693 }
694 
695 lxb_status_t
lxb_html_document_parse(lxb_html_document_t * document,const lxb_char_t * html,size_t size)696 lxb_html_document_parse(lxb_html_document_t *document,
697                         const lxb_char_t *html, size_t size)
698 {
699     lxb_status_t status;
700     lxb_dom_document_t *doc;
701     lxb_html_document_opt_t opt;
702 
703     if (document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_UNDEF
704         && document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_LOADING)
705     {
706         lxb_html_document_clean(document);
707     }
708 
709     opt = document->opt;
710     doc = lxb_dom_interface_document(document);
711 
712     status = lxb_html_document_parser_prepare(document);
713     if (status != LXB_STATUS_OK) {
714         goto failed;
715     }
716 
717     status = lxb_html_parse_chunk_prepare(doc->parser, document);
718     if (status != LXB_STATUS_OK) {
719         goto failed;
720     }
721 
722     status = lxb_html_parse_chunk_process(doc->parser, html, size);
723     if (status != LXB_STATUS_OK) {
724         goto failed;
725     }
726 
727     document->opt = opt;
728 
729     return lxb_html_parse_chunk_end(doc->parser);
730 
731 failed:
732 
733     document->opt = opt;
734 
735     return status;
736 }
737 
738 lxb_status_t
lxb_html_document_parse_chunk_begin(lxb_html_document_t * document)739 lxb_html_document_parse_chunk_begin(lxb_html_document_t *document)
740 {
741     if (document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_UNDEF
742         && document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_LOADING)
743     {
744         lxb_html_document_clean(document);
745     }
746 
747     lxb_status_t status = lxb_html_document_parser_prepare(document);
748     if (status != LXB_STATUS_OK) {
749         return status;
750     }
751 
752     return lxb_html_parse_chunk_prepare(document->dom_document.parser,
753                                         document);
754 }
755 
756 lxb_status_t
lxb_html_document_parse_chunk(lxb_html_document_t * document,const lxb_char_t * html,size_t size)757 lxb_html_document_parse_chunk(lxb_html_document_t *document,
758                               const lxb_char_t *html, size_t size)
759 {
760     return lxb_html_parse_chunk_process(document->dom_document.parser,
761                                         html, size);
762 }
763 
764 lxb_status_t
lxb_html_document_parse_chunk_end(lxb_html_document_t * document)765 lxb_html_document_parse_chunk_end(lxb_html_document_t *document)
766 {
767     return lxb_html_parse_chunk_end(document->dom_document.parser);
768 }
769 
770 lxb_dom_node_t *
lxb_html_document_parse_fragment(lxb_html_document_t * document,lxb_dom_element_t * element,const lxb_char_t * html,size_t size)771 lxb_html_document_parse_fragment(lxb_html_document_t *document,
772                                  lxb_dom_element_t *element,
773                                  const lxb_char_t *html, size_t size)
774 {
775     lxb_status_t status;
776     lxb_html_parser_t *parser;
777     lxb_html_document_opt_t opt = document->opt;
778 
779     status = lxb_html_document_parser_prepare(document);
780     if (status != LXB_STATUS_OK) {
781         goto failed;
782     }
783 
784     parser = document->dom_document.parser;
785 
786     status = lxb_html_parse_fragment_chunk_begin(parser, document,
787                                                  element->node.local_name,
788                                                  element->node.ns);
789     if (status != LXB_STATUS_OK) {
790         goto failed;
791     }
792 
793     status = lxb_html_parse_fragment_chunk_process(parser, html, size);
794     if (status != LXB_STATUS_OK) {
795         goto failed;
796     }
797 
798     document->opt = opt;
799 
800     return lxb_html_parse_fragment_chunk_end(parser);
801 
802 failed:
803 
804     document->opt = opt;
805 
806     return NULL;
807 }
808 
809 lxb_status_t
lxb_html_document_parse_fragment_chunk_begin(lxb_html_document_t * document,lxb_dom_element_t * element)810 lxb_html_document_parse_fragment_chunk_begin(lxb_html_document_t *document,
811                                              lxb_dom_element_t *element)
812 {
813     lxb_status_t status;
814     lxb_html_parser_t *parser = document->dom_document.parser;
815 
816     status = lxb_html_document_parser_prepare(document);
817     if (status != LXB_STATUS_OK) {
818         return status;
819     }
820 
821     return lxb_html_parse_fragment_chunk_begin(parser, document,
822                                                element->node.local_name,
823                                                element->node.ns);
824 }
825 
826 lxb_status_t
lxb_html_document_parse_fragment_chunk(lxb_html_document_t * document,const lxb_char_t * html,size_t size)827 lxb_html_document_parse_fragment_chunk(lxb_html_document_t *document,
828                                        const lxb_char_t *html, size_t size)
829 {
830     return lxb_html_parse_fragment_chunk_process(document->dom_document.parser,
831                                                  html, size);
832 }
833 
834 lxb_dom_node_t *
lxb_html_document_parse_fragment_chunk_end(lxb_html_document_t * document)835 lxb_html_document_parse_fragment_chunk_end(lxb_html_document_t *document)
836 {
837     return lxb_html_parse_fragment_chunk_end(document->dom_document.parser);
838 }
839 
840 lxb_inline lxb_status_t
lxb_html_document_parser_prepare(lxb_html_document_t * document)841 lxb_html_document_parser_prepare(lxb_html_document_t *document)
842 {
843     lxb_status_t status;
844     lxb_dom_document_t *doc;
845 
846     doc = lxb_dom_interface_document(document);
847 
848     if (doc->parser == NULL) {
849         doc->parser = lxb_html_parser_create();
850         status = lxb_html_parser_init(doc->parser);
851 
852         if (status != LXB_STATUS_OK) {
853             lxb_html_parser_destroy(doc->parser);
854             return status;
855         }
856     }
857     else if (lxb_html_parser_state(doc->parser) != LXB_HTML_PARSER_STATE_BEGIN) {
858         lxb_html_parser_clean(doc->parser);
859     }
860 
861     return LXB_STATUS_OK;
862 }
863 
864 #if 0
865 static lxb_status_t
866 lxb_html_document_done(lxb_html_document_t *document)
867 {
868     size_t i, length;
869     lxb_status_t status;
870     lxb_css_stylesheet_t *sst;
871 
872     if (!document->css_init) {
873         return LXB_STATUS_OK;
874     }
875 
876     length = lexbor_array_length(document->css.stylesheets);
877 
878     for (i = 0; i < length; i++) {
879         sst = lexbor_array_get(document->css.stylesheets, i);
880 
881         status = lxb_html_document_stylesheet_apply(document, sst);
882         if (status != LXB_STATUS_OK) {
883             return status;
884         }
885     }
886 
887     return LXB_STATUS_OK;
888 }
889 #endif
890 
891 const lxb_char_t *
lxb_html_document_title(lxb_html_document_t * document,size_t * len)892 lxb_html_document_title(lxb_html_document_t *document, size_t *len)
893 {
894     lxb_html_title_element_t *title = NULL;
895 
896     lxb_dom_node_simple_walk(lxb_dom_interface_node(document),
897                              lxb_html_document_title_walker, &title);
898     if (title == NULL) {
899         return NULL;
900     }
901 
902     return lxb_html_title_element_strict_text(title, len);
903 }
904 
905 lxb_status_t
lxb_html_document_title_set(lxb_html_document_t * document,const lxb_char_t * title,size_t len)906 lxb_html_document_title_set(lxb_html_document_t *document,
907                             const lxb_char_t *title, size_t len)
908 {
909     lxb_status_t status;
910 
911     /* TODO: If the document element is an SVG svg element */
912 
913     /* If the document element is in the HTML namespace */
914     if (document->head == NULL) {
915         return LXB_STATUS_OK;
916     }
917 
918     lxb_html_title_element_t *el_title = NULL;
919 
920     lxb_dom_node_simple_walk(lxb_dom_interface_node(document),
921                              lxb_html_document_title_walker, &el_title);
922     if (el_title == NULL) {
923         el_title = (void *) lxb_html_document_create_element(document,
924                                          (const lxb_char_t *) "title", 5, NULL);
925         if (el_title == NULL) {
926             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
927         }
928 
929         lxb_dom_node_insert_child(lxb_dom_interface_node(document->head),
930                                   lxb_dom_interface_node(el_title));
931     }
932 
933     status = lxb_dom_node_text_content_set(lxb_dom_interface_node(el_title),
934                                            title, len);
935     if (status != LXB_STATUS_OK) {
936         lxb_html_document_destroy_element(&el_title->element.element);
937 
938         return status;
939     }
940 
941     return LXB_STATUS_OK;
942 }
943 
944 const lxb_char_t *
lxb_html_document_title_raw(lxb_html_document_t * document,size_t * len)945 lxb_html_document_title_raw(lxb_html_document_t *document, size_t *len)
946 {
947     lxb_html_title_element_t *title = NULL;
948 
949     lxb_dom_node_simple_walk(lxb_dom_interface_node(document),
950                              lxb_html_document_title_walker, &title);
951     if (title == NULL) {
952         return NULL;
953     }
954 
955     return lxb_html_title_element_text(title, len);
956 }
957 
958 static lexbor_action_t
lxb_html_document_title_walker(lxb_dom_node_t * node,void * ctx)959 lxb_html_document_title_walker(lxb_dom_node_t *node, void *ctx)
960 {
961     if (node->local_name == LXB_TAG_TITLE && node->ns == LXB_NS_HTML) {
962         *((void **) ctx) = node;
963 
964         return LEXBOR_ACTION_STOP;
965     }
966 
967     return LEXBOR_ACTION_OK;
968 }
969 
970 lxb_dom_node_t *
lxb_html_document_import_node(lxb_html_document_t * doc,lxb_dom_node_t * node,bool deep)971 lxb_html_document_import_node(lxb_html_document_t *doc, lxb_dom_node_t *node,
972                               bool deep)
973 {
974     return lxb_dom_document_import_node(&doc->dom_document, node, deep);
975 }
976 
977 #if 0
978 static lxb_status_t
979 lxb_html_document_event_insert(lxb_dom_node_t *node)
980 {
981     lxb_status_t status;
982     lxb_html_document_t *doc;
983     lxb_html_style_element_t *style;
984 
985     if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
986         return lxb_html_document_event_insert_attribute(node);
987     }
988     else if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
989         return LXB_STATUS_OK;
990     }
991 
992     // FIXME: we don't have support for anything other than HTML.
993 
994     if (node->ns != LXB_NS_HTML) {
995         return LXB_STATUS_OK;
996     }
997 
998     if (node->local_name == LXB_TAG_STYLE) {
999         style = lxb_html_interface_style(node);
1000 
1001         status = lxb_html_style_element_parse(style);
1002         if (status != LXB_STATUS_OK) {
1003             return status;
1004         }
1005 
1006         doc = lxb_html_interface_document(node->owner_document);
1007 
1008         status = lxb_html_document_stylesheet_attach(doc, style->stylesheet);
1009         if (status != LXB_STATUS_OK) {
1010             return status;
1011         }
1012     }
1013 
1014     return lxb_html_document_element_styles_attach(lxb_html_interface_element(node));
1015 }
1016 
1017 static lxb_status_t
1018 lxb_html_document_event_insert_attribute(lxb_dom_node_t *node)
1019 {
1020     lxb_status_t status;
1021     lxb_dom_attr_t *attr;
1022     lxb_html_element_t *el;
1023 
1024     if (node->type != LXB_DOM_NODE_TYPE_ATTRIBUTE
1025         || node->local_name != LXB_DOM_ATTR_STYLE)
1026     {
1027         return LXB_STATUS_OK;
1028     }
1029 
1030     // FIXME: we don't have support for anything other than HTML.
1031 
1032     if (node->ns != LXB_NS_HTML) {
1033         return LXB_STATUS_OK;
1034     }
1035 
1036     attr = lxb_dom_interface_attr(node);
1037     el = lxb_html_interface_element(attr->owner);
1038 
1039     if (el != NULL && el->list != NULL) {
1040         status = lxb_html_document_event_remove_attribute(node);
1041         if (status != LXB_STATUS_OK) {
1042             return status;
1043         }
1044     }
1045 
1046     if (attr->value == NULL || attr->value->data == NULL) {
1047         return LXB_STATUS_OK;
1048     }
1049 
1050     return lxb_html_element_style_parse(el, attr->value->data,
1051                                         attr->value->length);
1052 }
1053 
1054 static lxb_status_t
1055 lxb_html_document_event_remove(lxb_dom_node_t *node)
1056 {
1057     lxb_status_t status;
1058     lxb_html_element_t *el;
1059     lxb_html_document_t *doc;
1060     lxb_html_document_event_ctx_t context;
1061 
1062     if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
1063         return lxb_html_document_event_remove_attribute(node);
1064     }
1065     else if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
1066         return LXB_STATUS_OK;
1067     }
1068 
1069     // FIXME: we don't have support for anything other than HTML.
1070 
1071     if (node->ns != LXB_NS_HTML) {
1072         return LXB_STATUS_OK;
1073     }
1074 
1075     if (node->local_name == LXB_TAG_STYLE) {
1076         status = lxb_html_element_style_remove((lxb_html_style_element_t *) node);
1077         if (status != LXB_STATUS_OK) {
1078             return status;
1079         }
1080     }
1081 
1082     el = lxb_html_interface_element(node);
1083 
1084     if (el->style == NULL) {
1085         return LXB_STATUS_OK;
1086     }
1087 
1088     doc = lxb_html_interface_document(node->owner_document);
1089 
1090     context.doc = doc;
1091     context.all = false;
1092 
1093     return lexbor_avl_foreach(doc->css.styles, &el->style,
1094                               lxb_html_document_style_remove_cb, &context);
1095 }
1096 
1097 static lxb_status_t
1098 lxb_html_document_style_remove_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
1099                                   lexbor_avl_node_t *node, void *ctx)
1100 {
1101     lxb_html_document_event_ctx_t *context = ctx;
1102     lxb_html_style_node_t *style = (lxb_html_style_node_t *) node;
1103 
1104     if (context->all) {
1105         lxb_html_element_style_remove_all(context->doc, root, style);
1106     }
1107 
1108     lxb_html_element_style_remove_all_not(context->doc, root, style, false);
1109 
1110     return LXB_STATUS_OK;
1111 }
1112 
1113 static lxb_status_t
1114 lxb_html_document_event_remove_attribute(lxb_dom_node_t *node)
1115 {
1116     lxb_status_t status;
1117     lxb_dom_attr_t *attr;
1118     lxb_html_element_t *el;
1119     lxb_html_document_t *doc;
1120     lxb_html_document_event_ctx_t context;
1121 
1122     // FIXME: we don't have support for anything other than HTML.
1123 
1124     if (node->local_name != LXB_DOM_ATTR_STYLE || node->ns != LXB_NS_HTML) {
1125         return LXB_STATUS_OK;
1126     }
1127 
1128     attr = lxb_dom_interface_attr(node);
1129     el = lxb_html_interface_element(attr->owner);
1130 
1131     if (el == NULL || el->list == NULL) {
1132         return LXB_STATUS_OK;
1133     }
1134 
1135     doc = lxb_html_interface_document(node->owner_document);
1136 
1137     context.doc = doc;
1138 
1139     status = lexbor_avl_foreach(doc->css.styles, &el->style,
1140                                 lxb_html_document_style_remove_my_cb, &context);
1141     if (status != LXB_STATUS_OK) {
1142         return status;
1143     }
1144 
1145     el->list->first = NULL;
1146     el->list->last = NULL;
1147 
1148     el->list = lxb_css_rule_declaration_list_destroy(el->list, true);
1149 
1150     return LXB_STATUS_OK;
1151 }
1152 
1153 static lxb_status_t
1154 lxb_html_document_style_remove_my_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
1155                                      lexbor_avl_node_t *node, void *ctx)
1156 {
1157     lxb_html_document_event_ctx_t *context = ctx;
1158     lxb_html_style_node_t *style = (lxb_html_style_node_t *) node;
1159 
1160     lxb_html_element_style_remove_all_not(context->doc, root, style, true);
1161 
1162     return LXB_STATUS_OK;
1163 }
1164 
1165 static lxb_status_t
1166 lxb_html_document_event_destroy(lxb_dom_node_t *node)
1167 {
1168     lxb_status_t status;
1169     lxb_html_element_t *el;
1170     lxb_html_document_t *doc;
1171     lxb_html_document_event_ctx_t context;
1172 
1173     if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
1174         return lxb_html_document_event_remove_attribute(node);
1175     }
1176     else if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
1177         return LXB_STATUS_OK;
1178     }
1179 
1180     // FIXME: we don't have support for anything other than HTML.
1181 
1182     if (node->ns != LXB_NS_HTML) {
1183         return LXB_STATUS_OK;
1184     }
1185 
1186     el = lxb_html_interface_element(node);
1187 
1188     if (el->style == NULL) {
1189         if (el->list != NULL) {
1190             goto destroy;
1191         }
1192 
1193         return LXB_STATUS_OK;
1194     }
1195 
1196     doc = lxb_html_interface_document(node->owner_document);
1197 
1198     context.doc = doc;
1199     context.all = true;
1200 
1201     status = lexbor_avl_foreach(doc->css.styles, &el->style,
1202                                 lxb_html_document_style_remove_cb, &context);
1203 
1204     if (status != LXB_STATUS_OK) {
1205         return status;
1206     }
1207 
1208 destroy:
1209 
1210     el->list->first = NULL;
1211     el->list->last = NULL;
1212 
1213     el->list = lxb_css_rule_declaration_list_destroy(el->list, true);
1214 
1215     return LXB_STATUS_OK;
1216 }
1217 
1218 static lxb_status_t
1219 lxb_html_document_event_set_value(lxb_dom_node_t *node,
1220                                   const lxb_char_t *value, size_t length)
1221 {
1222     lxb_status_t status;
1223     lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
1224 
1225     if (node->type != LXB_DOM_NODE_TYPE_ATTRIBUTE
1226         || node->local_name != LXB_DOM_ATTR_STYLE)
1227     {
1228         return LXB_STATUS_OK;
1229     }
1230 
1231     // FIXME: we don't have support for anything other than HTML.
1232 
1233     if (node->ns != LXB_NS_HTML) {
1234         return LXB_STATUS_OK;
1235     }
1236 
1237     if (attr->owner == NULL) {
1238         return LXB_STATUS_OK;
1239     }
1240 
1241     status = lxb_html_document_event_remove_attribute(node);
1242     if (status != LXB_STATUS_OK) {
1243         return status;
1244     }
1245 
1246     return lxb_html_element_style_parse(lxb_html_interface_element(node),
1247                                         value, length);
1248 }
1249 #endif
1250 
1251 /*
1252  * No inline functions for ABI.
1253  */
1254 lxb_html_head_element_t *
lxb_html_document_head_element_noi(lxb_html_document_t * document)1255 lxb_html_document_head_element_noi(lxb_html_document_t *document)
1256 {
1257     return lxb_html_document_head_element(document);
1258 }
1259 
1260 lxb_html_body_element_t *
lxb_html_document_body_element_noi(lxb_html_document_t * document)1261 lxb_html_document_body_element_noi(lxb_html_document_t *document)
1262 {
1263     return lxb_html_document_body_element(document);
1264 }
1265 
1266 lxb_dom_document_t *
lxb_html_document_original_ref_noi(lxb_html_document_t * document)1267 lxb_html_document_original_ref_noi(lxb_html_document_t *document)
1268 {
1269     return lxb_html_document_original_ref(document);
1270 }
1271 
1272 bool
lxb_html_document_is_original_noi(lxb_html_document_t * document)1273 lxb_html_document_is_original_noi(lxb_html_document_t *document)
1274 {
1275     return lxb_html_document_is_original(document);
1276 }
1277 
1278 lexbor_mraw_t *
lxb_html_document_mraw_noi(lxb_html_document_t * document)1279 lxb_html_document_mraw_noi(lxb_html_document_t *document)
1280 {
1281     return lxb_html_document_mraw(document);
1282 }
1283 
1284 lexbor_mraw_t *
lxb_html_document_mraw_text_noi(lxb_html_document_t * document)1285 lxb_html_document_mraw_text_noi(lxb_html_document_t *document)
1286 {
1287     return lxb_html_document_mraw_text(document);
1288 }
1289 
1290 void
lxb_html_document_opt_set_noi(lxb_html_document_t * document,lxb_html_document_opt_t opt)1291 lxb_html_document_opt_set_noi(lxb_html_document_t *document,
1292                               lxb_html_document_opt_t opt)
1293 {
1294     lxb_html_document_opt_set(document, opt);
1295 }
1296 
1297 lxb_html_document_opt_t
lxb_html_document_opt_noi(lxb_html_document_t * document)1298 lxb_html_document_opt_noi(lxb_html_document_t *document)
1299 {
1300     return lxb_html_document_opt(document);
1301 }
1302 
1303 void *
lxb_html_document_create_struct_noi(lxb_html_document_t * document,size_t struct_size)1304 lxb_html_document_create_struct_noi(lxb_html_document_t *document,
1305                                     size_t struct_size)
1306 {
1307     return lxb_html_document_create_struct(document, struct_size);
1308 }
1309 
1310 void *
lxb_html_document_destroy_struct_noi(lxb_html_document_t * document,void * data)1311 lxb_html_document_destroy_struct_noi(lxb_html_document_t *document, void *data)
1312 {
1313     return lxb_html_document_destroy_struct(document, data);
1314 }
1315 
1316 lxb_html_element_t *
lxb_html_document_create_element_noi(lxb_html_document_t * document,const lxb_char_t * local_name,size_t lname_len,void * reserved_for_opt)1317 lxb_html_document_create_element_noi(lxb_html_document_t *document,
1318                                      const lxb_char_t *local_name,
1319                                      size_t lname_len, void *reserved_for_opt)
1320 {
1321     return lxb_html_document_create_element(document, local_name, lname_len,
1322                                             reserved_for_opt);
1323 }
1324 
1325 lxb_dom_element_t *
lxb_html_document_destroy_element_noi(lxb_dom_element_t * element)1326 lxb_html_document_destroy_element_noi(lxb_dom_element_t *element)
1327 {
1328     return lxb_html_document_destroy_element(element);
1329 }
1330