1 /*
2 * Copyright (C) 2018-2024 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include "lexbor/core/str.h"
8
9 #include "lexbor/html/interfaces/document.h"
10 #include "lexbor/html/interfaces/title_element.h"
11 #include "lexbor/html/interfaces/style_element.h"
12 #include "lexbor/html/node.h"
13 #include "lexbor/html/parser.h"
14 #include "lexbor/html/style.h"
15
16 #include "lexbor/tag/tag.h"
17
18 #include "lexbor/dom/interfaces/text.h"
19 #include "lexbor/dom/interfaces/element.h"
20
21 #define LXB_HTML_TAG_RES_DATA
22 #define LXB_HTML_TAG_RES_SHS_DATA
23 #include "lexbor/html/tag_res.h"
24
25
26 static const lexbor_hash_search_t lxb_html_document_css_customs_se = {
27 .cmp = lexbor_str_data_ncasecmp,
28 .hash = lexbor_hash_make_id
29 };
30
31 static const lexbor_hash_insert_t lxb_html_document_css_customs_in = {
32 .copy = lexbor_hash_copy,
33 .cmp = lexbor_str_data_ncasecmp,
34 .hash = lexbor_hash_make_id
35 };
36
37
38 typedef struct {
39 lexbor_hash_entry_t entry;
40 uintptr_t id;
41 }
42 lxb_html_document_css_custom_entry_t;
43
44 typedef struct {
45 lxb_html_document_t *doc;
46 bool all;
47 }
48 lxb_html_document_event_ctx_t;
49
50 typedef struct {
51 lxb_html_document_t *doc;
52 lxb_css_rule_declaration_list_t *list;
53 }
54 lxb_html_document_remove_ctx_t;
55
56
57 static lxb_html_document_css_custom_entry_t *
58 lxb_html_document_css_customs_insert(lxb_html_document_t *document,
59 const lxb_char_t *key, size_t length);
60
61 static lxb_status_t
62 lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node,
63 lxb_css_selector_specificity_t spec,
64 void *ctx);
65
66 static lxb_status_t
67 lxb_html_document_style_remove_avl_cb(lexbor_avl_t *avl,
68 lexbor_avl_node_t **root,
69 lexbor_avl_node_t *node, void *ctx);
70
71 static lxb_status_t
72 lxb_html_document_style_cb(lxb_dom_node_t *node,
73 lxb_css_selector_specificity_t spec, void *ctx);
74
75 #if 0
76 static lxb_status_t
77 lxb_html_document_done(lxb_html_document_t *document);
78 #endif
79
80
81 lxb_status_t
82 lxb_html_parse_chunk_prepare(lxb_html_parser_t *parser,
83 lxb_html_document_t *document);
84
85 lxb_inline lxb_status_t
86 lxb_html_document_parser_prepare(lxb_html_document_t *document);
87
88 static lexbor_action_t
89 lxb_html_document_title_walker(lxb_dom_node_t *node, void *ctx);
90
91 #if 0
92 static lxb_status_t
93 lxb_html_document_event_insert(lxb_dom_node_t *node);
94
95 static lxb_status_t
96 lxb_html_document_event_insert_attribute(lxb_dom_node_t *node);
97
98 static lxb_status_t
99 lxb_html_document_event_remove(lxb_dom_node_t *node);
100
101 static lxb_status_t
102 lxb_html_document_style_remove_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
103 lexbor_avl_node_t *node, void *ctx);
104
105 static lxb_status_t
106 lxb_html_document_event_remove_attribute(lxb_dom_node_t *node);
107
108 static lxb_status_t
109 lxb_html_document_style_remove_my_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
110 lexbor_avl_node_t *node, void *ctx);
111
112 static lxb_status_t
113 lxb_html_document_event_destroy(lxb_dom_node_t *node);
114
115 static lxb_status_t
116 lxb_html_document_event_set_value(lxb_dom_node_t *node,
117 const lxb_char_t *value, size_t length);
118 #endif
119
120
121 lxb_html_document_t *
lxb_html_document_interface_create(lxb_html_document_t * document)122 lxb_html_document_interface_create(lxb_html_document_t *document)
123 {
124 lxb_status_t status;
125 lxb_dom_document_t *doc;
126 lxb_html_document_t *hdoc;
127 lxb_dom_interface_create_f icreator;
128
129 if (document != NULL) {
130 doc = lexbor_mraw_calloc(lxb_html_document_mraw(document),
131 sizeof(lxb_html_document_t));
132 }
133 else {
134 doc = lexbor_calloc(1, sizeof(lxb_html_document_t));
135 }
136
137 if (doc == NULL) {
138 return NULL;
139 }
140
141 icreator = (lxb_dom_interface_create_f) lxb_html_interface_create;
142
143 status = lxb_dom_document_init(doc, lxb_dom_interface_document(document),
144 icreator, lxb_html_interface_clone,
145 lxb_html_interface_destroy,
146 LXB_DOM_DOCUMENT_DTYPE_HTML, LXB_NS_HTML);
147 if (status != LXB_STATUS_OK) {
148 (void) lxb_dom_document_destroy(doc);
149 return NULL;
150 }
151
152 hdoc = lxb_html_interface_document(doc);
153
154 if (document == NULL) {
155 hdoc->css_init = false;
156 }
157 else {
158 hdoc->css = document->css;
159 hdoc->css_init = document->css_init;
160 }
161
162 return hdoc;
163 }
164
165 lxb_html_document_t *
lxb_html_document_interface_destroy(lxb_html_document_t * document)166 lxb_html_document_interface_destroy(lxb_html_document_t *document)
167 {
168 lxb_dom_document_t *doc;
169
170 if (document == NULL) {
171 return NULL;
172 }
173
174 doc = lxb_dom_interface_document(document);
175
176 if (doc->node.owner_document == doc) {
177 (void) lxb_html_parser_unref(doc->parser);
178 #if 0
179 lxb_html_document_css_destroy(document);
180 #endif
181 }
182
183 (void) lxb_dom_document_destroy(doc);
184
185 return NULL;
186 }
187
188 lxb_html_document_t *
lxb_html_document_create(void)189 lxb_html_document_create(void)
190 {
191 return lxb_html_document_interface_create(NULL);
192 }
193
194 void
lxb_html_document_clean(lxb_html_document_t * document)195 lxb_html_document_clean(lxb_html_document_t *document)
196 {
197 document->body = NULL;
198 document->head = NULL;
199 document->iframe_srcdoc = NULL;
200 document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_UNDEF;
201
202 #if 0
203 lxb_html_document_css_clean(document);
204 #endif
205
206 lxb_dom_document_clean(lxb_dom_interface_document(document));
207 }
208
209 #if 0
210 lxb_status_t
211 lxb_html_document_css_init(lxb_html_document_t *document)
212 {
213 lxb_status_t status;
214 lxb_html_document_css_t *css = &document->css;
215
216 if (document->css_init) {
217 return LXB_HTML_STATUS_OK;
218 }
219
220 css->memory = lxb_css_memory_create();
221 status = lxb_css_memory_init(css->memory, 1024);
222 if (status != LXB_STATUS_OK) {
223 goto failed;
224 }
225
226 css->css_selectors = lxb_css_selectors_create();
227 status = lxb_css_selectors_init(css->css_selectors);
228 if (status != LXB_STATUS_OK) {
229 goto failed;
230 }
231
232 css->parser = lxb_css_parser_create();
233 status = lxb_css_parser_init(css->parser, NULL);
234 if (status != LXB_STATUS_OK) {
235 goto failed;
236 }
237
238 lxb_css_parser_memory_set(css->parser, css->memory);
239 lxb_css_parser_selectors_set(css->parser, css->css_selectors);
240
241 css->selectors = lxb_selectors_create();
242 status = lxb_selectors_init(css->selectors);
243 if (status != LXB_STATUS_OK) {
244 goto failed;
245 }
246
247 css->styles = lexbor_avl_create();
248 status = lexbor_avl_init(css->styles, 2048, sizeof(lxb_html_style_node_t));
249 if (status != LXB_STATUS_OK) {
250 goto failed;
251 }
252
253 css->stylesheets = lexbor_array_create();
254 status = lexbor_array_init(css->stylesheets, 16);
255 if (status != LXB_STATUS_OK) {
256 goto failed;
257 }
258
259 css->weak = lexbor_dobject_create();
260 status = lexbor_dobject_init(css->weak, 2048,
261 sizeof(lxb_html_style_weak_t));
262 if (status != LXB_STATUS_OK) {
263 goto failed;
264 }
265
266 status = lxb_html_document_css_customs_init(document);
267 if (status != LXB_STATUS_OK) {
268 goto failed;
269 }
270
271 document->css_init = true;
272
273 document->dom_document.ev_insert = lxb_html_document_event_insert;
274 document->dom_document.ev_remove = lxb_html_document_event_remove;
275 document->dom_document.ev_destroy = lxb_html_document_event_destroy;
276 document->dom_document.ev_set_value = lxb_html_document_event_set_value;
277
278 document->done = lxb_html_document_done;
279
280 return LXB_STATUS_OK;
281
282 failed:
283
284 lxb_html_document_css_destroy(document);
285
286 return status;
287 }
288
289 void
290 lxb_html_document_css_destroy(lxb_html_document_t *document)
291 {
292 lxb_html_document_css_t *css = &document->css;
293
294 if (!document->css_init
295 || lxb_dom_interface_node(document)->owner_document
296 != lxb_dom_interface_document(document))
297 {
298 return;
299 }
300
301 css->memory = lxb_css_memory_destroy(css->memory, true);
302 css->css_selectors = lxb_css_selectors_destroy(css->css_selectors, true);
303 css->parser = lxb_css_parser_destroy(css->parser, true);
304 css->selectors = lxb_selectors_destroy(css->selectors, true);
305 css->styles = lexbor_avl_destroy(css->styles, true);
306 css->stylesheets = lexbor_array_destroy(css->stylesheets, true);
307 css->weak = lexbor_dobject_destroy(css->weak, true);
308
309 document->dom_document.ev_insert = NULL;
310 document->dom_document.ev_remove = NULL;
311 document->dom_document.ev_destroy = NULL;
312 document->dom_document.ev_set_value = NULL;
313
314 document->done = NULL;
315
316 lxb_html_document_css_customs_destroy(document);
317 }
318
319 void
320 lxb_html_document_css_clean(lxb_html_document_t *document)
321 {
322 lxb_html_document_css_t *css;
323
324 if (lxb_dom_interface_node(document)->owner_document
325 == lxb_dom_interface_document(document))
326 {
327 if (!document->css_init) {
328 return;
329 }
330
331 css = &document->css;
332
333 lxb_css_memory_clean(css->memory);
334 lxb_css_selectors_clean(css->css_selectors);
335 lxb_css_parser_clean(css->parser);
336 lxb_selectors_clean(css->selectors);
337 lexbor_avl_clean(css->styles);
338 lexbor_array_clean(css->stylesheets);
339 lexbor_dobject_clean(css->weak);
340 }
341 }
342 #endif
343
344 void
lxb_html_document_css_parser_attach(lxb_html_document_t * document,lxb_css_parser_t * parser)345 lxb_html_document_css_parser_attach(lxb_html_document_t *document,
346 lxb_css_parser_t *parser)
347 {
348 document->css.parser = parser;
349 }
350
351 void
lxb_html_document_css_memory_attach(lxb_html_document_t * document,lxb_css_memory_t * memory)352 lxb_html_document_css_memory_attach(lxb_html_document_t *document,
353 lxb_css_memory_t *memory)
354 {
355 document->css.memory = memory;
356 }
357
358 lxb_status_t
lxb_html_document_css_customs_init(lxb_html_document_t * document)359 lxb_html_document_css_customs_init(lxb_html_document_t *document)
360 {
361 lxb_html_document_css_t *css = &document->css;
362
363 css->customs_id = LXB_CSS_PROPERTY__LAST_ENTRY;
364
365 css->customs = lexbor_hash_create();
366 return lexbor_hash_init(css->customs, 512,
367 sizeof(lxb_html_document_css_custom_entry_t));
368 }
369
370 void
lxb_html_document_css_customs_destroy(lxb_html_document_t * document)371 lxb_html_document_css_customs_destroy(lxb_html_document_t *document)
372 {
373 document->css.customs = lexbor_hash_destroy(document->css.customs, true);
374 }
375
376 uintptr_t
lxb_html_document_css_customs_find_id(lxb_html_document_t * document,const lxb_char_t * key,size_t length)377 lxb_html_document_css_customs_find_id(lxb_html_document_t *document,
378 const lxb_char_t *key, size_t length)
379 {
380 const lxb_html_document_css_custom_entry_t *entry;
381
382 entry = lexbor_hash_search(document->css.customs,
383 &lxb_html_document_css_customs_se, key, length);
384
385 return (entry != NULL) ? entry->id : 0;
386 }
387
388 static lxb_html_document_css_custom_entry_t *
lxb_html_document_css_customs_insert(lxb_html_document_t * document,const lxb_char_t * key,size_t length)389 lxb_html_document_css_customs_insert(lxb_html_document_t *document,
390 const lxb_char_t *key, size_t length)
391 {
392 lxb_html_document_css_custom_entry_t *entry;
393
394 if (UINTPTR_MAX - document->css.customs_id == 0) {
395 return NULL;
396 }
397
398 entry = lexbor_hash_insert(document->css.customs,
399 &lxb_html_document_css_customs_in, key, length);
400 if (entry == NULL) {
401 return NULL;
402 }
403
404 entry->id = document->css.customs_id++;
405
406 return entry;
407 }
408
409 uintptr_t
lxb_html_document_css_customs_id(lxb_html_document_t * document,const lxb_char_t * key,size_t length)410 lxb_html_document_css_customs_id(lxb_html_document_t *document,
411 const lxb_char_t *key, size_t length)
412 {
413 lxb_html_document_css_custom_entry_t *entry;
414
415 entry = lexbor_hash_search(document->css.customs,
416 &lxb_html_document_css_customs_se, key, length);
417 if (entry != NULL) {
418 return entry->id;
419 }
420
421 entry = lxb_html_document_css_customs_insert(document, key, length);
422 if (entry == NULL) {
423 return 0;
424 }
425
426 return entry->id;
427 }
428
429 lxb_status_t
lxb_html_document_stylesheet_attach(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)430 lxb_html_document_stylesheet_attach(lxb_html_document_t *document,
431 lxb_css_stylesheet_t *sst)
432 {
433 lxb_status_t status;
434
435 status = lexbor_array_push(document->css.stylesheets, sst);
436 if (status != LXB_STATUS_OK) {
437 return status;
438 }
439
440 return lxb_html_document_stylesheet_apply(document, sst);
441 }
442
443 lxb_status_t
lxb_html_document_stylesheet_apply(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)444 lxb_html_document_stylesheet_apply(lxb_html_document_t *document,
445 lxb_css_stylesheet_t *sst)
446 {
447 lxb_status_t status = LXB_STATUS_OK;
448 lxb_css_rule_t *rule;
449 lxb_css_rule_list_t *list;
450
451 rule = sst->root;
452
453 if (rule->type != LXB_CSS_RULE_LIST) {
454 return LXB_STATUS_ERROR_WRONG_ARGS;
455 }
456
457 list = lxb_css_rule_list(rule);
458 rule = list->first;
459
460 while (rule != NULL) {
461 switch (rule->type) {
462 case LXB_CSS_RULE_STYLE:
463 status = lxb_html_document_style_attach(document,
464 lxb_css_rule_style(rule));
465 break;
466
467 default:
468 break;
469 }
470
471 if (status != LXB_STATUS_OK) {
472 /* FIXME: what to do with an error? */
473 }
474
475 rule = rule->next;
476 }
477
478 return LXB_STATUS_OK;
479 }
480
481 lxb_status_t
lxb_html_document_stylesheet_add(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)482 lxb_html_document_stylesheet_add(lxb_html_document_t *document,
483 lxb_css_stylesheet_t *sst)
484 {
485 if (sst == NULL) {
486 return LXB_STATUS_OK;
487 }
488
489 return lexbor_array_push(document->css.stylesheets, sst);
490 }
491
492 lxb_status_t
lxb_html_document_stylesheet_remove(lxb_html_document_t * document,lxb_css_stylesheet_t * sst)493 lxb_html_document_stylesheet_remove(lxb_html_document_t *document,
494 lxb_css_stylesheet_t *sst)
495 {
496 size_t i, length;
497 lxb_status_t status = LXB_STATUS_OK;
498 lxb_css_rule_t *rule;
499 lxb_css_rule_list_t *list;
500 lxb_css_stylesheet_t *sst_in;
501
502 rule = sst->root;
503
504 if (rule->type != LXB_CSS_RULE_LIST) {
505 return LXB_STATUS_ERROR_WRONG_ARGS;
506 }
507
508 list = lxb_css_rule_list(rule);
509 rule = list->first;
510
511 while (rule != NULL) {
512 switch (rule->type) {
513 case LXB_CSS_RULE_STYLE:
514 status = lxb_html_document_style_remove(document,
515 lxb_css_rule_style(rule));
516 break;
517
518 default:
519 break;
520 }
521
522 if (status != LXB_STATUS_OK) {
523 /* FIXME: what to do with an error? */
524 }
525
526 rule = rule->next;
527 }
528
529 length = lexbor_array_length(document->css.stylesheets);
530
531 for (i = 0; i < length; i++) {
532 sst_in = lexbor_array_get(document->css.stylesheets, i);
533
534 if (sst_in == sst) {
535 lexbor_array_delete(document->css.stylesheets, i, 1);
536 length = lexbor_array_length(document->css.stylesheets);
537 }
538 }
539
540 return LXB_STATUS_OK;
541 }
542
543 lxb_status_t
lxb_html_document_element_styles_attach(lxb_html_element_t * element)544 lxb_html_document_element_styles_attach(lxb_html_element_t *element)
545 {
546 lxb_status_t status = LXB_STATUS_OK;
547 lxb_css_rule_t *rule;
548 lexbor_array_t *ssts;
549 lxb_css_rule_list_t *list;
550 lxb_css_stylesheet_t *sst;
551 lxb_html_document_t *document;
552
553 document = lxb_html_element_document(element);
554 ssts = document->css.stylesheets;
555
556 for (size_t i = 0; i < lexbor_array_length(ssts); i++) {
557 sst = lexbor_array_get(ssts, i);
558
559 list = lxb_css_rule_list(sst->root);
560 rule = list->first;
561
562 while (rule != NULL) {
563 switch (rule->type) {
564 case LXB_CSS_RULE_STYLE:
565 status = lxb_html_document_style_attach_by_element(document,
566 element, lxb_css_rule_style(rule));
567 break;
568
569 default:
570 break;
571 }
572
573 if (status != LXB_STATUS_OK) {
574 /* FIXME: what to do with an error? */
575 }
576
577 rule = rule->next;
578 }
579 }
580
581 return LXB_STATUS_OK;
582 }
583
584 void
lxb_html_document_stylesheet_destroy_all(lxb_html_document_t * document,bool destroy_memory)585 lxb_html_document_stylesheet_destroy_all(lxb_html_document_t *document,
586 bool destroy_memory)
587 {
588 #if 0
589 size_t length;
590 lxb_css_stylesheet_t *sst;
591 lxb_html_document_css_t *css = &document->css;
592
593 length = lexbor_array_length(css->stylesheets);
594
595 for (size_t i = 0; i < length; i++) {
596 sst = lexbor_array_pop(css->stylesheets);
597
598 (void) lxb_css_stylesheet_destroy(sst, destroy_memory);
599 }
600 #endif
601 }
602
603 lxb_status_t
lxb_html_document_style_attach(lxb_html_document_t * document,lxb_css_rule_style_t * style)604 lxb_html_document_style_attach(lxb_html_document_t *document,
605 lxb_css_rule_style_t *style)
606 {
607 lxb_html_document_css_t *css = &document->css;
608
609 return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document),
610 style->selector, lxb_html_document_style_cb, style);
611 }
612
613 lxb_status_t
lxb_html_document_style_remove(lxb_html_document_t * document,lxb_css_rule_style_t * style)614 lxb_html_document_style_remove(lxb_html_document_t *document,
615 lxb_css_rule_style_t *style)
616 {
617 lxb_html_document_css_t *css = &document->css;
618
619 return lxb_selectors_find(css->selectors, lxb_dom_interface_node(document),
620 style->selector,
621 lxb_html_document_style_remove_by_rule_cb, style);
622 }
623
624 static lxb_status_t
lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t * node,lxb_css_selector_specificity_t spec,void * ctx)625 lxb_html_document_style_remove_by_rule_cb(lxb_dom_node_t *node,
626 lxb_css_selector_specificity_t spec,
627 void *ctx)
628 {
629 lxb_html_element_t *el;
630 lxb_html_document_t *doc;
631 lxb_css_rule_style_t *style = ctx;
632 lxb_html_document_remove_ctx_t context;
633
634 /* FIXME: we don't have support for anything other than HTML. */
635
636 if (node->ns != LXB_NS_HTML) {
637 return LXB_STATUS_OK;
638 }
639
640 el = lxb_html_interface_element(node);
641
642 if (el->style == NULL) {
643 return LXB_STATUS_OK;
644 }
645
646 doc = lxb_html_interface_document(node->owner_document);
647
648 context.doc = doc;
649 context.list = style->declarations;
650
651 return lexbor_avl_foreach(doc->css.styles, &el->style,
652 lxb_html_document_style_remove_avl_cb, &context);
653 }
654
655 static lxb_status_t
lxb_html_document_style_remove_avl_cb(lexbor_avl_t * avl,lexbor_avl_node_t ** root,lexbor_avl_node_t * node,void * ctx)656 lxb_html_document_style_remove_avl_cb(lexbor_avl_t *avl,
657 lexbor_avl_node_t **root,
658 lexbor_avl_node_t *node, void *ctx)
659 {
660 lxb_html_document_remove_ctx_t *context = ctx;
661 lxb_html_style_node_t *style = (lxb_html_style_node_t *) node;
662
663 if (context->list == NULL) {
664 return LXB_STATUS_OK;
665 }
666
667 lxb_html_element_style_remove_by_list(context->doc, root,
668 style, context->list);
669 return LXB_STATUS_OK;
670 }
671
672 lxb_status_t
lxb_html_document_style_attach_by_element(lxb_html_document_t * document,lxb_html_element_t * element,lxb_css_rule_style_t * style)673 lxb_html_document_style_attach_by_element(lxb_html_document_t *document,
674 lxb_html_element_t *element,
675 lxb_css_rule_style_t *style)
676 {
677 lxb_html_document_css_t *css = &document->css;
678
679 return lxb_selectors_match_node(css->selectors,
680 lxb_dom_interface_node(element),
681 style->selector,
682 lxb_html_document_style_cb, style);
683 }
684
685 static lxb_status_t
lxb_html_document_style_cb(lxb_dom_node_t * node,lxb_css_selector_specificity_t spec,void * ctx)686 lxb_html_document_style_cb(lxb_dom_node_t *node,
687 lxb_css_selector_specificity_t spec, void *ctx)
688 {
689 lxb_css_rule_style_t *style = ctx;
690
691 // FIXME: we don't have support for anything other than HTML.
692
693 if (node->ns != LXB_NS_HTML) {
694 return LXB_STATUS_OK;
695 }
696
697 /* Valid behavior when there are no declarations in the style. */
698
699 if (style->declarations == NULL) {
700 return LXB_STATUS_OK;
701 }
702
703 return lxb_html_element_style_list_append(lxb_html_interface_element(node),
704 style->declarations, spec);
705 }
706
707 lxb_html_document_t *
lxb_html_document_destroy(lxb_html_document_t * document)708 lxb_html_document_destroy(lxb_html_document_t *document)
709 {
710 return lxb_html_document_interface_destroy(document);
711 }
712
713 lxb_status_t
lxb_html_document_parse(lxb_html_document_t * document,const lxb_char_t * html,size_t size)714 lxb_html_document_parse(lxb_html_document_t *document,
715 const lxb_char_t *html, size_t size)
716 {
717 lxb_status_t status;
718 lxb_dom_document_t *doc;
719 lxb_html_document_opt_t opt;
720
721 if (document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_UNDEF
722 && document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_LOADING)
723 {
724 lxb_html_document_clean(document);
725 }
726
727 opt = document->opt;
728 doc = lxb_dom_interface_document(document);
729
730 status = lxb_html_document_parser_prepare(document);
731 if (status != LXB_STATUS_OK) {
732 goto failed;
733 }
734
735 status = lxb_html_parse_chunk_prepare(doc->parser, document);
736 if (status != LXB_STATUS_OK) {
737 goto failed;
738 }
739
740 status = lxb_html_parse_chunk_process(doc->parser, html, size);
741 if (status != LXB_STATUS_OK) {
742 goto failed;
743 }
744
745 document->opt = opt;
746
747 return lxb_html_parse_chunk_end(doc->parser);
748
749 failed:
750
751 document->opt = opt;
752
753 return status;
754 }
755
756 lxb_status_t
lxb_html_document_parse_chunk_begin(lxb_html_document_t * document)757 lxb_html_document_parse_chunk_begin(lxb_html_document_t *document)
758 {
759 if (document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_UNDEF
760 && document->ready_state != LXB_HTML_DOCUMENT_READY_STATE_LOADING)
761 {
762 lxb_html_document_clean(document);
763 }
764
765 lxb_status_t status = lxb_html_document_parser_prepare(document);
766 if (status != LXB_STATUS_OK) {
767 return status;
768 }
769
770 return lxb_html_parse_chunk_prepare(document->dom_document.parser,
771 document);
772 }
773
774 lxb_status_t
lxb_html_document_parse_chunk(lxb_html_document_t * document,const lxb_char_t * html,size_t size)775 lxb_html_document_parse_chunk(lxb_html_document_t *document,
776 const lxb_char_t *html, size_t size)
777 {
778 return lxb_html_parse_chunk_process(document->dom_document.parser,
779 html, size);
780 }
781
782 lxb_status_t
lxb_html_document_parse_chunk_end(lxb_html_document_t * document)783 lxb_html_document_parse_chunk_end(lxb_html_document_t *document)
784 {
785 return lxb_html_parse_chunk_end(document->dom_document.parser);
786 }
787
788 lxb_dom_node_t *
lxb_html_document_parse_fragment(lxb_html_document_t * document,lxb_dom_element_t * element,const lxb_char_t * html,size_t size)789 lxb_html_document_parse_fragment(lxb_html_document_t *document,
790 lxb_dom_element_t *element,
791 const lxb_char_t *html, size_t size)
792 {
793 lxb_status_t status;
794 lxb_html_parser_t *parser;
795 lxb_html_document_opt_t opt = document->opt;
796
797 status = lxb_html_document_parser_prepare(document);
798 if (status != LXB_STATUS_OK) {
799 goto failed;
800 }
801
802 parser = document->dom_document.parser;
803
804 status = lxb_html_parse_fragment_chunk_begin(parser, document,
805 element->node.local_name,
806 element->node.ns);
807 if (status != LXB_STATUS_OK) {
808 goto failed;
809 }
810
811 status = lxb_html_parse_fragment_chunk_process(parser, html, size);
812 if (status != LXB_STATUS_OK) {
813 goto failed;
814 }
815
816 document->opt = opt;
817
818 return lxb_html_parse_fragment_chunk_end(parser);
819
820 failed:
821
822 document->opt = opt;
823
824 return NULL;
825 }
826
827 lxb_status_t
lxb_html_document_parse_fragment_chunk_begin(lxb_html_document_t * document,lxb_dom_element_t * element)828 lxb_html_document_parse_fragment_chunk_begin(lxb_html_document_t *document,
829 lxb_dom_element_t *element)
830 {
831 lxb_status_t status;
832 lxb_html_parser_t *parser;
833
834 status = lxb_html_document_parser_prepare(document);
835 if (status != LXB_STATUS_OK) {
836 return status;
837 }
838
839 parser = document->dom_document.parser;
840
841 return lxb_html_parse_fragment_chunk_begin(parser, document,
842 element->node.local_name,
843 element->node.ns);
844 }
845
846 lxb_status_t
lxb_html_document_parse_fragment_chunk(lxb_html_document_t * document,const lxb_char_t * html,size_t size)847 lxb_html_document_parse_fragment_chunk(lxb_html_document_t *document,
848 const lxb_char_t *html, size_t size)
849 {
850 return lxb_html_parse_fragment_chunk_process(document->dom_document.parser,
851 html, size);
852 }
853
854 lxb_dom_node_t *
lxb_html_document_parse_fragment_chunk_end(lxb_html_document_t * document)855 lxb_html_document_parse_fragment_chunk_end(lxb_html_document_t *document)
856 {
857 return lxb_html_parse_fragment_chunk_end(document->dom_document.parser);
858 }
859
860 lxb_inline lxb_status_t
lxb_html_document_parser_prepare(lxb_html_document_t * document)861 lxb_html_document_parser_prepare(lxb_html_document_t *document)
862 {
863 lxb_status_t status;
864 lxb_dom_document_t *doc;
865
866 doc = lxb_dom_interface_document(document);
867
868 if (doc->parser == NULL) {
869 doc->parser = lxb_html_parser_create();
870 status = lxb_html_parser_init(doc->parser);
871
872 if (status != LXB_STATUS_OK) {
873 lxb_html_parser_destroy(doc->parser);
874 return status;
875 }
876 }
877 else if (lxb_html_parser_state(doc->parser) != LXB_HTML_PARSER_STATE_BEGIN) {
878 lxb_html_parser_clean(doc->parser);
879 }
880
881 return LXB_STATUS_OK;
882 }
883
884 #if 0
885 static lxb_status_t
886 lxb_html_document_done(lxb_html_document_t *document)
887 {
888 size_t i, length;
889 lxb_status_t status;
890 lxb_css_stylesheet_t *sst;
891
892 if (!document->css_init) {
893 return LXB_STATUS_OK;
894 }
895
896 length = lexbor_array_length(document->css.stylesheets);
897
898 for (i = 0; i < length; i++) {
899 sst = lexbor_array_get(document->css.stylesheets, i);
900
901 status = lxb_html_document_stylesheet_apply(document, sst);
902 if (status != LXB_STATUS_OK) {
903 return status;
904 }
905 }
906
907 return LXB_STATUS_OK;
908 }
909 #endif
910
911 const lxb_char_t *
lxb_html_document_title(lxb_html_document_t * document,size_t * len)912 lxb_html_document_title(lxb_html_document_t *document, size_t *len)
913 {
914 lxb_html_title_element_t *title = NULL;
915
916 lxb_dom_node_simple_walk(lxb_dom_interface_node(document),
917 lxb_html_document_title_walker, &title);
918 if (title == NULL) {
919 return NULL;
920 }
921
922 return lxb_html_title_element_strict_text(title, len);
923 }
924
925 lxb_status_t
lxb_html_document_title_set(lxb_html_document_t * document,const lxb_char_t * title,size_t len)926 lxb_html_document_title_set(lxb_html_document_t *document,
927 const lxb_char_t *title, size_t len)
928 {
929 lxb_status_t status;
930
931 /* TODO: If the document element is an SVG svg element */
932
933 /* If the document element is in the HTML namespace */
934 if (document->head == NULL) {
935 return LXB_STATUS_OK;
936 }
937
938 lxb_html_title_element_t *el_title = NULL;
939
940 lxb_dom_node_simple_walk(lxb_dom_interface_node(document),
941 lxb_html_document_title_walker, &el_title);
942 if (el_title == NULL) {
943 el_title = (void *) lxb_html_document_create_element(document,
944 (const lxb_char_t *) "title", 5, NULL);
945 if (el_title == NULL) {
946 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
947 }
948
949 lxb_dom_node_insert_child(lxb_dom_interface_node(document->head),
950 lxb_dom_interface_node(el_title));
951 }
952
953 status = lxb_dom_node_text_content_set(lxb_dom_interface_node(el_title),
954 title, len);
955 if (status != LXB_STATUS_OK) {
956 lxb_html_document_destroy_element(&el_title->element.element);
957
958 return status;
959 }
960
961 return LXB_STATUS_OK;
962 }
963
964 const lxb_char_t *
lxb_html_document_title_raw(lxb_html_document_t * document,size_t * len)965 lxb_html_document_title_raw(lxb_html_document_t *document, size_t *len)
966 {
967 lxb_html_title_element_t *title = NULL;
968
969 lxb_dom_node_simple_walk(lxb_dom_interface_node(document),
970 lxb_html_document_title_walker, &title);
971 if (title == NULL) {
972 return NULL;
973 }
974
975 return lxb_html_title_element_text(title, len);
976 }
977
978 static lexbor_action_t
lxb_html_document_title_walker(lxb_dom_node_t * node,void * ctx)979 lxb_html_document_title_walker(lxb_dom_node_t *node, void *ctx)
980 {
981 if (node->local_name == LXB_TAG_TITLE && node->ns == LXB_NS_HTML) {
982 *((void **) ctx) = node;
983
984 return LEXBOR_ACTION_STOP;
985 }
986
987 return LEXBOR_ACTION_OK;
988 }
989
990 lxb_dom_node_t *
lxb_html_document_import_node(lxb_html_document_t * doc,lxb_dom_node_t * node,bool deep)991 lxb_html_document_import_node(lxb_html_document_t *doc, lxb_dom_node_t *node,
992 bool deep)
993 {
994 return lxb_dom_document_import_node(&doc->dom_document, node, deep);
995 }
996
997 #if 0
998 static lxb_status_t
999 lxb_html_document_event_insert(lxb_dom_node_t *node)
1000 {
1001 lxb_status_t status;
1002 lxb_html_document_t *doc;
1003 lxb_html_style_element_t *style;
1004
1005 if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
1006 return lxb_html_document_event_insert_attribute(node);
1007 }
1008 else if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
1009 return LXB_STATUS_OK;
1010 }
1011
1012 // FIXME: we don't have support for anything other than HTML.
1013
1014 if (node->ns != LXB_NS_HTML) {
1015 return LXB_STATUS_OK;
1016 }
1017
1018 if (node->local_name == LXB_TAG_STYLE) {
1019 style = lxb_html_interface_style(node);
1020
1021 status = lxb_html_style_element_parse(style);
1022 if (status != LXB_STATUS_OK) {
1023 return status;
1024 }
1025
1026 doc = lxb_html_interface_document(node->owner_document);
1027
1028 status = lxb_html_document_stylesheet_attach(doc, style->stylesheet);
1029 if (status != LXB_STATUS_OK) {
1030 return status;
1031 }
1032 }
1033
1034 return lxb_html_document_element_styles_attach(lxb_html_interface_element(node));
1035 }
1036
1037 static lxb_status_t
1038 lxb_html_document_event_insert_attribute(lxb_dom_node_t *node)
1039 {
1040 lxb_status_t status;
1041 lxb_dom_attr_t *attr;
1042 lxb_html_element_t *el;
1043
1044 if (node->type != LXB_DOM_NODE_TYPE_ATTRIBUTE
1045 || node->local_name != LXB_DOM_ATTR_STYLE)
1046 {
1047 return LXB_STATUS_OK;
1048 }
1049
1050 // FIXME: we don't have support for anything other than HTML.
1051
1052 if (node->ns != LXB_NS_HTML) {
1053 return LXB_STATUS_OK;
1054 }
1055
1056 attr = lxb_dom_interface_attr(node);
1057 el = lxb_html_interface_element(attr->owner);
1058
1059 if (el != NULL && el->list != NULL) {
1060 status = lxb_html_document_event_remove_attribute(node);
1061 if (status != LXB_STATUS_OK) {
1062 return status;
1063 }
1064 }
1065
1066 if (attr->value == NULL || attr->value->data == NULL) {
1067 return LXB_STATUS_OK;
1068 }
1069
1070 return lxb_html_element_style_parse(el, attr->value->data,
1071 attr->value->length);
1072 }
1073
1074 static lxb_status_t
1075 lxb_html_document_event_remove(lxb_dom_node_t *node)
1076 {
1077 lxb_status_t status;
1078 lxb_html_element_t *el;
1079 lxb_html_document_t *doc;
1080 lxb_html_document_event_ctx_t context;
1081
1082 if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
1083 return lxb_html_document_event_remove_attribute(node);
1084 }
1085 else if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
1086 return LXB_STATUS_OK;
1087 }
1088
1089 // FIXME: we don't have support for anything other than HTML.
1090
1091 if (node->ns != LXB_NS_HTML) {
1092 return LXB_STATUS_OK;
1093 }
1094
1095 if (node->local_name == LXB_TAG_STYLE) {
1096 status = lxb_html_element_style_remove((lxb_html_style_element_t *) node);
1097 if (status != LXB_STATUS_OK) {
1098 return status;
1099 }
1100 }
1101
1102 el = lxb_html_interface_element(node);
1103
1104 if (el->style == NULL) {
1105 return LXB_STATUS_OK;
1106 }
1107
1108 doc = lxb_html_interface_document(node->owner_document);
1109
1110 context.doc = doc;
1111 context.all = false;
1112
1113 return lexbor_avl_foreach(doc->css.styles, &el->style,
1114 lxb_html_document_style_remove_cb, &context);
1115 }
1116
1117 static lxb_status_t
1118 lxb_html_document_style_remove_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
1119 lexbor_avl_node_t *node, void *ctx)
1120 {
1121 lxb_html_document_event_ctx_t *context = ctx;
1122 lxb_html_style_node_t *style = (lxb_html_style_node_t *) node;
1123
1124 if (context->all) {
1125 lxb_html_element_style_remove_all(context->doc, root, style);
1126 }
1127
1128 lxb_html_element_style_remove_all_not(context->doc, root, style, false);
1129
1130 return LXB_STATUS_OK;
1131 }
1132
1133 static lxb_status_t
1134 lxb_html_document_event_remove_attribute(lxb_dom_node_t *node)
1135 {
1136 lxb_status_t status;
1137 lxb_dom_attr_t *attr;
1138 lxb_html_element_t *el;
1139 lxb_html_document_t *doc;
1140 lxb_html_document_event_ctx_t context;
1141
1142 // FIXME: we don't have support for anything other than HTML.
1143
1144 if (node->local_name != LXB_DOM_ATTR_STYLE || node->ns != LXB_NS_HTML) {
1145 return LXB_STATUS_OK;
1146 }
1147
1148 attr = lxb_dom_interface_attr(node);
1149 el = lxb_html_interface_element(attr->owner);
1150
1151 if (el == NULL || el->list == NULL) {
1152 return LXB_STATUS_OK;
1153 }
1154
1155 doc = lxb_html_interface_document(node->owner_document);
1156
1157 context.doc = doc;
1158
1159 status = lexbor_avl_foreach(doc->css.styles, &el->style,
1160 lxb_html_document_style_remove_my_cb, &context);
1161 if (status != LXB_STATUS_OK) {
1162 return status;
1163 }
1164
1165 el->list->first = NULL;
1166 el->list->last = NULL;
1167
1168 el->list = lxb_css_rule_declaration_list_destroy(el->list, true);
1169
1170 return LXB_STATUS_OK;
1171 }
1172
1173 static lxb_status_t
1174 lxb_html_document_style_remove_my_cb(lexbor_avl_t *avl, lexbor_avl_node_t **root,
1175 lexbor_avl_node_t *node, void *ctx)
1176 {
1177 lxb_html_document_event_ctx_t *context = ctx;
1178 lxb_html_style_node_t *style = (lxb_html_style_node_t *) node;
1179
1180 lxb_html_element_style_remove_all_not(context->doc, root, style, true);
1181
1182 return LXB_STATUS_OK;
1183 }
1184
1185 static lxb_status_t
1186 lxb_html_document_event_destroy(lxb_dom_node_t *node)
1187 {
1188 lxb_status_t status;
1189 lxb_html_element_t *el;
1190 lxb_html_document_t *doc;
1191 lxb_html_document_event_ctx_t context;
1192
1193 if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
1194 return lxb_html_document_event_remove_attribute(node);
1195 }
1196 else if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
1197 return LXB_STATUS_OK;
1198 }
1199
1200 // FIXME: we don't have support for anything other than HTML.
1201
1202 if (node->ns != LXB_NS_HTML) {
1203 return LXB_STATUS_OK;
1204 }
1205
1206 el = lxb_html_interface_element(node);
1207
1208 if (el->style == NULL) {
1209 if (el->list != NULL) {
1210 goto destroy;
1211 }
1212
1213 return LXB_STATUS_OK;
1214 }
1215
1216 doc = lxb_html_interface_document(node->owner_document);
1217
1218 context.doc = doc;
1219 context.all = true;
1220
1221 status = lexbor_avl_foreach(doc->css.styles, &el->style,
1222 lxb_html_document_style_remove_cb, &context);
1223
1224 if (status != LXB_STATUS_OK) {
1225 return status;
1226 }
1227
1228 destroy:
1229
1230 el->list->first = NULL;
1231 el->list->last = NULL;
1232
1233 el->list = lxb_css_rule_declaration_list_destroy(el->list, true);
1234
1235 return LXB_STATUS_OK;
1236 }
1237
1238 static lxb_status_t
1239 lxb_html_document_event_set_value(lxb_dom_node_t *node,
1240 const lxb_char_t *value, size_t length)
1241 {
1242 lxb_status_t status;
1243 lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
1244
1245 if (node->type != LXB_DOM_NODE_TYPE_ATTRIBUTE
1246 || node->local_name != LXB_DOM_ATTR_STYLE)
1247 {
1248 return LXB_STATUS_OK;
1249 }
1250
1251 // FIXME: we don't have support for anything other than HTML.
1252
1253 if (node->ns != LXB_NS_HTML) {
1254 return LXB_STATUS_OK;
1255 }
1256
1257 if (attr->owner == NULL) {
1258 return LXB_STATUS_OK;
1259 }
1260
1261 status = lxb_html_document_event_remove_attribute(node);
1262 if (status != LXB_STATUS_OK) {
1263 return status;
1264 }
1265
1266 return lxb_html_element_style_parse(lxb_html_interface_element(node),
1267 value, length);
1268 }
1269 #endif
1270
1271 /*
1272 * No inline functions for ABI.
1273 */
1274 lxb_html_head_element_t *
lxb_html_document_head_element_noi(lxb_html_document_t * document)1275 lxb_html_document_head_element_noi(lxb_html_document_t *document)
1276 {
1277 return lxb_html_document_head_element(document);
1278 }
1279
1280 lxb_html_body_element_t *
lxb_html_document_body_element_noi(lxb_html_document_t * document)1281 lxb_html_document_body_element_noi(lxb_html_document_t *document)
1282 {
1283 return lxb_html_document_body_element(document);
1284 }
1285
1286 lxb_dom_document_t *
lxb_html_document_original_ref_noi(lxb_html_document_t * document)1287 lxb_html_document_original_ref_noi(lxb_html_document_t *document)
1288 {
1289 return lxb_html_document_original_ref(document);
1290 }
1291
1292 bool
lxb_html_document_is_original_noi(lxb_html_document_t * document)1293 lxb_html_document_is_original_noi(lxb_html_document_t *document)
1294 {
1295 return lxb_html_document_is_original(document);
1296 }
1297
1298 lexbor_mraw_t *
lxb_html_document_mraw_noi(lxb_html_document_t * document)1299 lxb_html_document_mraw_noi(lxb_html_document_t *document)
1300 {
1301 return lxb_html_document_mraw(document);
1302 }
1303
1304 lexbor_mraw_t *
lxb_html_document_mraw_text_noi(lxb_html_document_t * document)1305 lxb_html_document_mraw_text_noi(lxb_html_document_t *document)
1306 {
1307 return lxb_html_document_mraw_text(document);
1308 }
1309
1310 void
lxb_html_document_opt_set_noi(lxb_html_document_t * document,lxb_html_document_opt_t opt)1311 lxb_html_document_opt_set_noi(lxb_html_document_t *document,
1312 lxb_html_document_opt_t opt)
1313 {
1314 lxb_html_document_opt_set(document, opt);
1315 }
1316
1317 lxb_html_document_opt_t
lxb_html_document_opt_noi(lxb_html_document_t * document)1318 lxb_html_document_opt_noi(lxb_html_document_t *document)
1319 {
1320 return lxb_html_document_opt(document);
1321 }
1322
1323 void *
lxb_html_document_create_struct_noi(lxb_html_document_t * document,size_t struct_size)1324 lxb_html_document_create_struct_noi(lxb_html_document_t *document,
1325 size_t struct_size)
1326 {
1327 return lxb_html_document_create_struct(document, struct_size);
1328 }
1329
1330 void *
lxb_html_document_destroy_struct_noi(lxb_html_document_t * document,void * data)1331 lxb_html_document_destroy_struct_noi(lxb_html_document_t *document, void *data)
1332 {
1333 return lxb_html_document_destroy_struct(document, data);
1334 }
1335
1336 lxb_html_element_t *
lxb_html_document_create_element_noi(lxb_html_document_t * document,const lxb_char_t * local_name,size_t lname_len,void * reserved_for_opt)1337 lxb_html_document_create_element_noi(lxb_html_document_t *document,
1338 const lxb_char_t *local_name,
1339 size_t lname_len, void *reserved_for_opt)
1340 {
1341 return lxb_html_document_create_element(document, local_name, lname_len,
1342 reserved_for_opt);
1343 }
1344
1345 lxb_dom_element_t *
lxb_html_document_destroy_element_noi(lxb_dom_element_t * element)1346 lxb_html_document_destroy_element_noi(lxb_dom_element_t *element)
1347 {
1348 return lxb_html_document_destroy_element(element);
1349 }
1350