xref: /php-src/ext/dom/lexbor/lexbor/html/tree.c (revision bffab33a)
1 /*
2  * Copyright (C) 2018-2022 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/dom/interfaces/document_fragment.h"
8 #include "lexbor/dom/interfaces/document_type.h"
9 #include "lexbor/dom/interfaces/comment.h"
10 #include "lexbor/dom/interfaces/text.h"
11 
12 #include "lexbor/html/tree.h"
13 #include "lexbor/html/tree_res.h"
14 #include "lexbor/html/tree/insertion_mode.h"
15 #include "lexbor/html/tree/open_elements.h"
16 #include "lexbor/html/tree/active_formatting.h"
17 #include "lexbor/html/tree/template_insertion.h"
18 #include "lexbor/html/interface.h"
19 #include "lexbor/html/interface.h"
20 #include "lexbor/html/interfaces/template_element.h"
21 #include "lexbor/html/interfaces/unknown_element.h"
22 #include "lexbor/html/tokenizer/state_rawtext.h"
23 #include "lexbor/html/tokenizer/state_rcdata.h"
24 
25 
26 lxb_dom_attr_data_t *
27 lxb_dom_attr_local_name_append(lexbor_hash_t *hash,
28                                const lxb_char_t *name, size_t length);
29 
30 lxb_dom_attr_data_t *
31 lxb_dom_attr_qualified_name_append(lexbor_hash_t *hash, const lxb_char_t *name,
32                                    size_t length);
33 
34 const lxb_tag_data_t *
35 lxb_tag_append_lower(lexbor_hash_t *hash,
36                      const lxb_char_t *name, size_t length);
37 
38 static lxb_html_token_t *
39 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
40                              lxb_html_token_t *token, void *ctx);
41 
42 static lxb_status_t
43 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token);
44 
45 
46 lxb_html_tree_t *
lxb_html_tree_create(void)47 lxb_html_tree_create(void)
48 {
49     return lexbor_calloc(1, sizeof(lxb_html_tree_t));
50 }
51 
52 lxb_status_t
lxb_html_tree_init(lxb_html_tree_t * tree,lxb_html_tokenizer_t * tkz)53 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz)
54 {
55     if (tree == NULL) {
56         return LXB_STATUS_ERROR_OBJECT_IS_NULL;
57     }
58 
59     if (tkz == NULL) {
60         return LXB_STATUS_ERROR_WRONG_ARGS;
61     }
62 
63     lxb_status_t status;
64 
65     /* Stack of open elements */
66     tree->open_elements = lexbor_array_create();
67     status = lexbor_array_init(tree->open_elements, 128);
68     if (status != LXB_STATUS_OK) {
69         return status;
70     }
71 
72     /* Stack of active formatting */
73     tree->active_formatting = lexbor_array_create();
74     status = lexbor_array_init(tree->active_formatting, 128);
75     if (status != LXB_STATUS_OK) {
76         return status;
77     }
78 
79     /* Stack of template insertion modes */
80     tree->template_insertion_modes = lexbor_array_obj_create();
81     status = lexbor_array_obj_init(tree->template_insertion_modes, 64,
82                                    sizeof(lxb_html_tree_template_insertion_t));
83     if (status != LXB_STATUS_OK) {
84         return status;
85     }
86 
87     /* Stack of pending table character tokens */
88     tree->pending_table.text_list = lexbor_array_obj_create();
89     status = lexbor_array_obj_init(tree->pending_table.text_list, 16,
90                                    sizeof(lexbor_str_t));
91     if (status != LXB_STATUS_OK) {
92         return status;
93     }
94 
95     /* Parse errors */
96     tree->parse_errors = lexbor_array_obj_create();
97     status = lexbor_array_obj_init(tree->parse_errors, 16,
98                                                 sizeof(lxb_html_tree_error_t));
99     if (status != LXB_STATUS_OK) {
100         return status;
101     }
102 
103     tree->tkz_ref = lxb_html_tokenizer_ref(tkz);
104 
105     tree->document = NULL;
106     tree->fragment = NULL;
107 
108     tree->form = NULL;
109 
110     tree->foster_parenting = false;
111     tree->frameset_ok = true;
112 
113     tree->mode = lxb_html_tree_insertion_mode_initial;
114     tree->before_append_attr = NULL;
115 
116     tree->status = LXB_STATUS_OK;
117 
118     tree->ref_count = 1;
119 
120     lxb_html_tokenizer_callback_token_done_set(tkz,
121                                                lxb_html_tree_token_callback,
122                                                tree);
123 
124     return LXB_STATUS_OK;
125 }
126 
127 lxb_html_tree_t *
lxb_html_tree_ref(lxb_html_tree_t * tree)128 lxb_html_tree_ref(lxb_html_tree_t *tree)
129 {
130     if (tree == NULL) {
131         return NULL;
132     }
133 
134     tree->ref_count++;
135 
136     return tree;
137 }
138 
139 lxb_html_tree_t *
lxb_html_tree_unref(lxb_html_tree_t * tree)140 lxb_html_tree_unref(lxb_html_tree_t *tree)
141 {
142     if (tree == NULL || tree->ref_count == 0) {
143         return NULL;
144     }
145 
146     tree->ref_count--;
147 
148     if (tree->ref_count == 0) {
149         lxb_html_tree_destroy(tree);
150     }
151 
152     return NULL;
153 }
154 
155 void
lxb_html_tree_clean(lxb_html_tree_t * tree)156 lxb_html_tree_clean(lxb_html_tree_t *tree)
157 {
158     lexbor_array_clean(tree->open_elements);
159     lexbor_array_clean(tree->active_formatting);
160     lexbor_array_obj_clean(tree->template_insertion_modes);
161     lexbor_array_obj_clean(tree->pending_table.text_list);
162     lexbor_array_obj_clean(tree->parse_errors);
163 
164     tree->document = NULL;
165     tree->fragment = NULL;
166 
167     tree->form = NULL;
168 
169     tree->foster_parenting = false;
170     tree->frameset_ok = true;
171 
172     tree->mode = lxb_html_tree_insertion_mode_initial;
173     tree->before_append_attr = NULL;
174 
175     tree->status = LXB_STATUS_OK;
176 }
177 
178 lxb_html_tree_t *
lxb_html_tree_destroy(lxb_html_tree_t * tree)179 lxb_html_tree_destroy(lxb_html_tree_t *tree)
180 {
181     if (tree == NULL) {
182         return NULL;
183     }
184 
185     tree->open_elements = lexbor_array_destroy(tree->open_elements, true);
186     tree->active_formatting = lexbor_array_destroy(tree->active_formatting,
187                                                    true);
188     tree->template_insertion_modes = lexbor_array_obj_destroy(tree->template_insertion_modes,
189                                                               true);
190     tree->pending_table.text_list = lexbor_array_obj_destroy(tree->pending_table.text_list,
191                                                              true);
192 
193     tree->parse_errors = lexbor_array_obj_destroy(tree->parse_errors, true);
194     tree->tkz_ref = lxb_html_tokenizer_unref(tree->tkz_ref);
195 
196     return lexbor_free(tree);
197 }
198 
199 static lxb_html_token_t *
lxb_html_tree_token_callback(lxb_html_tokenizer_t * tkz,lxb_html_token_t * token,void * ctx)200 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
201                              lxb_html_token_t *token, void *ctx)
202 {
203     lxb_status_t status;
204 
205     status = lxb_html_tree_insertion_mode(ctx, token);
206     if (status != LXB_STATUS_OK) {
207         tkz->status = status;
208         return NULL;
209     }
210 
211     return token;
212 }
213 
214 /* TODO: not complete!!! */
215 lxb_status_t
lxb_html_tree_stop_parsing(lxb_html_tree_t * tree)216 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree)
217 {
218     tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
219 
220     return LXB_STATUS_OK;
221 }
222 
223 bool
lxb_html_tree_process_abort(lxb_html_tree_t * tree)224 lxb_html_tree_process_abort(lxb_html_tree_t *tree)
225 {
226     if (tree->status == LXB_STATUS_OK) {
227         tree->status = LXB_STATUS_ABORTED;
228     }
229 
230     tree->open_elements->length = 0;
231     tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
232 
233     return true;
234 }
235 
236 void
lxb_html_tree_parse_error(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_html_tree_error_id_t id)237 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
238                           lxb_html_tree_error_id_t id)
239 {
240     lxb_html_tree_error_add(tree->parse_errors, token, id);
241 }
242 
243 bool
lxb_html_tree_construction_dispatcher(lxb_html_tree_t * tree,lxb_html_token_t * token)244 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
245                                       lxb_html_token_t *token)
246 {
247     lxb_dom_node_t *adjusted;
248 
249     adjusted = lxb_html_tree_adjusted_current_node(tree);
250 
251     if (adjusted == NULL || adjusted->ns == LXB_NS_HTML) {
252         return tree->mode(tree, token);
253     }
254 
255     if (lxb_html_tree_mathml_text_integration_point(adjusted))
256     {
257         if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
258             && token->tag_id != LXB_TAG_MGLYPH
259             && token->tag_id != LXB_TAG_MALIGNMARK)
260         {
261             return tree->mode(tree, token);
262         }
263 
264         if (token->tag_id == LXB_TAG__TEXT) {
265             return tree->mode(tree, token);
266         }
267     }
268 
269     if (adjusted->local_name == LXB_TAG_ANNOTATION_XML
270         && adjusted->ns == LXB_NS_MATH
271         && (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
272         && token->tag_id == LXB_TAG_SVG)
273     {
274         return tree->mode(tree, token);
275     }
276 
277     if (lxb_html_tree_html_integration_point(adjusted)) {
278         if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
279             || token->tag_id == LXB_TAG__TEXT)
280         {
281             return tree->mode(tree, token);
282         }
283     }
284 
285     if (token->tag_id == LXB_TAG__END_OF_FILE) {
286         return tree->mode(tree, token);
287     }
288 
289     return lxb_html_tree_insertion_mode_foreign_content(tree, token);
290 }
291 
292 static lxb_status_t
lxb_html_tree_insertion_mode(lxb_html_tree_t * tree,lxb_html_token_t * token)293 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token)
294 {
295     while (lxb_html_tree_construction_dispatcher(tree, token) == false) {}
296 
297     return tree->status;
298 }
299 
300 /*
301  * Action
302  */
303 lxb_dom_node_t *
lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t * tree,lxb_dom_node_t * override_target,lxb_html_tree_insertion_position_t * ipos)304 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
305                                        lxb_dom_node_t *override_target,
306                                        lxb_html_tree_insertion_position_t *ipos)
307 {
308     lxb_dom_node_t *target, *adjusted_location = NULL;
309 
310     *ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
311 
312     if (override_target != NULL) {
313         target = override_target;
314     }
315     else {
316         target = lxb_html_tree_current_node(tree);
317     }
318 
319     if (tree->foster_parenting && target->ns == LXB_NS_HTML
320            && (target->local_name == LXB_TAG_TABLE
321             || target->local_name == LXB_TAG_TBODY
322             || target->local_name == LXB_TAG_TFOOT
323             || target->local_name == LXB_TAG_THEAD
324             || target->local_name == LXB_TAG_TR))
325     {
326         lxb_dom_node_t *last_temp, *last_table;
327         size_t last_temp_idx, last_table_idx;
328 
329         last_temp = lxb_html_tree_open_elements_find_reverse(tree,
330                                                           LXB_TAG_TEMPLATE,
331                                                           LXB_NS_HTML,
332                                                           &last_temp_idx);
333 
334         last_table = lxb_html_tree_open_elements_find_reverse(tree,
335                                                              LXB_TAG_TABLE,
336                                                              LXB_NS_HTML,
337                                                              &last_table_idx);
338 
339         if(last_temp != NULL && (last_table == NULL
340                          || last_temp_idx > last_table_idx))
341         {
342             lxb_dom_document_fragment_t *doc_fragment;
343 
344             doc_fragment = lxb_html_interface_template(last_temp)->content;
345 
346             return lxb_dom_interface_node(doc_fragment);
347         }
348         else if (last_table == NULL) {
349             adjusted_location = lxb_html_tree_open_elements_first(tree);
350 
351             lexbor_assert(adjusted_location != NULL);
352             lexbor_assert(adjusted_location->local_name == LXB_TAG_HTML);
353         }
354         else if (last_table->parent != NULL) {
355             adjusted_location = last_table;
356 
357             *ipos = LXB_HTML_TREE_INSERTION_POSITION_BEFORE;
358         }
359         else {
360             lexbor_assert(last_table_idx != 0);
361 
362             adjusted_location = lxb_html_tree_open_elements_get(tree,
363                                                             last_table_idx - 1);
364         }
365     }
366     else {
367         adjusted_location = target;
368     }
369 
370     if (adjusted_location == NULL) {
371         return NULL;
372     }
373 
374     /*
375      * In Spec it is not entirely clear what is meant:
376      *
377      * If the adjusted insertion location is inside a template element,
378      * let it instead be inside the template element's template contents,
379      * after its last child (if any).
380      */
381     if (lxb_html_tree_node_is(adjusted_location, LXB_TAG_TEMPLATE)) {
382         lxb_dom_document_fragment_t *df;
383 
384         df = lxb_html_interface_template(adjusted_location)->content;
385         adjusted_location = lxb_dom_interface_node(df);
386     }
387 
388     return adjusted_location;
389 }
390 
391 lxb_html_element_t *
lxb_html_tree_insert_foreign_element(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns)392 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
393                                      lxb_html_token_t *token, lxb_ns_id_t ns)
394 {
395     lxb_status_t status;
396     lxb_dom_node_t *pos;
397     lxb_html_element_t *element;
398     lxb_html_tree_insertion_position_t ipos;
399 
400     pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
401 
402     if (ipos == LXB_HTML_TREE_INSERTION_POSITION_CHILD) {
403         element = lxb_html_tree_create_element_for_token(tree, token, ns, pos);
404     }
405     else {
406         element = lxb_html_tree_create_element_for_token(tree, token, ns,
407                                                          pos->parent);
408     }
409 
410     if (element == NULL) {
411         return NULL;
412     }
413 
414     if (pos != NULL) {
415         lxb_html_tree_insert_node(pos, lxb_dom_interface_node(element), ipos);
416     }
417 
418     status = lxb_html_tree_open_elements_push(tree,
419                                               lxb_dom_interface_node(element));
420     if (status != LXB_HTML_STATUS_OK) {
421         return lxb_html_interface_destroy(element);
422     }
423 
424     return element;
425 }
426 
427 lxb_html_element_t *
lxb_html_tree_create_element_for_token(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns,lxb_dom_node_t * parent)428 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
429                                        lxb_html_token_t *token, lxb_ns_id_t ns,
430                                        lxb_dom_node_t *parent)
431 {
432     lxb_dom_node_t *node = lxb_html_tree_create_node(tree, token->tag_id, ns);
433     if (node == NULL) {
434         return NULL;
435     }
436 
437     node->line = token->line;
438     /* We only expose line number in PHP DOM */
439 
440     lxb_status_t status;
441     lxb_dom_element_t *element = lxb_dom_interface_element(node);
442 
443     if (token->base_element == NULL) {
444         status = lxb_html_tree_append_attributes(tree, element, token, ns);
445     }
446     else {
447         status = lxb_html_tree_append_attributes_from_element(tree, element,
448                                                        token->base_element, ns);
449     }
450 
451     if (status != LXB_HTML_STATUS_OK) {
452         return lxb_html_interface_destroy(element);
453     }
454 
455     return lxb_html_interface_element(node);
456 }
457 
458 lxb_status_t
lxb_html_tree_append_attributes(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_html_token_t * token,lxb_ns_id_t ns)459 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
460                                 lxb_dom_element_t *element,
461                                 lxb_html_token_t *token, lxb_ns_id_t ns)
462 {
463     lxb_status_t status;
464     lxb_dom_attr_t *attr;
465     lxb_html_document_t *doc;
466     lxb_html_token_attr_t *token_attr = token->attr_first;
467 
468     doc = lxb_html_interface_document(element->node.owner_document);
469 
470     while (token_attr != NULL) {
471         attr = lxb_dom_element_attr_by_local_name_data(element,
472                                                        token_attr->name);
473         if (attr != NULL) {
474             token_attr = token_attr->next;
475             continue;
476         }
477 
478         attr = lxb_dom_attr_interface_create(lxb_dom_interface_document(doc));
479         if (attr == NULL) {
480             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
481         }
482 
483         if (token_attr->value_begin != NULL) {
484             status = lxb_dom_attr_set_value_wo_copy(attr, token_attr->value,
485                                                     token_attr->value_size);
486             if (status != LXB_HTML_STATUS_OK) {
487                 return status;
488             }
489         }
490 
491         attr->node.local_name = token_attr->name->attr_id;
492         attr->node.ns = ns;
493 
494         /* Fix for adjust MathML/SVG attributes */
495         if (tree->before_append_attr != NULL) {
496             status = tree->before_append_attr(tree, attr, NULL);
497             if (status != LXB_STATUS_OK) {
498                 return status;
499             }
500         }
501 
502         lxb_dom_element_attr_append(element, attr);
503 
504         token_attr = token_attr->next;
505     }
506 
507     return LXB_HTML_STATUS_OK;
508 }
509 
510 lxb_status_t
lxb_html_tree_append_attributes_from_element(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_dom_element_t * from,lxb_ns_id_t ns)511 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
512                                              lxb_dom_element_t *element,
513                                              lxb_dom_element_t *from,
514                                              lxb_ns_id_t ns)
515 {
516     lxb_status_t status;
517     lxb_dom_attr_t *attr = from->first_attr;
518     lxb_dom_attr_t *new_attr;
519 
520     while (attr != NULL) {
521         new_attr = lxb_dom_attr_interface_create(element->node.owner_document);
522         if (new_attr == NULL) {
523             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
524         }
525 
526         status = lxb_dom_attr_clone_name_value(attr, new_attr);
527         if (status != LXB_HTML_STATUS_OK) {
528             return status;
529         }
530 
531         new_attr->node.ns = attr->node.ns;
532 
533         /* Fix for  adjust MathML/SVG attributes */
534         if (tree->before_append_attr != NULL) {
535             status = tree->before_append_attr(tree, new_attr, NULL);
536             if (status != LXB_STATUS_OK) {
537                 return status;
538             }
539         }
540 
541         lxb_dom_element_attr_append(element, attr);
542 
543         attr = attr->next;
544     }
545 
546     return LXB_HTML_STATUS_OK;
547 }
548 
549 lxb_status_t
lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)550 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
551                                        lxb_dom_attr_t *attr, void *ctx)
552 {
553     lexbor_hash_t *attrs;
554     const lxb_dom_attr_data_t *data;
555 
556     attrs = attr->node.owner_document->attrs;
557     data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
558 
559     if (data->entry.length == 13
560         && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
561                                (const lxb_char_t *) "definitionurl"))
562     {
563         data = lxb_dom_attr_qualified_name_append(attrs,
564                                       (const lxb_char_t *) "definitionURL", 13);
565         if (data == NULL) {
566             return LXB_STATUS_ERROR;
567         }
568 
569         attr->qualified_name = data->attr_id;
570     }
571 
572     return LXB_STATUS_OK;
573 }
574 
575 lxb_status_t
lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)576 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
577                                     lxb_dom_attr_t *attr, void *ctx)
578 {
579     lexbor_hash_t *attrs;
580     const lxb_dom_attr_data_t *data;
581     const lxb_html_tree_res_attr_adjust_t *adjust;
582 
583     size_t len = sizeof(lxb_html_tree_res_attr_adjust_svg_map)
584         / sizeof(lxb_html_tree_res_attr_adjust_t);
585 
586     attrs = attr->node.owner_document->attrs;
587 
588     data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
589 
590     for (size_t i = 0; i < len; i++) {
591         adjust = &lxb_html_tree_res_attr_adjust_svg_map[i];
592 
593         if (data->entry.length == adjust->len
594             && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
595                                    (const lxb_char_t *) adjust->from))
596         {
597             data = lxb_dom_attr_qualified_name_append(attrs,
598                                 (const lxb_char_t *) adjust->to, adjust->len);
599             if (data == NULL) {
600                 return LXB_STATUS_ERROR;
601             }
602 
603             attr->qualified_name = data->attr_id;
604 
605             return LXB_STATUS_OK;
606         }
607     }
608 
609     return LXB_STATUS_OK;
610 }
611 
612 lxb_status_t
lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)613 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
614                                         lxb_dom_attr_t *attr, void *ctx)
615 {
616     size_t lname_length;
617     lexbor_hash_t *attrs, *prefix;
618     const lxb_dom_attr_data_t *attr_data;
619     const lxb_ns_prefix_data_t *prefix_data;
620     const lxb_dom_attr_data_t *data;
621     const lxb_html_tree_res_attr_adjust_foreign_t *adjust;
622 
623     size_t len = sizeof(lxb_html_tree_res_attr_adjust_foreign_map)
624         / sizeof(lxb_html_tree_res_attr_adjust_foreign_t);
625 
626     attrs = attr->node.owner_document->attrs;
627     prefix = attr->node.owner_document->prefix;
628 
629     data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
630 
631     for (size_t i = 0; i < len; i++) {
632         adjust = &lxb_html_tree_res_attr_adjust_foreign_map[i];
633 
634         if (data->entry.length == adjust->name_len
635             && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
636                                    (const lxb_char_t *) adjust->name))
637         {
638             if (adjust->prefix_len != 0) {
639                 data = lxb_dom_attr_qualified_name_append(attrs,
640                            (const lxb_char_t *) adjust->name, adjust->name_len);
641                 if (data == NULL) {
642                     return LXB_STATUS_ERROR;
643                 }
644 
645                 attr->qualified_name = data->attr_id;
646 
647                 lname_length = adjust->name_len - adjust->prefix_len - 1;
648 
649                 attr_data = lxb_dom_attr_local_name_append(attrs,
650                          (const lxb_char_t *) adjust->local_name, lname_length);
651                 if (attr_data == NULL) {
652                     return LXB_STATUS_ERROR;
653                 }
654 
655                 attr->node.local_name = attr_data->attr_id;
656 
657                 prefix_data = lxb_ns_prefix_append(prefix,
658                        (const lxb_char_t *) adjust->prefix, adjust->prefix_len);
659                 if (prefix_data == NULL) {
660                     return LXB_STATUS_ERROR;
661                 }
662 
663                 attr->node.prefix = prefix_data->prefix_id;
664             }
665 
666             attr->node.ns = adjust->ns;
667 
668             return LXB_STATUS_OK;
669         }
670     }
671 
672     return LXB_STATUS_OK;
673 }
674 
675 lxb_status_t
lxb_html_tree_insert_character(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t ** ret_node)676 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
677                                lxb_dom_node_t **ret_node)
678 {
679     size_t size;
680     lxb_status_t status;
681     lexbor_str_t str = {0};
682 
683     size = token->text_end - token->text_start;
684 
685     lexbor_str_init(&str, tree->document->dom_document.text, size + 1);
686     if (str.data == NULL) {
687         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
688     }
689 
690     memcpy(str.data, token->text_start, size);
691 
692     str.data[size] = 0x00;
693     str.length = size;
694 
695     status = lxb_html_tree_insert_character_for_data(tree, &str, ret_node);
696     if (status != LXB_STATUS_OK) {
697         return status;
698     }
699 
700     return LXB_STATUS_OK;
701 }
702 
703 lxb_status_t
lxb_html_tree_insert_character_for_data(lxb_html_tree_t * tree,lexbor_str_t * str,lxb_dom_node_t ** ret_node)704 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
705                                         lexbor_str_t *str,
706                                         lxb_dom_node_t **ret_node)
707 {
708     const lxb_char_t *data;
709     lxb_dom_node_t *pos;
710     lxb_dom_character_data_t *chrs = NULL;
711     lxb_html_tree_insertion_position_t ipos;
712 
713     if (ret_node != NULL) {
714         *ret_node = NULL;
715     }
716 
717     pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
718     if (pos == NULL) {
719         return LXB_STATUS_ERROR;
720     }
721 
722     if (lxb_html_tree_node_is(pos, LXB_TAG__DOCUMENT)) {
723         goto destroy_str;
724     }
725 
726     if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
727         /* No need check namespace */
728         if (pos->prev != NULL && pos->prev->local_name == LXB_TAG__TEXT) {
729             chrs = lxb_dom_interface_character_data(pos->prev);
730 
731             if (ret_node != NULL) {
732                 *ret_node = pos->prev;
733             }
734         }
735     }
736     else {
737         /* No need check namespace */
738         if (pos->last_child != NULL
739             && pos->last_child->local_name == LXB_TAG__TEXT)
740         {
741             chrs = lxb_dom_interface_character_data(pos->last_child);
742 
743             if (ret_node != NULL) {
744                 *ret_node = pos->last_child;
745             }
746         }
747     }
748 
749     if (chrs != NULL) {
750         /* This is error. This can not happen, but... */
751         if (chrs->data.data == NULL) {
752             data = lexbor_str_init(&chrs->data, tree->document->dom_document.text,
753                                    str->length);
754             if (data == NULL) {
755                 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
756             }
757         }
758 
759         data = lexbor_str_append(&chrs->data, tree->document->dom_document.text,
760                                  str->data, str->length);
761         if (data == NULL) {
762             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
763         }
764 
765         goto destroy_str;
766     }
767 
768     lxb_dom_node_t *text = lxb_html_tree_create_node(tree, LXB_TAG__TEXT,
769                                                      LXB_NS_HTML);
770     if (text == NULL) {
771         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
772     }
773 
774     lxb_dom_interface_text(text)->char_data.data = *str;
775 
776     if (tree->tkz_ref) {
777         text->line = tree->tkz_ref->token->line;
778         /* We only expose line number in PHP DOM */
779     }
780 
781     if (ret_node != NULL) {
782         *ret_node = text;
783     }
784 
785     lxb_html_tree_insert_node(pos, text, ipos);
786 
787     return LXB_STATUS_OK;
788 
789 destroy_str:
790 
791     lexbor_str_destroy(str, tree->document->dom_document.text, false);
792 
793     return LXB_STATUS_OK;
794 }
795 
796 lxb_dom_comment_t *
lxb_html_tree_insert_comment(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t * pos)797 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
798                              lxb_html_token_t *token, lxb_dom_node_t *pos)
799 {
800     lxb_dom_node_t *node;
801     lxb_dom_comment_t *comment;
802     lxb_html_tree_insertion_position_t ipos;
803 
804     if (pos == NULL) {
805         pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
806     }
807     else {
808         ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
809     }
810 
811     lexbor_assert(pos != NULL);
812 
813     node = lxb_html_tree_create_node(tree, token->tag_id, pos->ns);
814     comment = lxb_dom_interface_comment(node);
815 
816     if (comment == NULL) {
817         return NULL;
818     }
819 
820     node->line = token->line;
821     /* We only expose line number in PHP DOM */
822 
823     tree->status = lxb_html_token_make_text(token, &comment->char_data.data,
824                                             tree->document->dom_document.text);
825     if (tree->status != LXB_STATUS_OK) {
826         return NULL;
827     }
828 
829     lxb_html_tree_insert_node(pos, node, ipos);
830 
831     return comment;
832 }
833 
834 lxb_dom_document_type_t *
lxb_html_tree_create_document_type_from_token(lxb_html_tree_t * tree,lxb_html_token_t * token)835 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
836                                               lxb_html_token_t *token)
837 {
838     lxb_status_t status;
839     lxb_dom_node_t *doctype_node;
840     lxb_dom_document_type_t *doc_type;
841 
842     /* Create */
843     doctype_node = lxb_html_tree_create_node(tree, token->tag_id, LXB_NS_HTML);
844     if (doctype_node == NULL) {
845         return NULL;
846     }
847 
848     doc_type = lxb_dom_interface_document_type(doctype_node);
849 
850     /* Parse */
851     status = lxb_html_token_doctype_parse(token, doc_type);
852     if (status != LXB_STATUS_OK) {
853         return lxb_dom_document_type_interface_destroy(doc_type);
854     }
855 
856     return doc_type;
857 }
858 
859 /*
860  * TODO: need use ref and unref for nodes (ref counter)
861  * Not implemented until the end. It is necessary to finish it.
862  */
863 void
lxb_html_tree_node_delete_deep(lxb_html_tree_t * tree,lxb_dom_node_t * node)864 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node)
865 {
866     lxb_dom_node_remove(node);
867 }
868 
869 lxb_html_element_t *
lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)870 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
871                                       lxb_html_token_t *token)
872 {
873     lxb_html_element_t *element;
874 
875     element = lxb_html_tree_insert_html_element(tree, token);
876     if (element == NULL) {
877         return NULL;
878     }
879 
880     /*
881      * Need for tokenizer state RAWTEXT
882      * See description for 'lxb_html_tokenizer_state_rawtext_before' function
883      */
884     lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
885     lxb_html_tokenizer_state_set(tree->tkz_ref,
886                                  lxb_html_tokenizer_state_rawtext_before);
887 
888     tree->original_mode = tree->mode;
889     tree->mode = lxb_html_tree_insertion_mode_text;
890 
891     return element;
892 }
893 
894 /* Magic of CopyPast power! */
895 lxb_html_element_t *
lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)896 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
897                                      lxb_html_token_t *token)
898 {
899     lxb_html_element_t *element;
900 
901     element = lxb_html_tree_insert_html_element(tree, token);
902     if (element == NULL) {
903         return NULL;
904     }
905 
906     /*
907      * Need for tokenizer state RCDATA
908      * See description for 'lxb_html_tokenizer_state_rcdata_before' function
909      */
910     lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
911     lxb_html_tokenizer_state_set(tree->tkz_ref,
912                                  lxb_html_tokenizer_state_rcdata_before);
913 
914     tree->original_mode = tree->mode;
915     tree->mode = lxb_html_tree_insertion_mode_text;
916 
917     return element;
918 }
919 
920 void
lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)921 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
922                                         lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns)
923 {
924     lxb_dom_node_t *node;
925 
926     lexbor_assert(tree->open_elements != 0);
927 
928     while (lexbor_array_length(tree->open_elements) != 0) {
929         node = lxb_html_tree_current_node(tree);
930 
931         lexbor_assert(node != NULL);
932 
933         switch (node->local_name) {
934             case LXB_TAG_DD:
935             case LXB_TAG_DT:
936             case LXB_TAG_LI:
937             case LXB_TAG_OPTGROUP:
938             case LXB_TAG_OPTION:
939             case LXB_TAG_P:
940             case LXB_TAG_RB:
941             case LXB_TAG_RP:
942             case LXB_TAG_RT:
943             case LXB_TAG_RTC:
944                 if(node->local_name == ex_tag && node->ns == ex_ns) {
945                     return;
946                 }
947 
948                 lxb_html_tree_open_elements_pop(tree);
949 
950                 break;
951 
952             default:
953                 return;
954         }
955     }
956 }
957 
958 void
lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)959 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
960                                                        lxb_tag_id_t ex_tag,
961                                                        lxb_ns_id_t ex_ns)
962 {
963     lxb_dom_node_t *node;
964 
965     lexbor_assert(tree->open_elements != 0);
966 
967     while (lexbor_array_length(tree->open_elements) != 0) {
968         node = lxb_html_tree_current_node(tree);
969 
970         lexbor_assert(node != NULL);
971 
972         switch (node->local_name) {
973             case LXB_TAG_CAPTION:
974             case LXB_TAG_COLGROUP:
975             case LXB_TAG_DD:
976             case LXB_TAG_DT:
977             case LXB_TAG_LI:
978             case LXB_TAG_OPTGROUP:
979             case LXB_TAG_OPTION:
980             case LXB_TAG_P:
981             case LXB_TAG_RB:
982             case LXB_TAG_RP:
983             case LXB_TAG_RT:
984             case LXB_TAG_RTC:
985             case LXB_TAG_TBODY:
986             case LXB_TAG_TD:
987             case LXB_TAG_TFOOT:
988             case LXB_TAG_TH:
989             case LXB_TAG_THEAD:
990             case LXB_TAG_TR:
991                 if(node->local_name == ex_tag && node->ns == ex_ns) {
992                     return;
993                 }
994 
995                 lxb_html_tree_open_elements_pop(tree);
996 
997                 break;
998 
999             default:
1000                 return;
1001         }
1002     }
1003 }
1004 
1005 void
lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t * tree)1006 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree)
1007 {
1008     lxb_dom_node_t *node;
1009     size_t idx = tree->open_elements->length;
1010 
1011     /* Step 1 */
1012     bool last = false;
1013     void **list = tree->open_elements->list;
1014 
1015     /* Step 3 */
1016     while (idx != 0) {
1017         idx--;
1018 
1019         /* Step 2 */
1020         node = list[idx];
1021 
1022         /* Step 3 */
1023         if (idx == 0) {
1024             last = true;
1025 
1026             if (tree->fragment != NULL) {
1027                 node = tree->fragment;
1028             }
1029         }
1030 
1031         lexbor_assert(node != NULL);
1032 
1033         /* Step 16 */
1034         if (node->ns != LXB_NS_HTML) {
1035             if (last) {
1036                 tree->mode = lxb_html_tree_insertion_mode_in_body;
1037                 return;
1038             }
1039 
1040             continue;
1041         }
1042 
1043         /* Step 4 */
1044         if (node->local_name == LXB_TAG_SELECT) {
1045             /* Step 4.1 */
1046             if (last) {
1047                 tree->mode = lxb_html_tree_insertion_mode_in_select;
1048                 return;
1049             }
1050 
1051             /* Step 4.2 */
1052             size_t ancestor = idx;
1053 
1054             for (;;) {
1055                 /* Step 4.3 */
1056                 if (ancestor == 0) {
1057                     tree->mode = lxb_html_tree_insertion_mode_in_select;
1058                     return;
1059                 }
1060 
1061                 /* Step 4.4 */
1062                 ancestor--;
1063 
1064                 /* Step 4.5 */
1065                 lxb_dom_node_t *ancestor_node = list[ancestor];
1066 
1067                 if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TEMPLATE)) {
1068                     tree->mode = lxb_html_tree_insertion_mode_in_select;
1069                     return;
1070                 }
1071 
1072                 /* Step 4.6 */
1073                 else if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TABLE)) {
1074                     tree->mode = lxb_html_tree_insertion_mode_in_select_in_table;
1075                     return;
1076                 }
1077             }
1078         }
1079 
1080         /* Step 5-15 */
1081         switch (node->local_name) {
1082             case LXB_TAG_TD:
1083             case LXB_TAG_TH:
1084                 if (last == false) {
1085                     tree->mode = lxb_html_tree_insertion_mode_in_cell;
1086                     return;
1087                 }
1088 
1089                 break;
1090 
1091             case LXB_TAG_TR:
1092                 tree->mode = lxb_html_tree_insertion_mode_in_row;
1093                 return;
1094 
1095             case LXB_TAG_TBODY:
1096             case LXB_TAG_TFOOT:
1097             case LXB_TAG_THEAD:
1098                 tree->mode = lxb_html_tree_insertion_mode_in_table_body;
1099                 return;
1100 
1101             case LXB_TAG_CAPTION:
1102                 tree->mode = lxb_html_tree_insertion_mode_in_caption;
1103                 return;
1104 
1105             case LXB_TAG_COLGROUP:
1106                 tree->mode = lxb_html_tree_insertion_mode_in_column_group;
1107                 return;
1108 
1109             case LXB_TAG_TABLE:
1110                 tree->mode = lxb_html_tree_insertion_mode_in_table;
1111                 return;
1112 
1113             case LXB_TAG_TEMPLATE:
1114                 tree->mode = lxb_html_tree_template_insertion_current(tree);
1115 
1116                 lexbor_assert(tree->mode != NULL);
1117 
1118                 return;
1119 
1120             case LXB_TAG_HEAD:
1121                 if (last == false) {
1122                     tree->mode = lxb_html_tree_insertion_mode_in_head;
1123                     return;
1124                 }
1125 
1126                 break;
1127 
1128             case LXB_TAG_BODY:
1129                 tree->mode = lxb_html_tree_insertion_mode_in_body;
1130                 return;
1131 
1132             case LXB_TAG_FRAMESET:
1133                 tree->mode = lxb_html_tree_insertion_mode_in_frameset;
1134                 return;
1135 
1136             case LXB_TAG_HTML: {
1137                 if (tree->document->head == NULL) {
1138                     tree->mode = lxb_html_tree_insertion_mode_before_head;
1139                     return;
1140                 }
1141 
1142                 tree->mode = lxb_html_tree_insertion_mode_after_head;
1143                 return;
1144             }
1145 
1146             default:
1147                 break;
1148         }
1149 
1150         /* Step 16 */
1151         if (last) {
1152             tree->mode = lxb_html_tree_insertion_mode_in_body;
1153             return;
1154         }
1155     }
1156 }
1157 
1158 lxb_dom_node_t *
lxb_html_tree_element_in_scope(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns,lxb_html_tag_category_t ct)1159 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
1160                                lxb_ns_id_t ns, lxb_html_tag_category_t ct)
1161 {
1162     lxb_dom_node_t *node;
1163 
1164     size_t idx = tree->open_elements->length;
1165     void **list = tree->open_elements->list;
1166 
1167     while (idx != 0) {
1168         idx--;
1169         node = list[idx];
1170 
1171         if (node->local_name == tag_id && node->ns == ns) {
1172             return node;
1173         }
1174 
1175         if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1176             return NULL;
1177         }
1178     }
1179 
1180     return NULL;
1181 }
1182 
1183 lxb_dom_node_t *
lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t * tree,lxb_dom_node_t * by_node,lxb_html_tag_category_t ct)1184 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
1185                                        lxb_dom_node_t *by_node,
1186                                        lxb_html_tag_category_t ct)
1187 {
1188     lxb_dom_node_t *node;
1189 
1190     size_t idx = tree->open_elements->length;
1191     void **list = tree->open_elements->list;
1192 
1193     while (idx != 0) {
1194         idx--;
1195         node = list[idx];
1196 
1197         if (node == by_node) {
1198             return node;
1199         }
1200 
1201         if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1202             return NULL;
1203         }
1204     }
1205 
1206     return NULL;
1207 }
1208 
1209 lxb_dom_node_t *
lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t * tree)1210 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree)
1211 {
1212     lxb_dom_node_t *node;
1213 
1214     size_t idx = tree->open_elements->length;
1215     void **list = tree->open_elements->list;
1216 
1217     while (idx != 0) {
1218         idx--;
1219         node = list[idx];
1220 
1221         switch (node->local_name) {
1222             case LXB_TAG_H1:
1223             case LXB_TAG_H2:
1224             case LXB_TAG_H3:
1225             case LXB_TAG_H4:
1226             case LXB_TAG_H5:
1227             case LXB_TAG_H6:
1228                 if (node->ns == LXB_NS_HTML) {
1229                     return node;
1230                 }
1231 
1232                 break;
1233 
1234             default:
1235                 break;
1236         }
1237 
1238         if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1239                                      LXB_HTML_TAG_CATEGORY_SCOPE))
1240         {
1241             return NULL;
1242         }
1243     }
1244 
1245     return NULL;
1246 }
1247 
1248 lxb_dom_node_t *
lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t * tree)1249 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree)
1250 {
1251     lxb_dom_node_t *node;
1252 
1253     size_t idx = tree->open_elements->length;
1254     void **list = tree->open_elements->list;
1255 
1256     while (idx != 0) {
1257         idx--;
1258         node = list[idx];
1259 
1260         switch (node->local_name) {
1261             case LXB_TAG_TBODY:
1262             case LXB_TAG_THEAD:
1263             case LXB_TAG_TFOOT:
1264                 if (node->ns == LXB_NS_HTML) {
1265                     return node;
1266                 }
1267 
1268                 break;
1269 
1270             default:
1271                 break;
1272         }
1273 
1274         if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1275                                      LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1276         {
1277             return NULL;
1278         }
1279     }
1280 
1281     return NULL;
1282 }
1283 
1284 lxb_dom_node_t *
lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t * tree)1285 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree)
1286 {
1287     lxb_dom_node_t *node;
1288 
1289     size_t idx = tree->open_elements->length;
1290     void **list = tree->open_elements->list;
1291 
1292     while (idx != 0) {
1293         idx--;
1294         node = list[idx];
1295 
1296         switch (node->local_name) {
1297             case LXB_TAG_TD:
1298             case LXB_TAG_TH:
1299                 if (node->ns == LXB_NS_HTML) {
1300                     return node;
1301                 }
1302 
1303                 break;
1304 
1305             default:
1306                 break;
1307         }
1308 
1309         if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1310                                      LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1311         {
1312             return NULL;
1313         }
1314     }
1315 
1316     return NULL;
1317 }
1318 
1319 bool
lxb_html_tree_check_scope_element(lxb_html_tree_t * tree)1320 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree)
1321 {
1322     lxb_dom_node_t *node;
1323 
1324     for (size_t i = 0; i < tree->open_elements->length; i++) {
1325         node = tree->open_elements->list[i];
1326 
1327         switch (node->local_name) {
1328             case LXB_TAG_DD:
1329             case LXB_TAG_DT:
1330             case LXB_TAG_LI:
1331             case LXB_TAG_OPTGROUP:
1332             case LXB_TAG_OPTION:
1333             case LXB_TAG_P:
1334             case LXB_TAG_RB:
1335             case LXB_TAG_RP:
1336             case LXB_TAG_RT:
1337             case LXB_TAG_RTC:
1338             case LXB_TAG_TBODY:
1339             case LXB_TAG_TD:
1340             case LXB_TAG_TFOOT:
1341             case LXB_TAG_TH:
1342             case LXB_TAG_THEAD:
1343             case LXB_TAG_TR:
1344             case LXB_TAG_BODY:
1345             case LXB_TAG_HTML:
1346                 return true;
1347 
1348             default:
1349                 break;
1350         }
1351     }
1352 
1353     return false;
1354 }
1355 
1356 void
lxb_html_tree_close_p_element(lxb_html_tree_t * tree,lxb_html_token_t * token)1357 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token)
1358 {
1359     lxb_html_tree_generate_implied_end_tags(tree, LXB_TAG_P, LXB_NS_HTML);
1360 
1361     lxb_dom_node_t *node = lxb_html_tree_current_node(tree);
1362 
1363     if (lxb_html_tree_node_is(node, LXB_TAG_P) == false) {
1364         lxb_html_tree_parse_error(tree, token,
1365                                   LXB_HTML_RULES_ERROR_UNELINOPELST);
1366     }
1367 
1368     lxb_html_tree_open_elements_pop_until_tag_id(tree, LXB_TAG_P, LXB_NS_HTML,
1369                                                  true);
1370 }
1371 
1372 #include "lexbor/html/serialize.h"
1373 
1374 bool
lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_status_t * status)1375 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
1376                                         lxb_html_token_t *token,
1377                                         lxb_status_t *status)
1378 {
1379     lexbor_assert(tree->open_elements->length != 0);
1380 
1381     /* State 1 */
1382     bool is;
1383     short outer_loop;
1384     lxb_html_element_t *element;
1385     lxb_dom_node_t *node, *marker, **oel_list, **afe_list;
1386 
1387     lxb_tag_id_t subject = token->tag_id;
1388 
1389     oel_list = (lxb_dom_node_t **) tree->open_elements->list;
1390     afe_list = (lxb_dom_node_t **) tree->active_formatting->list;
1391     marker = (lxb_dom_node_t *) lxb_html_tree_active_formatting_marker();
1392 
1393     *status = LXB_STATUS_OK;
1394 
1395     /* State 2 */
1396     node = lxb_html_tree_current_node(tree);
1397     lexbor_assert(node != NULL);
1398 
1399     if (lxb_html_tree_node_is(node, subject)) {
1400         is = lxb_html_tree_active_formatting_find_by_node_reverse(tree, node,
1401                                                                   NULL);
1402         if (is == false) {
1403             lxb_html_tree_open_elements_pop(tree);
1404 
1405             return false;
1406         }
1407     }
1408 
1409     /* State 3 */
1410     outer_loop = 0;
1411 
1412     /* State 4 */
1413     while (outer_loop < 8) {
1414         /* State 5 */
1415         outer_loop++;
1416 
1417         /* State 6 */
1418         size_t formatting_index = 0;
1419         size_t idx = tree->active_formatting->length;
1420         lxb_dom_node_t *formatting_element = NULL;
1421 
1422         while (idx) {
1423             idx--;
1424 
1425             if (afe_list[idx] == marker) {
1426                     return true;
1427             }
1428             else if (afe_list[idx]->local_name == subject) {
1429                 formatting_index = idx;
1430                 formatting_element = afe_list[idx];
1431 
1432                 break;
1433             }
1434         }
1435 
1436         if (formatting_element == NULL) {
1437             return true;
1438         }
1439 
1440         /* State 7 */
1441         size_t oel_formatting_idx;
1442         is = lxb_html_tree_open_elements_find_by_node_reverse(tree,
1443                                                               formatting_element,
1444                                                               &oel_formatting_idx);
1445         if (is == false) {
1446             lxb_html_tree_parse_error(tree, token,
1447                                       LXB_HTML_RULES_ERROR_MIELINOPELST);
1448 
1449             lxb_html_tree_active_formatting_remove_by_node(tree,
1450                                                            formatting_element);
1451 
1452             return false;
1453         }
1454 
1455         /* State 8 */
1456         node = lxb_html_tree_element_in_scope_by_node(tree, formatting_element,
1457                                                       LXB_HTML_TAG_CATEGORY_SCOPE);
1458         if (node == NULL) {
1459             lxb_html_tree_parse_error(tree, token,
1460                                       LXB_HTML_RULES_ERROR_MIELINSC);
1461             return false;
1462         }
1463 
1464         /* State 9 */
1465         node = lxb_html_tree_current_node(tree);
1466 
1467         if (formatting_element != node) {
1468             lxb_html_tree_parse_error(tree, token,
1469                                       LXB_HTML_RULES_ERROR_UNELINOPELST);
1470         }
1471 
1472         /* State 10 */
1473         lxb_dom_node_t *furthest_block = NULL;
1474         size_t furthest_block_idx = 0;
1475         size_t oel_idx = tree->open_elements->length;
1476 
1477         for (furthest_block_idx = oel_formatting_idx;
1478              furthest_block_idx < oel_idx;
1479              furthest_block_idx++)
1480         {
1481             is = lxb_html_tag_is_category(oel_list[furthest_block_idx]->local_name,
1482                                           oel_list[furthest_block_idx]->ns,
1483                                           LXB_HTML_TAG_CATEGORY_SPECIAL);
1484             if (is) {
1485                 furthest_block = oel_list[furthest_block_idx];
1486 
1487                 break;
1488             }
1489         }
1490 
1491         /* State 11 */
1492         if (furthest_block == NULL) {
1493             lxb_html_tree_open_elements_pop_until_node(tree, formatting_element,
1494                                                        true);
1495 
1496             lxb_html_tree_active_formatting_remove_by_node(tree,
1497                                                            formatting_element);
1498 
1499             return false;
1500         }
1501 
1502         lexbor_assert(oel_formatting_idx != 0);
1503 
1504         /* State 12 */
1505         lxb_dom_node_t *common_ancestor = oel_list[oel_formatting_idx - 1];
1506 
1507         /* State 13 */
1508         size_t bookmark = formatting_index;
1509 
1510         /* State 14 */
1511         lxb_dom_node_t *node;
1512         lxb_dom_node_t *last = furthest_block;
1513         size_t node_idx = furthest_block_idx;
1514 
1515         /* State 14.1 */
1516         size_t inner_loop_counter = 0;
1517 
1518         /* State 14.2 */
1519         while (1) {
1520             inner_loop_counter++;
1521 
1522             /* State 14.3 */
1523             lexbor_assert(node_idx != 0);
1524 
1525             if (node_idx == 0) {
1526                 return false;
1527             }
1528 
1529             node_idx--;
1530             node = oel_list[node_idx];
1531 
1532             /* State 14.4 */
1533             if (node == formatting_element) {
1534                 break;
1535             }
1536 
1537             /* State 14.5 */
1538             size_t afe_node_idx;
1539             is = lxb_html_tree_active_formatting_find_by_node_reverse(tree,
1540                                                                       node,
1541                                                                       &afe_node_idx);
1542             /* State 14.5 */
1543             if (inner_loop_counter > 3 && is) {
1544                 lxb_html_tree_active_formatting_remove_by_node(tree, node);
1545 
1546                 continue;
1547             }
1548 
1549             /* State 14.6 */
1550             if (is == false) {
1551                 lxb_html_tree_open_elements_remove_by_node(tree, node);
1552 
1553                 continue;
1554             }
1555 
1556             /* State 14.7 */
1557             lxb_html_token_t fake_token = {0};
1558 
1559             fake_token.tag_id = node->local_name;
1560             fake_token.base_element = node;
1561 
1562             element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1563                                                              LXB_NS_HTML,
1564                                                              common_ancestor);
1565             if (element == NULL) {
1566                 *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1567 
1568                 return false;
1569             }
1570 
1571             node = lxb_dom_interface_node(element);
1572 
1573             afe_list[afe_node_idx] = node;
1574             oel_list[node_idx] = node;
1575 
1576             /* State 14.8 */
1577             if (last == furthest_block) {
1578                 bookmark = afe_node_idx + 1;
1579 
1580                 lexbor_assert(bookmark < tree->active_formatting->length);
1581             }
1582 
1583             /* State 14.9 */
1584             if (last->parent != NULL) {
1585                 lxb_dom_node_remove_wo_events(last);
1586             }
1587 
1588             lxb_dom_node_insert_child_wo_events(node, last);
1589 
1590             /* State 14.10 */
1591             last = node;
1592         }
1593 
1594         if (last->parent != NULL) {
1595             lxb_dom_node_remove_wo_events(last);
1596         }
1597 
1598         /* State 15 */
1599         lxb_dom_node_t *pos;
1600         lxb_html_tree_insertion_position_t ipos;
1601 
1602         pos = lxb_html_tree_appropriate_place_inserting_node(tree,
1603                                                              common_ancestor,
1604                                                              &ipos);
1605         if (pos == NULL) {
1606             return false;
1607         }
1608 
1609         lxb_html_tree_insert_node(pos, last, ipos);
1610 
1611         /* State 16 */
1612         lxb_html_token_t fake_token = {0};
1613 
1614         fake_token.tag_id = formatting_element->local_name;
1615         fake_token.base_element = formatting_element;
1616 
1617         element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1618                                                          LXB_NS_HTML,
1619                                                          furthest_block);
1620         if (element == NULL) {
1621             *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1622 
1623             return false;
1624         }
1625 
1626         /* State 17 */
1627         lxb_dom_node_t *next;
1628         node = furthest_block->first_child;
1629 
1630         while (node != NULL) {
1631             next = node->next;
1632 
1633             lxb_dom_node_remove_wo_events(node);
1634             lxb_dom_node_insert_child_wo_events(lxb_dom_interface_node(element),
1635                                                 node);
1636             node = next;
1637         }
1638 
1639         node = lxb_dom_interface_node(element);
1640 
1641         /* State 18 */
1642         lxb_dom_node_insert_child_wo_events(furthest_block, node);
1643 
1644         /* State 19 */
1645         lxb_html_tree_active_formatting_remove(tree, formatting_index);
1646 
1647         if (bookmark > tree->active_formatting->length) {
1648             bookmark = tree->active_formatting->length;
1649         }
1650 
1651         *status = lxb_html_tree_active_formatting_insert(tree, node, bookmark);
1652         if (*status != LXB_STATUS_OK) {
1653             return false;
1654         }
1655 
1656         /* State 20 */
1657         lxb_html_tree_open_elements_remove_by_node(tree, formatting_element);
1658 
1659         lxb_html_tree_open_elements_find_by_node(tree, furthest_block,
1660                                                  &furthest_block_idx);
1661 
1662         *status = lxb_html_tree_open_elements_insert_after(tree, node,
1663                                                            furthest_block_idx);
1664         if (*status != LXB_STATUS_OK) {
1665             return false;
1666         }
1667     }
1668 
1669     return false;
1670 }
1671 
1672 bool
lxb_html_tree_html_integration_point(lxb_dom_node_t * node)1673 lxb_html_tree_html_integration_point(lxb_dom_node_t *node)
1674 {
1675     if (node->ns == LXB_NS_MATH
1676         && node->local_name == LXB_TAG_ANNOTATION_XML)
1677     {
1678         lxb_dom_attr_t *attr;
1679         attr = lxb_dom_element_attr_is_exist(lxb_dom_interface_element(node),
1680                                              (const lxb_char_t *) "encoding",
1681                                              8);
1682         if (attr == NULL || attr->value == NULL) {
1683             return false;
1684         }
1685 
1686         if (attr->value->length == 9
1687             && lexbor_str_data_casecmp(attr->value->data,
1688                                        (const lxb_char_t *) "text/html"))
1689         {
1690             return true;
1691         }
1692 
1693         if (attr->value->length == 21
1694             && lexbor_str_data_casecmp(attr->value->data,
1695                                        (const lxb_char_t *) "application/xhtml+xml"))
1696         {
1697             return true;
1698         }
1699 
1700         return false;
1701     }
1702 
1703     if (node->ns == LXB_NS_SVG
1704         && (node->local_name == LXB_TAG_FOREIGNOBJECT
1705             || node->local_name == LXB_TAG_DESC
1706             || node->local_name == LXB_TAG_TITLE))
1707     {
1708         return true;
1709     }
1710 
1711     return false;
1712 }
1713 
1714 lxb_status_t
lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1715 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
1716                                        lxb_dom_attr_t *attr, void *ctx)
1717 {
1718     lxb_status_t status;
1719 
1720     status = lxb_html_tree_adjust_mathml_attributes(tree, attr, ctx);
1721     if (status !=LXB_STATUS_OK) {
1722         return status;
1723     }
1724 
1725     return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1726 }
1727 
1728 lxb_status_t
lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1729 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
1730                                     lxb_dom_attr_t *attr, void *ctx)
1731 {
1732     lxb_status_t status;
1733 
1734     status = lxb_html_tree_adjust_svg_attributes(tree, attr, ctx);
1735     if (status !=LXB_STATUS_OK) {
1736         return status;
1737     }
1738 
1739     return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1740 }
1741