xref: /php-src/ext/dom/lexbor/lexbor/html/tree.c (revision 445c1c92)
1 /*
2  * Copyright (C) 2018-2022 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/dom/interfaces/document_fragment.h"
8 #include "lexbor/dom/interfaces/document_type.h"
9 #include "lexbor/dom/interfaces/comment.h"
10 #include "lexbor/dom/interfaces/text.h"
11 
12 #include "lexbor/html/tree.h"
13 #include "lexbor/html/tree_res.h"
14 #include "lexbor/html/tree/insertion_mode.h"
15 #include "lexbor/html/tree/open_elements.h"
16 #include "lexbor/html/tree/active_formatting.h"
17 #include "lexbor/html/tree/template_insertion.h"
18 #include "lexbor/html/interface.h"
19 #include "lexbor/html/interface.h"
20 #include "lexbor/html/interfaces/template_element.h"
21 #include "lexbor/html/interfaces/unknown_element.h"
22 #include "lexbor/html/tokenizer/state_rawtext.h"
23 #include "lexbor/html/tokenizer/state_rcdata.h"
24 
25 
26 lxb_dom_attr_data_t *
27 lxb_dom_attr_local_name_append(lexbor_hash_t *hash,
28                                const lxb_char_t *name, size_t length);
29 
30 lxb_dom_attr_data_t *
31 lxb_dom_attr_qualified_name_append(lexbor_hash_t *hash, const lxb_char_t *name,
32                                    size_t length);
33 
34 const lxb_tag_data_t *
35 lxb_tag_append_lower(lexbor_hash_t *hash,
36                      const lxb_char_t *name, size_t length);
37 
38 static lxb_html_token_t *
39 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
40                              lxb_html_token_t *token, void *ctx);
41 
42 static lxb_status_t
43 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token);
44 
45 
46 lxb_html_tree_t *
lxb_html_tree_create(void)47 lxb_html_tree_create(void)
48 {
49     return lexbor_calloc(1, sizeof(lxb_html_tree_t));
50 }
51 
52 lxb_status_t
lxb_html_tree_init(lxb_html_tree_t * tree,lxb_html_tokenizer_t * tkz)53 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz)
54 {
55     if (tree == NULL) {
56         return LXB_STATUS_ERROR_OBJECT_IS_NULL;
57     }
58 
59     if (tkz == NULL) {
60         return LXB_STATUS_ERROR_WRONG_ARGS;
61     }
62 
63     lxb_status_t status;
64 
65     /* Stack of open elements */
66     tree->open_elements = lexbor_array_create();
67     status = lexbor_array_init(tree->open_elements, 128);
68     if (status != LXB_STATUS_OK) {
69         return status;
70     }
71 
72     /* Stack of active formatting */
73     tree->active_formatting = lexbor_array_create();
74     status = lexbor_array_init(tree->active_formatting, 128);
75     if (status != LXB_STATUS_OK) {
76         return status;
77     }
78 
79     /* Stack of template insertion modes */
80     tree->template_insertion_modes = lexbor_array_obj_create();
81     status = lexbor_array_obj_init(tree->template_insertion_modes, 64,
82                                    sizeof(lxb_html_tree_template_insertion_t));
83     if (status != LXB_STATUS_OK) {
84         return status;
85     }
86 
87     /* Stack of pending table character tokens */
88     tree->pending_table.text_list = lexbor_array_obj_create();
89     status = lexbor_array_obj_init(tree->pending_table.text_list, 16,
90                                    sizeof(lexbor_str_t));
91     if (status != LXB_STATUS_OK) {
92         return status;
93     }
94 
95     /* Parse errors */
96     tree->parse_errors = lexbor_array_obj_create();
97     status = lexbor_array_obj_init(tree->parse_errors, 16,
98                                                 sizeof(lxb_html_tree_error_t));
99     if (status != LXB_STATUS_OK) {
100         return status;
101     }
102 
103     tree->tkz_ref = lxb_html_tokenizer_ref(tkz);
104 
105     tree->document = NULL;
106     tree->fragment = NULL;
107 
108     tree->form = NULL;
109 
110     tree->foster_parenting = false;
111     tree->frameset_ok = true;
112 
113     tree->mode = lxb_html_tree_insertion_mode_initial;
114     tree->before_append_attr = NULL;
115 
116     tree->status = LXB_STATUS_OK;
117 
118     tree->ref_count = 1;
119 
120     lxb_html_tokenizer_callback_token_done_set(tkz,
121                                                lxb_html_tree_token_callback,
122                                                tree);
123 
124     return LXB_STATUS_OK;
125 }
126 
127 lxb_html_tree_t *
lxb_html_tree_ref(lxb_html_tree_t * tree)128 lxb_html_tree_ref(lxb_html_tree_t *tree)
129 {
130     if (tree == NULL) {
131         return NULL;
132     }
133 
134     tree->ref_count++;
135 
136     return tree;
137 }
138 
139 lxb_html_tree_t *
lxb_html_tree_unref(lxb_html_tree_t * tree)140 lxb_html_tree_unref(lxb_html_tree_t *tree)
141 {
142     if (tree == NULL || tree->ref_count == 0) {
143         return NULL;
144     }
145 
146     tree->ref_count--;
147 
148     if (tree->ref_count == 0) {
149         lxb_html_tree_destroy(tree);
150     }
151 
152     return NULL;
153 }
154 
155 void
lxb_html_tree_clean(lxb_html_tree_t * tree)156 lxb_html_tree_clean(lxb_html_tree_t *tree)
157 {
158     lexbor_array_clean(tree->open_elements);
159     lexbor_array_clean(tree->active_formatting);
160     lexbor_array_obj_clean(tree->template_insertion_modes);
161     lexbor_array_obj_clean(tree->pending_table.text_list);
162     lexbor_array_obj_clean(tree->parse_errors);
163 
164     tree->document = NULL;
165     tree->fragment = NULL;
166 
167     tree->form = NULL;
168 
169     tree->foster_parenting = false;
170     tree->frameset_ok = true;
171 
172     tree->mode = lxb_html_tree_insertion_mode_initial;
173     tree->before_append_attr = NULL;
174 
175     tree->status = LXB_STATUS_OK;
176 }
177 
178 lxb_html_tree_t *
lxb_html_tree_destroy(lxb_html_tree_t * tree)179 lxb_html_tree_destroy(lxb_html_tree_t *tree)
180 {
181     if (tree == NULL) {
182         return NULL;
183     }
184 
185     tree->open_elements = lexbor_array_destroy(tree->open_elements, true);
186     tree->active_formatting = lexbor_array_destroy(tree->active_formatting,
187                                                    true);
188     tree->template_insertion_modes = lexbor_array_obj_destroy(tree->template_insertion_modes,
189                                                               true);
190     tree->pending_table.text_list = lexbor_array_obj_destroy(tree->pending_table.text_list,
191                                                              true);
192 
193     tree->parse_errors = lexbor_array_obj_destroy(tree->parse_errors, true);
194     tree->tkz_ref = lxb_html_tokenizer_unref(tree->tkz_ref);
195 
196     return lexbor_free(tree);
197 }
198 
199 static lxb_html_token_t *
lxb_html_tree_token_callback(lxb_html_tokenizer_t * tkz,lxb_html_token_t * token,void * ctx)200 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
201                              lxb_html_token_t *token, void *ctx)
202 {
203     lxb_status_t status;
204 
205     status = lxb_html_tree_insertion_mode(ctx, token);
206     if (status != LXB_STATUS_OK) {
207         tkz->status = status;
208         return NULL;
209     }
210 
211     return token;
212 }
213 
214 /* TODO: not complete!!! */
215 lxb_status_t
lxb_html_tree_stop_parsing(lxb_html_tree_t * tree)216 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree)
217 {
218     tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
219 
220     return LXB_STATUS_OK;
221 }
222 
223 bool
lxb_html_tree_process_abort(lxb_html_tree_t * tree)224 lxb_html_tree_process_abort(lxb_html_tree_t *tree)
225 {
226     if (tree->status == LXB_STATUS_OK) {
227         tree->status = LXB_STATUS_ABORTED;
228     }
229 
230     tree->open_elements->length = 0;
231     tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
232 
233     return true;
234 }
235 
236 void
lxb_html_tree_parse_error(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_html_tree_error_id_t id)237 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
238                           lxb_html_tree_error_id_t id)
239 {
240     lxb_html_tree_error_add(tree->parse_errors, token, id);
241 }
242 
243 bool
lxb_html_tree_construction_dispatcher(lxb_html_tree_t * tree,lxb_html_token_t * token)244 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
245                                       lxb_html_token_t *token)
246 {
247     lxb_dom_node_t *adjusted;
248 
249     adjusted = lxb_html_tree_adjusted_current_node(tree);
250 
251     if (adjusted == NULL || adjusted->ns == LXB_NS_HTML) {
252         return tree->mode(tree, token);
253     }
254 
255     if (lxb_html_tree_mathml_text_integration_point(adjusted))
256     {
257         if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
258             && token->tag_id != LXB_TAG_MGLYPH
259             && token->tag_id != LXB_TAG_MALIGNMARK)
260         {
261             return tree->mode(tree, token);
262         }
263 
264         if (token->tag_id == LXB_TAG__TEXT) {
265             return tree->mode(tree, token);
266         }
267     }
268 
269     if (adjusted->local_name == LXB_TAG_ANNOTATION_XML
270         && adjusted->ns == LXB_NS_MATH
271         && (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
272         && token->tag_id == LXB_TAG_SVG)
273     {
274         return tree->mode(tree, token);
275     }
276 
277     if (lxb_html_tree_html_integration_point(adjusted)) {
278         if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
279             || token->tag_id == LXB_TAG__TEXT)
280         {
281             return tree->mode(tree, token);
282         }
283     }
284 
285     if (token->tag_id == LXB_TAG__END_OF_FILE) {
286         return tree->mode(tree, token);
287     }
288 
289     return lxb_html_tree_insertion_mode_foreign_content(tree, token);
290 }
291 
292 static lxb_status_t
lxb_html_tree_insertion_mode(lxb_html_tree_t * tree,lxb_html_token_t * token)293 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token)
294 {
295     while (lxb_html_tree_construction_dispatcher(tree, token) == false) {}
296 
297     return tree->status;
298 }
299 
300 /*
301  * Action
302  */
303 lxb_dom_node_t *
lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t * tree,lxb_dom_node_t * override_target,lxb_html_tree_insertion_position_t * ipos)304 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
305                                        lxb_dom_node_t *override_target,
306                                        lxb_html_tree_insertion_position_t *ipos)
307 {
308     lxb_dom_node_t *target, *adjusted_location = NULL;
309 
310     *ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
311 
312     if (override_target != NULL) {
313         target = override_target;
314     }
315     else {
316         target = lxb_html_tree_current_node(tree);
317     }
318 
319     if (tree->foster_parenting && target->ns == LXB_NS_HTML
320            && (target->local_name == LXB_TAG_TABLE
321             || target->local_name == LXB_TAG_TBODY
322             || target->local_name == LXB_TAG_TFOOT
323             || target->local_name == LXB_TAG_THEAD
324             || target->local_name == LXB_TAG_TR))
325     {
326         lxb_dom_node_t *last_temp, *last_table;
327         size_t last_temp_idx, last_table_idx;
328 
329         last_temp = lxb_html_tree_open_elements_find_reverse(tree,
330                                                           LXB_TAG_TEMPLATE,
331                                                           LXB_NS_HTML,
332                                                           &last_temp_idx);
333 
334         last_table = lxb_html_tree_open_elements_find_reverse(tree,
335                                                              LXB_TAG_TABLE,
336                                                              LXB_NS_HTML,
337                                                              &last_table_idx);
338 
339         if(last_temp != NULL && (last_table == NULL
340                          || last_temp_idx > last_table_idx))
341         {
342             lxb_dom_document_fragment_t *doc_fragment;
343 
344             doc_fragment = lxb_html_interface_template(last_temp)->content;
345 
346             return lxb_dom_interface_node(doc_fragment);
347         }
348         else if (last_table == NULL) {
349             adjusted_location = lxb_html_tree_open_elements_first(tree);
350 
351             lexbor_assert(adjusted_location != NULL);
352             lexbor_assert(adjusted_location->local_name == LXB_TAG_HTML);
353         }
354         else if (last_table->parent != NULL) {
355             adjusted_location = last_table;
356 
357             *ipos = LXB_HTML_TREE_INSERTION_POSITION_BEFORE;
358         }
359         else {
360             lexbor_assert(last_table_idx != 0);
361 
362             adjusted_location = lxb_html_tree_open_elements_get(tree,
363                                                             last_table_idx - 1);
364         }
365     }
366     else {
367         adjusted_location = target;
368     }
369 
370     if (adjusted_location == NULL) {
371         return NULL;
372     }
373 
374     /*
375      * In Spec it is not entirely clear what is meant:
376      *
377      * If the adjusted insertion location is inside a template element,
378      * let it instead be inside the template element's template contents,
379      * after its last child (if any).
380      */
381     if (lxb_html_tree_node_is(adjusted_location, LXB_TAG_TEMPLATE)) {
382         lxb_dom_document_fragment_t *df;
383 
384         df = lxb_html_interface_template(adjusted_location)->content;
385         adjusted_location = lxb_dom_interface_node(df);
386     }
387 
388     return adjusted_location;
389 }
390 
391 lxb_html_element_t *
lxb_html_tree_insert_foreign_element(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns)392 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
393                                      lxb_html_token_t *token, lxb_ns_id_t ns)
394 {
395     lxb_status_t status;
396     lxb_dom_node_t *pos;
397     lxb_html_element_t *element;
398     lxb_html_tree_insertion_position_t ipos;
399 
400     pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
401     if (pos == NULL) {
402         return NULL;
403     }
404 
405     element = lxb_html_tree_create_element_for_token(tree, token, ns);
406     if (element == NULL) {
407         return NULL;
408     }
409 
410     lxb_html_tree_insert_node(pos, lxb_dom_interface_node(element), ipos);
411 
412     status = lxb_html_tree_open_elements_push(tree,
413                                               lxb_dom_interface_node(element));
414     if (status != LXB_HTML_STATUS_OK) {
415         return lxb_html_interface_destroy(element);
416     }
417 
418     return element;
419 }
420 
421 lxb_html_element_t *
lxb_html_tree_create_element_for_token(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns)422 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
423                                        lxb_html_token_t *token, lxb_ns_id_t ns)
424 {
425     lxb_dom_node_t *node = lxb_html_tree_create_node(tree, token->tag_id, ns);
426     if (node == NULL) {
427         return NULL;
428     }
429 
430     node->line = token->line;
431     /* We only expose line number in PHP DOM */
432 
433     lxb_status_t status;
434     lxb_dom_element_t *element = lxb_dom_interface_element(node);
435 
436     if (token->base_element == NULL) {
437         status = lxb_html_tree_append_attributes(tree, element, token, ns);
438     }
439     else {
440         status = lxb_html_tree_append_attributes_from_element(tree, element,
441                                                        token->base_element, ns);
442     }
443 
444     if (status != LXB_HTML_STATUS_OK) {
445         return lxb_html_interface_destroy(element);
446     }
447 
448     return lxb_html_interface_element(node);
449 }
450 
451 lxb_status_t
lxb_html_tree_append_attributes(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_html_token_t * token,lxb_ns_id_t ns)452 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
453                                 lxb_dom_element_t *element,
454                                 lxb_html_token_t *token, lxb_ns_id_t ns)
455 {
456     lxb_status_t status;
457     lxb_dom_attr_t *attr;
458     lxb_html_document_t *doc;
459     lxb_html_token_attr_t *token_attr = token->attr_first;
460 
461     doc = lxb_html_interface_document(element->node.owner_document);
462 
463     while (token_attr != NULL) {
464         attr = lxb_dom_element_attr_by_local_name_data(element,
465                                                        token_attr->name);
466         if (attr != NULL) {
467             token_attr = token_attr->next;
468             continue;
469         }
470 
471         attr = lxb_dom_attr_interface_create(lxb_dom_interface_document(doc));
472         if (attr == NULL) {
473             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
474         }
475 
476         if (token_attr->value_begin != NULL) {
477             status = lxb_dom_attr_set_value_wo_copy(attr, token_attr->value,
478                                                     token_attr->value_size);
479             if (status != LXB_HTML_STATUS_OK) {
480                 return status;
481             }
482         }
483 
484         attr->node.local_name = token_attr->name->attr_id;
485         attr->node.ns = ns;
486 
487         /* Fix for adjust MathML/SVG attributes */
488         if (tree->before_append_attr != NULL) {
489             status = tree->before_append_attr(tree, attr, NULL);
490             if (status != LXB_STATUS_OK) {
491                 return status;
492             }
493         }
494 
495         lxb_dom_element_attr_append(element, attr);
496 
497         token_attr = token_attr->next;
498     }
499 
500     return LXB_HTML_STATUS_OK;
501 }
502 
503 lxb_status_t
lxb_html_tree_append_attributes_from_element(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_dom_element_t * from,lxb_ns_id_t ns)504 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
505                                              lxb_dom_element_t *element,
506                                              lxb_dom_element_t *from,
507                                              lxb_ns_id_t ns)
508 {
509     lxb_status_t status;
510     lxb_dom_attr_t *attr = from->first_attr;
511     lxb_dom_attr_t *new_attr;
512 
513     while (attr != NULL) {
514         new_attr = lxb_dom_attr_interface_create(element->node.owner_document);
515         if (new_attr == NULL) {
516             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
517         }
518 
519         status = lxb_dom_attr_clone_name_value(attr, new_attr);
520         if (status != LXB_HTML_STATUS_OK) {
521             return status;
522         }
523 
524         new_attr->node.ns = attr->node.ns;
525 
526         /* Fix for  adjust MathML/SVG attributes */
527         if (tree->before_append_attr != NULL) {
528             status = tree->before_append_attr(tree, new_attr, NULL);
529             if (status != LXB_STATUS_OK) {
530                 return status;
531             }
532         }
533 
534         lxb_dom_element_attr_append(element, attr);
535 
536         attr = attr->next;
537     }
538 
539     return LXB_HTML_STATUS_OK;
540 }
541 
542 lxb_status_t
lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)543 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
544                                        lxb_dom_attr_t *attr, void *ctx)
545 {
546     lexbor_hash_t *attrs;
547     const lxb_dom_attr_data_t *data;
548 
549     attrs = attr->node.owner_document->attrs;
550     data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
551 
552     if (data->entry.length == 13
553         && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
554                                (const lxb_char_t *) "definitionurl"))
555     {
556         data = lxb_dom_attr_qualified_name_append(attrs,
557                                       (const lxb_char_t *) "definitionURL", 13);
558         if (data == NULL) {
559             return LXB_STATUS_ERROR;
560         }
561 
562         attr->qualified_name = data->attr_id;
563     }
564 
565     return LXB_STATUS_OK;
566 }
567 
568 lxb_status_t
lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)569 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
570                                     lxb_dom_attr_t *attr, void *ctx)
571 {
572     lexbor_hash_t *attrs;
573     const lxb_dom_attr_data_t *data;
574     const lxb_html_tree_res_attr_adjust_t *adjust;
575 
576     size_t len = sizeof(lxb_html_tree_res_attr_adjust_svg_map)
577         / sizeof(lxb_html_tree_res_attr_adjust_t);
578 
579     attrs = attr->node.owner_document->attrs;
580 
581     data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
582 
583     for (size_t i = 0; i < len; i++) {
584         adjust = &lxb_html_tree_res_attr_adjust_svg_map[i];
585 
586         if (data->entry.length == adjust->len
587             && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
588                                    (const lxb_char_t *) adjust->from))
589         {
590             data = lxb_dom_attr_qualified_name_append(attrs,
591                                 (const lxb_char_t *) adjust->to, adjust->len);
592             if (data == NULL) {
593                 return LXB_STATUS_ERROR;
594             }
595 
596             attr->qualified_name = data->attr_id;
597 
598             return LXB_STATUS_OK;
599         }
600     }
601 
602     return LXB_STATUS_OK;
603 }
604 
605 lxb_status_t
lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)606 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
607                                         lxb_dom_attr_t *attr, void *ctx)
608 {
609     size_t lname_length;
610     lexbor_hash_t *attrs, *prefix;
611     const lxb_dom_attr_data_t *attr_data;
612     const lxb_ns_prefix_data_t *prefix_data;
613     const lxb_dom_attr_data_t *data;
614     const lxb_html_tree_res_attr_adjust_foreign_t *adjust;
615 
616     size_t len = sizeof(lxb_html_tree_res_attr_adjust_foreign_map)
617         / sizeof(lxb_html_tree_res_attr_adjust_foreign_t);
618 
619     attrs = attr->node.owner_document->attrs;
620     prefix = attr->node.owner_document->prefix;
621 
622     data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
623 
624     for (size_t i = 0; i < len; i++) {
625         adjust = &lxb_html_tree_res_attr_adjust_foreign_map[i];
626 
627         if (data->entry.length == adjust->name_len
628             && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
629                                    (const lxb_char_t *) adjust->name))
630         {
631             if (adjust->prefix_len != 0) {
632                 data = lxb_dom_attr_qualified_name_append(attrs,
633                            (const lxb_char_t *) adjust->name, adjust->name_len);
634                 if (data == NULL) {
635                     return LXB_STATUS_ERROR;
636                 }
637 
638                 attr->qualified_name = data->attr_id;
639 
640                 lname_length = adjust->name_len - adjust->prefix_len - 1;
641 
642                 attr_data = lxb_dom_attr_local_name_append(attrs,
643                          (const lxb_char_t *) adjust->local_name, lname_length);
644                 if (attr_data == NULL) {
645                     return LXB_STATUS_ERROR;
646                 }
647 
648                 attr->node.local_name = attr_data->attr_id;
649 
650                 prefix_data = lxb_ns_prefix_append(prefix,
651                        (const lxb_char_t *) adjust->prefix, adjust->prefix_len);
652                 if (prefix_data == NULL) {
653                     return LXB_STATUS_ERROR;
654                 }
655 
656                 attr->node.prefix = prefix_data->prefix_id;
657             }
658 
659             attr->node.ns = adjust->ns;
660 
661             return LXB_STATUS_OK;
662         }
663     }
664 
665     return LXB_STATUS_OK;
666 }
667 
668 lxb_status_t
lxb_html_tree_insert_character(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t ** ret_node)669 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
670                                lxb_dom_node_t **ret_node)
671 {
672     size_t size;
673     lxb_status_t status;
674     lexbor_str_t str = {0};
675 
676     size = token->text_end - token->text_start;
677 
678     lexbor_str_init(&str, tree->document->dom_document.text, size + 1);
679     if (str.data == NULL) {
680         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
681     }
682 
683     memcpy(str.data, token->text_start, size);
684 
685     str.data[size] = 0x00;
686     str.length = size;
687 
688     status = lxb_html_tree_insert_character_for_data(tree, &str, ret_node);
689     if (status != LXB_STATUS_OK) {
690         return status;
691     }
692 
693     return LXB_STATUS_OK;
694 }
695 
696 lxb_status_t
lxb_html_tree_insert_character_for_data(lxb_html_tree_t * tree,lexbor_str_t * str,lxb_dom_node_t ** ret_node)697 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
698                                         lexbor_str_t *str,
699                                         lxb_dom_node_t **ret_node)
700 {
701     const lxb_char_t *data;
702     lxb_dom_node_t *pos;
703     lxb_dom_character_data_t *chrs = NULL;
704     lxb_html_tree_insertion_position_t ipos;
705 
706     if (ret_node != NULL) {
707         *ret_node = NULL;
708     }
709 
710     pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
711     if (pos == NULL) {
712         return LXB_STATUS_ERROR;
713     }
714 
715     if (lxb_html_tree_node_is(pos, LXB_TAG__DOCUMENT)) {
716         goto destroy_str;
717     }
718 
719     if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
720         /* No need check namespace */
721         if (pos->prev != NULL && pos->prev->local_name == LXB_TAG__TEXT) {
722             chrs = lxb_dom_interface_character_data(pos->prev);
723 
724             if (ret_node != NULL) {
725                 *ret_node = pos->prev;
726             }
727         }
728     }
729     else {
730         /* No need check namespace */
731         if (pos->last_child != NULL
732             && pos->last_child->local_name == LXB_TAG__TEXT)
733         {
734             chrs = lxb_dom_interface_character_data(pos->last_child);
735 
736             if (ret_node != NULL) {
737                 *ret_node = pos->last_child;
738             }
739         }
740     }
741 
742     if (chrs != NULL) {
743         /* This is error. This can not happen, but... */
744         if (chrs->data.data == NULL) {
745             data = lexbor_str_init(&chrs->data, tree->document->dom_document.text,
746                                    str->length);
747             if (data == NULL) {
748                 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
749             }
750         }
751 
752         data = lexbor_str_append(&chrs->data, tree->document->dom_document.text,
753                                  str->data, str->length);
754         if (data == NULL) {
755             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
756         }
757 
758         goto destroy_str;
759     }
760 
761     lxb_dom_node_t *text = lxb_html_tree_create_node(tree, LXB_TAG__TEXT,
762                                                      LXB_NS_HTML);
763     if (text == NULL) {
764         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
765     }
766 
767     lxb_dom_interface_text(text)->char_data.data = *str;
768 
769     if (tree->tkz_ref) {
770         text->line = tree->tkz_ref->token->line;
771         /* We only expose line number in PHP DOM */
772     }
773 
774     if (ret_node != NULL) {
775         *ret_node = text;
776     }
777 
778     lxb_html_tree_insert_node(pos, text, ipos);
779 
780     return LXB_STATUS_OK;
781 
782 destroy_str:
783 
784     lexbor_str_destroy(str, tree->document->dom_document.text, false);
785 
786     return LXB_STATUS_OK;
787 }
788 
789 lxb_dom_comment_t *
lxb_html_tree_insert_comment(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t * pos)790 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
791                              lxb_html_token_t *token, lxb_dom_node_t *pos)
792 {
793     lxb_dom_node_t *node;
794     lxb_dom_comment_t *comment;
795     lxb_html_tree_insertion_position_t ipos;
796 
797     if (pos == NULL) {
798         pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
799     }
800     else {
801         ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
802     }
803 
804     lexbor_assert(pos != NULL);
805 
806     node = lxb_html_tree_create_node(tree, token->tag_id, pos->ns);
807     comment = lxb_dom_interface_comment(node);
808 
809     if (comment == NULL) {
810         return NULL;
811     }
812 
813     node->line = token->line;
814     /* We only expose line number in PHP DOM */
815 
816     tree->status = lxb_html_token_make_text(token, &comment->char_data.data,
817                                             tree->document->dom_document.text);
818     if (tree->status != LXB_STATUS_OK) {
819         return NULL;
820     }
821 
822     lxb_html_tree_insert_node(pos, node, ipos);
823 
824     return comment;
825 }
826 
827 lxb_dom_document_type_t *
lxb_html_tree_create_document_type_from_token(lxb_html_tree_t * tree,lxb_html_token_t * token)828 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
829                                               lxb_html_token_t *token)
830 {
831     lxb_status_t status;
832     lxb_dom_node_t *doctype_node;
833     lxb_dom_document_type_t *doc_type;
834 
835     /* Create */
836     doctype_node = lxb_html_tree_create_node(tree, token->tag_id, LXB_NS_HTML);
837     if (doctype_node == NULL) {
838         return NULL;
839     }
840 
841     doc_type = lxb_dom_interface_document_type(doctype_node);
842 
843     /* Parse */
844     status = lxb_html_token_doctype_parse(token, doc_type);
845     if (status != LXB_STATUS_OK) {
846         return lxb_dom_document_type_interface_destroy(doc_type);
847     }
848 
849     return doc_type;
850 }
851 
852 /*
853  * TODO: need use ref and unref for nodes (ref counter)
854  * Not implemented until the end. It is necessary to finish it.
855  */
856 void
lxb_html_tree_node_delete_deep(lxb_html_tree_t * tree,lxb_dom_node_t * node)857 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node)
858 {
859     lxb_dom_node_remove(node);
860 }
861 
862 lxb_html_element_t *
lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)863 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
864                                       lxb_html_token_t *token)
865 {
866     lxb_html_element_t *element;
867 
868     element = lxb_html_tree_insert_html_element(tree, token);
869     if (element == NULL) {
870         return NULL;
871     }
872 
873     /*
874      * Need for tokenizer state RAWTEXT
875      * See description for 'lxb_html_tokenizer_state_rawtext_before' function
876      */
877     lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
878     lxb_html_tokenizer_state_set(tree->tkz_ref,
879                                  lxb_html_tokenizer_state_rawtext_before);
880 
881     tree->original_mode = tree->mode;
882     tree->mode = lxb_html_tree_insertion_mode_text;
883 
884     return element;
885 }
886 
887 /* Magic of CopyPast power! */
888 lxb_html_element_t *
lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)889 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
890                                      lxb_html_token_t *token)
891 {
892     lxb_html_element_t *element;
893 
894     element = lxb_html_tree_insert_html_element(tree, token);
895     if (element == NULL) {
896         return NULL;
897     }
898 
899     /*
900      * Need for tokenizer state RCDATA
901      * See description for 'lxb_html_tokenizer_state_rcdata_before' function
902      */
903     lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
904     lxb_html_tokenizer_state_set(tree->tkz_ref,
905                                  lxb_html_tokenizer_state_rcdata_before);
906 
907     tree->original_mode = tree->mode;
908     tree->mode = lxb_html_tree_insertion_mode_text;
909 
910     return element;
911 }
912 
913 void
lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)914 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
915                                         lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns)
916 {
917     lxb_dom_node_t *node;
918 
919     lexbor_assert(tree->open_elements != 0);
920 
921     while (lexbor_array_length(tree->open_elements) != 0) {
922         node = lxb_html_tree_current_node(tree);
923 
924         lexbor_assert(node != NULL);
925 
926         switch (node->local_name) {
927             case LXB_TAG_DD:
928             case LXB_TAG_DT:
929             case LXB_TAG_LI:
930             case LXB_TAG_OPTGROUP:
931             case LXB_TAG_OPTION:
932             case LXB_TAG_P:
933             case LXB_TAG_RB:
934             case LXB_TAG_RP:
935             case LXB_TAG_RT:
936             case LXB_TAG_RTC:
937                 if(node->local_name == ex_tag && node->ns == ex_ns) {
938                     return;
939                 }
940 
941                 lxb_html_tree_open_elements_pop(tree);
942 
943                 break;
944 
945             default:
946                 return;
947         }
948     }
949 }
950 
951 void
lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)952 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
953                                                        lxb_tag_id_t ex_tag,
954                                                        lxb_ns_id_t ex_ns)
955 {
956     lxb_dom_node_t *node;
957 
958     lexbor_assert(tree->open_elements != 0);
959 
960     while (lexbor_array_length(tree->open_elements) != 0) {
961         node = lxb_html_tree_current_node(tree);
962 
963         lexbor_assert(node != NULL);
964 
965         switch (node->local_name) {
966             case LXB_TAG_CAPTION:
967             case LXB_TAG_COLGROUP:
968             case LXB_TAG_DD:
969             case LXB_TAG_DT:
970             case LXB_TAG_LI:
971             case LXB_TAG_OPTGROUP:
972             case LXB_TAG_OPTION:
973             case LXB_TAG_P:
974             case LXB_TAG_RB:
975             case LXB_TAG_RP:
976             case LXB_TAG_RT:
977             case LXB_TAG_RTC:
978             case LXB_TAG_TBODY:
979             case LXB_TAG_TD:
980             case LXB_TAG_TFOOT:
981             case LXB_TAG_TH:
982             case LXB_TAG_THEAD:
983             case LXB_TAG_TR:
984                 if(node->local_name == ex_tag && node->ns == ex_ns) {
985                     return;
986                 }
987 
988                 lxb_html_tree_open_elements_pop(tree);
989 
990                 break;
991 
992             default:
993                 return;
994         }
995     }
996 }
997 
998 void
lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t * tree)999 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree)
1000 {
1001     lxb_dom_node_t *node;
1002     size_t idx = tree->open_elements->length;
1003 
1004     /* Step 1 */
1005     bool last = false;
1006     void **list = tree->open_elements->list;
1007 
1008     /* Step 3 */
1009     while (idx != 0) {
1010         idx--;
1011 
1012         /* Step 2 */
1013         node = list[idx];
1014 
1015         /* Step 3 */
1016         if (idx == 0) {
1017             last = true;
1018 
1019             if (tree->fragment != NULL) {
1020                 node = tree->fragment;
1021             }
1022         }
1023 
1024         lexbor_assert(node != NULL);
1025 
1026         /* Step 16 */
1027         if (node->ns != LXB_NS_HTML) {
1028             if (last) {
1029                 tree->mode = lxb_html_tree_insertion_mode_in_body;
1030                 return;
1031             }
1032 
1033             continue;
1034         }
1035 
1036         /* Step 4 */
1037         if (node->local_name == LXB_TAG_SELECT) {
1038             /* Step 4.1 */
1039             if (last) {
1040                 tree->mode = lxb_html_tree_insertion_mode_in_select;
1041                 return;
1042             }
1043 
1044             /* Step 4.2 */
1045             size_t ancestor = idx;
1046 
1047             for (;;) {
1048                 /* Step 4.3 */
1049                 if (ancestor == 0) {
1050                     tree->mode = lxb_html_tree_insertion_mode_in_select;
1051                     return;
1052                 }
1053 
1054                 /* Step 4.4 */
1055                 ancestor--;
1056 
1057                 /* Step 4.5 */
1058                 lxb_dom_node_t *ancestor_node = list[ancestor];
1059 
1060                 if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TEMPLATE)) {
1061                     tree->mode = lxb_html_tree_insertion_mode_in_select;
1062                     return;
1063                 }
1064 
1065                 /* Step 4.6 */
1066                 else if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TABLE)) {
1067                     tree->mode = lxb_html_tree_insertion_mode_in_select_in_table;
1068                     return;
1069                 }
1070             }
1071         }
1072 
1073         /* Step 5-15 */
1074         switch (node->local_name) {
1075             case LXB_TAG_TD:
1076             case LXB_TAG_TH:
1077                 if (last == false) {
1078                     tree->mode = lxb_html_tree_insertion_mode_in_cell;
1079                     return;
1080                 }
1081 
1082                 break;
1083 
1084             case LXB_TAG_TR:
1085                 tree->mode = lxb_html_tree_insertion_mode_in_row;
1086                 return;
1087 
1088             case LXB_TAG_TBODY:
1089             case LXB_TAG_TFOOT:
1090             case LXB_TAG_THEAD:
1091                 tree->mode = lxb_html_tree_insertion_mode_in_table_body;
1092                 return;
1093 
1094             case LXB_TAG_CAPTION:
1095                 tree->mode = lxb_html_tree_insertion_mode_in_caption;
1096                 return;
1097 
1098             case LXB_TAG_COLGROUP:
1099                 tree->mode = lxb_html_tree_insertion_mode_in_column_group;
1100                 return;
1101 
1102             case LXB_TAG_TABLE:
1103                 tree->mode = lxb_html_tree_insertion_mode_in_table;
1104                 return;
1105 
1106             case LXB_TAG_TEMPLATE:
1107                 tree->mode = lxb_html_tree_template_insertion_current(tree);
1108 
1109                 lexbor_assert(tree->mode != NULL);
1110 
1111                 return;
1112 
1113             case LXB_TAG_HEAD:
1114                 if (last == false) {
1115                     tree->mode = lxb_html_tree_insertion_mode_in_head;
1116                     return;
1117                 }
1118 
1119                 break;
1120 
1121             case LXB_TAG_BODY:
1122                 tree->mode = lxb_html_tree_insertion_mode_in_body;
1123                 return;
1124 
1125             case LXB_TAG_FRAMESET:
1126                 tree->mode = lxb_html_tree_insertion_mode_in_frameset;
1127                 return;
1128 
1129             case LXB_TAG_HTML: {
1130                 if (tree->document->head == NULL) {
1131                     tree->mode = lxb_html_tree_insertion_mode_before_head;
1132                     return;
1133                 }
1134 
1135                 tree->mode = lxb_html_tree_insertion_mode_after_head;
1136                 return;
1137             }
1138 
1139             default:
1140                 break;
1141         }
1142 
1143         /* Step 16 */
1144         if (last) {
1145             tree->mode = lxb_html_tree_insertion_mode_in_body;
1146             return;
1147         }
1148     }
1149 }
1150 
1151 lxb_dom_node_t *
lxb_html_tree_element_in_scope(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns,lxb_html_tag_category_t ct)1152 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
1153                                lxb_ns_id_t ns, lxb_html_tag_category_t ct)
1154 {
1155     lxb_dom_node_t *node;
1156 
1157     size_t idx = tree->open_elements->length;
1158     void **list = tree->open_elements->list;
1159 
1160     while (idx != 0) {
1161         idx--;
1162         node = list[idx];
1163 
1164         if (node->local_name == tag_id && node->ns == ns) {
1165             return node;
1166         }
1167 
1168         if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1169             return NULL;
1170         }
1171     }
1172 
1173     return NULL;
1174 }
1175 
1176 lxb_dom_node_t *
lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t * tree,lxb_dom_node_t * by_node,lxb_html_tag_category_t ct)1177 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
1178                                        lxb_dom_node_t *by_node,
1179                                        lxb_html_tag_category_t ct)
1180 {
1181     lxb_dom_node_t *node;
1182 
1183     size_t idx = tree->open_elements->length;
1184     void **list = tree->open_elements->list;
1185 
1186     while (idx != 0) {
1187         idx--;
1188         node = list[idx];
1189 
1190         if (node == by_node) {
1191             return node;
1192         }
1193 
1194         if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1195             return NULL;
1196         }
1197     }
1198 
1199     return NULL;
1200 }
1201 
1202 lxb_dom_node_t *
lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t * tree)1203 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree)
1204 {
1205     lxb_dom_node_t *node;
1206 
1207     size_t idx = tree->open_elements->length;
1208     void **list = tree->open_elements->list;
1209 
1210     while (idx != 0) {
1211         idx--;
1212         node = list[idx];
1213 
1214         switch (node->local_name) {
1215             case LXB_TAG_H1:
1216             case LXB_TAG_H2:
1217             case LXB_TAG_H3:
1218             case LXB_TAG_H4:
1219             case LXB_TAG_H5:
1220             case LXB_TAG_H6:
1221                 if (node->ns == LXB_NS_HTML) {
1222                     return node;
1223                 }
1224 
1225                 break;
1226 
1227             default:
1228                 break;
1229         }
1230 
1231         if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1232                                      LXB_HTML_TAG_CATEGORY_SCOPE))
1233         {
1234             return NULL;
1235         }
1236     }
1237 
1238     return NULL;
1239 }
1240 
1241 lxb_dom_node_t *
lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t * tree)1242 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree)
1243 {
1244     lxb_dom_node_t *node;
1245 
1246     size_t idx = tree->open_elements->length;
1247     void **list = tree->open_elements->list;
1248 
1249     while (idx != 0) {
1250         idx--;
1251         node = list[idx];
1252 
1253         switch (node->local_name) {
1254             case LXB_TAG_TBODY:
1255             case LXB_TAG_THEAD:
1256             case LXB_TAG_TFOOT:
1257                 if (node->ns == LXB_NS_HTML) {
1258                     return node;
1259                 }
1260 
1261                 break;
1262 
1263             default:
1264                 break;
1265         }
1266 
1267         if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1268                                      LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1269         {
1270             return NULL;
1271         }
1272     }
1273 
1274     return NULL;
1275 }
1276 
1277 lxb_dom_node_t *
lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t * tree)1278 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree)
1279 {
1280     lxb_dom_node_t *node;
1281 
1282     size_t idx = tree->open_elements->length;
1283     void **list = tree->open_elements->list;
1284 
1285     while (idx != 0) {
1286         idx--;
1287         node = list[idx];
1288 
1289         switch (node->local_name) {
1290             case LXB_TAG_TD:
1291             case LXB_TAG_TH:
1292                 if (node->ns == LXB_NS_HTML) {
1293                     return node;
1294                 }
1295 
1296                 break;
1297 
1298             default:
1299                 break;
1300         }
1301 
1302         if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1303                                      LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1304         {
1305             return NULL;
1306         }
1307     }
1308 
1309     return NULL;
1310 }
1311 
1312 bool
lxb_html_tree_check_scope_element(lxb_html_tree_t * tree)1313 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree)
1314 {
1315     lxb_dom_node_t *node;
1316 
1317     for (size_t i = 0; i < tree->open_elements->length; i++) {
1318         node = tree->open_elements->list[i];
1319 
1320         switch (node->local_name) {
1321             case LXB_TAG_DD:
1322             case LXB_TAG_DT:
1323             case LXB_TAG_LI:
1324             case LXB_TAG_OPTGROUP:
1325             case LXB_TAG_OPTION:
1326             case LXB_TAG_P:
1327             case LXB_TAG_RB:
1328             case LXB_TAG_RP:
1329             case LXB_TAG_RT:
1330             case LXB_TAG_RTC:
1331             case LXB_TAG_TBODY:
1332             case LXB_TAG_TD:
1333             case LXB_TAG_TFOOT:
1334             case LXB_TAG_TH:
1335             case LXB_TAG_THEAD:
1336             case LXB_TAG_TR:
1337             case LXB_TAG_BODY:
1338             case LXB_TAG_HTML:
1339                 return true;
1340 
1341             default:
1342                 break;
1343         }
1344     }
1345 
1346     return false;
1347 }
1348 
1349 void
lxb_html_tree_close_p_element(lxb_html_tree_t * tree,lxb_html_token_t * token)1350 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token)
1351 {
1352     lxb_html_tree_generate_implied_end_tags(tree, LXB_TAG_P, LXB_NS_HTML);
1353 
1354     lxb_dom_node_t *node = lxb_html_tree_current_node(tree);
1355 
1356     if (lxb_html_tree_node_is(node, LXB_TAG_P) == false) {
1357         lxb_html_tree_parse_error(tree, token,
1358                                   LXB_HTML_RULES_ERROR_UNELINOPELST);
1359     }
1360 
1361     lxb_html_tree_open_elements_pop_until_tag_id(tree, LXB_TAG_P, LXB_NS_HTML,
1362                                                  true);
1363 }
1364 
1365 #include "lexbor/html/serialize.h"
1366 
1367 bool
lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_status_t * status)1368 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
1369                                         lxb_html_token_t *token,
1370                                         lxb_status_t *status)
1371 {
1372     lexbor_assert(tree->open_elements->length != 0);
1373 
1374     /* State 1 */
1375     bool is;
1376     short outer_loop;
1377     lxb_html_element_t *element;
1378     lxb_dom_node_t *node, *marker, **oel_list, **afe_list;
1379 
1380     lxb_tag_id_t subject = token->tag_id;
1381 
1382     oel_list = (lxb_dom_node_t **) tree->open_elements->list;
1383     afe_list = (lxb_dom_node_t **) tree->active_formatting->list;
1384     marker = (lxb_dom_node_t *) lxb_html_tree_active_formatting_marker();
1385 
1386     *status = LXB_STATUS_OK;
1387 
1388     /* State 2 */
1389     node = lxb_html_tree_current_node(tree);
1390     lexbor_assert(node != NULL);
1391 
1392     if (lxb_html_tree_node_is(node, subject)) {
1393         is = lxb_html_tree_active_formatting_find_by_node_reverse(tree, node,
1394                                                                   NULL);
1395         if (is == false) {
1396             lxb_html_tree_open_elements_pop(tree);
1397 
1398             return false;
1399         }
1400     }
1401 
1402     /* State 3 */
1403     outer_loop = 0;
1404 
1405     /* State 4 */
1406     while (outer_loop < 8) {
1407         /* State 5 */
1408         outer_loop++;
1409 
1410         /* State 6 */
1411         size_t formatting_index = 0;
1412         size_t idx = tree->active_formatting->length;
1413         lxb_dom_node_t *formatting_element = NULL;
1414 
1415         while (idx) {
1416             idx--;
1417 
1418             if (afe_list[idx] == marker) {
1419                     return true;
1420             }
1421             else if (afe_list[idx]->local_name == subject) {
1422                 formatting_index = idx;
1423                 formatting_element = afe_list[idx];
1424 
1425                 break;
1426             }
1427         }
1428 
1429         if (formatting_element == NULL) {
1430             return true;
1431         }
1432 
1433         /* State 7 */
1434         size_t oel_formatting_idx;
1435         is = lxb_html_tree_open_elements_find_by_node_reverse(tree,
1436                                                               formatting_element,
1437                                                               &oel_formatting_idx);
1438         if (is == false) {
1439             lxb_html_tree_parse_error(tree, token,
1440                                       LXB_HTML_RULES_ERROR_MIELINOPELST);
1441 
1442             lxb_html_tree_active_formatting_remove_by_node(tree,
1443                                                            formatting_element);
1444 
1445             return false;
1446         }
1447 
1448         /* State 8 */
1449         node = lxb_html_tree_element_in_scope_by_node(tree, formatting_element,
1450                                                       LXB_HTML_TAG_CATEGORY_SCOPE);
1451         if (node == NULL) {
1452             lxb_html_tree_parse_error(tree, token,
1453                                       LXB_HTML_RULES_ERROR_MIELINSC);
1454             return false;
1455         }
1456 
1457         /* State 9 */
1458         node = lxb_html_tree_current_node(tree);
1459 
1460         if (formatting_element != node) {
1461             lxb_html_tree_parse_error(tree, token,
1462                                       LXB_HTML_RULES_ERROR_UNELINOPELST);
1463         }
1464 
1465         /* State 10 */
1466         lxb_dom_node_t *furthest_block = NULL;
1467         size_t furthest_block_idx = 0;
1468         size_t oel_idx = tree->open_elements->length;
1469 
1470         for (furthest_block_idx = oel_formatting_idx;
1471              furthest_block_idx < oel_idx;
1472              furthest_block_idx++)
1473         {
1474             is = lxb_html_tag_is_category(oel_list[furthest_block_idx]->local_name,
1475                                           oel_list[furthest_block_idx]->ns,
1476                                           LXB_HTML_TAG_CATEGORY_SPECIAL);
1477             if (is) {
1478                 furthest_block = oel_list[furthest_block_idx];
1479 
1480                 break;
1481             }
1482         }
1483 
1484         /* State 11 */
1485         if (furthest_block == NULL) {
1486             lxb_html_tree_open_elements_pop_until_node(tree, formatting_element,
1487                                                        true);
1488 
1489             lxb_html_tree_active_formatting_remove_by_node(tree,
1490                                                            formatting_element);
1491 
1492             return false;
1493         }
1494 
1495         lexbor_assert(oel_formatting_idx != 0);
1496 
1497         /* State 12 */
1498         lxb_dom_node_t *common_ancestor = oel_list[oel_formatting_idx - 1];
1499 
1500         /* State 13 */
1501         size_t bookmark = formatting_index;
1502 
1503         /* State 14 */
1504         lxb_dom_node_t *node;
1505         lxb_dom_node_t *last = furthest_block;
1506         size_t node_idx = furthest_block_idx;
1507 
1508         /* State 14.1 */
1509         size_t inner_loop_counter = 0;
1510 
1511         /* State 14.2 */
1512         while (1) {
1513             inner_loop_counter++;
1514 
1515             /* State 14.3 */
1516             lexbor_assert(node_idx != 0);
1517 
1518             if (node_idx == 0) {
1519                 return false;
1520             }
1521 
1522             node_idx--;
1523             node = oel_list[node_idx];
1524 
1525             /* State 14.4 */
1526             if (node == formatting_element) {
1527                 break;
1528             }
1529 
1530             /* State 14.5 */
1531             size_t afe_node_idx;
1532             is = lxb_html_tree_active_formatting_find_by_node_reverse(tree,
1533                                                                       node,
1534                                                                       &afe_node_idx);
1535             /* State 14.5 */
1536             if (inner_loop_counter > 3 && is) {
1537                 lxb_html_tree_active_formatting_remove_by_node(tree, node);
1538 
1539                 continue;
1540             }
1541 
1542             /* State 14.6 */
1543             if (is == false) {
1544                 lxb_html_tree_open_elements_remove_by_node(tree, node);
1545 
1546                 continue;
1547             }
1548 
1549             /* State 14.7 */
1550             lxb_html_token_t fake_token = {0};
1551 
1552             fake_token.tag_id = node->local_name;
1553             fake_token.base_element = node;
1554 
1555             element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1556                                                              LXB_NS_HTML);
1557             if (element == NULL) {
1558                 *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1559 
1560                 return false;
1561             }
1562 
1563             node = lxb_dom_interface_node(element);
1564 
1565             afe_list[afe_node_idx] = node;
1566             oel_list[node_idx] = node;
1567 
1568             /* State 14.8 */
1569             if (last == furthest_block) {
1570                 bookmark = afe_node_idx + 1;
1571 
1572                 lexbor_assert(bookmark < tree->active_formatting->length);
1573             }
1574 
1575             /* State 14.9 */
1576             if (last->parent != NULL) {
1577                 lxb_dom_node_remove_wo_events(last);
1578             }
1579 
1580             lxb_dom_node_insert_child_wo_events(node, last);
1581 
1582             /* State 14.10 */
1583             last = node;
1584         }
1585 
1586         if (last->parent != NULL) {
1587             lxb_dom_node_remove_wo_events(last);
1588         }
1589 
1590         /* State 15 */
1591         lxb_dom_node_t *pos;
1592         lxb_html_tree_insertion_position_t ipos;
1593 
1594         pos = lxb_html_tree_appropriate_place_inserting_node(tree,
1595                                                              common_ancestor,
1596                                                              &ipos);
1597         if (pos == NULL) {
1598             return false;
1599         }
1600 
1601         lxb_html_tree_insert_node(pos, last, ipos);
1602 
1603         /* State 16 */
1604         lxb_html_token_t fake_token = {0};
1605 
1606         fake_token.tag_id = formatting_element->local_name;
1607         fake_token.base_element = formatting_element;
1608 
1609         element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1610                                                          LXB_NS_HTML);
1611         if (element == NULL) {
1612             *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1613 
1614             return false;
1615         }
1616 
1617         /* State 17 */
1618         lxb_dom_node_t *next;
1619         node = furthest_block->first_child;
1620 
1621         while (node != NULL) {
1622             next = node->next;
1623 
1624             lxb_dom_node_remove_wo_events(node);
1625             lxb_dom_node_insert_child_wo_events(lxb_dom_interface_node(element),
1626                                                 node);
1627             node = next;
1628         }
1629 
1630         node = lxb_dom_interface_node(element);
1631 
1632         /* State 18 */
1633         lxb_dom_node_insert_child_wo_events(furthest_block, node);
1634 
1635         /* State 19 */
1636         lxb_html_tree_active_formatting_remove(tree, formatting_index);
1637 
1638         if (bookmark > tree->active_formatting->length) {
1639             bookmark = tree->active_formatting->length;
1640         }
1641 
1642         *status = lxb_html_tree_active_formatting_insert(tree, node, bookmark);
1643         if (*status != LXB_STATUS_OK) {
1644             return false;
1645         }
1646 
1647         /* State 20 */
1648         lxb_html_tree_open_elements_remove_by_node(tree, formatting_element);
1649 
1650         lxb_html_tree_open_elements_find_by_node(tree, furthest_block,
1651                                                  &furthest_block_idx);
1652 
1653         *status = lxb_html_tree_open_elements_insert_after(tree, node,
1654                                                            furthest_block_idx);
1655         if (*status != LXB_STATUS_OK) {
1656             return false;
1657         }
1658     }
1659 
1660     return false;
1661 }
1662 
1663 bool
lxb_html_tree_html_integration_point(lxb_dom_node_t * node)1664 lxb_html_tree_html_integration_point(lxb_dom_node_t *node)
1665 {
1666     if (node->ns == LXB_NS_MATH
1667         && node->local_name == LXB_TAG_ANNOTATION_XML)
1668     {
1669         lxb_dom_attr_t *attr;
1670         attr = lxb_dom_element_attr_is_exist(lxb_dom_interface_element(node),
1671                                              (const lxb_char_t *) "encoding",
1672                                              8);
1673         if (attr == NULL || attr->value == NULL) {
1674             return false;
1675         }
1676 
1677         if (attr->value->length == 9
1678             && lexbor_str_data_casecmp(attr->value->data,
1679                                        (const lxb_char_t *) "text/html"))
1680         {
1681             return true;
1682         }
1683 
1684         if (attr->value->length == 21
1685             && lexbor_str_data_casecmp(attr->value->data,
1686                                        (const lxb_char_t *) "application/xhtml+xml"))
1687         {
1688             return true;
1689         }
1690 
1691         return false;
1692     }
1693 
1694     if (node->ns == LXB_NS_SVG
1695         && (node->local_name == LXB_TAG_FOREIGNOBJECT
1696             || node->local_name == LXB_TAG_DESC
1697             || node->local_name == LXB_TAG_TITLE))
1698     {
1699         return true;
1700     }
1701 
1702     return false;
1703 }
1704 
1705 lxb_status_t
lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1706 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
1707                                        lxb_dom_attr_t *attr, void *ctx)
1708 {
1709     lxb_status_t status;
1710 
1711     status = lxb_html_tree_adjust_mathml_attributes(tree, attr, ctx);
1712     if (status !=LXB_STATUS_OK) {
1713         return status;
1714     }
1715 
1716     return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1717 }
1718 
1719 lxb_status_t
lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1720 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
1721                                     lxb_dom_attr_t *attr, void *ctx)
1722 {
1723     lxb_status_t status;
1724 
1725     status = lxb_html_tree_adjust_svg_attributes(tree, attr, ctx);
1726     if (status !=LXB_STATUS_OK) {
1727         return status;
1728     }
1729 
1730     return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1731 }
1732