1 /*
2 * Copyright (C) 2018-2022 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include "lexbor/dom/interfaces/document_fragment.h"
8 #include "lexbor/dom/interfaces/document_type.h"
9 #include "lexbor/dom/interfaces/comment.h"
10 #include "lexbor/dom/interfaces/text.h"
11
12 #include "lexbor/html/tree.h"
13 #include "lexbor/html/tree_res.h"
14 #include "lexbor/html/tree/insertion_mode.h"
15 #include "lexbor/html/tree/open_elements.h"
16 #include "lexbor/html/tree/active_formatting.h"
17 #include "lexbor/html/tree/template_insertion.h"
18 #include "lexbor/html/interface.h"
19 #include "lexbor/html/interface.h"
20 #include "lexbor/html/interfaces/template_element.h"
21 #include "lexbor/html/interfaces/unknown_element.h"
22 #include "lexbor/html/tokenizer/state_rawtext.h"
23 #include "lexbor/html/tokenizer/state_rcdata.h"
24
25
26 lxb_dom_attr_data_t *
27 lxb_dom_attr_local_name_append(lexbor_hash_t *hash,
28 const lxb_char_t *name, size_t length);
29
30 lxb_dom_attr_data_t *
31 lxb_dom_attr_qualified_name_append(lexbor_hash_t *hash, const lxb_char_t *name,
32 size_t length);
33
34 const lxb_tag_data_t *
35 lxb_tag_append_lower(lexbor_hash_t *hash,
36 const lxb_char_t *name, size_t length);
37
38 static lxb_html_token_t *
39 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
40 lxb_html_token_t *token, void *ctx);
41
42 static lxb_status_t
43 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token);
44
45
46 lxb_html_tree_t *
lxb_html_tree_create(void)47 lxb_html_tree_create(void)
48 {
49 return lexbor_calloc(1, sizeof(lxb_html_tree_t));
50 }
51
52 lxb_status_t
lxb_html_tree_init(lxb_html_tree_t * tree,lxb_html_tokenizer_t * tkz)53 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz)
54 {
55 if (tree == NULL) {
56 return LXB_STATUS_ERROR_OBJECT_IS_NULL;
57 }
58
59 if (tkz == NULL) {
60 return LXB_STATUS_ERROR_WRONG_ARGS;
61 }
62
63 lxb_status_t status;
64
65 /* Stack of open elements */
66 tree->open_elements = lexbor_array_create();
67 status = lexbor_array_init(tree->open_elements, 128);
68 if (status != LXB_STATUS_OK) {
69 return status;
70 }
71
72 /* Stack of active formatting */
73 tree->active_formatting = lexbor_array_create();
74 status = lexbor_array_init(tree->active_formatting, 128);
75 if (status != LXB_STATUS_OK) {
76 return status;
77 }
78
79 /* Stack of template insertion modes */
80 tree->template_insertion_modes = lexbor_array_obj_create();
81 status = lexbor_array_obj_init(tree->template_insertion_modes, 64,
82 sizeof(lxb_html_tree_template_insertion_t));
83 if (status != LXB_STATUS_OK) {
84 return status;
85 }
86
87 /* Stack of pending table character tokens */
88 tree->pending_table.text_list = lexbor_array_obj_create();
89 status = lexbor_array_obj_init(tree->pending_table.text_list, 16,
90 sizeof(lexbor_str_t));
91 if (status != LXB_STATUS_OK) {
92 return status;
93 }
94
95 /* Parse errors */
96 tree->parse_errors = lexbor_array_obj_create();
97 status = lexbor_array_obj_init(tree->parse_errors, 16,
98 sizeof(lxb_html_tree_error_t));
99 if (status != LXB_STATUS_OK) {
100 return status;
101 }
102
103 tree->tkz_ref = lxb_html_tokenizer_ref(tkz);
104
105 tree->document = NULL;
106 tree->fragment = NULL;
107
108 tree->form = NULL;
109
110 tree->foster_parenting = false;
111 tree->frameset_ok = true;
112
113 tree->mode = lxb_html_tree_insertion_mode_initial;
114 tree->before_append_attr = NULL;
115
116 tree->status = LXB_STATUS_OK;
117
118 tree->ref_count = 1;
119
120 lxb_html_tokenizer_callback_token_done_set(tkz,
121 lxb_html_tree_token_callback,
122 tree);
123
124 return LXB_STATUS_OK;
125 }
126
127 lxb_html_tree_t *
lxb_html_tree_ref(lxb_html_tree_t * tree)128 lxb_html_tree_ref(lxb_html_tree_t *tree)
129 {
130 if (tree == NULL) {
131 return NULL;
132 }
133
134 tree->ref_count++;
135
136 return tree;
137 }
138
139 lxb_html_tree_t *
lxb_html_tree_unref(lxb_html_tree_t * tree)140 lxb_html_tree_unref(lxb_html_tree_t *tree)
141 {
142 if (tree == NULL || tree->ref_count == 0) {
143 return NULL;
144 }
145
146 tree->ref_count--;
147
148 if (tree->ref_count == 0) {
149 lxb_html_tree_destroy(tree);
150 }
151
152 return NULL;
153 }
154
155 void
lxb_html_tree_clean(lxb_html_tree_t * tree)156 lxb_html_tree_clean(lxb_html_tree_t *tree)
157 {
158 lexbor_array_clean(tree->open_elements);
159 lexbor_array_clean(tree->active_formatting);
160 lexbor_array_obj_clean(tree->template_insertion_modes);
161 lexbor_array_obj_clean(tree->pending_table.text_list);
162 lexbor_array_obj_clean(tree->parse_errors);
163
164 tree->document = NULL;
165 tree->fragment = NULL;
166
167 tree->form = NULL;
168
169 tree->foster_parenting = false;
170 tree->frameset_ok = true;
171
172 tree->mode = lxb_html_tree_insertion_mode_initial;
173 tree->before_append_attr = NULL;
174
175 tree->status = LXB_STATUS_OK;
176 }
177
178 lxb_html_tree_t *
lxb_html_tree_destroy(lxb_html_tree_t * tree)179 lxb_html_tree_destroy(lxb_html_tree_t *tree)
180 {
181 if (tree == NULL) {
182 return NULL;
183 }
184
185 tree->open_elements = lexbor_array_destroy(tree->open_elements, true);
186 tree->active_formatting = lexbor_array_destroy(tree->active_formatting,
187 true);
188 tree->template_insertion_modes = lexbor_array_obj_destroy(tree->template_insertion_modes,
189 true);
190 tree->pending_table.text_list = lexbor_array_obj_destroy(tree->pending_table.text_list,
191 true);
192
193 tree->parse_errors = lexbor_array_obj_destroy(tree->parse_errors, true);
194 tree->tkz_ref = lxb_html_tokenizer_unref(tree->tkz_ref);
195
196 return lexbor_free(tree);
197 }
198
199 static lxb_html_token_t *
lxb_html_tree_token_callback(lxb_html_tokenizer_t * tkz,lxb_html_token_t * token,void * ctx)200 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
201 lxb_html_token_t *token, void *ctx)
202 {
203 lxb_status_t status;
204
205 status = lxb_html_tree_insertion_mode(ctx, token);
206 if (status != LXB_STATUS_OK) {
207 tkz->status = status;
208 return NULL;
209 }
210
211 return token;
212 }
213
214 /* TODO: not complete!!! */
215 lxb_status_t
lxb_html_tree_stop_parsing(lxb_html_tree_t * tree)216 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree)
217 {
218 tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
219
220 return LXB_STATUS_OK;
221 }
222
223 bool
lxb_html_tree_process_abort(lxb_html_tree_t * tree)224 lxb_html_tree_process_abort(lxb_html_tree_t *tree)
225 {
226 if (tree->status == LXB_STATUS_OK) {
227 tree->status = LXB_STATUS_ABORTED;
228 }
229
230 tree->open_elements->length = 0;
231 tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
232
233 return true;
234 }
235
236 void
lxb_html_tree_parse_error(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_html_tree_error_id_t id)237 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
238 lxb_html_tree_error_id_t id)
239 {
240 lxb_html_tree_error_add(tree->parse_errors, token, id);
241 }
242
243 bool
lxb_html_tree_construction_dispatcher(lxb_html_tree_t * tree,lxb_html_token_t * token)244 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
245 lxb_html_token_t *token)
246 {
247 lxb_dom_node_t *adjusted;
248
249 adjusted = lxb_html_tree_adjusted_current_node(tree);
250
251 if (adjusted == NULL || adjusted->ns == LXB_NS_HTML) {
252 return tree->mode(tree, token);
253 }
254
255 if (lxb_html_tree_mathml_text_integration_point(adjusted))
256 {
257 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
258 && token->tag_id != LXB_TAG_MGLYPH
259 && token->tag_id != LXB_TAG_MALIGNMARK)
260 {
261 return tree->mode(tree, token);
262 }
263
264 if (token->tag_id == LXB_TAG__TEXT) {
265 return tree->mode(tree, token);
266 }
267 }
268
269 if (adjusted->local_name == LXB_TAG_ANNOTATION_XML
270 && adjusted->ns == LXB_NS_MATH
271 && (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
272 && token->tag_id == LXB_TAG_SVG)
273 {
274 return tree->mode(tree, token);
275 }
276
277 if (lxb_html_tree_html_integration_point(adjusted)) {
278 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
279 || token->tag_id == LXB_TAG__TEXT)
280 {
281 return tree->mode(tree, token);
282 }
283 }
284
285 if (token->tag_id == LXB_TAG__END_OF_FILE) {
286 return tree->mode(tree, token);
287 }
288
289 return lxb_html_tree_insertion_mode_foreign_content(tree, token);
290 }
291
292 static lxb_status_t
lxb_html_tree_insertion_mode(lxb_html_tree_t * tree,lxb_html_token_t * token)293 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token)
294 {
295 while (lxb_html_tree_construction_dispatcher(tree, token) == false) {}
296
297 return tree->status;
298 }
299
300 /*
301 * Action
302 */
303 lxb_dom_node_t *
lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t * tree,lxb_dom_node_t * override_target,lxb_html_tree_insertion_position_t * ipos)304 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
305 lxb_dom_node_t *override_target,
306 lxb_html_tree_insertion_position_t *ipos)
307 {
308 lxb_dom_node_t *target, *adjusted_location = NULL;
309
310 *ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
311
312 if (override_target != NULL) {
313 target = override_target;
314 }
315 else {
316 target = lxb_html_tree_current_node(tree);
317 }
318
319 if (tree->foster_parenting && target->ns == LXB_NS_HTML
320 && (target->local_name == LXB_TAG_TABLE
321 || target->local_name == LXB_TAG_TBODY
322 || target->local_name == LXB_TAG_TFOOT
323 || target->local_name == LXB_TAG_THEAD
324 || target->local_name == LXB_TAG_TR))
325 {
326 lxb_dom_node_t *last_temp, *last_table;
327 size_t last_temp_idx, last_table_idx;
328
329 last_temp = lxb_html_tree_open_elements_find_reverse(tree,
330 LXB_TAG_TEMPLATE,
331 LXB_NS_HTML,
332 &last_temp_idx);
333
334 last_table = lxb_html_tree_open_elements_find_reverse(tree,
335 LXB_TAG_TABLE,
336 LXB_NS_HTML,
337 &last_table_idx);
338
339 if(last_temp != NULL && (last_table == NULL
340 || last_temp_idx > last_table_idx))
341 {
342 lxb_dom_document_fragment_t *doc_fragment;
343
344 doc_fragment = lxb_html_interface_template(last_temp)->content;
345
346 return lxb_dom_interface_node(doc_fragment);
347 }
348 else if (last_table == NULL) {
349 adjusted_location = lxb_html_tree_open_elements_first(tree);
350
351 lexbor_assert(adjusted_location != NULL);
352 lexbor_assert(adjusted_location->local_name == LXB_TAG_HTML);
353 }
354 else if (last_table->parent != NULL) {
355 adjusted_location = last_table;
356
357 *ipos = LXB_HTML_TREE_INSERTION_POSITION_BEFORE;
358 }
359 else {
360 lexbor_assert(last_table_idx != 0);
361
362 adjusted_location = lxb_html_tree_open_elements_get(tree,
363 last_table_idx - 1);
364 }
365 }
366 else {
367 adjusted_location = target;
368 }
369
370 if (adjusted_location == NULL) {
371 return NULL;
372 }
373
374 /*
375 * In Spec it is not entirely clear what is meant:
376 *
377 * If the adjusted insertion location is inside a template element,
378 * let it instead be inside the template element's template contents,
379 * after its last child (if any).
380 */
381 if (lxb_html_tree_node_is(adjusted_location, LXB_TAG_TEMPLATE)) {
382 lxb_dom_document_fragment_t *df;
383
384 df = lxb_html_interface_template(adjusted_location)->content;
385 adjusted_location = lxb_dom_interface_node(df);
386 }
387
388 return adjusted_location;
389 }
390
391 lxb_html_element_t *
lxb_html_tree_insert_foreign_element(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns)392 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
393 lxb_html_token_t *token, lxb_ns_id_t ns)
394 {
395 lxb_status_t status;
396 lxb_dom_node_t *pos;
397 lxb_html_element_t *element;
398 lxb_html_tree_insertion_position_t ipos;
399
400 pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
401
402 if (ipos == LXB_HTML_TREE_INSERTION_POSITION_CHILD) {
403 element = lxb_html_tree_create_element_for_token(tree, token, ns, pos);
404 }
405 else {
406 element = lxb_html_tree_create_element_for_token(tree, token, ns,
407 pos->parent);
408 }
409
410 if (element == NULL) {
411 return NULL;
412 }
413
414 if (pos != NULL) {
415 lxb_html_tree_insert_node(pos, lxb_dom_interface_node(element), ipos);
416 }
417
418 status = lxb_html_tree_open_elements_push(tree,
419 lxb_dom_interface_node(element));
420 if (status != LXB_HTML_STATUS_OK) {
421 return lxb_html_interface_destroy(element);
422 }
423
424 return element;
425 }
426
427 lxb_html_element_t *
lxb_html_tree_create_element_for_token(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns,lxb_dom_node_t * parent)428 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
429 lxb_html_token_t *token, lxb_ns_id_t ns,
430 lxb_dom_node_t *parent)
431 {
432 lxb_dom_node_t *node = lxb_html_tree_create_node(tree, token->tag_id, ns);
433 if (node == NULL) {
434 return NULL;
435 }
436
437 node->line = token->line;
438 /* We only expose line number in PHP DOM */
439
440 lxb_status_t status;
441 lxb_dom_element_t *element = lxb_dom_interface_element(node);
442
443 if (token->base_element == NULL) {
444 status = lxb_html_tree_append_attributes(tree, element, token, ns);
445 }
446 else {
447 status = lxb_html_tree_append_attributes_from_element(tree, element,
448 token->base_element, ns);
449 }
450
451 if (status != LXB_HTML_STATUS_OK) {
452 return lxb_html_interface_destroy(element);
453 }
454
455 return lxb_html_interface_element(node);
456 }
457
458 lxb_status_t
lxb_html_tree_append_attributes(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_html_token_t * token,lxb_ns_id_t ns)459 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
460 lxb_dom_element_t *element,
461 lxb_html_token_t *token, lxb_ns_id_t ns)
462 {
463 lxb_status_t status;
464 lxb_dom_attr_t *attr;
465 lxb_html_document_t *doc;
466 lxb_html_token_attr_t *token_attr = token->attr_first;
467
468 doc = lxb_html_interface_document(element->node.owner_document);
469
470 while (token_attr != NULL) {
471 attr = lxb_dom_element_attr_by_local_name_data(element,
472 token_attr->name);
473 if (attr != NULL) {
474 token_attr = token_attr->next;
475 continue;
476 }
477
478 attr = lxb_dom_attr_interface_create(lxb_dom_interface_document(doc));
479 if (attr == NULL) {
480 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
481 }
482
483 if (token_attr->value_begin != NULL) {
484 status = lxb_dom_attr_set_value_wo_copy(attr, token_attr->value,
485 token_attr->value_size);
486 if (status != LXB_HTML_STATUS_OK) {
487 return status;
488 }
489 }
490
491 attr->node.local_name = token_attr->name->attr_id;
492 attr->node.ns = ns;
493
494 /* Fix for adjust MathML/SVG attributes */
495 if (tree->before_append_attr != NULL) {
496 status = tree->before_append_attr(tree, attr, NULL);
497 if (status != LXB_STATUS_OK) {
498 return status;
499 }
500 }
501
502 lxb_dom_element_attr_append(element, attr);
503
504 token_attr = token_attr->next;
505 }
506
507 return LXB_HTML_STATUS_OK;
508 }
509
510 lxb_status_t
lxb_html_tree_append_attributes_from_element(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_dom_element_t * from,lxb_ns_id_t ns)511 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
512 lxb_dom_element_t *element,
513 lxb_dom_element_t *from,
514 lxb_ns_id_t ns)
515 {
516 lxb_status_t status;
517 lxb_dom_attr_t *attr = from->first_attr;
518 lxb_dom_attr_t *new_attr;
519
520 while (attr != NULL) {
521 new_attr = lxb_dom_attr_interface_create(element->node.owner_document);
522 if (new_attr == NULL) {
523 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
524 }
525
526 status = lxb_dom_attr_clone_name_value(attr, new_attr);
527 if (status != LXB_HTML_STATUS_OK) {
528 return status;
529 }
530
531 new_attr->node.ns = attr->node.ns;
532
533 /* Fix for adjust MathML/SVG attributes */
534 if (tree->before_append_attr != NULL) {
535 status = tree->before_append_attr(tree, new_attr, NULL);
536 if (status != LXB_STATUS_OK) {
537 return status;
538 }
539 }
540
541 lxb_dom_element_attr_append(element, attr);
542
543 attr = attr->next;
544 }
545
546 return LXB_HTML_STATUS_OK;
547 }
548
549 lxb_status_t
lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)550 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
551 lxb_dom_attr_t *attr, void *ctx)
552 {
553 lexbor_hash_t *attrs;
554 const lxb_dom_attr_data_t *data;
555
556 attrs = attr->node.owner_document->attrs;
557 data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
558
559 if (data->entry.length == 13
560 && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
561 (const lxb_char_t *) "definitionurl"))
562 {
563 data = lxb_dom_attr_qualified_name_append(attrs,
564 (const lxb_char_t *) "definitionURL", 13);
565 if (data == NULL) {
566 return LXB_STATUS_ERROR;
567 }
568
569 attr->qualified_name = data->attr_id;
570 }
571
572 return LXB_STATUS_OK;
573 }
574
575 lxb_status_t
lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)576 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
577 lxb_dom_attr_t *attr, void *ctx)
578 {
579 lexbor_hash_t *attrs;
580 const lxb_dom_attr_data_t *data;
581 const lxb_html_tree_res_attr_adjust_t *adjust;
582
583 size_t len = sizeof(lxb_html_tree_res_attr_adjust_svg_map)
584 / sizeof(lxb_html_tree_res_attr_adjust_t);
585
586 attrs = attr->node.owner_document->attrs;
587
588 data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
589
590 for (size_t i = 0; i < len; i++) {
591 adjust = &lxb_html_tree_res_attr_adjust_svg_map[i];
592
593 if (data->entry.length == adjust->len
594 && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
595 (const lxb_char_t *) adjust->from))
596 {
597 data = lxb_dom_attr_qualified_name_append(attrs,
598 (const lxb_char_t *) adjust->to, adjust->len);
599 if (data == NULL) {
600 return LXB_STATUS_ERROR;
601 }
602
603 attr->qualified_name = data->attr_id;
604
605 return LXB_STATUS_OK;
606 }
607 }
608
609 return LXB_STATUS_OK;
610 }
611
612 lxb_status_t
lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)613 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
614 lxb_dom_attr_t *attr, void *ctx)
615 {
616 size_t lname_length;
617 lexbor_hash_t *attrs, *prefix;
618 const lxb_dom_attr_data_t *attr_data;
619 const lxb_ns_prefix_data_t *prefix_data;
620 const lxb_dom_attr_data_t *data;
621 const lxb_html_tree_res_attr_adjust_foreign_t *adjust;
622
623 size_t len = sizeof(lxb_html_tree_res_attr_adjust_foreign_map)
624 / sizeof(lxb_html_tree_res_attr_adjust_foreign_t);
625
626 attrs = attr->node.owner_document->attrs;
627 prefix = attr->node.owner_document->prefix;
628
629 data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
630
631 for (size_t i = 0; i < len; i++) {
632 adjust = &lxb_html_tree_res_attr_adjust_foreign_map[i];
633
634 if (data->entry.length == adjust->name_len
635 && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
636 (const lxb_char_t *) adjust->name))
637 {
638 if (adjust->prefix_len != 0) {
639 data = lxb_dom_attr_qualified_name_append(attrs,
640 (const lxb_char_t *) adjust->name, adjust->name_len);
641 if (data == NULL) {
642 return LXB_STATUS_ERROR;
643 }
644
645 attr->qualified_name = data->attr_id;
646
647 lname_length = adjust->name_len - adjust->prefix_len - 1;
648
649 attr_data = lxb_dom_attr_local_name_append(attrs,
650 (const lxb_char_t *) adjust->local_name, lname_length);
651 if (attr_data == NULL) {
652 return LXB_STATUS_ERROR;
653 }
654
655 attr->node.local_name = attr_data->attr_id;
656
657 prefix_data = lxb_ns_prefix_append(prefix,
658 (const lxb_char_t *) adjust->prefix, adjust->prefix_len);
659 if (prefix_data == NULL) {
660 return LXB_STATUS_ERROR;
661 }
662
663 attr->node.prefix = prefix_data->prefix_id;
664 }
665
666 attr->node.ns = adjust->ns;
667
668 return LXB_STATUS_OK;
669 }
670 }
671
672 return LXB_STATUS_OK;
673 }
674
675 lxb_status_t
lxb_html_tree_insert_character(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t ** ret_node)676 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
677 lxb_dom_node_t **ret_node)
678 {
679 size_t size;
680 lxb_status_t status;
681 lexbor_str_t str = {0};
682
683 size = token->text_end - token->text_start;
684
685 lexbor_str_init(&str, tree->document->dom_document.text, size + 1);
686 if (str.data == NULL) {
687 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
688 }
689
690 memcpy(str.data, token->text_start, size);
691
692 str.data[size] = 0x00;
693 str.length = size;
694
695 status = lxb_html_tree_insert_character_for_data(tree, &str, ret_node);
696 if (status != LXB_STATUS_OK) {
697 return status;
698 }
699
700 return LXB_STATUS_OK;
701 }
702
703 lxb_status_t
lxb_html_tree_insert_character_for_data(lxb_html_tree_t * tree,lexbor_str_t * str,lxb_dom_node_t ** ret_node)704 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
705 lexbor_str_t *str,
706 lxb_dom_node_t **ret_node)
707 {
708 const lxb_char_t *data;
709 lxb_dom_node_t *pos;
710 lxb_dom_character_data_t *chrs = NULL;
711 lxb_html_tree_insertion_position_t ipos;
712
713 if (ret_node != NULL) {
714 *ret_node = NULL;
715 }
716
717 pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
718 if (pos == NULL) {
719 return LXB_STATUS_ERROR;
720 }
721
722 if (lxb_html_tree_node_is(pos, LXB_TAG__DOCUMENT)) {
723 goto destroy_str;
724 }
725
726 if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
727 /* No need check namespace */
728 if (pos->prev != NULL && pos->prev->local_name == LXB_TAG__TEXT) {
729 chrs = lxb_dom_interface_character_data(pos->prev);
730
731 if (ret_node != NULL) {
732 *ret_node = pos->prev;
733 }
734 }
735 }
736 else {
737 /* No need check namespace */
738 if (pos->last_child != NULL
739 && pos->last_child->local_name == LXB_TAG__TEXT)
740 {
741 chrs = lxb_dom_interface_character_data(pos->last_child);
742
743 if (ret_node != NULL) {
744 *ret_node = pos->last_child;
745 }
746 }
747 }
748
749 if (chrs != NULL) {
750 /* This is error. This can not happen, but... */
751 if (chrs->data.data == NULL) {
752 data = lexbor_str_init(&chrs->data, tree->document->dom_document.text,
753 str->length);
754 if (data == NULL) {
755 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
756 }
757 }
758
759 data = lexbor_str_append(&chrs->data, tree->document->dom_document.text,
760 str->data, str->length);
761 if (data == NULL) {
762 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
763 }
764
765 goto destroy_str;
766 }
767
768 lxb_dom_node_t *text = lxb_html_tree_create_node(tree, LXB_TAG__TEXT,
769 LXB_NS_HTML);
770 if (text == NULL) {
771 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
772 }
773
774 lxb_dom_interface_text(text)->char_data.data = *str;
775
776 if (tree->tkz_ref) {
777 text->line = tree->tkz_ref->token->line;
778 /* We only expose line number in PHP DOM */
779 }
780
781 if (ret_node != NULL) {
782 *ret_node = text;
783 }
784
785 lxb_html_tree_insert_node(pos, text, ipos);
786
787 return LXB_STATUS_OK;
788
789 destroy_str:
790
791 lexbor_str_destroy(str, tree->document->dom_document.text, false);
792
793 return LXB_STATUS_OK;
794 }
795
796 lxb_dom_comment_t *
lxb_html_tree_insert_comment(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t * pos)797 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
798 lxb_html_token_t *token, lxb_dom_node_t *pos)
799 {
800 lxb_dom_node_t *node;
801 lxb_dom_comment_t *comment;
802 lxb_html_tree_insertion_position_t ipos;
803
804 if (pos == NULL) {
805 pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
806 }
807 else {
808 ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
809 }
810
811 lexbor_assert(pos != NULL);
812
813 node = lxb_html_tree_create_node(tree, token->tag_id, pos->ns);
814 comment = lxb_dom_interface_comment(node);
815
816 if (comment == NULL) {
817 return NULL;
818 }
819
820 node->line = token->line;
821 /* We only expose line number in PHP DOM */
822
823 tree->status = lxb_html_token_make_text(token, &comment->char_data.data,
824 tree->document->dom_document.text);
825 if (tree->status != LXB_STATUS_OK) {
826 return NULL;
827 }
828
829 lxb_html_tree_insert_node(pos, node, ipos);
830
831 return comment;
832 }
833
834 lxb_dom_document_type_t *
lxb_html_tree_create_document_type_from_token(lxb_html_tree_t * tree,lxb_html_token_t * token)835 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
836 lxb_html_token_t *token)
837 {
838 lxb_status_t status;
839 lxb_dom_node_t *doctype_node;
840 lxb_dom_document_type_t *doc_type;
841
842 /* Create */
843 doctype_node = lxb_html_tree_create_node(tree, token->tag_id, LXB_NS_HTML);
844 if (doctype_node == NULL) {
845 return NULL;
846 }
847
848 doc_type = lxb_dom_interface_document_type(doctype_node);
849
850 /* Parse */
851 status = lxb_html_token_doctype_parse(token, doc_type);
852 if (status != LXB_STATUS_OK) {
853 return lxb_dom_document_type_interface_destroy(doc_type);
854 }
855
856 return doc_type;
857 }
858
859 /*
860 * TODO: need use ref and unref for nodes (ref counter)
861 * Not implemented until the end. It is necessary to finish it.
862 */
863 void
lxb_html_tree_node_delete_deep(lxb_html_tree_t * tree,lxb_dom_node_t * node)864 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node)
865 {
866 lxb_dom_node_remove(node);
867 }
868
869 lxb_html_element_t *
lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)870 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
871 lxb_html_token_t *token)
872 {
873 lxb_html_element_t *element;
874
875 element = lxb_html_tree_insert_html_element(tree, token);
876 if (element == NULL) {
877 return NULL;
878 }
879
880 /*
881 * Need for tokenizer state RAWTEXT
882 * See description for 'lxb_html_tokenizer_state_rawtext_before' function
883 */
884 lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
885 lxb_html_tokenizer_state_set(tree->tkz_ref,
886 lxb_html_tokenizer_state_rawtext_before);
887
888 tree->original_mode = tree->mode;
889 tree->mode = lxb_html_tree_insertion_mode_text;
890
891 return element;
892 }
893
894 /* Magic of CopyPast power! */
895 lxb_html_element_t *
lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)896 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
897 lxb_html_token_t *token)
898 {
899 lxb_html_element_t *element;
900
901 element = lxb_html_tree_insert_html_element(tree, token);
902 if (element == NULL) {
903 return NULL;
904 }
905
906 /*
907 * Need for tokenizer state RCDATA
908 * See description for 'lxb_html_tokenizer_state_rcdata_before' function
909 */
910 lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
911 lxb_html_tokenizer_state_set(tree->tkz_ref,
912 lxb_html_tokenizer_state_rcdata_before);
913
914 tree->original_mode = tree->mode;
915 tree->mode = lxb_html_tree_insertion_mode_text;
916
917 return element;
918 }
919
920 void
lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)921 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
922 lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns)
923 {
924 lxb_dom_node_t *node;
925
926 lexbor_assert(tree->open_elements != 0);
927
928 while (lexbor_array_length(tree->open_elements) != 0) {
929 node = lxb_html_tree_current_node(tree);
930
931 lexbor_assert(node != NULL);
932
933 switch (node->local_name) {
934 case LXB_TAG_DD:
935 case LXB_TAG_DT:
936 case LXB_TAG_LI:
937 case LXB_TAG_OPTGROUP:
938 case LXB_TAG_OPTION:
939 case LXB_TAG_P:
940 case LXB_TAG_RB:
941 case LXB_TAG_RP:
942 case LXB_TAG_RT:
943 case LXB_TAG_RTC:
944 if(node->local_name == ex_tag && node->ns == ex_ns) {
945 return;
946 }
947
948 lxb_html_tree_open_elements_pop(tree);
949
950 break;
951
952 default:
953 return;
954 }
955 }
956 }
957
958 void
lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)959 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
960 lxb_tag_id_t ex_tag,
961 lxb_ns_id_t ex_ns)
962 {
963 lxb_dom_node_t *node;
964
965 lexbor_assert(tree->open_elements != 0);
966
967 while (lexbor_array_length(tree->open_elements) != 0) {
968 node = lxb_html_tree_current_node(tree);
969
970 lexbor_assert(node != NULL);
971
972 switch (node->local_name) {
973 case LXB_TAG_CAPTION:
974 case LXB_TAG_COLGROUP:
975 case LXB_TAG_DD:
976 case LXB_TAG_DT:
977 case LXB_TAG_LI:
978 case LXB_TAG_OPTGROUP:
979 case LXB_TAG_OPTION:
980 case LXB_TAG_P:
981 case LXB_TAG_RB:
982 case LXB_TAG_RP:
983 case LXB_TAG_RT:
984 case LXB_TAG_RTC:
985 case LXB_TAG_TBODY:
986 case LXB_TAG_TD:
987 case LXB_TAG_TFOOT:
988 case LXB_TAG_TH:
989 case LXB_TAG_THEAD:
990 case LXB_TAG_TR:
991 if(node->local_name == ex_tag && node->ns == ex_ns) {
992 return;
993 }
994
995 lxb_html_tree_open_elements_pop(tree);
996
997 break;
998
999 default:
1000 return;
1001 }
1002 }
1003 }
1004
1005 void
lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t * tree)1006 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree)
1007 {
1008 lxb_dom_node_t *node;
1009 size_t idx = tree->open_elements->length;
1010
1011 /* Step 1 */
1012 bool last = false;
1013 void **list = tree->open_elements->list;
1014
1015 /* Step 3 */
1016 while (idx != 0) {
1017 idx--;
1018
1019 /* Step 2 */
1020 node = list[idx];
1021
1022 /* Step 3 */
1023 if (idx == 0) {
1024 last = true;
1025
1026 if (tree->fragment != NULL) {
1027 node = tree->fragment;
1028 }
1029 }
1030
1031 lexbor_assert(node != NULL);
1032
1033 /* Step 16 */
1034 if (node->ns != LXB_NS_HTML) {
1035 if (last) {
1036 tree->mode = lxb_html_tree_insertion_mode_in_body;
1037 return;
1038 }
1039
1040 continue;
1041 }
1042
1043 /* Step 4 */
1044 if (node->local_name == LXB_TAG_SELECT) {
1045 /* Step 4.1 */
1046 if (last) {
1047 tree->mode = lxb_html_tree_insertion_mode_in_select;
1048 return;
1049 }
1050
1051 /* Step 4.2 */
1052 size_t ancestor = idx;
1053
1054 for (;;) {
1055 /* Step 4.3 */
1056 if (ancestor == 0) {
1057 tree->mode = lxb_html_tree_insertion_mode_in_select;
1058 return;
1059 }
1060
1061 /* Step 4.4 */
1062 ancestor--;
1063
1064 /* Step 4.5 */
1065 lxb_dom_node_t *ancestor_node = list[ancestor];
1066
1067 if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TEMPLATE)) {
1068 tree->mode = lxb_html_tree_insertion_mode_in_select;
1069 return;
1070 }
1071
1072 /* Step 4.6 */
1073 else if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TABLE)) {
1074 tree->mode = lxb_html_tree_insertion_mode_in_select_in_table;
1075 return;
1076 }
1077 }
1078 }
1079
1080 /* Step 5-15 */
1081 switch (node->local_name) {
1082 case LXB_TAG_TD:
1083 case LXB_TAG_TH:
1084 if (last == false) {
1085 tree->mode = lxb_html_tree_insertion_mode_in_cell;
1086 return;
1087 }
1088
1089 break;
1090
1091 case LXB_TAG_TR:
1092 tree->mode = lxb_html_tree_insertion_mode_in_row;
1093 return;
1094
1095 case LXB_TAG_TBODY:
1096 case LXB_TAG_TFOOT:
1097 case LXB_TAG_THEAD:
1098 tree->mode = lxb_html_tree_insertion_mode_in_table_body;
1099 return;
1100
1101 case LXB_TAG_CAPTION:
1102 tree->mode = lxb_html_tree_insertion_mode_in_caption;
1103 return;
1104
1105 case LXB_TAG_COLGROUP:
1106 tree->mode = lxb_html_tree_insertion_mode_in_column_group;
1107 return;
1108
1109 case LXB_TAG_TABLE:
1110 tree->mode = lxb_html_tree_insertion_mode_in_table;
1111 return;
1112
1113 case LXB_TAG_TEMPLATE:
1114 tree->mode = lxb_html_tree_template_insertion_current(tree);
1115
1116 lexbor_assert(tree->mode != NULL);
1117
1118 return;
1119
1120 case LXB_TAG_HEAD:
1121 if (last == false) {
1122 tree->mode = lxb_html_tree_insertion_mode_in_head;
1123 return;
1124 }
1125
1126 break;
1127
1128 case LXB_TAG_BODY:
1129 tree->mode = lxb_html_tree_insertion_mode_in_body;
1130 return;
1131
1132 case LXB_TAG_FRAMESET:
1133 tree->mode = lxb_html_tree_insertion_mode_in_frameset;
1134 return;
1135
1136 case LXB_TAG_HTML: {
1137 if (tree->document->head == NULL) {
1138 tree->mode = lxb_html_tree_insertion_mode_before_head;
1139 return;
1140 }
1141
1142 tree->mode = lxb_html_tree_insertion_mode_after_head;
1143 return;
1144 }
1145
1146 default:
1147 break;
1148 }
1149
1150 /* Step 16 */
1151 if (last) {
1152 tree->mode = lxb_html_tree_insertion_mode_in_body;
1153 return;
1154 }
1155 }
1156 }
1157
1158 lxb_dom_node_t *
lxb_html_tree_element_in_scope(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns,lxb_html_tag_category_t ct)1159 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
1160 lxb_ns_id_t ns, lxb_html_tag_category_t ct)
1161 {
1162 lxb_dom_node_t *node;
1163
1164 size_t idx = tree->open_elements->length;
1165 void **list = tree->open_elements->list;
1166
1167 while (idx != 0) {
1168 idx--;
1169 node = list[idx];
1170
1171 if (node->local_name == tag_id && node->ns == ns) {
1172 return node;
1173 }
1174
1175 if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1176 return NULL;
1177 }
1178 }
1179
1180 return NULL;
1181 }
1182
1183 lxb_dom_node_t *
lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t * tree,lxb_dom_node_t * by_node,lxb_html_tag_category_t ct)1184 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
1185 lxb_dom_node_t *by_node,
1186 lxb_html_tag_category_t ct)
1187 {
1188 lxb_dom_node_t *node;
1189
1190 size_t idx = tree->open_elements->length;
1191 void **list = tree->open_elements->list;
1192
1193 while (idx != 0) {
1194 idx--;
1195 node = list[idx];
1196
1197 if (node == by_node) {
1198 return node;
1199 }
1200
1201 if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1202 return NULL;
1203 }
1204 }
1205
1206 return NULL;
1207 }
1208
1209 lxb_dom_node_t *
lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t * tree)1210 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree)
1211 {
1212 lxb_dom_node_t *node;
1213
1214 size_t idx = tree->open_elements->length;
1215 void **list = tree->open_elements->list;
1216
1217 while (idx != 0) {
1218 idx--;
1219 node = list[idx];
1220
1221 switch (node->local_name) {
1222 case LXB_TAG_H1:
1223 case LXB_TAG_H2:
1224 case LXB_TAG_H3:
1225 case LXB_TAG_H4:
1226 case LXB_TAG_H5:
1227 case LXB_TAG_H6:
1228 if (node->ns == LXB_NS_HTML) {
1229 return node;
1230 }
1231
1232 break;
1233
1234 default:
1235 break;
1236 }
1237
1238 if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1239 LXB_HTML_TAG_CATEGORY_SCOPE))
1240 {
1241 return NULL;
1242 }
1243 }
1244
1245 return NULL;
1246 }
1247
1248 lxb_dom_node_t *
lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t * tree)1249 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree)
1250 {
1251 lxb_dom_node_t *node;
1252
1253 size_t idx = tree->open_elements->length;
1254 void **list = tree->open_elements->list;
1255
1256 while (idx != 0) {
1257 idx--;
1258 node = list[idx];
1259
1260 switch (node->local_name) {
1261 case LXB_TAG_TBODY:
1262 case LXB_TAG_THEAD:
1263 case LXB_TAG_TFOOT:
1264 if (node->ns == LXB_NS_HTML) {
1265 return node;
1266 }
1267
1268 break;
1269
1270 default:
1271 break;
1272 }
1273
1274 if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1275 LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1276 {
1277 return NULL;
1278 }
1279 }
1280
1281 return NULL;
1282 }
1283
1284 lxb_dom_node_t *
lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t * tree)1285 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree)
1286 {
1287 lxb_dom_node_t *node;
1288
1289 size_t idx = tree->open_elements->length;
1290 void **list = tree->open_elements->list;
1291
1292 while (idx != 0) {
1293 idx--;
1294 node = list[idx];
1295
1296 switch (node->local_name) {
1297 case LXB_TAG_TD:
1298 case LXB_TAG_TH:
1299 if (node->ns == LXB_NS_HTML) {
1300 return node;
1301 }
1302
1303 break;
1304
1305 default:
1306 break;
1307 }
1308
1309 if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1310 LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1311 {
1312 return NULL;
1313 }
1314 }
1315
1316 return NULL;
1317 }
1318
1319 bool
lxb_html_tree_check_scope_element(lxb_html_tree_t * tree)1320 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree)
1321 {
1322 lxb_dom_node_t *node;
1323
1324 for (size_t i = 0; i < tree->open_elements->length; i++) {
1325 node = tree->open_elements->list[i];
1326
1327 switch (node->local_name) {
1328 case LXB_TAG_DD:
1329 case LXB_TAG_DT:
1330 case LXB_TAG_LI:
1331 case LXB_TAG_OPTGROUP:
1332 case LXB_TAG_OPTION:
1333 case LXB_TAG_P:
1334 case LXB_TAG_RB:
1335 case LXB_TAG_RP:
1336 case LXB_TAG_RT:
1337 case LXB_TAG_RTC:
1338 case LXB_TAG_TBODY:
1339 case LXB_TAG_TD:
1340 case LXB_TAG_TFOOT:
1341 case LXB_TAG_TH:
1342 case LXB_TAG_THEAD:
1343 case LXB_TAG_TR:
1344 case LXB_TAG_BODY:
1345 case LXB_TAG_HTML:
1346 return true;
1347
1348 default:
1349 break;
1350 }
1351 }
1352
1353 return false;
1354 }
1355
1356 void
lxb_html_tree_close_p_element(lxb_html_tree_t * tree,lxb_html_token_t * token)1357 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token)
1358 {
1359 lxb_html_tree_generate_implied_end_tags(tree, LXB_TAG_P, LXB_NS_HTML);
1360
1361 lxb_dom_node_t *node = lxb_html_tree_current_node(tree);
1362
1363 if (lxb_html_tree_node_is(node, LXB_TAG_P) == false) {
1364 lxb_html_tree_parse_error(tree, token,
1365 LXB_HTML_RULES_ERROR_UNELINOPELST);
1366 }
1367
1368 lxb_html_tree_open_elements_pop_until_tag_id(tree, LXB_TAG_P, LXB_NS_HTML,
1369 true);
1370 }
1371
1372 #include "lexbor/html/serialize.h"
1373
1374 bool
lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_status_t * status)1375 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
1376 lxb_html_token_t *token,
1377 lxb_status_t *status)
1378 {
1379 lexbor_assert(tree->open_elements->length != 0);
1380
1381 /* State 1 */
1382 bool is;
1383 short outer_loop;
1384 lxb_html_element_t *element;
1385 lxb_dom_node_t *node, *marker, **oel_list, **afe_list;
1386
1387 lxb_tag_id_t subject = token->tag_id;
1388
1389 oel_list = (lxb_dom_node_t **) tree->open_elements->list;
1390 afe_list = (lxb_dom_node_t **) tree->active_formatting->list;
1391 marker = (lxb_dom_node_t *) lxb_html_tree_active_formatting_marker();
1392
1393 *status = LXB_STATUS_OK;
1394
1395 /* State 2 */
1396 node = lxb_html_tree_current_node(tree);
1397 lexbor_assert(node != NULL);
1398
1399 if (lxb_html_tree_node_is(node, subject)) {
1400 is = lxb_html_tree_active_formatting_find_by_node_reverse(tree, node,
1401 NULL);
1402 if (is == false) {
1403 lxb_html_tree_open_elements_pop(tree);
1404
1405 return false;
1406 }
1407 }
1408
1409 /* State 3 */
1410 outer_loop = 0;
1411
1412 /* State 4 */
1413 while (outer_loop < 8) {
1414 /* State 5 */
1415 outer_loop++;
1416
1417 /* State 6 */
1418 size_t formatting_index = 0;
1419 size_t idx = tree->active_formatting->length;
1420 lxb_dom_node_t *formatting_element = NULL;
1421
1422 while (idx) {
1423 idx--;
1424
1425 if (afe_list[idx] == marker) {
1426 return true;
1427 }
1428 else if (afe_list[idx]->local_name == subject) {
1429 formatting_index = idx;
1430 formatting_element = afe_list[idx];
1431
1432 break;
1433 }
1434 }
1435
1436 if (formatting_element == NULL) {
1437 return true;
1438 }
1439
1440 /* State 7 */
1441 size_t oel_formatting_idx;
1442 is = lxb_html_tree_open_elements_find_by_node_reverse(tree,
1443 formatting_element,
1444 &oel_formatting_idx);
1445 if (is == false) {
1446 lxb_html_tree_parse_error(tree, token,
1447 LXB_HTML_RULES_ERROR_MIELINOPELST);
1448
1449 lxb_html_tree_active_formatting_remove_by_node(tree,
1450 formatting_element);
1451
1452 return false;
1453 }
1454
1455 /* State 8 */
1456 node = lxb_html_tree_element_in_scope_by_node(tree, formatting_element,
1457 LXB_HTML_TAG_CATEGORY_SCOPE);
1458 if (node == NULL) {
1459 lxb_html_tree_parse_error(tree, token,
1460 LXB_HTML_RULES_ERROR_MIELINSC);
1461 return false;
1462 }
1463
1464 /* State 9 */
1465 node = lxb_html_tree_current_node(tree);
1466
1467 if (formatting_element != node) {
1468 lxb_html_tree_parse_error(tree, token,
1469 LXB_HTML_RULES_ERROR_UNELINOPELST);
1470 }
1471
1472 /* State 10 */
1473 lxb_dom_node_t *furthest_block = NULL;
1474 size_t furthest_block_idx = 0;
1475 size_t oel_idx = tree->open_elements->length;
1476
1477 for (furthest_block_idx = oel_formatting_idx;
1478 furthest_block_idx < oel_idx;
1479 furthest_block_idx++)
1480 {
1481 is = lxb_html_tag_is_category(oel_list[furthest_block_idx]->local_name,
1482 oel_list[furthest_block_idx]->ns,
1483 LXB_HTML_TAG_CATEGORY_SPECIAL);
1484 if (is) {
1485 furthest_block = oel_list[furthest_block_idx];
1486
1487 break;
1488 }
1489 }
1490
1491 /* State 11 */
1492 if (furthest_block == NULL) {
1493 lxb_html_tree_open_elements_pop_until_node(tree, formatting_element,
1494 true);
1495
1496 lxb_html_tree_active_formatting_remove_by_node(tree,
1497 formatting_element);
1498
1499 return false;
1500 }
1501
1502 lexbor_assert(oel_formatting_idx != 0);
1503
1504 /* State 12 */
1505 lxb_dom_node_t *common_ancestor = oel_list[oel_formatting_idx - 1];
1506
1507 /* State 13 */
1508 size_t bookmark = formatting_index;
1509
1510 /* State 14 */
1511 lxb_dom_node_t *node;
1512 lxb_dom_node_t *last = furthest_block;
1513 size_t node_idx = furthest_block_idx;
1514
1515 /* State 14.1 */
1516 size_t inner_loop_counter = 0;
1517
1518 /* State 14.2 */
1519 while (1) {
1520 inner_loop_counter++;
1521
1522 /* State 14.3 */
1523 lexbor_assert(node_idx != 0);
1524
1525 if (node_idx == 0) {
1526 return false;
1527 }
1528
1529 node_idx--;
1530 node = oel_list[node_idx];
1531
1532 /* State 14.4 */
1533 if (node == formatting_element) {
1534 break;
1535 }
1536
1537 /* State 14.5 */
1538 size_t afe_node_idx;
1539 is = lxb_html_tree_active_formatting_find_by_node_reverse(tree,
1540 node,
1541 &afe_node_idx);
1542 /* State 14.5 */
1543 if (inner_loop_counter > 3 && is) {
1544 lxb_html_tree_active_formatting_remove_by_node(tree, node);
1545
1546 continue;
1547 }
1548
1549 /* State 14.6 */
1550 if (is == false) {
1551 lxb_html_tree_open_elements_remove_by_node(tree, node);
1552
1553 continue;
1554 }
1555
1556 /* State 14.7 */
1557 lxb_html_token_t fake_token = {0};
1558
1559 fake_token.tag_id = node->local_name;
1560 fake_token.base_element = node;
1561
1562 element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1563 LXB_NS_HTML,
1564 common_ancestor);
1565 if (element == NULL) {
1566 *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1567
1568 return false;
1569 }
1570
1571 node = lxb_dom_interface_node(element);
1572
1573 afe_list[afe_node_idx] = node;
1574 oel_list[node_idx] = node;
1575
1576 /* State 14.8 */
1577 if (last == furthest_block) {
1578 bookmark = afe_node_idx + 1;
1579
1580 lexbor_assert(bookmark < tree->active_formatting->length);
1581 }
1582
1583 /* State 14.9 */
1584 if (last->parent != NULL) {
1585 lxb_dom_node_remove_wo_events(last);
1586 }
1587
1588 lxb_dom_node_insert_child_wo_events(node, last);
1589
1590 /* State 14.10 */
1591 last = node;
1592 }
1593
1594 if (last->parent != NULL) {
1595 lxb_dom_node_remove_wo_events(last);
1596 }
1597
1598 /* State 15 */
1599 lxb_dom_node_t *pos;
1600 lxb_html_tree_insertion_position_t ipos;
1601
1602 pos = lxb_html_tree_appropriate_place_inserting_node(tree,
1603 common_ancestor,
1604 &ipos);
1605 if (pos == NULL) {
1606 return false;
1607 }
1608
1609 lxb_html_tree_insert_node(pos, last, ipos);
1610
1611 /* State 16 */
1612 lxb_html_token_t fake_token = {0};
1613
1614 fake_token.tag_id = formatting_element->local_name;
1615 fake_token.base_element = formatting_element;
1616
1617 element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1618 LXB_NS_HTML,
1619 furthest_block);
1620 if (element == NULL) {
1621 *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1622
1623 return false;
1624 }
1625
1626 /* State 17 */
1627 lxb_dom_node_t *next;
1628 node = furthest_block->first_child;
1629
1630 while (node != NULL) {
1631 next = node->next;
1632
1633 lxb_dom_node_remove_wo_events(node);
1634 lxb_dom_node_insert_child_wo_events(lxb_dom_interface_node(element),
1635 node);
1636 node = next;
1637 }
1638
1639 node = lxb_dom_interface_node(element);
1640
1641 /* State 18 */
1642 lxb_dom_node_insert_child_wo_events(furthest_block, node);
1643
1644 /* State 19 */
1645 lxb_html_tree_active_formatting_remove(tree, formatting_index);
1646
1647 if (bookmark > tree->active_formatting->length) {
1648 bookmark = tree->active_formatting->length;
1649 }
1650
1651 *status = lxb_html_tree_active_formatting_insert(tree, node, bookmark);
1652 if (*status != LXB_STATUS_OK) {
1653 return false;
1654 }
1655
1656 /* State 20 */
1657 lxb_html_tree_open_elements_remove_by_node(tree, formatting_element);
1658
1659 lxb_html_tree_open_elements_find_by_node(tree, furthest_block,
1660 &furthest_block_idx);
1661
1662 *status = lxb_html_tree_open_elements_insert_after(tree, node,
1663 furthest_block_idx);
1664 if (*status != LXB_STATUS_OK) {
1665 return false;
1666 }
1667 }
1668
1669 return false;
1670 }
1671
1672 bool
lxb_html_tree_html_integration_point(lxb_dom_node_t * node)1673 lxb_html_tree_html_integration_point(lxb_dom_node_t *node)
1674 {
1675 if (node->ns == LXB_NS_MATH
1676 && node->local_name == LXB_TAG_ANNOTATION_XML)
1677 {
1678 lxb_dom_attr_t *attr;
1679 attr = lxb_dom_element_attr_is_exist(lxb_dom_interface_element(node),
1680 (const lxb_char_t *) "encoding",
1681 8);
1682 if (attr == NULL || attr->value == NULL) {
1683 return false;
1684 }
1685
1686 if (attr->value->length == 9
1687 && lexbor_str_data_casecmp(attr->value->data,
1688 (const lxb_char_t *) "text/html"))
1689 {
1690 return true;
1691 }
1692
1693 if (attr->value->length == 21
1694 && lexbor_str_data_casecmp(attr->value->data,
1695 (const lxb_char_t *) "application/xhtml+xml"))
1696 {
1697 return true;
1698 }
1699
1700 return false;
1701 }
1702
1703 if (node->ns == LXB_NS_SVG
1704 && (node->local_name == LXB_TAG_FOREIGNOBJECT
1705 || node->local_name == LXB_TAG_DESC
1706 || node->local_name == LXB_TAG_TITLE))
1707 {
1708 return true;
1709 }
1710
1711 return false;
1712 }
1713
1714 lxb_status_t
lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1715 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
1716 lxb_dom_attr_t *attr, void *ctx)
1717 {
1718 lxb_status_t status;
1719
1720 status = lxb_html_tree_adjust_mathml_attributes(tree, attr, ctx);
1721 if (status !=LXB_STATUS_OK) {
1722 return status;
1723 }
1724
1725 return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1726 }
1727
1728 lxb_status_t
lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1729 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
1730 lxb_dom_attr_t *attr, void *ctx)
1731 {
1732 lxb_status_t status;
1733
1734 status = lxb_html_tree_adjust_svg_attributes(tree, attr, ctx);
1735 if (status !=LXB_STATUS_OK) {
1736 return status;
1737 }
1738
1739 return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1740 }
1741