1 /*
2 * Copyright (C) 2018-2022 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include "lexbor/dom/interfaces/document_fragment.h"
8 #include "lexbor/dom/interfaces/document_type.h"
9 #include "lexbor/dom/interfaces/comment.h"
10 #include "lexbor/dom/interfaces/text.h"
11
12 #include "lexbor/html/tree.h"
13 #include "lexbor/html/tree_res.h"
14 #include "lexbor/html/tree/insertion_mode.h"
15 #include "lexbor/html/tree/open_elements.h"
16 #include "lexbor/html/tree/active_formatting.h"
17 #include "lexbor/html/tree/template_insertion.h"
18 #include "lexbor/html/interface.h"
19 #include "lexbor/html/interface.h"
20 #include "lexbor/html/interfaces/template_element.h"
21 #include "lexbor/html/interfaces/unknown_element.h"
22 #include "lexbor/html/tokenizer/state_rawtext.h"
23 #include "lexbor/html/tokenizer/state_rcdata.h"
24
25
26 lxb_dom_attr_data_t *
27 lxb_dom_attr_local_name_append(lexbor_hash_t *hash,
28 const lxb_char_t *name, size_t length);
29
30 lxb_dom_attr_data_t *
31 lxb_dom_attr_qualified_name_append(lexbor_hash_t *hash, const lxb_char_t *name,
32 size_t length);
33
34 const lxb_tag_data_t *
35 lxb_tag_append_lower(lexbor_hash_t *hash,
36 const lxb_char_t *name, size_t length);
37
38 static lxb_html_token_t *
39 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
40 lxb_html_token_t *token, void *ctx);
41
42 static lxb_status_t
43 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token);
44
45
46 lxb_html_tree_t *
lxb_html_tree_create(void)47 lxb_html_tree_create(void)
48 {
49 return lexbor_calloc(1, sizeof(lxb_html_tree_t));
50 }
51
52 lxb_status_t
lxb_html_tree_init(lxb_html_tree_t * tree,lxb_html_tokenizer_t * tkz)53 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz)
54 {
55 if (tree == NULL) {
56 return LXB_STATUS_ERROR_OBJECT_IS_NULL;
57 }
58
59 if (tkz == NULL) {
60 return LXB_STATUS_ERROR_WRONG_ARGS;
61 }
62
63 lxb_status_t status;
64
65 /* Stack of open elements */
66 tree->open_elements = lexbor_array_create();
67 status = lexbor_array_init(tree->open_elements, 128);
68 if (status != LXB_STATUS_OK) {
69 return status;
70 }
71
72 /* Stack of active formatting */
73 tree->active_formatting = lexbor_array_create();
74 status = lexbor_array_init(tree->active_formatting, 128);
75 if (status != LXB_STATUS_OK) {
76 return status;
77 }
78
79 /* Stack of template insertion modes */
80 tree->template_insertion_modes = lexbor_array_obj_create();
81 status = lexbor_array_obj_init(tree->template_insertion_modes, 64,
82 sizeof(lxb_html_tree_template_insertion_t));
83 if (status != LXB_STATUS_OK) {
84 return status;
85 }
86
87 /* Stack of pending table character tokens */
88 tree->pending_table.text_list = lexbor_array_obj_create();
89 status = lexbor_array_obj_init(tree->pending_table.text_list, 16,
90 sizeof(lexbor_str_t));
91 if (status != LXB_STATUS_OK) {
92 return status;
93 }
94
95 /* Parse errors */
96 tree->parse_errors = lexbor_array_obj_create();
97 status = lexbor_array_obj_init(tree->parse_errors, 16,
98 sizeof(lxb_html_tree_error_t));
99 if (status != LXB_STATUS_OK) {
100 return status;
101 }
102
103 tree->tkz_ref = lxb_html_tokenizer_ref(tkz);
104
105 tree->document = NULL;
106 tree->fragment = NULL;
107
108 tree->form = NULL;
109
110 tree->foster_parenting = false;
111 tree->frameset_ok = true;
112
113 tree->mode = lxb_html_tree_insertion_mode_initial;
114 tree->before_append_attr = NULL;
115
116 tree->status = LXB_STATUS_OK;
117
118 tree->ref_count = 1;
119
120 lxb_html_tokenizer_callback_token_done_set(tkz,
121 lxb_html_tree_token_callback,
122 tree);
123
124 return LXB_STATUS_OK;
125 }
126
127 lxb_html_tree_t *
lxb_html_tree_ref(lxb_html_tree_t * tree)128 lxb_html_tree_ref(lxb_html_tree_t *tree)
129 {
130 if (tree == NULL) {
131 return NULL;
132 }
133
134 tree->ref_count++;
135
136 return tree;
137 }
138
139 lxb_html_tree_t *
lxb_html_tree_unref(lxb_html_tree_t * tree)140 lxb_html_tree_unref(lxb_html_tree_t *tree)
141 {
142 if (tree == NULL || tree->ref_count == 0) {
143 return NULL;
144 }
145
146 tree->ref_count--;
147
148 if (tree->ref_count == 0) {
149 lxb_html_tree_destroy(tree);
150 }
151
152 return NULL;
153 }
154
155 void
lxb_html_tree_clean(lxb_html_tree_t * tree)156 lxb_html_tree_clean(lxb_html_tree_t *tree)
157 {
158 lexbor_array_clean(tree->open_elements);
159 lexbor_array_clean(tree->active_formatting);
160 lexbor_array_obj_clean(tree->template_insertion_modes);
161 lexbor_array_obj_clean(tree->pending_table.text_list);
162 lexbor_array_obj_clean(tree->parse_errors);
163
164 tree->document = NULL;
165 tree->fragment = NULL;
166
167 tree->form = NULL;
168
169 tree->foster_parenting = false;
170 tree->frameset_ok = true;
171
172 tree->mode = lxb_html_tree_insertion_mode_initial;
173 tree->before_append_attr = NULL;
174
175 tree->status = LXB_STATUS_OK;
176 }
177
178 lxb_html_tree_t *
lxb_html_tree_destroy(lxb_html_tree_t * tree)179 lxb_html_tree_destroy(lxb_html_tree_t *tree)
180 {
181 if (tree == NULL) {
182 return NULL;
183 }
184
185 tree->open_elements = lexbor_array_destroy(tree->open_elements, true);
186 tree->active_formatting = lexbor_array_destroy(tree->active_formatting,
187 true);
188 tree->template_insertion_modes = lexbor_array_obj_destroy(tree->template_insertion_modes,
189 true);
190 tree->pending_table.text_list = lexbor_array_obj_destroy(tree->pending_table.text_list,
191 true);
192
193 tree->parse_errors = lexbor_array_obj_destroy(tree->parse_errors, true);
194 tree->tkz_ref = lxb_html_tokenizer_unref(tree->tkz_ref);
195
196 return lexbor_free(tree);
197 }
198
199 static lxb_html_token_t *
lxb_html_tree_token_callback(lxb_html_tokenizer_t * tkz,lxb_html_token_t * token,void * ctx)200 lxb_html_tree_token_callback(lxb_html_tokenizer_t *tkz,
201 lxb_html_token_t *token, void *ctx)
202 {
203 lxb_status_t status;
204
205 status = lxb_html_tree_insertion_mode(ctx, token);
206 if (status != LXB_STATUS_OK) {
207 tkz->status = status;
208 return NULL;
209 }
210
211 return token;
212 }
213
214 /* TODO: not complete!!! */
215 lxb_status_t
lxb_html_tree_stop_parsing(lxb_html_tree_t * tree)216 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree)
217 {
218 tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
219
220 return LXB_STATUS_OK;
221 }
222
223 bool
lxb_html_tree_process_abort(lxb_html_tree_t * tree)224 lxb_html_tree_process_abort(lxb_html_tree_t *tree)
225 {
226 if (tree->status == LXB_STATUS_OK) {
227 tree->status = LXB_STATUS_ABORTED;
228 }
229
230 tree->open_elements->length = 0;
231 tree->document->ready_state = LXB_HTML_DOCUMENT_READY_STATE_COMPLETE;
232
233 return true;
234 }
235
236 void
lxb_html_tree_parse_error(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_html_tree_error_id_t id)237 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
238 lxb_html_tree_error_id_t id)
239 {
240 lxb_html_tree_error_add(tree->parse_errors, token, id);
241 }
242
243 bool
lxb_html_tree_construction_dispatcher(lxb_html_tree_t * tree,lxb_html_token_t * token)244 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
245 lxb_html_token_t *token)
246 {
247 lxb_dom_node_t *adjusted;
248
249 adjusted = lxb_html_tree_adjusted_current_node(tree);
250
251 if (adjusted == NULL || adjusted->ns == LXB_NS_HTML) {
252 return tree->mode(tree, token);
253 }
254
255 if (lxb_html_tree_mathml_text_integration_point(adjusted))
256 {
257 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
258 && token->tag_id != LXB_TAG_MGLYPH
259 && token->tag_id != LXB_TAG_MALIGNMARK)
260 {
261 return tree->mode(tree, token);
262 }
263
264 if (token->tag_id == LXB_TAG__TEXT) {
265 return tree->mode(tree, token);
266 }
267 }
268
269 if (adjusted->local_name == LXB_TAG_ANNOTATION_XML
270 && adjusted->ns == LXB_NS_MATH
271 && (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
272 && token->tag_id == LXB_TAG_SVG)
273 {
274 return tree->mode(tree, token);
275 }
276
277 if (lxb_html_tree_html_integration_point(adjusted)) {
278 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE) == 0
279 || token->tag_id == LXB_TAG__TEXT)
280 {
281 return tree->mode(tree, token);
282 }
283 }
284
285 if (token->tag_id == LXB_TAG__END_OF_FILE) {
286 return tree->mode(tree, token);
287 }
288
289 return lxb_html_tree_insertion_mode_foreign_content(tree, token);
290 }
291
292 static lxb_status_t
lxb_html_tree_insertion_mode(lxb_html_tree_t * tree,lxb_html_token_t * token)293 lxb_html_tree_insertion_mode(lxb_html_tree_t *tree, lxb_html_token_t *token)
294 {
295 while (lxb_html_tree_construction_dispatcher(tree, token) == false) {}
296
297 return tree->status;
298 }
299
300 /*
301 * Action
302 */
303 lxb_dom_node_t *
lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t * tree,lxb_dom_node_t * override_target,lxb_html_tree_insertion_position_t * ipos)304 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
305 lxb_dom_node_t *override_target,
306 lxb_html_tree_insertion_position_t *ipos)
307 {
308 lxb_dom_node_t *target, *adjusted_location = NULL;
309
310 *ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
311
312 if (override_target != NULL) {
313 target = override_target;
314 }
315 else {
316 target = lxb_html_tree_current_node(tree);
317 }
318
319 if (tree->foster_parenting && target->ns == LXB_NS_HTML
320 && (target->local_name == LXB_TAG_TABLE
321 || target->local_name == LXB_TAG_TBODY
322 || target->local_name == LXB_TAG_TFOOT
323 || target->local_name == LXB_TAG_THEAD
324 || target->local_name == LXB_TAG_TR))
325 {
326 lxb_dom_node_t *last_temp, *last_table;
327 size_t last_temp_idx, last_table_idx;
328
329 last_temp = lxb_html_tree_open_elements_find_reverse(tree,
330 LXB_TAG_TEMPLATE,
331 LXB_NS_HTML,
332 &last_temp_idx);
333
334 last_table = lxb_html_tree_open_elements_find_reverse(tree,
335 LXB_TAG_TABLE,
336 LXB_NS_HTML,
337 &last_table_idx);
338
339 if(last_temp != NULL && (last_table == NULL
340 || last_temp_idx > last_table_idx))
341 {
342 lxb_dom_document_fragment_t *doc_fragment;
343
344 doc_fragment = lxb_html_interface_template(last_temp)->content;
345
346 return lxb_dom_interface_node(doc_fragment);
347 }
348 else if (last_table == NULL) {
349 adjusted_location = lxb_html_tree_open_elements_first(tree);
350
351 lexbor_assert(adjusted_location != NULL);
352 lexbor_assert(adjusted_location->local_name == LXB_TAG_HTML);
353 }
354 else if (last_table->parent != NULL) {
355 adjusted_location = last_table;
356
357 *ipos = LXB_HTML_TREE_INSERTION_POSITION_BEFORE;
358 }
359 else {
360 lexbor_assert(last_table_idx != 0);
361
362 adjusted_location = lxb_html_tree_open_elements_get(tree,
363 last_table_idx - 1);
364 }
365 }
366 else {
367 adjusted_location = target;
368 }
369
370 if (adjusted_location == NULL) {
371 return NULL;
372 }
373
374 /*
375 * In Spec it is not entirely clear what is meant:
376 *
377 * If the adjusted insertion location is inside a template element,
378 * let it instead be inside the template element's template contents,
379 * after its last child (if any).
380 */
381 if (lxb_html_tree_node_is(adjusted_location, LXB_TAG_TEMPLATE)) {
382 lxb_dom_document_fragment_t *df;
383
384 df = lxb_html_interface_template(adjusted_location)->content;
385 adjusted_location = lxb_dom_interface_node(df);
386 }
387
388 return adjusted_location;
389 }
390
391 lxb_html_element_t *
lxb_html_tree_insert_foreign_element(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns)392 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
393 lxb_html_token_t *token, lxb_ns_id_t ns)
394 {
395 lxb_status_t status;
396 lxb_dom_node_t *pos;
397 lxb_html_element_t *element;
398 lxb_html_tree_insertion_position_t ipos;
399
400 pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
401 if (pos == NULL) {
402 return NULL;
403 }
404
405 element = lxb_html_tree_create_element_for_token(tree, token, ns);
406 if (element == NULL) {
407 return NULL;
408 }
409
410 lxb_html_tree_insert_node(pos, lxb_dom_interface_node(element), ipos);
411
412 status = lxb_html_tree_open_elements_push(tree,
413 lxb_dom_interface_node(element));
414 if (status != LXB_HTML_STATUS_OK) {
415 return lxb_html_interface_destroy(element);
416 }
417
418 return element;
419 }
420
421 lxb_html_element_t *
lxb_html_tree_create_element_for_token(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_ns_id_t ns)422 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
423 lxb_html_token_t *token, lxb_ns_id_t ns)
424 {
425 lxb_dom_node_t *node = lxb_html_tree_create_node(tree, token->tag_id, ns);
426 if (node == NULL) {
427 return NULL;
428 }
429
430 node->line = token->line;
431 /* We only expose line number in PHP DOM */
432
433 lxb_status_t status;
434 lxb_dom_element_t *element = lxb_dom_interface_element(node);
435
436 if (token->base_element == NULL) {
437 status = lxb_html_tree_append_attributes(tree, element, token, ns);
438 }
439 else {
440 status = lxb_html_tree_append_attributes_from_element(tree, element,
441 token->base_element, ns);
442 }
443
444 if (status != LXB_HTML_STATUS_OK) {
445 return lxb_html_interface_destroy(element);
446 }
447
448 return lxb_html_interface_element(node);
449 }
450
451 lxb_status_t
lxb_html_tree_append_attributes(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_html_token_t * token,lxb_ns_id_t ns)452 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
453 lxb_dom_element_t *element,
454 lxb_html_token_t *token, lxb_ns_id_t ns)
455 {
456 lxb_status_t status;
457 lxb_dom_attr_t *attr;
458 lxb_html_document_t *doc;
459 lxb_html_token_attr_t *token_attr = token->attr_first;
460
461 doc = lxb_html_interface_document(element->node.owner_document);
462
463 while (token_attr != NULL) {
464 attr = lxb_dom_element_attr_by_local_name_data(element,
465 token_attr->name);
466 if (attr != NULL) {
467 token_attr = token_attr->next;
468 continue;
469 }
470
471 attr = lxb_dom_attr_interface_create(lxb_dom_interface_document(doc));
472 if (attr == NULL) {
473 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
474 }
475
476 if (token_attr->value_begin != NULL) {
477 status = lxb_dom_attr_set_value_wo_copy(attr, token_attr->value,
478 token_attr->value_size);
479 if (status != LXB_HTML_STATUS_OK) {
480 return status;
481 }
482 }
483
484 attr->node.local_name = token_attr->name->attr_id;
485 attr->node.ns = ns;
486
487 /* Fix for adjust MathML/SVG attributes */
488 if (tree->before_append_attr != NULL) {
489 status = tree->before_append_attr(tree, attr, NULL);
490 if (status != LXB_STATUS_OK) {
491 return status;
492 }
493 }
494
495 lxb_dom_element_attr_append(element, attr);
496
497 token_attr = token_attr->next;
498 }
499
500 return LXB_HTML_STATUS_OK;
501 }
502
503 lxb_status_t
lxb_html_tree_append_attributes_from_element(lxb_html_tree_t * tree,lxb_dom_element_t * element,lxb_dom_element_t * from,lxb_ns_id_t ns)504 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
505 lxb_dom_element_t *element,
506 lxb_dom_element_t *from,
507 lxb_ns_id_t ns)
508 {
509 lxb_status_t status;
510 lxb_dom_attr_t *attr = from->first_attr;
511 lxb_dom_attr_t *new_attr;
512
513 while (attr != NULL) {
514 new_attr = lxb_dom_attr_interface_create(element->node.owner_document);
515 if (new_attr == NULL) {
516 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
517 }
518
519 status = lxb_dom_attr_clone_name_value(attr, new_attr);
520 if (status != LXB_HTML_STATUS_OK) {
521 return status;
522 }
523
524 new_attr->node.ns = attr->node.ns;
525
526 /* Fix for adjust MathML/SVG attributes */
527 if (tree->before_append_attr != NULL) {
528 status = tree->before_append_attr(tree, new_attr, NULL);
529 if (status != LXB_STATUS_OK) {
530 return status;
531 }
532 }
533
534 lxb_dom_element_attr_append(element, attr);
535
536 attr = attr->next;
537 }
538
539 return LXB_HTML_STATUS_OK;
540 }
541
542 lxb_status_t
lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)543 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
544 lxb_dom_attr_t *attr, void *ctx)
545 {
546 lexbor_hash_t *attrs;
547 const lxb_dom_attr_data_t *data;
548
549 attrs = attr->node.owner_document->attrs;
550 data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
551
552 if (data->entry.length == 13
553 && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
554 (const lxb_char_t *) "definitionurl"))
555 {
556 data = lxb_dom_attr_qualified_name_append(attrs,
557 (const lxb_char_t *) "definitionURL", 13);
558 if (data == NULL) {
559 return LXB_STATUS_ERROR;
560 }
561
562 attr->qualified_name = data->attr_id;
563 }
564
565 return LXB_STATUS_OK;
566 }
567
568 lxb_status_t
lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)569 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
570 lxb_dom_attr_t *attr, void *ctx)
571 {
572 lexbor_hash_t *attrs;
573 const lxb_dom_attr_data_t *data;
574 const lxb_html_tree_res_attr_adjust_t *adjust;
575
576 size_t len = sizeof(lxb_html_tree_res_attr_adjust_svg_map)
577 / sizeof(lxb_html_tree_res_attr_adjust_t);
578
579 attrs = attr->node.owner_document->attrs;
580
581 data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
582
583 for (size_t i = 0; i < len; i++) {
584 adjust = &lxb_html_tree_res_attr_adjust_svg_map[i];
585
586 if (data->entry.length == adjust->len
587 && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
588 (const lxb_char_t *) adjust->from))
589 {
590 data = lxb_dom_attr_qualified_name_append(attrs,
591 (const lxb_char_t *) adjust->to, adjust->len);
592 if (data == NULL) {
593 return LXB_STATUS_ERROR;
594 }
595
596 attr->qualified_name = data->attr_id;
597
598 return LXB_STATUS_OK;
599 }
600 }
601
602 return LXB_STATUS_OK;
603 }
604
605 lxb_status_t
lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)606 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
607 lxb_dom_attr_t *attr, void *ctx)
608 {
609 size_t lname_length;
610 lexbor_hash_t *attrs, *prefix;
611 const lxb_dom_attr_data_t *attr_data;
612 const lxb_ns_prefix_data_t *prefix_data;
613 const lxb_dom_attr_data_t *data;
614 const lxb_html_tree_res_attr_adjust_foreign_t *adjust;
615
616 size_t len = sizeof(lxb_html_tree_res_attr_adjust_foreign_map)
617 / sizeof(lxb_html_tree_res_attr_adjust_foreign_t);
618
619 attrs = attr->node.owner_document->attrs;
620 prefix = attr->node.owner_document->prefix;
621
622 data = lxb_dom_attr_data_by_id(attrs, attr->node.local_name);
623
624 for (size_t i = 0; i < len; i++) {
625 adjust = &lxb_html_tree_res_attr_adjust_foreign_map[i];
626
627 if (data->entry.length == adjust->name_len
628 && lexbor_str_data_cmp(lexbor_hash_entry_str(&data->entry),
629 (const lxb_char_t *) adjust->name))
630 {
631 if (adjust->prefix_len != 0) {
632 data = lxb_dom_attr_qualified_name_append(attrs,
633 (const lxb_char_t *) adjust->name, adjust->name_len);
634 if (data == NULL) {
635 return LXB_STATUS_ERROR;
636 }
637
638 attr->qualified_name = data->attr_id;
639
640 lname_length = adjust->name_len - adjust->prefix_len - 1;
641
642 attr_data = lxb_dom_attr_local_name_append(attrs,
643 (const lxb_char_t *) adjust->local_name, lname_length);
644 if (attr_data == NULL) {
645 return LXB_STATUS_ERROR;
646 }
647
648 attr->node.local_name = attr_data->attr_id;
649
650 prefix_data = lxb_ns_prefix_append(prefix,
651 (const lxb_char_t *) adjust->prefix, adjust->prefix_len);
652 if (prefix_data == NULL) {
653 return LXB_STATUS_ERROR;
654 }
655
656 attr->node.prefix = prefix_data->prefix_id;
657 }
658
659 attr->node.ns = adjust->ns;
660
661 return LXB_STATUS_OK;
662 }
663 }
664
665 return LXB_STATUS_OK;
666 }
667
668 lxb_status_t
lxb_html_tree_insert_character(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t ** ret_node)669 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
670 lxb_dom_node_t **ret_node)
671 {
672 size_t size;
673 lxb_status_t status;
674 lexbor_str_t str = {0};
675
676 size = token->text_end - token->text_start;
677
678 lexbor_str_init(&str, tree->document->dom_document.text, size + 1);
679 if (str.data == NULL) {
680 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
681 }
682
683 memcpy(str.data, token->text_start, size);
684
685 str.data[size] = 0x00;
686 str.length = size;
687
688 status = lxb_html_tree_insert_character_for_data(tree, &str, ret_node);
689 if (status != LXB_STATUS_OK) {
690 return status;
691 }
692
693 return LXB_STATUS_OK;
694 }
695
696 lxb_status_t
lxb_html_tree_insert_character_for_data(lxb_html_tree_t * tree,lexbor_str_t * str,lxb_dom_node_t ** ret_node)697 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
698 lexbor_str_t *str,
699 lxb_dom_node_t **ret_node)
700 {
701 const lxb_char_t *data;
702 lxb_dom_node_t *pos;
703 lxb_dom_character_data_t *chrs = NULL;
704 lxb_html_tree_insertion_position_t ipos;
705
706 if (ret_node != NULL) {
707 *ret_node = NULL;
708 }
709
710 pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
711 if (pos == NULL) {
712 return LXB_STATUS_ERROR;
713 }
714
715 if (lxb_html_tree_node_is(pos, LXB_TAG__DOCUMENT)) {
716 goto destroy_str;
717 }
718
719 if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
720 /* No need check namespace */
721 if (pos->prev != NULL && pos->prev->local_name == LXB_TAG__TEXT) {
722 chrs = lxb_dom_interface_character_data(pos->prev);
723
724 if (ret_node != NULL) {
725 *ret_node = pos->prev;
726 }
727 }
728 }
729 else {
730 /* No need check namespace */
731 if (pos->last_child != NULL
732 && pos->last_child->local_name == LXB_TAG__TEXT)
733 {
734 chrs = lxb_dom_interface_character_data(pos->last_child);
735
736 if (ret_node != NULL) {
737 *ret_node = pos->last_child;
738 }
739 }
740 }
741
742 if (chrs != NULL) {
743 /* This is error. This can not happen, but... */
744 if (chrs->data.data == NULL) {
745 data = lexbor_str_init(&chrs->data, tree->document->dom_document.text,
746 str->length);
747 if (data == NULL) {
748 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
749 }
750 }
751
752 data = lexbor_str_append(&chrs->data, tree->document->dom_document.text,
753 str->data, str->length);
754 if (data == NULL) {
755 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
756 }
757
758 goto destroy_str;
759 }
760
761 lxb_dom_node_t *text = lxb_html_tree_create_node(tree, LXB_TAG__TEXT,
762 LXB_NS_HTML);
763 if (text == NULL) {
764 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
765 }
766
767 lxb_dom_interface_text(text)->char_data.data = *str;
768
769 if (tree->tkz_ref) {
770 text->line = tree->tkz_ref->token->line;
771 /* We only expose line number in PHP DOM */
772 }
773
774 if (ret_node != NULL) {
775 *ret_node = text;
776 }
777
778 lxb_html_tree_insert_node(pos, text, ipos);
779
780 return LXB_STATUS_OK;
781
782 destroy_str:
783
784 lexbor_str_destroy(str, tree->document->dom_document.text, false);
785
786 return LXB_STATUS_OK;
787 }
788
789 lxb_dom_comment_t *
lxb_html_tree_insert_comment(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_dom_node_t * pos)790 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
791 lxb_html_token_t *token, lxb_dom_node_t *pos)
792 {
793 lxb_dom_node_t *node;
794 lxb_dom_comment_t *comment;
795 lxb_html_tree_insertion_position_t ipos;
796
797 if (pos == NULL) {
798 pos = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
799 }
800 else {
801 ipos = LXB_HTML_TREE_INSERTION_POSITION_CHILD;
802 }
803
804 lexbor_assert(pos != NULL);
805
806 node = lxb_html_tree_create_node(tree, token->tag_id, pos->ns);
807 comment = lxb_dom_interface_comment(node);
808
809 if (comment == NULL) {
810 return NULL;
811 }
812
813 node->line = token->line;
814 /* We only expose line number in PHP DOM */
815
816 tree->status = lxb_html_token_make_text(token, &comment->char_data.data,
817 tree->document->dom_document.text);
818 if (tree->status != LXB_STATUS_OK) {
819 return NULL;
820 }
821
822 lxb_html_tree_insert_node(pos, node, ipos);
823
824 return comment;
825 }
826
827 lxb_dom_document_type_t *
lxb_html_tree_create_document_type_from_token(lxb_html_tree_t * tree,lxb_html_token_t * token)828 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
829 lxb_html_token_t *token)
830 {
831 lxb_status_t status;
832 lxb_dom_node_t *doctype_node;
833 lxb_dom_document_type_t *doc_type;
834
835 /* Create */
836 doctype_node = lxb_html_tree_create_node(tree, token->tag_id, LXB_NS_HTML);
837 if (doctype_node == NULL) {
838 return NULL;
839 }
840
841 doc_type = lxb_dom_interface_document_type(doctype_node);
842
843 /* Parse */
844 status = lxb_html_token_doctype_parse(token, doc_type);
845 if (status != LXB_STATUS_OK) {
846 return lxb_dom_document_type_interface_destroy(doc_type);
847 }
848
849 return doc_type;
850 }
851
852 /*
853 * TODO: need use ref and unref for nodes (ref counter)
854 * Not implemented until the end. It is necessary to finish it.
855 */
856 void
lxb_html_tree_node_delete_deep(lxb_html_tree_t * tree,lxb_dom_node_t * node)857 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node)
858 {
859 lxb_dom_node_remove(node);
860 }
861
862 lxb_html_element_t *
lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)863 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
864 lxb_html_token_t *token)
865 {
866 lxb_html_element_t *element;
867
868 element = lxb_html_tree_insert_html_element(tree, token);
869 if (element == NULL) {
870 return NULL;
871 }
872
873 /*
874 * Need for tokenizer state RAWTEXT
875 * See description for 'lxb_html_tokenizer_state_rawtext_before' function
876 */
877 lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
878 lxb_html_tokenizer_state_set(tree->tkz_ref,
879 lxb_html_tokenizer_state_rawtext_before);
880
881 tree->original_mode = tree->mode;
882 tree->mode = lxb_html_tree_insertion_mode_text;
883
884 return element;
885 }
886
887 /* Magic of CopyPast power! */
888 lxb_html_element_t *
lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t * tree,lxb_html_token_t * token)889 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
890 lxb_html_token_t *token)
891 {
892 lxb_html_element_t *element;
893
894 element = lxb_html_tree_insert_html_element(tree, token);
895 if (element == NULL) {
896 return NULL;
897 }
898
899 /*
900 * Need for tokenizer state RCDATA
901 * See description for 'lxb_html_tokenizer_state_rcdata_before' function
902 */
903 lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
904 lxb_html_tokenizer_state_set(tree->tkz_ref,
905 lxb_html_tokenizer_state_rcdata_before);
906
907 tree->original_mode = tree->mode;
908 tree->mode = lxb_html_tree_insertion_mode_text;
909
910 return element;
911 }
912
913 void
lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)914 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
915 lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns)
916 {
917 lxb_dom_node_t *node;
918
919 lexbor_assert(tree->open_elements != 0);
920
921 while (lexbor_array_length(tree->open_elements) != 0) {
922 node = lxb_html_tree_current_node(tree);
923
924 lexbor_assert(node != NULL);
925
926 switch (node->local_name) {
927 case LXB_TAG_DD:
928 case LXB_TAG_DT:
929 case LXB_TAG_LI:
930 case LXB_TAG_OPTGROUP:
931 case LXB_TAG_OPTION:
932 case LXB_TAG_P:
933 case LXB_TAG_RB:
934 case LXB_TAG_RP:
935 case LXB_TAG_RT:
936 case LXB_TAG_RTC:
937 if(node->local_name == ex_tag && node->ns == ex_ns) {
938 return;
939 }
940
941 lxb_html_tree_open_elements_pop(tree);
942
943 break;
944
945 default:
946 return;
947 }
948 }
949 }
950
951 void
lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t * tree,lxb_tag_id_t ex_tag,lxb_ns_id_t ex_ns)952 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
953 lxb_tag_id_t ex_tag,
954 lxb_ns_id_t ex_ns)
955 {
956 lxb_dom_node_t *node;
957
958 lexbor_assert(tree->open_elements != 0);
959
960 while (lexbor_array_length(tree->open_elements) != 0) {
961 node = lxb_html_tree_current_node(tree);
962
963 lexbor_assert(node != NULL);
964
965 switch (node->local_name) {
966 case LXB_TAG_CAPTION:
967 case LXB_TAG_COLGROUP:
968 case LXB_TAG_DD:
969 case LXB_TAG_DT:
970 case LXB_TAG_LI:
971 case LXB_TAG_OPTGROUP:
972 case LXB_TAG_OPTION:
973 case LXB_TAG_P:
974 case LXB_TAG_RB:
975 case LXB_TAG_RP:
976 case LXB_TAG_RT:
977 case LXB_TAG_RTC:
978 case LXB_TAG_TBODY:
979 case LXB_TAG_TD:
980 case LXB_TAG_TFOOT:
981 case LXB_TAG_TH:
982 case LXB_TAG_THEAD:
983 case LXB_TAG_TR:
984 if(node->local_name == ex_tag && node->ns == ex_ns) {
985 return;
986 }
987
988 lxb_html_tree_open_elements_pop(tree);
989
990 break;
991
992 default:
993 return;
994 }
995 }
996 }
997
998 void
lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t * tree)999 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree)
1000 {
1001 lxb_dom_node_t *node;
1002 size_t idx = tree->open_elements->length;
1003
1004 /* Step 1 */
1005 bool last = false;
1006 void **list = tree->open_elements->list;
1007
1008 /* Step 3 */
1009 while (idx != 0) {
1010 idx--;
1011
1012 /* Step 2 */
1013 node = list[idx];
1014
1015 /* Step 3 */
1016 if (idx == 0) {
1017 last = true;
1018
1019 if (tree->fragment != NULL) {
1020 node = tree->fragment;
1021 }
1022 }
1023
1024 lexbor_assert(node != NULL);
1025
1026 /* Step 16 */
1027 if (node->ns != LXB_NS_HTML) {
1028 if (last) {
1029 tree->mode = lxb_html_tree_insertion_mode_in_body;
1030 return;
1031 }
1032
1033 continue;
1034 }
1035
1036 /* Step 4 */
1037 if (node->local_name == LXB_TAG_SELECT) {
1038 /* Step 4.1 */
1039 if (last) {
1040 tree->mode = lxb_html_tree_insertion_mode_in_select;
1041 return;
1042 }
1043
1044 /* Step 4.2 */
1045 size_t ancestor = idx;
1046
1047 for (;;) {
1048 /* Step 4.3 */
1049 if (ancestor == 0) {
1050 tree->mode = lxb_html_tree_insertion_mode_in_select;
1051 return;
1052 }
1053
1054 /* Step 4.4 */
1055 ancestor--;
1056
1057 /* Step 4.5 */
1058 lxb_dom_node_t *ancestor_node = list[ancestor];
1059
1060 if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TEMPLATE)) {
1061 tree->mode = lxb_html_tree_insertion_mode_in_select;
1062 return;
1063 }
1064
1065 /* Step 4.6 */
1066 else if(lxb_html_tree_node_is(ancestor_node, LXB_TAG_TABLE)) {
1067 tree->mode = lxb_html_tree_insertion_mode_in_select_in_table;
1068 return;
1069 }
1070 }
1071 }
1072
1073 /* Step 5-15 */
1074 switch (node->local_name) {
1075 case LXB_TAG_TD:
1076 case LXB_TAG_TH:
1077 if (last == false) {
1078 tree->mode = lxb_html_tree_insertion_mode_in_cell;
1079 return;
1080 }
1081
1082 break;
1083
1084 case LXB_TAG_TR:
1085 tree->mode = lxb_html_tree_insertion_mode_in_row;
1086 return;
1087
1088 case LXB_TAG_TBODY:
1089 case LXB_TAG_TFOOT:
1090 case LXB_TAG_THEAD:
1091 tree->mode = lxb_html_tree_insertion_mode_in_table_body;
1092 return;
1093
1094 case LXB_TAG_CAPTION:
1095 tree->mode = lxb_html_tree_insertion_mode_in_caption;
1096 return;
1097
1098 case LXB_TAG_COLGROUP:
1099 tree->mode = lxb_html_tree_insertion_mode_in_column_group;
1100 return;
1101
1102 case LXB_TAG_TABLE:
1103 tree->mode = lxb_html_tree_insertion_mode_in_table;
1104 return;
1105
1106 case LXB_TAG_TEMPLATE:
1107 tree->mode = lxb_html_tree_template_insertion_current(tree);
1108
1109 lexbor_assert(tree->mode != NULL);
1110
1111 return;
1112
1113 case LXB_TAG_HEAD:
1114 if (last == false) {
1115 tree->mode = lxb_html_tree_insertion_mode_in_head;
1116 return;
1117 }
1118
1119 break;
1120
1121 case LXB_TAG_BODY:
1122 tree->mode = lxb_html_tree_insertion_mode_in_body;
1123 return;
1124
1125 case LXB_TAG_FRAMESET:
1126 tree->mode = lxb_html_tree_insertion_mode_in_frameset;
1127 return;
1128
1129 case LXB_TAG_HTML: {
1130 if (tree->document->head == NULL) {
1131 tree->mode = lxb_html_tree_insertion_mode_before_head;
1132 return;
1133 }
1134
1135 tree->mode = lxb_html_tree_insertion_mode_after_head;
1136 return;
1137 }
1138
1139 default:
1140 break;
1141 }
1142
1143 /* Step 16 */
1144 if (last) {
1145 tree->mode = lxb_html_tree_insertion_mode_in_body;
1146 return;
1147 }
1148 }
1149 }
1150
1151 lxb_dom_node_t *
lxb_html_tree_element_in_scope(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns,lxb_html_tag_category_t ct)1152 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
1153 lxb_ns_id_t ns, lxb_html_tag_category_t ct)
1154 {
1155 lxb_dom_node_t *node;
1156
1157 size_t idx = tree->open_elements->length;
1158 void **list = tree->open_elements->list;
1159
1160 while (idx != 0) {
1161 idx--;
1162 node = list[idx];
1163
1164 if (node->local_name == tag_id && node->ns == ns) {
1165 return node;
1166 }
1167
1168 if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1169 return NULL;
1170 }
1171 }
1172
1173 return NULL;
1174 }
1175
1176 lxb_dom_node_t *
lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t * tree,lxb_dom_node_t * by_node,lxb_html_tag_category_t ct)1177 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
1178 lxb_dom_node_t *by_node,
1179 lxb_html_tag_category_t ct)
1180 {
1181 lxb_dom_node_t *node;
1182
1183 size_t idx = tree->open_elements->length;
1184 void **list = tree->open_elements->list;
1185
1186 while (idx != 0) {
1187 idx--;
1188 node = list[idx];
1189
1190 if (node == by_node) {
1191 return node;
1192 }
1193
1194 if (lxb_html_tag_is_category(node->local_name, node->ns, ct)) {
1195 return NULL;
1196 }
1197 }
1198
1199 return NULL;
1200 }
1201
1202 lxb_dom_node_t *
lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t * tree)1203 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree)
1204 {
1205 lxb_dom_node_t *node;
1206
1207 size_t idx = tree->open_elements->length;
1208 void **list = tree->open_elements->list;
1209
1210 while (idx != 0) {
1211 idx--;
1212 node = list[idx];
1213
1214 switch (node->local_name) {
1215 case LXB_TAG_H1:
1216 case LXB_TAG_H2:
1217 case LXB_TAG_H3:
1218 case LXB_TAG_H4:
1219 case LXB_TAG_H5:
1220 case LXB_TAG_H6:
1221 if (node->ns == LXB_NS_HTML) {
1222 return node;
1223 }
1224
1225 break;
1226
1227 default:
1228 break;
1229 }
1230
1231 if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1232 LXB_HTML_TAG_CATEGORY_SCOPE))
1233 {
1234 return NULL;
1235 }
1236 }
1237
1238 return NULL;
1239 }
1240
1241 lxb_dom_node_t *
lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t * tree)1242 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree)
1243 {
1244 lxb_dom_node_t *node;
1245
1246 size_t idx = tree->open_elements->length;
1247 void **list = tree->open_elements->list;
1248
1249 while (idx != 0) {
1250 idx--;
1251 node = list[idx];
1252
1253 switch (node->local_name) {
1254 case LXB_TAG_TBODY:
1255 case LXB_TAG_THEAD:
1256 case LXB_TAG_TFOOT:
1257 if (node->ns == LXB_NS_HTML) {
1258 return node;
1259 }
1260
1261 break;
1262
1263 default:
1264 break;
1265 }
1266
1267 if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1268 LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1269 {
1270 return NULL;
1271 }
1272 }
1273
1274 return NULL;
1275 }
1276
1277 lxb_dom_node_t *
lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t * tree)1278 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree)
1279 {
1280 lxb_dom_node_t *node;
1281
1282 size_t idx = tree->open_elements->length;
1283 void **list = tree->open_elements->list;
1284
1285 while (idx != 0) {
1286 idx--;
1287 node = list[idx];
1288
1289 switch (node->local_name) {
1290 case LXB_TAG_TD:
1291 case LXB_TAG_TH:
1292 if (node->ns == LXB_NS_HTML) {
1293 return node;
1294 }
1295
1296 break;
1297
1298 default:
1299 break;
1300 }
1301
1302 if (lxb_html_tag_is_category(node->local_name, LXB_NS_HTML,
1303 LXB_HTML_TAG_CATEGORY_SCOPE_TABLE))
1304 {
1305 return NULL;
1306 }
1307 }
1308
1309 return NULL;
1310 }
1311
1312 bool
lxb_html_tree_check_scope_element(lxb_html_tree_t * tree)1313 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree)
1314 {
1315 lxb_dom_node_t *node;
1316
1317 for (size_t i = 0; i < tree->open_elements->length; i++) {
1318 node = tree->open_elements->list[i];
1319
1320 switch (node->local_name) {
1321 case LXB_TAG_DD:
1322 case LXB_TAG_DT:
1323 case LXB_TAG_LI:
1324 case LXB_TAG_OPTGROUP:
1325 case LXB_TAG_OPTION:
1326 case LXB_TAG_P:
1327 case LXB_TAG_RB:
1328 case LXB_TAG_RP:
1329 case LXB_TAG_RT:
1330 case LXB_TAG_RTC:
1331 case LXB_TAG_TBODY:
1332 case LXB_TAG_TD:
1333 case LXB_TAG_TFOOT:
1334 case LXB_TAG_TH:
1335 case LXB_TAG_THEAD:
1336 case LXB_TAG_TR:
1337 case LXB_TAG_BODY:
1338 case LXB_TAG_HTML:
1339 return true;
1340
1341 default:
1342 break;
1343 }
1344 }
1345
1346 return false;
1347 }
1348
1349 void
lxb_html_tree_close_p_element(lxb_html_tree_t * tree,lxb_html_token_t * token)1350 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token)
1351 {
1352 lxb_html_tree_generate_implied_end_tags(tree, LXB_TAG_P, LXB_NS_HTML);
1353
1354 lxb_dom_node_t *node = lxb_html_tree_current_node(tree);
1355
1356 if (lxb_html_tree_node_is(node, LXB_TAG_P) == false) {
1357 lxb_html_tree_parse_error(tree, token,
1358 LXB_HTML_RULES_ERROR_UNELINOPELST);
1359 }
1360
1361 lxb_html_tree_open_elements_pop_until_tag_id(tree, LXB_TAG_P, LXB_NS_HTML,
1362 true);
1363 }
1364
1365 #include "lexbor/html/serialize.h"
1366
1367 bool
lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t * tree,lxb_html_token_t * token,lxb_status_t * status)1368 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
1369 lxb_html_token_t *token,
1370 lxb_status_t *status)
1371 {
1372 lexbor_assert(tree->open_elements->length != 0);
1373
1374 /* State 1 */
1375 bool is;
1376 short outer_loop;
1377 lxb_html_element_t *element;
1378 lxb_dom_node_t *node, *marker, **oel_list, **afe_list;
1379
1380 lxb_tag_id_t subject = token->tag_id;
1381
1382 oel_list = (lxb_dom_node_t **) tree->open_elements->list;
1383 afe_list = (lxb_dom_node_t **) tree->active_formatting->list;
1384 marker = (lxb_dom_node_t *) lxb_html_tree_active_formatting_marker();
1385
1386 *status = LXB_STATUS_OK;
1387
1388 /* State 2 */
1389 node = lxb_html_tree_current_node(tree);
1390 lexbor_assert(node != NULL);
1391
1392 if (lxb_html_tree_node_is(node, subject)) {
1393 is = lxb_html_tree_active_formatting_find_by_node_reverse(tree, node,
1394 NULL);
1395 if (is == false) {
1396 lxb_html_tree_open_elements_pop(tree);
1397
1398 return false;
1399 }
1400 }
1401
1402 /* State 3 */
1403 outer_loop = 0;
1404
1405 /* State 4 */
1406 while (outer_loop < 8) {
1407 /* State 5 */
1408 outer_loop++;
1409
1410 /* State 6 */
1411 size_t formatting_index = 0;
1412 size_t idx = tree->active_formatting->length;
1413 lxb_dom_node_t *formatting_element = NULL;
1414
1415 while (idx) {
1416 idx--;
1417
1418 if (afe_list[idx] == marker) {
1419 return true;
1420 }
1421 else if (afe_list[idx]->local_name == subject) {
1422 formatting_index = idx;
1423 formatting_element = afe_list[idx];
1424
1425 break;
1426 }
1427 }
1428
1429 if (formatting_element == NULL) {
1430 return true;
1431 }
1432
1433 /* State 7 */
1434 size_t oel_formatting_idx;
1435 is = lxb_html_tree_open_elements_find_by_node_reverse(tree,
1436 formatting_element,
1437 &oel_formatting_idx);
1438 if (is == false) {
1439 lxb_html_tree_parse_error(tree, token,
1440 LXB_HTML_RULES_ERROR_MIELINOPELST);
1441
1442 lxb_html_tree_active_formatting_remove_by_node(tree,
1443 formatting_element);
1444
1445 return false;
1446 }
1447
1448 /* State 8 */
1449 node = lxb_html_tree_element_in_scope_by_node(tree, formatting_element,
1450 LXB_HTML_TAG_CATEGORY_SCOPE);
1451 if (node == NULL) {
1452 lxb_html_tree_parse_error(tree, token,
1453 LXB_HTML_RULES_ERROR_MIELINSC);
1454 return false;
1455 }
1456
1457 /* State 9 */
1458 node = lxb_html_tree_current_node(tree);
1459
1460 if (formatting_element != node) {
1461 lxb_html_tree_parse_error(tree, token,
1462 LXB_HTML_RULES_ERROR_UNELINOPELST);
1463 }
1464
1465 /* State 10 */
1466 lxb_dom_node_t *furthest_block = NULL;
1467 size_t furthest_block_idx = 0;
1468 size_t oel_idx = tree->open_elements->length;
1469
1470 for (furthest_block_idx = oel_formatting_idx;
1471 furthest_block_idx < oel_idx;
1472 furthest_block_idx++)
1473 {
1474 is = lxb_html_tag_is_category(oel_list[furthest_block_idx]->local_name,
1475 oel_list[furthest_block_idx]->ns,
1476 LXB_HTML_TAG_CATEGORY_SPECIAL);
1477 if (is) {
1478 furthest_block = oel_list[furthest_block_idx];
1479
1480 break;
1481 }
1482 }
1483
1484 /* State 11 */
1485 if (furthest_block == NULL) {
1486 lxb_html_tree_open_elements_pop_until_node(tree, formatting_element,
1487 true);
1488
1489 lxb_html_tree_active_formatting_remove_by_node(tree,
1490 formatting_element);
1491
1492 return false;
1493 }
1494
1495 lexbor_assert(oel_formatting_idx != 0);
1496
1497 /* State 12 */
1498 lxb_dom_node_t *common_ancestor = oel_list[oel_formatting_idx - 1];
1499
1500 /* State 13 */
1501 size_t bookmark = formatting_index;
1502
1503 /* State 14 */
1504 lxb_dom_node_t *node;
1505 lxb_dom_node_t *last = furthest_block;
1506 size_t node_idx = furthest_block_idx;
1507
1508 /* State 14.1 */
1509 size_t inner_loop_counter = 0;
1510
1511 /* State 14.2 */
1512 while (1) {
1513 inner_loop_counter++;
1514
1515 /* State 14.3 */
1516 lexbor_assert(node_idx != 0);
1517
1518 if (node_idx == 0) {
1519 return false;
1520 }
1521
1522 node_idx--;
1523 node = oel_list[node_idx];
1524
1525 /* State 14.4 */
1526 if (node == formatting_element) {
1527 break;
1528 }
1529
1530 /* State 14.5 */
1531 size_t afe_node_idx;
1532 is = lxb_html_tree_active_formatting_find_by_node_reverse(tree,
1533 node,
1534 &afe_node_idx);
1535 /* State 14.5 */
1536 if (inner_loop_counter > 3 && is) {
1537 lxb_html_tree_active_formatting_remove_by_node(tree, node);
1538
1539 continue;
1540 }
1541
1542 /* State 14.6 */
1543 if (is == false) {
1544 lxb_html_tree_open_elements_remove_by_node(tree, node);
1545
1546 continue;
1547 }
1548
1549 /* State 14.7 */
1550 lxb_html_token_t fake_token = {0};
1551
1552 fake_token.tag_id = node->local_name;
1553 fake_token.base_element = node;
1554
1555 element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1556 LXB_NS_HTML);
1557 if (element == NULL) {
1558 *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1559
1560 return false;
1561 }
1562
1563 node = lxb_dom_interface_node(element);
1564
1565 afe_list[afe_node_idx] = node;
1566 oel_list[node_idx] = node;
1567
1568 /* State 14.8 */
1569 if (last == furthest_block) {
1570 bookmark = afe_node_idx + 1;
1571
1572 lexbor_assert(bookmark < tree->active_formatting->length);
1573 }
1574
1575 /* State 14.9 */
1576 if (last->parent != NULL) {
1577 lxb_dom_node_remove_wo_events(last);
1578 }
1579
1580 lxb_dom_node_insert_child_wo_events(node, last);
1581
1582 /* State 14.10 */
1583 last = node;
1584 }
1585
1586 if (last->parent != NULL) {
1587 lxb_dom_node_remove_wo_events(last);
1588 }
1589
1590 /* State 15 */
1591 lxb_dom_node_t *pos;
1592 lxb_html_tree_insertion_position_t ipos;
1593
1594 pos = lxb_html_tree_appropriate_place_inserting_node(tree,
1595 common_ancestor,
1596 &ipos);
1597 if (pos == NULL) {
1598 return false;
1599 }
1600
1601 lxb_html_tree_insert_node(pos, last, ipos);
1602
1603 /* State 16 */
1604 lxb_html_token_t fake_token = {0};
1605
1606 fake_token.tag_id = formatting_element->local_name;
1607 fake_token.base_element = formatting_element;
1608
1609 element = lxb_html_tree_create_element_for_token(tree, &fake_token,
1610 LXB_NS_HTML);
1611 if (element == NULL) {
1612 *status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1613
1614 return false;
1615 }
1616
1617 /* State 17 */
1618 lxb_dom_node_t *next;
1619 node = furthest_block->first_child;
1620
1621 while (node != NULL) {
1622 next = node->next;
1623
1624 lxb_dom_node_remove_wo_events(node);
1625 lxb_dom_node_insert_child_wo_events(lxb_dom_interface_node(element),
1626 node);
1627 node = next;
1628 }
1629
1630 node = lxb_dom_interface_node(element);
1631
1632 /* State 18 */
1633 lxb_dom_node_insert_child_wo_events(furthest_block, node);
1634
1635 /* State 19 */
1636 lxb_html_tree_active_formatting_remove(tree, formatting_index);
1637
1638 if (bookmark > tree->active_formatting->length) {
1639 bookmark = tree->active_formatting->length;
1640 }
1641
1642 *status = lxb_html_tree_active_formatting_insert(tree, node, bookmark);
1643 if (*status != LXB_STATUS_OK) {
1644 return false;
1645 }
1646
1647 /* State 20 */
1648 lxb_html_tree_open_elements_remove_by_node(tree, formatting_element);
1649
1650 lxb_html_tree_open_elements_find_by_node(tree, furthest_block,
1651 &furthest_block_idx);
1652
1653 *status = lxb_html_tree_open_elements_insert_after(tree, node,
1654 furthest_block_idx);
1655 if (*status != LXB_STATUS_OK) {
1656 return false;
1657 }
1658 }
1659
1660 return false;
1661 }
1662
1663 bool
lxb_html_tree_html_integration_point(lxb_dom_node_t * node)1664 lxb_html_tree_html_integration_point(lxb_dom_node_t *node)
1665 {
1666 if (node->ns == LXB_NS_MATH
1667 && node->local_name == LXB_TAG_ANNOTATION_XML)
1668 {
1669 lxb_dom_attr_t *attr;
1670 attr = lxb_dom_element_attr_is_exist(lxb_dom_interface_element(node),
1671 (const lxb_char_t *) "encoding",
1672 8);
1673 if (attr == NULL || attr->value == NULL) {
1674 return false;
1675 }
1676
1677 if (attr->value->length == 9
1678 && lexbor_str_data_casecmp(attr->value->data,
1679 (const lxb_char_t *) "text/html"))
1680 {
1681 return true;
1682 }
1683
1684 if (attr->value->length == 21
1685 && lexbor_str_data_casecmp(attr->value->data,
1686 (const lxb_char_t *) "application/xhtml+xml"))
1687 {
1688 return true;
1689 }
1690
1691 return false;
1692 }
1693
1694 if (node->ns == LXB_NS_SVG
1695 && (node->local_name == LXB_TAG_FOREIGNOBJECT
1696 || node->local_name == LXB_TAG_DESC
1697 || node->local_name == LXB_TAG_TITLE))
1698 {
1699 return true;
1700 }
1701
1702 return false;
1703 }
1704
1705 lxb_status_t
lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1706 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
1707 lxb_dom_attr_t *attr, void *ctx)
1708 {
1709 lxb_status_t status;
1710
1711 status = lxb_html_tree_adjust_mathml_attributes(tree, attr, ctx);
1712 if (status !=LXB_STATUS_OK) {
1713 return status;
1714 }
1715
1716 return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1717 }
1718
1719 lxb_status_t
lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t * tree,lxb_dom_attr_t * attr,void * ctx)1720 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
1721 lxb_dom_attr_t *attr, void *ctx)
1722 {
1723 lxb_status_t status;
1724
1725 status = lxb_html_tree_adjust_svg_attributes(tree, attr, ctx);
1726 if (status !=LXB_STATUS_OK) {
1727 return status;
1728 }
1729
1730 return lxb_html_tree_adjust_foreign_attributes(tree, attr, ctx);
1731 }
1732