1 /*
2  * Copyright (C) 2018-2020 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/html/tree/insertion_mode.h"
8 #include "lexbor/html/tree/open_elements.h"
9 #include "lexbor/html/tree/active_formatting.h"
10 
11 
12 lxb_inline void
lxb_html_tree_clear_stack_back_to_table_context(lxb_html_tree_t * tree)13 lxb_html_tree_clear_stack_back_to_table_context(lxb_html_tree_t *tree)
14 {
15     lxb_dom_node_t *current = lxb_html_tree_current_node(tree);
16 
17     while ((current->local_name != LXB_TAG_TABLE
18             && current->local_name != LXB_TAG_TEMPLATE
19             && current->local_name != LXB_TAG_HTML)
20            || current->ns != LXB_NS_HTML)
21     {
22         lxb_html_tree_open_elements_pop(tree);
23         current = lxb_html_tree_current_node(tree);
24     }
25 }
26 
27 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_text_open(lxb_html_tree_t * tree,lxb_html_token_t * token)28 lxb_html_tree_insertion_mode_in_table_text_open(lxb_html_tree_t *tree,
29                                                 lxb_html_token_t *token)
30 {
31     lxb_dom_node_t *node = lxb_html_tree_current_node(tree);
32 
33     if (node->ns == LXB_NS_HTML &&
34         (node->local_name == LXB_TAG_TABLE
35          || node->local_name == LXB_TAG_TBODY
36          || node->local_name == LXB_TAG_TFOOT
37          || node->local_name == LXB_TAG_THEAD
38          || node->local_name == LXB_TAG_TR))
39     {
40         tree->pending_table.text_list->length = 0;
41         tree->pending_table.have_non_ws = false;
42 
43         tree->original_mode = tree->mode;
44         tree->mode = lxb_html_tree_insertion_mode_in_table_text;
45 
46         return false;
47     }
48 
49     return lxb_html_tree_insertion_mode_in_table_anything_else(tree, token);
50 }
51 
52 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_comment(lxb_html_tree_t * tree,lxb_html_token_t * token)53 lxb_html_tree_insertion_mode_in_table_comment(lxb_html_tree_t *tree,
54                                               lxb_html_token_t *token)
55 {
56     lxb_dom_comment_t *comment;
57 
58     comment = lxb_html_tree_insert_comment(tree, token, NULL);
59     if (comment == NULL) {
60         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
61 
62         return lxb_html_tree_process_abort(tree);
63     }
64 
65     return true;
66 }
67 
68 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_doctype(lxb_html_tree_t * tree,lxb_html_token_t * token)69 lxb_html_tree_insertion_mode_in_table_doctype(lxb_html_tree_t *tree,
70                                               lxb_html_token_t *token)
71 {
72     lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_DOTOINTAMO);
73 
74     return true;
75 }
76 
77 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_caption(lxb_html_tree_t * tree,lxb_html_token_t * token)78 lxb_html_tree_insertion_mode_in_table_caption(lxb_html_tree_t *tree,
79                                               lxb_html_token_t *token)
80 {
81     lxb_html_element_t *element;
82 
83     lxb_html_tree_clear_stack_back_to_table_context(tree);
84 
85     tree->status = lxb_html_tree_active_formatting_push_marker(tree);
86     if (tree->status != LXB_STATUS_OK) {
87         return lxb_html_tree_process_abort(tree);
88     }
89 
90     element = lxb_html_tree_insert_html_element(tree, token);
91     if (element == NULL) {
92         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
93 
94         return lxb_html_tree_process_abort(tree);
95     }
96 
97     tree->mode = lxb_html_tree_insertion_mode_in_caption;
98 
99     return true;
100 }
101 
102 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_colgroup(lxb_html_tree_t * tree,lxb_html_token_t * token)103 lxb_html_tree_insertion_mode_in_table_colgroup(lxb_html_tree_t *tree,
104                                                lxb_html_token_t *token)
105 {
106     lxb_html_element_t *element;
107 
108     lxb_html_tree_clear_stack_back_to_table_context(tree);
109 
110     element = lxb_html_tree_insert_html_element(tree, token);
111     if (element == NULL) {
112         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
113 
114         return lxb_html_tree_process_abort(tree);
115     }
116 
117     tree->mode = lxb_html_tree_insertion_mode_in_column_group;
118 
119     return true;
120 }
121 
122 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_col(lxb_html_tree_t * tree,lxb_html_token_t * token)123 lxb_html_tree_insertion_mode_in_table_col(lxb_html_tree_t *tree,
124                                           lxb_html_token_t *token)
125 {
126     lxb_html_element_t *element;
127     lxb_html_token_t fake_token = {0};
128 
129     lxb_html_tree_clear_stack_back_to_table_context(tree);
130 
131     fake_token.tag_id = LXB_TAG_COLGROUP;
132     fake_token.attr_first = NULL;
133     fake_token.attr_last = NULL;
134 
135     element = lxb_html_tree_insert_html_element(tree, &fake_token);
136     if (element == NULL) {
137         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
138 
139         return lxb_html_tree_process_abort(tree);
140     }
141 
142     tree->mode = lxb_html_tree_insertion_mode_in_column_group;
143 
144     return false;
145 }
146 
147 /*
148  * "tbody", "tfoot", "thead"
149  */
150 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_tbtfth(lxb_html_tree_t * tree,lxb_html_token_t * token)151 lxb_html_tree_insertion_mode_in_table_tbtfth(lxb_html_tree_t *tree,
152                                              lxb_html_token_t *token)
153 {
154     lxb_html_element_t *element;
155 
156     lxb_html_tree_clear_stack_back_to_table_context(tree);
157 
158     element = lxb_html_tree_insert_html_element(tree, token);
159     if (element == NULL) {
160         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
161 
162         return lxb_html_tree_process_abort(tree);
163     }
164 
165     tree->mode = lxb_html_tree_insertion_mode_in_table_body;
166 
167     return true;
168 }
169 
170 /*
171  * "td", "th", "tr"
172  */
173 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_tdthtr(lxb_html_tree_t * tree,lxb_html_token_t * token)174 lxb_html_tree_insertion_mode_in_table_tdthtr(lxb_html_tree_t *tree,
175                                              lxb_html_token_t *token)
176 {
177     lxb_html_element_t *element;
178     lxb_html_token_t fake_token = {0};
179 
180     lxb_html_tree_clear_stack_back_to_table_context(tree);
181 
182     fake_token.tag_id = LXB_TAG_TBODY;
183     fake_token.attr_first = NULL;
184     fake_token.attr_last = NULL;
185 
186     element = lxb_html_tree_insert_html_element(tree, &fake_token);
187     if (element == NULL) {
188         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
189 
190         return lxb_html_tree_process_abort(tree);
191     }
192 
193     tree->mode = lxb_html_tree_insertion_mode_in_table_body;
194 
195     return false;
196 }
197 
198 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_table(lxb_html_tree_t * tree,lxb_html_token_t * token)199 lxb_html_tree_insertion_mode_in_table_table(lxb_html_tree_t *tree,
200                                             lxb_html_token_t *token)
201 {
202     lxb_dom_node_t *node;
203 
204     lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_UNTO);
205 
206     node = lxb_html_tree_element_in_scope(tree, LXB_TAG_TABLE, LXB_NS_HTML,
207                                           LXB_HTML_TAG_CATEGORY_SCOPE_TABLE);
208     if (node == NULL) {
209         return true;
210     }
211 
212     lxb_html_tree_open_elements_pop_until_node(tree, node, true);
213     lxb_html_tree_reset_insertion_mode_appropriately(tree);
214 
215     return false;
216 }
217 
218 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_table_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)219 lxb_html_tree_insertion_mode_in_table_table_closed(lxb_html_tree_t *tree,
220                                                    lxb_html_token_t *token)
221 {
222     lxb_dom_node_t *node;
223 
224     node = lxb_html_tree_element_in_scope(tree, LXB_TAG_TABLE, LXB_NS_HTML,
225                                           LXB_HTML_TAG_CATEGORY_SCOPE_TABLE);
226     if (node == NULL) {
227         lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_UNCLTO);
228 
229         return true;
230     }
231 
232     lxb_html_tree_open_elements_pop_until_node(tree, node, true);
233     lxb_html_tree_reset_insertion_mode_appropriately(tree);
234 
235     return true;
236 }
237 
238 /*
239  * "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
240  * "thead", "tr"
241  */
242 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_bcht_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)243 lxb_html_tree_insertion_mode_in_table_bcht_closed(lxb_html_tree_t *tree,
244                                                   lxb_html_token_t *token)
245 {
246     lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_UNCLTO);
247 
248     return true;
249 }
250 
251 /*
252  * A start tag whose tag name is one of: "style", "script", "template"
253  * An end tag whose tag name is "template"
254  */
255 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_st_open_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)256 lxb_html_tree_insertion_mode_in_table_st_open_closed(lxb_html_tree_t *tree,
257                                                      lxb_html_token_t *token)
258 {
259     return lxb_html_tree_insertion_mode_in_head(tree, token);
260 }
261 
262 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_input(lxb_html_tree_t * tree,lxb_html_token_t * token)263 lxb_html_tree_insertion_mode_in_table_input(lxb_html_tree_t *tree,
264                                             lxb_html_token_t *token)
265 {
266     lxb_html_element_t *element;
267     lxb_html_token_attr_t *attr = token->attr_first;
268 
269     while (attr != NULL) {
270 
271         /* Name == "type" and value == "hidden" */
272         if (attr->name != NULL && attr->name->attr_id == LXB_DOM_ATTR_TYPE) {
273             if (attr->value_size == 6
274                 && lexbor_str_data_ncasecmp(attr->value,
275                                             (const lxb_char_t *) "hidden", 6))
276             {
277                 goto have_hidden;
278             }
279         }
280 
281         attr = attr->next;
282     }
283 
284     return lxb_html_tree_insertion_mode_in_table_anything_else(tree, token);
285 
286 have_hidden:
287 
288     lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_UNTO);
289 
290     element = lxb_html_tree_insert_html_element(tree, token);
291     if (element == NULL) {
292         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
293 
294         return lxb_html_tree_process_abort(tree);
295     }
296 
297     lxb_html_tree_open_elements_pop_until_node(tree,
298                                                lxb_dom_interface_node(element),
299                                                true);
300 
301     lxb_html_tree_acknowledge_token_self_closing(tree, token);
302 
303     return true;
304 }
305 
306 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_form(lxb_html_tree_t * tree,lxb_html_token_t * token)307 lxb_html_tree_insertion_mode_in_table_form(lxb_html_tree_t *tree,
308                                            lxb_html_token_t *token)
309 {
310     lxb_dom_node_t *node;
311     lxb_html_element_t *element;
312 
313     lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_UNTO);
314 
315     if (tree->form != NULL) {
316         return true;
317     }
318 
319     node = lxb_html_tree_open_elements_find_reverse(tree, LXB_TAG_TEMPLATE,
320                                                     LXB_NS_HTML, NULL);
321     if (node != NULL) {
322         return true;
323     }
324 
325     element = lxb_html_tree_insert_html_element(tree, token);
326     if (element == NULL) {
327         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
328 
329         return lxb_html_tree_process_abort(tree);
330     }
331 
332     tree->form = lxb_html_interface_form(element);
333 
334     lxb_html_tree_open_elements_pop_until_node(tree,
335                                                lxb_dom_interface_node(element),
336                                                true);
337     return true;
338 }
339 
340 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_end_of_file(lxb_html_tree_t * tree,lxb_html_token_t * token)341 lxb_html_tree_insertion_mode_in_table_end_of_file(lxb_html_tree_t *tree,
342                                                   lxb_html_token_t *token)
343 {
344     return lxb_html_tree_insertion_mode_in_body(tree, token);
345 }
346 
347 bool
lxb_html_tree_insertion_mode_in_table_anything_else(lxb_html_tree_t * tree,lxb_html_token_t * token)348 lxb_html_tree_insertion_mode_in_table_anything_else(lxb_html_tree_t *tree,
349                                                     lxb_html_token_t *token)
350 {
351     tree->foster_parenting = true;
352 
353     lxb_html_tree_insertion_mode_in_body(tree, token);
354     if (tree->status != LXB_STATUS_OK) {
355         return lxb_html_tree_process_abort(tree);
356     }
357 
358     tree->foster_parenting = false;
359 
360     return true;
361 }
362 
363 lxb_inline bool
lxb_html_tree_insertion_mode_in_table_anything_else_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)364 lxb_html_tree_insertion_mode_in_table_anything_else_closed(lxb_html_tree_t *tree,
365                                                            lxb_html_token_t *token)
366 {
367     return lxb_html_tree_insertion_mode_in_table_anything_else(tree, token);
368 }
369 
370 bool
lxb_html_tree_insertion_mode_in_table(lxb_html_tree_t * tree,lxb_html_token_t * token)371 lxb_html_tree_insertion_mode_in_table(lxb_html_tree_t *tree,
372                                       lxb_html_token_t *token)
373 {
374     if (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) {
375         switch (token->tag_id) {
376             case LXB_TAG_TABLE:
377                 return lxb_html_tree_insertion_mode_in_table_table_closed(tree,
378                                                                           token);
379             case LXB_TAG_BODY:
380             case LXB_TAG_CAPTION:
381             case LXB_TAG_COL:
382             case LXB_TAG_COLGROUP:
383             case LXB_TAG_HTML:
384             case LXB_TAG_TBODY:
385             case LXB_TAG_TD:
386             case LXB_TAG_TFOOT:
387             case LXB_TAG_TH:
388             case LXB_TAG_THEAD:
389             case LXB_TAG_TR:
390                 return lxb_html_tree_insertion_mode_in_table_bcht_closed(tree,
391                                                                          token);
392             case LXB_TAG_TEMPLATE:
393                 return lxb_html_tree_insertion_mode_in_table_st_open_closed(tree,
394                                                                             token);
395             default:
396                 return lxb_html_tree_insertion_mode_in_table_anything_else_closed(tree,
397                                                                                   token);
398         }
399     }
400 
401     switch (token->tag_id) {
402         case LXB_TAG__TEXT:
403             return lxb_html_tree_insertion_mode_in_table_text_open(tree, token);
404 
405         case LXB_TAG__EM_COMMENT:
406             return lxb_html_tree_insertion_mode_in_table_comment(tree, token);
407 
408         case LXB_TAG__EM_DOCTYPE:
409             return lxb_html_tree_insertion_mode_in_table_doctype(tree, token);
410 
411         case LXB_TAG_CAPTION:
412             return lxb_html_tree_insertion_mode_in_table_caption(tree, token);
413 
414         case LXB_TAG_COLGROUP:
415             return lxb_html_tree_insertion_mode_in_table_colgroup(tree, token);
416 
417         case LXB_TAG_COL:
418             return lxb_html_tree_insertion_mode_in_table_col(tree, token);
419 
420         case LXB_TAG_TBODY:
421         case LXB_TAG_TFOOT:
422         case LXB_TAG_THEAD:
423             return lxb_html_tree_insertion_mode_in_table_tbtfth(tree, token);
424 
425         case LXB_TAG_TD:
426         case LXB_TAG_TH:
427         case LXB_TAG_TR:
428             return lxb_html_tree_insertion_mode_in_table_tdthtr(tree, token);
429 
430         case LXB_TAG_TABLE:
431             return lxb_html_tree_insertion_mode_in_table_table(tree, token);
432 
433         case LXB_TAG_STYLE:
434         case LXB_TAG_SCRIPT:
435         case LXB_TAG_TEMPLATE:
436             return lxb_html_tree_insertion_mode_in_table_st_open_closed(tree,
437                                                                         token);
438         case LXB_TAG_INPUT:
439             return lxb_html_tree_insertion_mode_in_table_input(tree, token);
440 
441         case LXB_TAG_FORM:
442             return lxb_html_tree_insertion_mode_in_table_form(tree, token);
443 
444         case LXB_TAG__END_OF_FILE:
445             return lxb_html_tree_insertion_mode_in_table_end_of_file(tree,
446                                                                      token);
447         default:
448             return lxb_html_tree_insertion_mode_in_table_anything_else(tree,
449                                                                        token);
450     }
451 }
452