1 /*
2  * Copyright (C) 2018-2020 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/html/tree/insertion_mode.h"
8 #include "lexbor/html/tree/open_elements.h"
9 #include "lexbor/html/tree/active_formatting.h"
10 #include "lexbor/html/tree/template_insertion.h"
11 #include "lexbor/html/interfaces/script_element.h"
12 #include "lexbor/html/interfaces/template_element.h"
13 #include "lexbor/html/tokenizer/state_script.h"
14 
15 
16 static bool
17 lxb_html_tree_insertion_mode_in_head_open(lxb_html_tree_t *tree,
18                                           lxb_html_token_t *token);
19 
20 static bool
21 lxb_html_tree_insertion_mode_in_head_closed(lxb_html_tree_t *tree,
22                                             lxb_html_token_t *token);
23 
24 lxb_inline bool
25 lxb_html_tree_insertion_mode_in_head_script(lxb_html_tree_t *tree,
26                                             lxb_html_token_t *token);
27 
28 lxb_inline bool
29 lxb_html_tree_insertion_mode_in_head_template(lxb_html_tree_t *tree,
30                                               lxb_html_token_t *token);
31 
32 lxb_inline bool
33 lxb_html_tree_insertion_mode_in_head_template_closed(lxb_html_tree_t *tree,
34                                                      lxb_html_token_t *token);
35 
36 lxb_inline bool
37 lxb_html_tree_insertion_mode_in_head_anything_else(lxb_html_tree_t *tree);
38 
39 
40 bool
lxb_html_tree_insertion_mode_in_head(lxb_html_tree_t * tree,lxb_html_token_t * token)41 lxb_html_tree_insertion_mode_in_head(lxb_html_tree_t *tree,
42                                      lxb_html_token_t *token)
43 {
44     if (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) {
45         return lxb_html_tree_insertion_mode_in_head_closed(tree, token);;
46     }
47 
48     return lxb_html_tree_insertion_mode_in_head_open(tree, token);
49 }
50 
51 static bool
lxb_html_tree_insertion_mode_in_head_open(lxb_html_tree_t * tree,lxb_html_token_t * token)52 lxb_html_tree_insertion_mode_in_head_open(lxb_html_tree_t *tree,
53                                           lxb_html_token_t *token)
54 {
55     switch (token->tag_id) {
56         case LXB_TAG__EM_COMMENT: {
57             lxb_dom_comment_t *comment;
58 
59             comment = lxb_html_tree_insert_comment(tree, token, NULL);
60             if (comment == NULL) {
61                 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
62 
63                 return lxb_html_tree_process_abort(tree);
64             }
65 
66             break;
67         }
68 
69         case LXB_TAG__EM_DOCTYPE:
70             lxb_html_tree_parse_error(tree, token,
71                                       LXB_HTML_RULES_ERROR_DOTOINHEMO);
72             break;
73 
74         case LXB_TAG_HTML:
75             return lxb_html_tree_insertion_mode_in_body(tree, token);
76 
77         case LXB_TAG_BASE:
78         case LXB_TAG_BASEFONT:
79         case LXB_TAG_BGSOUND:
80         case LXB_TAG_LINK: {
81             lxb_html_element_t *element;
82 
83             element = lxb_html_tree_insert_html_element(tree, token);
84             if (element == NULL) {
85                 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
86 
87                 return lxb_html_tree_process_abort(tree);
88             }
89 
90             lxb_html_tree_open_elements_pop(tree);
91             lxb_html_tree_acknowledge_token_self_closing(tree, token);
92 
93             break;
94         }
95 
96         case LXB_TAG_META: {
97             lxb_html_element_t *element;
98 
99             element = lxb_html_tree_insert_html_element(tree, token);
100             if (element == NULL) {
101                 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
102 
103                 return lxb_html_tree_process_abort(tree);
104             }
105 
106             lxb_html_tree_open_elements_pop(tree);
107             lxb_html_tree_acknowledge_token_self_closing(tree, token);
108 
109             /*
110              * TODO: Check encoding: charset attribute or http-equiv attribute.
111              */
112 
113             break;
114         }
115 
116         case LXB_TAG_TITLE: {
117             lxb_html_element_t *element;
118 
119             element = lxb_html_tree_generic_rcdata_parsing(tree, token);
120             if (element == NULL) {
121                 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
122 
123                 return lxb_html_tree_process_abort(tree);
124             }
125 
126             break;
127         }
128 
129         case LXB_TAG_NOSCRIPT: {
130             lxb_html_element_t *element;
131 
132             if (tree->document->dom_document.scripting) {
133                 element = lxb_html_tree_generic_rawtext_parsing(tree, token);
134             }
135             else {
136                 element = lxb_html_tree_insert_html_element(tree, token);
137                 tree->mode = lxb_html_tree_insertion_mode_in_head_noscript;
138             }
139 
140             if (element == NULL) {
141                 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
142 
143                 return lxb_html_tree_process_abort(tree);
144             }
145 
146             break;
147         }
148 
149         case LXB_TAG_NOFRAMES:
150         case LXB_TAG_STYLE: {
151             lxb_html_element_t *element;
152 
153             element = lxb_html_tree_generic_rawtext_parsing(tree, token);
154             if (element == NULL) {
155                 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
156 
157                 return lxb_html_tree_process_abort(tree);
158             }
159 
160             break;
161         }
162 
163         case LXB_TAG_SCRIPT:
164             return lxb_html_tree_insertion_mode_in_head_script(tree, token);
165 
166         case LXB_TAG_TEMPLATE:
167             return lxb_html_tree_insertion_mode_in_head_template(tree, token);
168 
169         case LXB_TAG_HEAD:
170             lxb_html_tree_parse_error(tree, token,
171                                       LXB_HTML_RULES_ERROR_HETOINHEMO);
172             break;
173 
174         /*
175          * We can create function for this, but...
176          *
177          * The "in head noscript" insertion mode use this
178          * is you change this code, please, change it in in head noscript" mode
179          */
180         case LXB_TAG__TEXT: {
181             lxb_html_token_t ws_token = {0};
182 
183             tree->status = lxb_html_token_data_split_ws_begin(token, &ws_token);
184             if (tree->status != LXB_STATUS_OK) {
185                 return lxb_html_tree_process_abort(tree);
186             }
187 
188             if (ws_token.text_start != ws_token.text_end) {
189                 tree->status = lxb_html_tree_insert_character(tree, &ws_token,
190                                                               NULL);
191                 if (tree->status != LXB_STATUS_OK) {
192                     return lxb_html_tree_process_abort(tree);
193                 }
194             }
195 
196             if (token->text_start == token->text_end) {
197                 return true;
198             }
199         }
200         /* fall through */
201 
202         default:
203             return lxb_html_tree_insertion_mode_in_head_anything_else(tree);
204     }
205 
206     return true;
207 }
208 
209 static bool
lxb_html_tree_insertion_mode_in_head_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)210 lxb_html_tree_insertion_mode_in_head_closed(lxb_html_tree_t *tree,
211                                             lxb_html_token_t *token)
212 {
213     switch (token->tag_id) {
214         case LXB_TAG_HEAD:
215             lxb_html_tree_open_elements_pop(tree);
216 
217             tree->mode = lxb_html_tree_insertion_mode_after_head;
218 
219             break;
220 
221         case LXB_TAG_BODY:
222         case LXB_TAG_HTML:
223         case LXB_TAG_BR:
224             return lxb_html_tree_insertion_mode_in_head_anything_else(tree);
225 
226         case LXB_TAG_TEMPLATE:
227             return lxb_html_tree_insertion_mode_in_head_template_closed(tree,
228                                                                         token);
229 
230         default:
231             lxb_html_tree_parse_error(tree, token,
232                                       LXB_HTML_RULES_ERROR_UNCLTOINHEMO);
233             break;
234 
235     }
236 
237     return true;
238 }
239 
240 lxb_inline bool
lxb_html_tree_insertion_mode_in_head_script(lxb_html_tree_t * tree,lxb_html_token_t * token)241 lxb_html_tree_insertion_mode_in_head_script(lxb_html_tree_t *tree,
242                                             lxb_html_token_t *token)
243 {
244     lxb_dom_node_t *ap_node;
245     lxb_html_element_t *element;
246     lxb_html_tree_insertion_position_t ipos;
247 
248     ap_node = lxb_html_tree_appropriate_place_inserting_node(tree, NULL, &ipos);
249     if (ap_node == NULL) {
250         tree->status = LXB_STATUS_ERROR;
251 
252         return lxb_html_tree_process_abort(tree);
253     }
254 
255     if (ipos == LXB_HTML_TREE_INSERTION_POSITION_CHILD) {
256         element = lxb_html_tree_create_element_for_token(tree, token,
257                                                          LXB_NS_HTML, ap_node);
258     }
259     else {
260         element = lxb_html_tree_create_element_for_token(tree, token,
261                                                          LXB_NS_HTML,
262                                                          ap_node->parent);
263     }
264 
265     if (element == NULL) {
266         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
267 
268         return lxb_html_tree_process_abort(tree);
269     }
270 
271     /* TODO: Need code for set flags for Script Element */
272 
273     tree->status = lxb_html_tree_open_elements_push(tree,
274                                                     lxb_dom_interface_node(element));
275     if (tree->status != LXB_HTML_STATUS_OK) {
276         lxb_html_script_element_interface_destroy(lxb_html_interface_script(element));
277 
278         return lxb_html_tree_process_abort(tree);
279     }
280 
281     lxb_html_tree_insert_node(ap_node, lxb_dom_interface_node(element), ipos);
282 
283     /*
284      * Need for tokenizer state Script
285      * See description for
286      * 'lxb_html_tokenizer_state_script_data_before' function
287      */
288     lxb_html_tokenizer_tmp_tag_id_set(tree->tkz_ref, token->tag_id);
289     lxb_html_tokenizer_state_set(tree->tkz_ref,
290                                  lxb_html_tokenizer_state_script_data_before);
291 
292     tree->original_mode = tree->mode;
293     tree->mode = lxb_html_tree_insertion_mode_text;
294 
295     return true;
296 }
297 
298 lxb_inline bool
lxb_html_tree_insertion_mode_in_head_template(lxb_html_tree_t * tree,lxb_html_token_t * token)299 lxb_html_tree_insertion_mode_in_head_template(lxb_html_tree_t *tree,
300                                               lxb_html_token_t *token)
301 {
302     lxb_html_element_t *element;
303 
304     element = lxb_html_tree_insert_html_element(tree, token);
305     if (element == NULL) {
306         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
307 
308         return lxb_html_tree_process_abort(tree);
309     }
310 
311     tree->status = lxb_html_tree_active_formatting_push_marker(tree);
312     if (tree->status != LXB_STATUS_OK) {
313         lxb_html_template_element_interface_destroy(lxb_html_interface_template(element));
314 
315         return lxb_html_tree_process_abort(tree);
316     }
317 
318     tree->frameset_ok = false;
319     tree->mode = lxb_html_tree_insertion_mode_in_template;
320 
321     tree->status = lxb_html_tree_template_insertion_push(tree,
322                                       lxb_html_tree_insertion_mode_in_template);
323     if (tree->status != LXB_STATUS_OK) {
324         lxb_html_template_element_interface_destroy(lxb_html_interface_template(element));
325 
326         return lxb_html_tree_process_abort(tree);
327     }
328 
329     return true;
330 }
331 
332 lxb_inline bool
lxb_html_tree_insertion_mode_in_head_template_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)333 lxb_html_tree_insertion_mode_in_head_template_closed(lxb_html_tree_t *tree,
334                                                      lxb_html_token_t *token)
335 {
336     lxb_dom_node_t *temp_node;
337 
338     temp_node = lxb_html_tree_open_elements_find_reverse(tree, LXB_TAG_TEMPLATE,
339                                                          LXB_NS_HTML, NULL);
340     if (temp_node == NULL) {
341         lxb_html_tree_parse_error(tree, token,
342                                   LXB_HTML_RULES_ERROR_TECLTOWIOPINHEMO);
343         return true;
344     }
345 
346     lxb_html_tree_generate_all_implied_end_tags_thoroughly(tree, LXB_TAG__UNDEF,
347                                                            LXB_NS__UNDEF);
348 
349     temp_node = lxb_html_tree_current_node(tree);
350 
351     if (lxb_html_tree_node_is(temp_node, LXB_TAG_TEMPLATE) == false) {
352         lxb_html_tree_parse_error(tree, token,
353                                   LXB_HTML_RULES_ERROR_TEELISNOCUINHEMO);
354     }
355 
356     lxb_html_tree_open_elements_pop_until_tag_id(tree, LXB_TAG_TEMPLATE,
357                                                  LXB_NS_HTML, true);
358 
359     lxb_html_tree_active_formatting_up_to_last_marker(tree);
360     lxb_html_tree_template_insertion_pop(tree);
361     lxb_html_tree_reset_insertion_mode_appropriately(tree);
362 
363     return true;
364 }
365 
366 lxb_inline bool
lxb_html_tree_insertion_mode_in_head_anything_else(lxb_html_tree_t * tree)367 lxb_html_tree_insertion_mode_in_head_anything_else(lxb_html_tree_t *tree)
368 {
369     lxb_html_tree_open_elements_pop(tree);
370 
371     tree->mode = lxb_html_tree_insertion_mode_after_head;
372 
373     return false;
374 }
375