1 /*
2  * Copyright (C) 2018-2020 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/html/tree/insertion_mode.h"
8 #include "lexbor/html/tree/open_elements.h"
9 #include "lexbor/html/interfaces/element.h"
10 
11 #define LEXBOR_TOKENIZER_CHARS_MAP
12 #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
13 #include "lexbor/core/str_res.h"
14 
15 
16 lxb_status_t
17 lxb_dom_element_qualified_name_set(lxb_dom_element_t *element,
18                                    const lxb_char_t *prefix, size_t prefix_len,
19                                    const lxb_char_t *lname, size_t lname_len);
20 
21 
22 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)23 lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(lxb_html_tree_t *tree,
24                                                                   lxb_html_token_t *token)
25 {
26     if (tree->open_elements->length == 0) {
27         return tree->mode(tree, token);
28     }
29 
30     lxb_dom_node_t **list = (lxb_dom_node_t **) tree->open_elements->list;
31 
32     size_t idx = tree->open_elements->length - 1;
33 
34     if (idx > 0 && list[idx]->local_name != token->tag_id) {
35         lxb_html_tree_parse_error(tree, token,
36                                   LXB_HTML_RULES_ERROR_UNELINOPELST);
37     }
38 
39     while (idx != 0) {
40         if (list[idx]->local_name == token->tag_id) {
41             lxb_html_tree_open_elements_pop_until_node(tree, list[idx], true);
42 
43             return true;
44         }
45 
46         idx--;
47 
48         if (list[idx]->ns == LXB_NS_HTML) {
49             break;
50         }
51     }
52 
53     return tree->mode(tree, token);
54 }
55 
56 /*
57  * TODO: Need to process script
58  */
59 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_script_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)60 lxb_html_tree_insertion_mode_foreign_content_script_closed(lxb_html_tree_t *tree,
61                                                            lxb_html_token_t *token)
62 {
63     lxb_dom_node_t *node = lxb_html_tree_current_node(tree);
64 
65     if (node->local_name != LXB_TAG_SCRIPT || node->ns != LXB_NS_SVG) {
66         return lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(tree,
67                                                                                  token);
68     }
69 
70     lxb_html_tree_open_elements_pop(tree);
71 
72     return true;
73 }
74 
75 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_anything_else(lxb_html_tree_t * tree,lxb_html_token_t * token)76 lxb_html_tree_insertion_mode_foreign_content_anything_else(lxb_html_tree_t *tree,
77                                                            lxb_html_token_t *token)
78 {
79     lxb_html_element_t *element;
80     const lxb_html_tag_fixname_t *fixname_svg;
81     lxb_dom_node_t *node = lxb_html_tree_adjusted_current_node(tree);
82 
83     if (node->ns == LXB_NS_MATH) {
84         tree->before_append_attr = lxb_html_tree_adjust_attributes_mathml;
85     }
86     else if (node->ns == LXB_NS_SVG) {
87         tree->before_append_attr = lxb_html_tree_adjust_attributes_svg;
88     }
89 
90     element = lxb_html_tree_insert_foreign_element(tree, token, node->ns);
91     if (element == NULL) {
92         tree->before_append_attr = NULL;
93         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
94 
95         return lxb_html_tree_process_abort(tree);
96     }
97 
98     if (node->ns == LXB_NS_SVG) {
99         fixname_svg = lxb_html_tag_fixname_svg(element->element.node.local_name);
100         if (fixname_svg != NULL && fixname_svg->name != NULL) {
101             lxb_dom_element_qualified_name_set(&element->element, NULL, 0,
102                                                fixname_svg->name,
103                                                (size_t) fixname_svg->len);
104         }
105     }
106 
107     tree->before_append_attr = NULL;
108 
109     if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE_SELF) == 0) {
110         return true;
111     }
112 
113     node = lxb_html_tree_current_node(tree);
114 
115     if (token->tag_id == LXB_TAG_SCRIPT && node->ns == LXB_NS_SVG) {
116         lxb_html_tree_acknowledge_token_self_closing(tree, token);
117         return lxb_html_tree_insertion_mode_foreign_content_script_closed(tree, token);
118     }
119     else {
120         lxb_html_tree_open_elements_pop(tree);
121         lxb_html_tree_acknowledge_token_self_closing(tree, token);
122     }
123 
124     return true;
125 }
126 
127 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_text(lxb_html_tree_t * tree,lxb_html_token_t * token)128 lxb_html_tree_insertion_mode_foreign_content_text(lxb_html_tree_t *tree,
129                                                   lxb_html_token_t *token)
130 {
131     lexbor_str_t str;
132 
133     if (token->null_count != 0) {
134         lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_NUCH);
135 
136         tree->status = lxb_html_token_make_text_replace_null(token, &str,
137                                                              tree->document->dom_document.text);
138     }
139     else {
140         tree->status = lxb_html_token_make_text(token, &str,
141                                                 tree->document->dom_document.text);
142     }
143 
144     if (tree->status != LXB_STATUS_OK) {
145         return lxb_html_tree_process_abort(tree);
146     }
147 
148     /* Can be zero only if all NULL are gone */
149     if (str.length == 0) {
150         lexbor_str_destroy(&str, tree->document->dom_document.text, false);
151 
152         return true;
153     }
154 
155     if (tree->frameset_ok) {
156         const lxb_char_t *pos = str.data;
157         const lxb_char_t *end = str.data + str.length;
158 
159         static const lxb_char_t *rep = lexbor_str_res_ansi_replacement_character;
160         static const unsigned rep_len = sizeof(lexbor_str_res_ansi_replacement_character) - 1;
161 
162         while (pos != end) {
163             /* Need skip U+FFFD REPLACEMENT CHARACTER */
164             if (*pos == *rep) {
165                 if ((end - pos) < rep_len) {
166                     tree->frameset_ok = false;
167 
168                     break;
169                 }
170 
171                 if (memcmp(pos, rep, sizeof(lxb_char_t) * rep_len) != 0) {
172                     tree->frameset_ok = false;
173 
174                     break;
175                 }
176 
177                 pos = pos + rep_len;
178 
179                 continue;
180             }
181 
182             if (lexbor_tokenizer_chars_map[*pos]
183                 != LEXBOR_STR_RES_MAP_CHAR_WHITESPACE)
184             {
185                 tree->frameset_ok = false;
186 
187                 break;
188             }
189 
190             pos++;
191         }
192     }
193 
194     tree->status = lxb_html_tree_insert_character_for_data(tree, &str, NULL);
195     if (tree->status != LXB_STATUS_OK) {
196         return lxb_html_tree_process_abort(tree);
197     }
198 
199     return true;
200 }
201 
202 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_comment(lxb_html_tree_t * tree,lxb_html_token_t * token)203 lxb_html_tree_insertion_mode_foreign_content_comment(lxb_html_tree_t *tree,
204                                                      lxb_html_token_t *token)
205 {
206     lxb_dom_comment_t *comment;
207 
208     comment = lxb_html_tree_insert_comment(tree, token, NULL);
209     if (comment == NULL) {
210         tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
211 
212         return lxb_html_tree_process_abort(tree);
213     }
214 
215     return true;
216 }
217 
218 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_doctype(lxb_html_tree_t * tree,lxb_html_token_t * token)219 lxb_html_tree_insertion_mode_foreign_content_doctype(lxb_html_tree_t *tree,
220                                                      lxb_html_token_t *token)
221 {
222     lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_DOTOFOCOMO);
223 
224     return true;
225 }
226 
227 /*
228  * "b", "big", "blockquote", "body", "br", "center", "code", "dd", "div", "dl",
229  * "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i",
230  * "img", "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre", "ruby",
231  * "s", "small", "span", "strong", "strike", "sub", "sup", "table", "tt", "u",
232  * "ul", "var"
233  * "font", if the token has any attributes named "color", "face", or "size"
234  */
235 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_all(lxb_html_tree_t * tree,lxb_html_token_t * token)236 lxb_html_tree_insertion_mode_foreign_content_all(lxb_html_tree_t *tree,
237                                                  lxb_html_token_t *token)
238 {
239     lxb_dom_node_t *node;
240 
241     if (token->tag_id == LXB_TAG_FONT) {
242         lxb_html_token_attr_t *attr = token->attr_first;
243 
244         while (attr != NULL) {
245             if (attr->name != NULL
246                 && (attr->name->attr_id == LXB_DOM_ATTR_COLOR
247                 || attr->name->attr_id == LXB_DOM_ATTR_FACE
248                 || attr->name->attr_id == LXB_DOM_ATTR_SIZE))
249             {
250                 goto go_next;
251             }
252 
253             attr = attr->next;
254         }
255 
256         return lxb_html_tree_insertion_mode_foreign_content_anything_else(tree,
257                                                                           token);
258     }
259 
260 go_next:
261 
262     lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_UNTO);
263 
264     if (tree->fragment != NULL) {
265         return lxb_html_tree_insertion_mode_foreign_content_anything_else(tree,
266                                                                           token);
267     }
268 
269     do {
270         lxb_html_tree_open_elements_pop(tree);
271 
272         node = lxb_html_tree_current_node(tree);
273     }
274     while (node &&
275            !(lxb_html_tree_mathml_text_integration_point(node)
276             || lxb_html_tree_html_integration_point(node)
277             || node->ns == LXB_NS_HTML));
278 
279     return false;
280 }
281 
282 bool
lxb_html_tree_insertion_mode_foreign_content(lxb_html_tree_t * tree,lxb_html_token_t * token)283 lxb_html_tree_insertion_mode_foreign_content(lxb_html_tree_t *tree,
284                                              lxb_html_token_t *token)
285 {
286     if (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) {
287         switch (token->tag_id) {
288             case LXB_TAG_SCRIPT:
289                 return lxb_html_tree_insertion_mode_foreign_content_script_closed(tree,
290                                                                                   token);
291             default:
292                 return lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(tree,
293                                                                                          token);
294         }
295     }
296 
297     switch (token->tag_id) {
298         case LXB_TAG__TEXT:
299             return lxb_html_tree_insertion_mode_foreign_content_text(tree,
300                                                                      token);
301         case LXB_TAG__EM_COMMENT:
302             return lxb_html_tree_insertion_mode_foreign_content_comment(tree,
303                                                                         token);
304         case LXB_TAG__EM_DOCTYPE:
305             return lxb_html_tree_insertion_mode_foreign_content_doctype(tree,
306                                                                         token);
307 
308         case LXB_TAG_B:
309         case LXB_TAG_BIG:
310         case LXB_TAG_BLOCKQUOTE:
311         case LXB_TAG_BODY:
312         case LXB_TAG_BR:
313         case LXB_TAG_CENTER:
314         case LXB_TAG_CODE:
315         case LXB_TAG_DD:
316         case LXB_TAG_DIV:
317         case LXB_TAG_DL:
318         case LXB_TAG_DT:
319         case LXB_TAG_EM:
320         case LXB_TAG_EMBED:
321         case LXB_TAG_H1:
322         case LXB_TAG_H2:
323         case LXB_TAG_H3:
324         case LXB_TAG_H4:
325         case LXB_TAG_H5:
326         case LXB_TAG_H6:
327         case LXB_TAG_HEAD:
328         case LXB_TAG_HR:
329         case LXB_TAG_I:
330         case LXB_TAG_IMG:
331         case LXB_TAG_LI:
332         case LXB_TAG_LISTING:
333         case LXB_TAG_MENU:
334         case LXB_TAG_META:
335         case LXB_TAG_NOBR:
336         case LXB_TAG_OL:
337         case LXB_TAG_P:
338         case LXB_TAG_PRE:
339         case LXB_TAG_RUBY:
340         case LXB_TAG_S:
341         case LXB_TAG_SMALL:
342         case LXB_TAG_SPAN:
343         case LXB_TAG_STRONG:
344         case LXB_TAG_STRIKE:
345         case LXB_TAG_SUB:
346         case LXB_TAG_TABLE:
347         case LXB_TAG_TT:
348         case LXB_TAG_U:
349         case LXB_TAG_UL:
350         case LXB_TAG_VAR:
351         case LXB_TAG_FONT:
352             return lxb_html_tree_insertion_mode_foreign_content_all(tree,
353                                                                     token);
354         default:
355             return lxb_html_tree_insertion_mode_foreign_content_anything_else(tree,
356                                                                               token);
357     }
358 }
359