1 /*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include "lexbor/html/tree/insertion_mode.h"
8 #include "lexbor/html/tree/open_elements.h"
9 #include "lexbor/html/interfaces/element.h"
10
11 #define LEXBOR_TOKENIZER_CHARS_MAP
12 #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
13 #include "lexbor/core/str_res.h"
14
15
16 lxb_status_t
17 lxb_dom_element_qualified_name_set(lxb_dom_element_t *element,
18 const lxb_char_t *prefix, size_t prefix_len,
19 const lxb_char_t *lname, size_t lname_len);
20
21
22 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)23 lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(lxb_html_tree_t *tree,
24 lxb_html_token_t *token)
25 {
26 if (tree->open_elements->length == 0) {
27 return tree->mode(tree, token);
28 }
29
30 lxb_dom_node_t **list = (lxb_dom_node_t **) tree->open_elements->list;
31
32 size_t idx = tree->open_elements->length - 1;
33
34 if (idx > 0 && list[idx]->local_name != token->tag_id) {
35 lxb_html_tree_parse_error(tree, token,
36 LXB_HTML_RULES_ERROR_UNELINOPELST);
37 }
38
39 while (idx != 0) {
40 if (list[idx]->local_name == token->tag_id) {
41 lxb_html_tree_open_elements_pop_until_node(tree, list[idx], true);
42
43 return true;
44 }
45
46 idx--;
47
48 if (list[idx]->ns == LXB_NS_HTML) {
49 break;
50 }
51 }
52
53 return tree->mode(tree, token);
54 }
55
56 /*
57 * TODO: Need to process script
58 */
59 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_script_closed(lxb_html_tree_t * tree,lxb_html_token_t * token)60 lxb_html_tree_insertion_mode_foreign_content_script_closed(lxb_html_tree_t *tree,
61 lxb_html_token_t *token)
62 {
63 lxb_dom_node_t *node = lxb_html_tree_current_node(tree);
64
65 if (node->local_name != LXB_TAG_SCRIPT || node->ns != LXB_NS_SVG) {
66 return lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(tree,
67 token);
68 }
69
70 lxb_html_tree_open_elements_pop(tree);
71
72 return true;
73 }
74
75 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_anything_else(lxb_html_tree_t * tree,lxb_html_token_t * token)76 lxb_html_tree_insertion_mode_foreign_content_anything_else(lxb_html_tree_t *tree,
77 lxb_html_token_t *token)
78 {
79 lxb_html_element_t *element;
80 const lxb_html_tag_fixname_t *fixname_svg;
81 lxb_dom_node_t *node = lxb_html_tree_adjusted_current_node(tree);
82
83 if (node->ns == LXB_NS_MATH) {
84 tree->before_append_attr = lxb_html_tree_adjust_attributes_mathml;
85 }
86 else if (node->ns == LXB_NS_SVG) {
87 tree->before_append_attr = lxb_html_tree_adjust_attributes_svg;
88 }
89
90 element = lxb_html_tree_insert_foreign_element(tree, token, node->ns);
91 if (element == NULL) {
92 tree->before_append_attr = NULL;
93 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
94
95 return lxb_html_tree_process_abort(tree);
96 }
97
98 if (node->ns == LXB_NS_SVG) {
99 fixname_svg = lxb_html_tag_fixname_svg(element->element.node.local_name);
100 if (fixname_svg != NULL && fixname_svg->name != NULL) {
101 lxb_dom_element_qualified_name_set(&element->element, NULL, 0,
102 fixname_svg->name,
103 (size_t) fixname_svg->len);
104 }
105 }
106
107 tree->before_append_attr = NULL;
108
109 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE_SELF) == 0) {
110 return true;
111 }
112
113 node = lxb_html_tree_current_node(tree);
114
115 if (token->tag_id == LXB_TAG_SCRIPT && node->ns == LXB_NS_SVG) {
116 lxb_html_tree_acknowledge_token_self_closing(tree, token);
117 return lxb_html_tree_insertion_mode_foreign_content_script_closed(tree, token);
118 }
119 else {
120 lxb_html_tree_open_elements_pop(tree);
121 lxb_html_tree_acknowledge_token_self_closing(tree, token);
122 }
123
124 return true;
125 }
126
127 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_text(lxb_html_tree_t * tree,lxb_html_token_t * token)128 lxb_html_tree_insertion_mode_foreign_content_text(lxb_html_tree_t *tree,
129 lxb_html_token_t *token)
130 {
131 lexbor_str_t str;
132
133 if (token->null_count != 0) {
134 lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_NUCH);
135
136 tree->status = lxb_html_token_make_text_replace_null(token, &str,
137 tree->document->dom_document.text);
138 }
139 else {
140 tree->status = lxb_html_token_make_text(token, &str,
141 tree->document->dom_document.text);
142 }
143
144 if (tree->status != LXB_STATUS_OK) {
145 return lxb_html_tree_process_abort(tree);
146 }
147
148 /* Can be zero only if all NULL are gone */
149 if (str.length == 0) {
150 lexbor_str_destroy(&str, tree->document->dom_document.text, false);
151
152 return true;
153 }
154
155 if (tree->frameset_ok) {
156 const lxb_char_t *pos = str.data;
157 const lxb_char_t *end = str.data + str.length;
158
159 static const lxb_char_t *rep = lexbor_str_res_ansi_replacement_character;
160 static const unsigned rep_len = sizeof(lexbor_str_res_ansi_replacement_character) - 1;
161
162 while (pos != end) {
163 /* Need skip U+FFFD REPLACEMENT CHARACTER */
164 if (*pos == *rep) {
165 if ((end - pos) < rep_len) {
166 tree->frameset_ok = false;
167
168 break;
169 }
170
171 if (memcmp(pos, rep, sizeof(lxb_char_t) * rep_len) != 0) {
172 tree->frameset_ok = false;
173
174 break;
175 }
176
177 pos = pos + rep_len;
178
179 continue;
180 }
181
182 if (lexbor_tokenizer_chars_map[*pos]
183 != LEXBOR_STR_RES_MAP_CHAR_WHITESPACE)
184 {
185 tree->frameset_ok = false;
186
187 break;
188 }
189
190 pos++;
191 }
192 }
193
194 tree->status = lxb_html_tree_insert_character_for_data(tree, &str, NULL);
195 if (tree->status != LXB_STATUS_OK) {
196 return lxb_html_tree_process_abort(tree);
197 }
198
199 return true;
200 }
201
202 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_comment(lxb_html_tree_t * tree,lxb_html_token_t * token)203 lxb_html_tree_insertion_mode_foreign_content_comment(lxb_html_tree_t *tree,
204 lxb_html_token_t *token)
205 {
206 lxb_dom_comment_t *comment;
207
208 comment = lxb_html_tree_insert_comment(tree, token, NULL);
209 if (comment == NULL) {
210 tree->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
211
212 return lxb_html_tree_process_abort(tree);
213 }
214
215 return true;
216 }
217
218 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_doctype(lxb_html_tree_t * tree,lxb_html_token_t * token)219 lxb_html_tree_insertion_mode_foreign_content_doctype(lxb_html_tree_t *tree,
220 lxb_html_token_t *token)
221 {
222 lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_DOTOFOCOMO);
223
224 return true;
225 }
226
227 /*
228 * "b", "big", "blockquote", "body", "br", "center", "code", "dd", "div", "dl",
229 * "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i",
230 * "img", "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre", "ruby",
231 * "s", "small", "span", "strong", "strike", "sub", "sup", "table", "tt", "u",
232 * "ul", "var"
233 * "font", if the token has any attributes named "color", "face", or "size"
234 */
235 lxb_inline bool
lxb_html_tree_insertion_mode_foreign_content_all(lxb_html_tree_t * tree,lxb_html_token_t * token)236 lxb_html_tree_insertion_mode_foreign_content_all(lxb_html_tree_t *tree,
237 lxb_html_token_t *token)
238 {
239 lxb_dom_node_t *node;
240
241 if (token->tag_id == LXB_TAG_FONT) {
242 lxb_html_token_attr_t *attr = token->attr_first;
243
244 while (attr != NULL) {
245 if (attr->name != NULL
246 && (attr->name->attr_id == LXB_DOM_ATTR_COLOR
247 || attr->name->attr_id == LXB_DOM_ATTR_FACE
248 || attr->name->attr_id == LXB_DOM_ATTR_SIZE))
249 {
250 goto go_next;
251 }
252
253 attr = attr->next;
254 }
255
256 return lxb_html_tree_insertion_mode_foreign_content_anything_else(tree,
257 token);
258 }
259
260 go_next:
261
262 lxb_html_tree_parse_error(tree, token, LXB_HTML_RULES_ERROR_UNTO);
263
264 if (tree->fragment != NULL) {
265 return lxb_html_tree_insertion_mode_foreign_content_anything_else(tree,
266 token);
267 }
268
269 do {
270 lxb_html_tree_open_elements_pop(tree);
271
272 node = lxb_html_tree_current_node(tree);
273 }
274 while (node &&
275 !(lxb_html_tree_mathml_text_integration_point(node)
276 || lxb_html_tree_html_integration_point(node)
277 || node->ns == LXB_NS_HTML));
278
279 return false;
280 }
281
282 bool
lxb_html_tree_insertion_mode_foreign_content(lxb_html_tree_t * tree,lxb_html_token_t * token)283 lxb_html_tree_insertion_mode_foreign_content(lxb_html_tree_t *tree,
284 lxb_html_token_t *token)
285 {
286 if (token->type & LXB_HTML_TOKEN_TYPE_CLOSE) {
287 switch (token->tag_id) {
288 case LXB_TAG_SCRIPT:
289 return lxb_html_tree_insertion_mode_foreign_content_script_closed(tree,
290 token);
291 default:
292 return lxb_html_tree_insertion_mode_foreign_content_anything_else_closed(tree,
293 token);
294 }
295 }
296
297 switch (token->tag_id) {
298 case LXB_TAG__TEXT:
299 return lxb_html_tree_insertion_mode_foreign_content_text(tree,
300 token);
301 case LXB_TAG__EM_COMMENT:
302 return lxb_html_tree_insertion_mode_foreign_content_comment(tree,
303 token);
304 case LXB_TAG__EM_DOCTYPE:
305 return lxb_html_tree_insertion_mode_foreign_content_doctype(tree,
306 token);
307
308 case LXB_TAG_B:
309 case LXB_TAG_BIG:
310 case LXB_TAG_BLOCKQUOTE:
311 case LXB_TAG_BODY:
312 case LXB_TAG_BR:
313 case LXB_TAG_CENTER:
314 case LXB_TAG_CODE:
315 case LXB_TAG_DD:
316 case LXB_TAG_DIV:
317 case LXB_TAG_DL:
318 case LXB_TAG_DT:
319 case LXB_TAG_EM:
320 case LXB_TAG_EMBED:
321 case LXB_TAG_H1:
322 case LXB_TAG_H2:
323 case LXB_TAG_H3:
324 case LXB_TAG_H4:
325 case LXB_TAG_H5:
326 case LXB_TAG_H6:
327 case LXB_TAG_HEAD:
328 case LXB_TAG_HR:
329 case LXB_TAG_I:
330 case LXB_TAG_IMG:
331 case LXB_TAG_LI:
332 case LXB_TAG_LISTING:
333 case LXB_TAG_MENU:
334 case LXB_TAG_META:
335 case LXB_TAG_NOBR:
336 case LXB_TAG_OL:
337 case LXB_TAG_P:
338 case LXB_TAG_PRE:
339 case LXB_TAG_RUBY:
340 case LXB_TAG_S:
341 case LXB_TAG_SMALL:
342 case LXB_TAG_SPAN:
343 case LXB_TAG_STRONG:
344 case LXB_TAG_STRIKE:
345 case LXB_TAG_SUB:
346 case LXB_TAG_TABLE:
347 case LXB_TAG_TT:
348 case LXB_TAG_U:
349 case LXB_TAG_UL:
350 case LXB_TAG_VAR:
351 case LXB_TAG_FONT:
352 return lxb_html_tree_insertion_mode_foreign_content_all(tree,
353 token);
354 default:
355 return lxb_html_tree_insertion_mode_foreign_content_anything_else(tree,
356 token);
357 }
358 }
359