1 /*
2 * Copyright (C) 2018 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #ifndef LEXBOR_HTML_TREE_H
8 #define LEXBOR_HTML_TREE_H
9
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13
14 #include "lexbor/dom/interfaces/node.h"
15 #include "lexbor/dom/interfaces/attr.h"
16
17 #include "lexbor/html/base.h"
18 #include "lexbor/html/node.h"
19 #include "lexbor/html/tokenizer.h"
20 #include "lexbor/html/interfaces/document.h"
21 #include "lexbor/html/tag.h"
22 #include "lexbor/html/tree/error.h"
23
24
25 typedef bool
26 (*lxb_html_tree_insertion_mode_f)(lxb_html_tree_t *tree,
27 lxb_html_token_t *token);
28
29 typedef lxb_status_t
30 (*lxb_html_tree_append_attr_f)(lxb_html_tree_t *tree,
31 lxb_dom_attr_t *attr, void *ctx);
32
33 typedef struct {
34 lexbor_array_obj_t *text_list;
35 bool have_non_ws;
36 }
37 lxb_html_tree_pending_table_t;
38
39 struct lxb_html_tree {
40 lxb_html_tokenizer_t *tkz_ref;
41
42 lxb_html_document_t *document;
43 lxb_dom_node_t *fragment;
44
45 lxb_html_form_element_t *form;
46
47 lexbor_array_t *open_elements;
48 lexbor_array_t *active_formatting;
49 lexbor_array_obj_t *template_insertion_modes;
50
51 lxb_html_tree_pending_table_t pending_table;
52
53 lexbor_array_obj_t *parse_errors;
54
55 bool foster_parenting;
56 bool frameset_ok;
57 bool scripting;
58 bool has_explicit_html_tag;
59 bool has_explicit_head_tag;
60 bool has_explicit_body_tag;
61
62 lxb_html_tree_insertion_mode_f mode;
63 lxb_html_tree_insertion_mode_f original_mode;
64 lxb_html_tree_append_attr_f before_append_attr;
65
66 lxb_status_t status;
67
68 size_t ref_count;
69 };
70
71 typedef enum {
72 LXB_HTML_TREE_INSERTION_POSITION_CHILD = 0x00,
73 LXB_HTML_TREE_INSERTION_POSITION_BEFORE = 0x01
74 }
75 lxb_html_tree_insertion_position_t;
76
77
78 LXB_API lxb_html_tree_t *
79 lxb_html_tree_create(void);
80
81 LXB_API lxb_status_t
82 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz);
83
84 LXB_API lxb_html_tree_t *
85 lxb_html_tree_ref(lxb_html_tree_t *tree);
86
87 LXB_API lxb_html_tree_t *
88 lxb_html_tree_unref(lxb_html_tree_t *tree);
89
90 LXB_API void
91 lxb_html_tree_clean(lxb_html_tree_t *tree);
92
93 LXB_API lxb_html_tree_t *
94 lxb_html_tree_destroy(lxb_html_tree_t *tree);
95
96 LXB_API lxb_status_t
97 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree);
98
99 LXB_API bool
100 lxb_html_tree_process_abort(lxb_html_tree_t *tree);
101
102 LXB_API void
103 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
104 lxb_html_tree_error_id_t id);
105
106 LXB_API bool
107 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
108 lxb_html_token_t *token);
109
110 LXB_API lxb_dom_node_t *
111 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
112 lxb_dom_node_t *override_target,
113 lxb_html_tree_insertion_position_t *ipos);
114
115 LXB_API lxb_html_element_t *
116 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
117 lxb_html_token_t *token, lxb_ns_id_t ns);
118
119 LXB_API lxb_html_element_t *
120 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
121 lxb_html_token_t *token, lxb_ns_id_t ns);
122
123 LXB_API lxb_status_t
124 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
125 lxb_dom_element_t *element,
126 lxb_html_token_t *token, lxb_ns_id_t ns);
127
128 LXB_API lxb_status_t
129 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
130 lxb_dom_element_t *element,
131 lxb_dom_element_t *from,
132 lxb_ns_id_t ns);
133
134 LXB_API lxb_status_t
135 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
136 lxb_dom_attr_t *attr, void *ctx);
137
138 LXB_API lxb_status_t
139 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
140 lxb_dom_attr_t *attr, void *ctx);
141
142 LXB_API lxb_status_t
143 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
144 lxb_dom_attr_t *attr, void *ctx);
145
146 LXB_API lxb_status_t
147 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
148 lxb_dom_node_t **ret_node);
149
150 LXB_API lxb_status_t
151 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
152 lexbor_str_t *str,
153 lxb_dom_node_t **ret_node);
154
155 LXB_API lxb_dom_comment_t *
156 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
157 lxb_html_token_t *token, lxb_dom_node_t *pos);
158
159 LXB_API lxb_dom_document_type_t *
160 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
161 lxb_html_token_t *token);
162
163 LXB_API void
164 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node);
165
166 LXB_API lxb_html_element_t *
167 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
168 lxb_html_token_t *token);
169
170 LXB_API lxb_html_element_t *
171 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
172 lxb_html_token_t *token);
173
174 LXB_API void
175 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
176 lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns);
177
178 LXB_API void
179 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
180 lxb_tag_id_t ex_tag,
181 lxb_ns_id_t ex_ns);
182
183 LXB_API void
184 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree);
185
186 LXB_API lxb_dom_node_t *
187 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
188 lxb_ns_id_t ns, lxb_html_tag_category_t ct);
189
190 LXB_API lxb_dom_node_t *
191 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
192 lxb_dom_node_t *by_node,
193 lxb_html_tag_category_t ct);
194
195 LXB_API lxb_dom_node_t *
196 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree);
197
198 LXB_API lxb_dom_node_t *
199 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree);
200
201 LXB_API lxb_dom_node_t *
202 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree);
203
204 LXB_API bool
205 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree);
206
207 LXB_API void
208 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token);
209
210 LXB_API bool
211 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
212 lxb_html_token_t *token,
213 lxb_status_t *status);
214
215 LXB_API bool
216 lxb_html_tree_html_integration_point(lxb_dom_node_t *node);
217
218 LXB_API lxb_status_t
219 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
220 lxb_dom_attr_t *attr, void *ctx);
221
222 LXB_API lxb_status_t
223 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
224 lxb_dom_attr_t *attr, void *ctx);
225
226
227 /*
228 * Inline functions
229 */
230 lxb_inline lxb_status_t
lxb_html_tree_begin(lxb_html_tree_t * tree,lxb_html_document_t * document)231 lxb_html_tree_begin(lxb_html_tree_t *tree, lxb_html_document_t *document)
232 {
233 tree->document = document;
234
235 return lxb_html_tokenizer_begin(tree->tkz_ref);
236 }
237
238 lxb_inline lxb_status_t
lxb_html_tree_chunk(lxb_html_tree_t * tree,const lxb_char_t * html,size_t size)239 lxb_html_tree_chunk(lxb_html_tree_t *tree, const lxb_char_t *html, size_t size)
240 {
241 return lxb_html_tokenizer_chunk(tree->tkz_ref, html, size);
242 }
243
244 lxb_inline lxb_status_t
lxb_html_tree_end(lxb_html_tree_t * tree)245 lxb_html_tree_end(lxb_html_tree_t *tree)
246 {
247 if (tree->document->done != NULL) {
248 tree->document->done(tree->document);
249 }
250
251 return lxb_html_tokenizer_end(tree->tkz_ref);
252 }
253
254 lxb_inline lxb_status_t
lxb_html_tree_build(lxb_html_tree_t * tree,lxb_html_document_t * document,const lxb_char_t * html,size_t size)255 lxb_html_tree_build(lxb_html_tree_t *tree, lxb_html_document_t *document,
256 const lxb_char_t *html, size_t size)
257 {
258 tree->status = lxb_html_tree_begin(tree, document);
259 if (tree->status != LXB_STATUS_OK) {
260 return tree->status;
261 }
262
263 tree->status = lxb_html_tree_chunk(tree, html, size);
264 if (tree->status != LXB_STATUS_OK) {
265 return tree->status;
266 }
267
268 return lxb_html_tree_end(tree);
269 }
270
271 lxb_inline lxb_dom_node_t *
lxb_html_tree_create_node(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns)272 lxb_html_tree_create_node(lxb_html_tree_t *tree,
273 lxb_tag_id_t tag_id, lxb_ns_id_t ns)
274 {
275 return (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
276 tag_id, ns);
277 }
278
279 lxb_inline bool
lxb_html_tree_node_is(lxb_dom_node_t * node,lxb_tag_id_t tag_id)280 lxb_html_tree_node_is(lxb_dom_node_t *node, lxb_tag_id_t tag_id)
281 {
282 return node->local_name == tag_id && node->ns == LXB_NS_HTML;
283 }
284
285 lxb_inline lxb_dom_node_t *
lxb_html_tree_current_node(lxb_html_tree_t * tree)286 lxb_html_tree_current_node(lxb_html_tree_t *tree)
287 {
288 if (tree->open_elements->length == 0) {
289 return NULL;
290 }
291
292 return (lxb_dom_node_t *)
293 tree->open_elements->list[ (tree->open_elements->length - 1) ];
294 }
295
296 lxb_inline lxb_dom_node_t *
lxb_html_tree_adjusted_current_node(lxb_html_tree_t * tree)297 lxb_html_tree_adjusted_current_node(lxb_html_tree_t *tree)
298 {
299 if(tree->fragment != NULL && tree->open_elements->length == 1) {
300 return lxb_dom_interface_node(tree->fragment);
301 }
302
303 return lxb_html_tree_current_node(tree);
304 }
305
306 lxb_inline lxb_html_element_t *
lxb_html_tree_insert_html_element(lxb_html_tree_t * tree,lxb_html_token_t * token)307 lxb_html_tree_insert_html_element(lxb_html_tree_t *tree,
308 lxb_html_token_t *token)
309 {
310 return lxb_html_tree_insert_foreign_element(tree, token, LXB_NS_HTML);
311 }
312
313 lxb_inline void
lxb_html_tree_insert_node(lxb_dom_node_t * to,lxb_dom_node_t * node,lxb_html_tree_insertion_position_t ipos)314 lxb_html_tree_insert_node(lxb_dom_node_t *to, lxb_dom_node_t *node,
315 lxb_html_tree_insertion_position_t ipos)
316 {
317 if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
318 lxb_dom_node_insert_before_wo_events(to, node);
319 return;
320 }
321
322 lxb_dom_node_insert_child_wo_events(to, node);
323 }
324
325 /* TODO: if we not need to save parse errors?! */
326 lxb_inline void
lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t * tree,lxb_html_token_t * token)327 lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t *tree,
328 lxb_html_token_t *token)
329 {
330 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE_SELF) == 0) {
331 return;
332 }
333
334 bool is_void = lxb_html_tag_is_void(token->tag_id);
335
336 if (is_void) {
337 lxb_html_tree_parse_error(tree, token,
338 LXB_HTML_RULES_ERROR_NOVOHTELSTTAWITRSO);
339 }
340 }
341
342 lxb_inline bool
lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t * node)343 lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t *node)
344 {
345 if (node->ns == LXB_NS_MATH) {
346 switch (node->local_name) {
347 case LXB_TAG_MI:
348 case LXB_TAG_MO:
349 case LXB_TAG_MN:
350 case LXB_TAG_MS:
351 case LXB_TAG_MTEXT:
352 return true;
353 }
354 }
355
356 return false;
357 }
358
359 lxb_inline bool
lxb_html_tree_scripting(lxb_html_tree_t * tree)360 lxb_html_tree_scripting(lxb_html_tree_t *tree)
361 {
362 return tree->scripting;
363 }
364
365 lxb_inline void
lxb_html_tree_scripting_set(lxb_html_tree_t * tree,bool scripting)366 lxb_html_tree_scripting_set(lxb_html_tree_t *tree, bool scripting)
367 {
368 tree->scripting = scripting;
369 }
370
371 lxb_inline void
lxb_html_tree_attach_document(lxb_html_tree_t * tree,lxb_html_document_t * doc)372 lxb_html_tree_attach_document(lxb_html_tree_t *tree, lxb_html_document_t *doc)
373 {
374 tree->document = doc;
375 }
376
377 /*
378 * No inline functions for ABI.
379 */
380 LXB_API lxb_status_t
381 lxb_html_tree_begin_noi(lxb_html_tree_t *tree, lxb_html_document_t *document);
382
383 LXB_API lxb_status_t
384 lxb_html_tree_chunk_noi(lxb_html_tree_t *tree, const lxb_char_t *html,
385 size_t size);
386
387 LXB_API lxb_status_t
388 lxb_html_tree_end_noi(lxb_html_tree_t *tree);
389
390 LXB_API lxb_status_t
391 lxb_html_tree_build_noi(lxb_html_tree_t *tree, lxb_html_document_t *document,
392 const lxb_char_t *html, size_t size);
393
394 LXB_API lxb_dom_node_t *
395 lxb_html_tree_create_node_noi(lxb_html_tree_t *tree,
396 lxb_tag_id_t tag_id, lxb_ns_id_t ns);
397
398 LXB_API bool
399 lxb_html_tree_node_is_noi(lxb_dom_node_t *node, lxb_tag_id_t tag_id);
400
401 LXB_API lxb_dom_node_t *
402 lxb_html_tree_current_node_noi(lxb_html_tree_t *tree);
403
404 LXB_API lxb_dom_node_t *
405 lxb_html_tree_adjusted_current_node_noi(lxb_html_tree_t *tree);
406
407 LXB_API lxb_html_element_t *
408 lxb_html_tree_insert_html_element_noi(lxb_html_tree_t *tree,
409 lxb_html_token_t *token);
410
411 LXB_API void
412 lxb_html_tree_insert_node_noi(lxb_dom_node_t *to, lxb_dom_node_t *node,
413 lxb_html_tree_insertion_position_t ipos);
414
415 LXB_API void
416 lxb_html_tree_acknowledge_token_self_closing_noi(lxb_html_tree_t *tree,
417 lxb_html_token_t *token);
418
419 LXB_API bool
420 lxb_html_tree_mathml_text_integration_point_noi(lxb_dom_node_t *node);
421
422 LXB_API bool
423 lxb_html_tree_scripting_noi(lxb_html_tree_t *tree);
424
425 LXB_API void
426 lxb_html_tree_scripting_set_noi(lxb_html_tree_t *tree, bool scripting);
427
428 LXB_API void
429 lxb_html_tree_attach_document_noi(lxb_html_tree_t *tree,
430 lxb_html_document_t *doc);
431
432
433 #ifdef __cplusplus
434 } /* extern "C" */
435 #endif
436
437 #endif /* LEXBOR_HTML_TREE_H */
438