1 /*
2 * Copyright (C) 2018 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #ifndef LEXBOR_HTML_TREE_H
8 #define LEXBOR_HTML_TREE_H
9
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13
14 #include "lexbor/dom/interfaces/node.h"
15 #include "lexbor/dom/interfaces/attr.h"
16
17 #include "lexbor/html/base.h"
18 #include "lexbor/html/node.h"
19 #include "lexbor/html/tokenizer.h"
20 #include "lexbor/html/interfaces/document.h"
21 #include "lexbor/html/tag.h"
22 #include "lexbor/html/tree/error.h"
23
24
25 typedef bool
26 (*lxb_html_tree_insertion_mode_f)(lxb_html_tree_t *tree,
27 lxb_html_token_t *token);
28
29 typedef lxb_status_t
30 (*lxb_html_tree_append_attr_f)(lxb_html_tree_t *tree,
31 lxb_dom_attr_t *attr, void *ctx);
32
33 typedef struct {
34 lexbor_array_obj_t *text_list;
35 bool have_non_ws;
36 }
37 lxb_html_tree_pending_table_t;
38
39 struct lxb_html_tree {
40 lxb_html_tokenizer_t *tkz_ref;
41
42 lxb_html_document_t *document;
43 lxb_dom_node_t *fragment;
44
45 lxb_html_form_element_t *form;
46
47 lexbor_array_t *open_elements;
48 lexbor_array_t *active_formatting;
49 lexbor_array_obj_t *template_insertion_modes;
50
51 lxb_html_tree_pending_table_t pending_table;
52
53 lexbor_array_obj_t *parse_errors;
54
55 bool foster_parenting;
56 bool frameset_ok;
57 bool scripting;
58 bool has_explicit_html_tag;
59 bool has_explicit_head_tag;
60 bool has_explicit_body_tag;
61
62 lxb_html_tree_insertion_mode_f mode;
63 lxb_html_tree_insertion_mode_f original_mode;
64 lxb_html_tree_append_attr_f before_append_attr;
65
66 lxb_status_t status;
67
68 size_t ref_count;
69 };
70
71 typedef enum {
72 LXB_HTML_TREE_INSERTION_POSITION_CHILD = 0x00,
73 LXB_HTML_TREE_INSERTION_POSITION_BEFORE = 0x01
74 }
75 lxb_html_tree_insertion_position_t;
76
77
78 LXB_API lxb_html_tree_t *
79 lxb_html_tree_create(void);
80
81 LXB_API lxb_status_t
82 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz);
83
84 LXB_API lxb_html_tree_t *
85 lxb_html_tree_ref(lxb_html_tree_t *tree);
86
87 LXB_API lxb_html_tree_t *
88 lxb_html_tree_unref(lxb_html_tree_t *tree);
89
90 LXB_API void
91 lxb_html_tree_clean(lxb_html_tree_t *tree);
92
93 LXB_API lxb_html_tree_t *
94 lxb_html_tree_destroy(lxb_html_tree_t *tree);
95
96 LXB_API lxb_status_t
97 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree);
98
99 LXB_API bool
100 lxb_html_tree_process_abort(lxb_html_tree_t *tree);
101
102 LXB_API void
103 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
104 lxb_html_tree_error_id_t id);
105
106 LXB_API bool
107 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
108 lxb_html_token_t *token);
109
110 LXB_API lxb_dom_node_t *
111 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
112 lxb_dom_node_t *override_target,
113 lxb_html_tree_insertion_position_t *ipos);
114
115 LXB_API lxb_html_element_t *
116 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
117 lxb_html_token_t *token, lxb_ns_id_t ns);
118
119 LXB_API lxb_html_element_t *
120 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
121 lxb_html_token_t *token, lxb_ns_id_t ns,
122 lxb_dom_node_t *parent);
123
124 LXB_API lxb_status_t
125 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
126 lxb_dom_element_t *element,
127 lxb_html_token_t *token, lxb_ns_id_t ns);
128
129 LXB_API lxb_status_t
130 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
131 lxb_dom_element_t *element,
132 lxb_dom_element_t *from,
133 lxb_ns_id_t ns);
134
135 LXB_API lxb_status_t
136 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
137 lxb_dom_attr_t *attr, void *ctx);
138
139 LXB_API lxb_status_t
140 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
141 lxb_dom_attr_t *attr, void *ctx);
142
143 LXB_API lxb_status_t
144 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
145 lxb_dom_attr_t *attr, void *ctx);
146
147 LXB_API lxb_status_t
148 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
149 lxb_dom_node_t **ret_node);
150
151 LXB_API lxb_status_t
152 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
153 lexbor_str_t *str,
154 lxb_dom_node_t **ret_node);
155
156 LXB_API lxb_dom_comment_t *
157 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
158 lxb_html_token_t *token, lxb_dom_node_t *pos);
159
160 LXB_API lxb_dom_document_type_t *
161 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
162 lxb_html_token_t *token);
163
164 LXB_API void
165 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node);
166
167 LXB_API lxb_html_element_t *
168 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
169 lxb_html_token_t *token);
170
171 LXB_API lxb_html_element_t *
172 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
173 lxb_html_token_t *token);
174
175 LXB_API void
176 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
177 lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns);
178
179 LXB_API void
180 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
181 lxb_tag_id_t ex_tag,
182 lxb_ns_id_t ex_ns);
183
184 LXB_API void
185 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree);
186
187 LXB_API lxb_dom_node_t *
188 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
189 lxb_ns_id_t ns, lxb_html_tag_category_t ct);
190
191 LXB_API lxb_dom_node_t *
192 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
193 lxb_dom_node_t *by_node,
194 lxb_html_tag_category_t ct);
195
196 LXB_API lxb_dom_node_t *
197 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree);
198
199 LXB_API lxb_dom_node_t *
200 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree);
201
202 LXB_API lxb_dom_node_t *
203 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree);
204
205 LXB_API bool
206 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree);
207
208 LXB_API void
209 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token);
210
211 LXB_API bool
212 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
213 lxb_html_token_t *token,
214 lxb_status_t *status);
215
216 LXB_API bool
217 lxb_html_tree_html_integration_point(lxb_dom_node_t *node);
218
219 LXB_API lxb_status_t
220 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
221 lxb_dom_attr_t *attr, void *ctx);
222
223 LXB_API lxb_status_t
224 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
225 lxb_dom_attr_t *attr, void *ctx);
226
227
228 /*
229 * Inline functions
230 */
231 lxb_inline lxb_status_t
lxb_html_tree_begin(lxb_html_tree_t * tree,lxb_html_document_t * document)232 lxb_html_tree_begin(lxb_html_tree_t *tree, lxb_html_document_t *document)
233 {
234 tree->document = document;
235
236 return lxb_html_tokenizer_begin(tree->tkz_ref);
237 }
238
239 lxb_inline lxb_status_t
lxb_html_tree_chunk(lxb_html_tree_t * tree,const lxb_char_t * html,size_t size)240 lxb_html_tree_chunk(lxb_html_tree_t *tree, const lxb_char_t *html, size_t size)
241 {
242 return lxb_html_tokenizer_chunk(tree->tkz_ref, html, size);
243 }
244
245 lxb_inline lxb_status_t
lxb_html_tree_end(lxb_html_tree_t * tree)246 lxb_html_tree_end(lxb_html_tree_t *tree)
247 {
248 if (tree->document->done != NULL) {
249 tree->document->done(tree->document);
250 }
251
252 return lxb_html_tokenizer_end(tree->tkz_ref);
253 }
254
255 lxb_inline lxb_status_t
lxb_html_tree_build(lxb_html_tree_t * tree,lxb_html_document_t * document,const lxb_char_t * html,size_t size)256 lxb_html_tree_build(lxb_html_tree_t *tree, lxb_html_document_t *document,
257 const lxb_char_t *html, size_t size)
258 {
259 tree->status = lxb_html_tree_begin(tree, document);
260 if (tree->status != LXB_STATUS_OK) {
261 return tree->status;
262 }
263
264 tree->status = lxb_html_tree_chunk(tree, html, size);
265 if (tree->status != LXB_STATUS_OK) {
266 return tree->status;
267 }
268
269 return lxb_html_tree_end(tree);
270 }
271
272 lxb_inline lxb_dom_node_t *
lxb_html_tree_create_node(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns)273 lxb_html_tree_create_node(lxb_html_tree_t *tree,
274 lxb_tag_id_t tag_id, lxb_ns_id_t ns)
275 {
276 return (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
277 tag_id, ns);
278 }
279
280 lxb_inline bool
lxb_html_tree_node_is(lxb_dom_node_t * node,lxb_tag_id_t tag_id)281 lxb_html_tree_node_is(lxb_dom_node_t *node, lxb_tag_id_t tag_id)
282 {
283 return node->local_name == tag_id && node->ns == LXB_NS_HTML;
284 }
285
286 lxb_inline lxb_dom_node_t *
lxb_html_tree_current_node(lxb_html_tree_t * tree)287 lxb_html_tree_current_node(lxb_html_tree_t *tree)
288 {
289 if (tree->open_elements->length == 0) {
290 return NULL;
291 }
292
293 return (lxb_dom_node_t *)
294 tree->open_elements->list[ (tree->open_elements->length - 1) ];
295 }
296
297 lxb_inline lxb_dom_node_t *
lxb_html_tree_adjusted_current_node(lxb_html_tree_t * tree)298 lxb_html_tree_adjusted_current_node(lxb_html_tree_t *tree)
299 {
300 if(tree->fragment != NULL && tree->open_elements->length == 1) {
301 return lxb_dom_interface_node(tree->fragment);
302 }
303
304 return lxb_html_tree_current_node(tree);
305 }
306
307 lxb_inline lxb_html_element_t *
lxb_html_tree_insert_html_element(lxb_html_tree_t * tree,lxb_html_token_t * token)308 lxb_html_tree_insert_html_element(lxb_html_tree_t *tree,
309 lxb_html_token_t *token)
310 {
311 return lxb_html_tree_insert_foreign_element(tree, token, LXB_NS_HTML);
312 }
313
314 lxb_inline void
lxb_html_tree_insert_node(lxb_dom_node_t * to,lxb_dom_node_t * node,lxb_html_tree_insertion_position_t ipos)315 lxb_html_tree_insert_node(lxb_dom_node_t *to, lxb_dom_node_t *node,
316 lxb_html_tree_insertion_position_t ipos)
317 {
318 if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
319 lxb_dom_node_insert_before_wo_events(to, node);
320 return;
321 }
322
323 lxb_dom_node_insert_child_wo_events(to, node);
324 }
325
326 /* TODO: if we not need to save parse errors?! */
327 lxb_inline void
lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t * tree,lxb_html_token_t * token)328 lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t *tree,
329 lxb_html_token_t *token)
330 {
331 if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE_SELF) == 0) {
332 return;
333 }
334
335 bool is_void = lxb_html_tag_is_void(token->tag_id);
336
337 if (is_void) {
338 lxb_html_tree_parse_error(tree, token,
339 LXB_HTML_RULES_ERROR_NOVOHTELSTTAWITRSO);
340 }
341 }
342
343 lxb_inline bool
lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t * node)344 lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t *node)
345 {
346 if (node->ns == LXB_NS_MATH) {
347 switch (node->local_name) {
348 case LXB_TAG_MI:
349 case LXB_TAG_MO:
350 case LXB_TAG_MN:
351 case LXB_TAG_MS:
352 case LXB_TAG_MTEXT:
353 return true;
354 }
355 }
356
357 return false;
358 }
359
360 lxb_inline bool
lxb_html_tree_scripting(lxb_html_tree_t * tree)361 lxb_html_tree_scripting(lxb_html_tree_t *tree)
362 {
363 return tree->scripting;
364 }
365
366 lxb_inline void
lxb_html_tree_scripting_set(lxb_html_tree_t * tree,bool scripting)367 lxb_html_tree_scripting_set(lxb_html_tree_t *tree, bool scripting)
368 {
369 tree->scripting = scripting;
370 }
371
372 lxb_inline void
lxb_html_tree_attach_document(lxb_html_tree_t * tree,lxb_html_document_t * doc)373 lxb_html_tree_attach_document(lxb_html_tree_t *tree, lxb_html_document_t *doc)
374 {
375 tree->document = doc;
376 }
377
378 /*
379 * No inline functions for ABI.
380 */
381 LXB_API lxb_status_t
382 lxb_html_tree_begin_noi(lxb_html_tree_t *tree, lxb_html_document_t *document);
383
384 LXB_API lxb_status_t
385 lxb_html_tree_chunk_noi(lxb_html_tree_t *tree, const lxb_char_t *html,
386 size_t size);
387
388 LXB_API lxb_status_t
389 lxb_html_tree_end_noi(lxb_html_tree_t *tree);
390
391 LXB_API lxb_status_t
392 lxb_html_tree_build_noi(lxb_html_tree_t *tree, lxb_html_document_t *document,
393 const lxb_char_t *html, size_t size);
394
395 LXB_API lxb_dom_node_t *
396 lxb_html_tree_create_node_noi(lxb_html_tree_t *tree,
397 lxb_tag_id_t tag_id, lxb_ns_id_t ns);
398
399 LXB_API bool
400 lxb_html_tree_node_is_noi(lxb_dom_node_t *node, lxb_tag_id_t tag_id);
401
402 LXB_API lxb_dom_node_t *
403 lxb_html_tree_current_node_noi(lxb_html_tree_t *tree);
404
405 LXB_API lxb_dom_node_t *
406 lxb_html_tree_adjusted_current_node_noi(lxb_html_tree_t *tree);
407
408 LXB_API lxb_html_element_t *
409 lxb_html_tree_insert_html_element_noi(lxb_html_tree_t *tree,
410 lxb_html_token_t *token);
411
412 LXB_API void
413 lxb_html_tree_insert_node_noi(lxb_dom_node_t *to, lxb_dom_node_t *node,
414 lxb_html_tree_insertion_position_t ipos);
415
416 LXB_API void
417 lxb_html_tree_acknowledge_token_self_closing_noi(lxb_html_tree_t *tree,
418 lxb_html_token_t *token);
419
420 LXB_API bool
421 lxb_html_tree_mathml_text_integration_point_noi(lxb_dom_node_t *node);
422
423 LXB_API bool
424 lxb_html_tree_scripting_noi(lxb_html_tree_t *tree);
425
426 LXB_API void
427 lxb_html_tree_scripting_set_noi(lxb_html_tree_t *tree, bool scripting);
428
429 LXB_API void
430 lxb_html_tree_attach_document_noi(lxb_html_tree_t *tree,
431 lxb_html_document_t *doc);
432
433
434 #ifdef __cplusplus
435 } /* extern "C" */
436 #endif
437
438 #endif /* LEXBOR_HTML_TREE_H */
439