xref: /php-src/ext/dom/lexbor/lexbor/html/tree.h (revision bffab33a)
1 /*
2  * Copyright (C) 2018 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #ifndef LEXBOR_HTML_TREE_H
8 #define LEXBOR_HTML_TREE_H
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 #include "lexbor/dom/interfaces/node.h"
15 #include "lexbor/dom/interfaces/attr.h"
16 
17 #include "lexbor/html/base.h"
18 #include "lexbor/html/node.h"
19 #include "lexbor/html/tokenizer.h"
20 #include "lexbor/html/interfaces/document.h"
21 #include "lexbor/html/tag.h"
22 #include "lexbor/html/tree/error.h"
23 
24 
25 typedef bool
26 (*lxb_html_tree_insertion_mode_f)(lxb_html_tree_t *tree,
27                                   lxb_html_token_t *token);
28 
29 typedef lxb_status_t
30 (*lxb_html_tree_append_attr_f)(lxb_html_tree_t *tree,
31                                lxb_dom_attr_t *attr, void *ctx);
32 
33 typedef struct {
34     lexbor_array_obj_t *text_list;
35     bool               have_non_ws;
36 }
37 lxb_html_tree_pending_table_t;
38 
39 struct lxb_html_tree {
40     lxb_html_tokenizer_t           *tkz_ref;
41 
42     lxb_html_document_t            *document;
43     lxb_dom_node_t                 *fragment;
44 
45     lxb_html_form_element_t        *form;
46 
47     lexbor_array_t                 *open_elements;
48     lexbor_array_t                 *active_formatting;
49     lexbor_array_obj_t             *template_insertion_modes;
50 
51     lxb_html_tree_pending_table_t  pending_table;
52 
53     lexbor_array_obj_t             *parse_errors;
54 
55     bool                           foster_parenting;
56     bool                           frameset_ok;
57     bool                           scripting;
58     bool                           has_explicit_html_tag;
59     bool                           has_explicit_head_tag;
60     bool                           has_explicit_body_tag;
61 
62     lxb_html_tree_insertion_mode_f mode;
63     lxb_html_tree_insertion_mode_f original_mode;
64     lxb_html_tree_append_attr_f    before_append_attr;
65 
66     lxb_status_t                   status;
67 
68     size_t                         ref_count;
69 };
70 
71 typedef enum {
72     LXB_HTML_TREE_INSERTION_POSITION_CHILD  = 0x00,
73     LXB_HTML_TREE_INSERTION_POSITION_BEFORE = 0x01
74 }
75 lxb_html_tree_insertion_position_t;
76 
77 
78 LXB_API lxb_html_tree_t *
79 lxb_html_tree_create(void);
80 
81 LXB_API lxb_status_t
82 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz);
83 
84 LXB_API lxb_html_tree_t *
85 lxb_html_tree_ref(lxb_html_tree_t *tree);
86 
87 LXB_API lxb_html_tree_t *
88 lxb_html_tree_unref(lxb_html_tree_t *tree);
89 
90 LXB_API void
91 lxb_html_tree_clean(lxb_html_tree_t *tree);
92 
93 LXB_API lxb_html_tree_t *
94 lxb_html_tree_destroy(lxb_html_tree_t *tree);
95 
96 LXB_API lxb_status_t
97 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree);
98 
99 LXB_API bool
100 lxb_html_tree_process_abort(lxb_html_tree_t *tree);
101 
102 LXB_API void
103 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
104                           lxb_html_tree_error_id_t id);
105 
106 LXB_API bool
107 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
108                                       lxb_html_token_t *token);
109 
110 LXB_API lxb_dom_node_t *
111 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
112                                       lxb_dom_node_t *override_target,
113                                       lxb_html_tree_insertion_position_t *ipos);
114 
115 LXB_API lxb_html_element_t *
116 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
117                                      lxb_html_token_t *token, lxb_ns_id_t ns);
118 
119 LXB_API lxb_html_element_t *
120 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
121                                        lxb_html_token_t *token, lxb_ns_id_t ns,
122                                        lxb_dom_node_t *parent);
123 
124 LXB_API lxb_status_t
125 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
126                                 lxb_dom_element_t *element,
127                                 lxb_html_token_t *token, lxb_ns_id_t ns);
128 
129 LXB_API lxb_status_t
130 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
131                                              lxb_dom_element_t *element,
132                                              lxb_dom_element_t *from,
133                                              lxb_ns_id_t ns);
134 
135 LXB_API lxb_status_t
136 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
137                                        lxb_dom_attr_t *attr, void *ctx);
138 
139 LXB_API lxb_status_t
140 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
141                                     lxb_dom_attr_t *attr, void *ctx);
142 
143 LXB_API lxb_status_t
144 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
145                                         lxb_dom_attr_t *attr, void *ctx);
146 
147 LXB_API lxb_status_t
148 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
149                                lxb_dom_node_t **ret_node);
150 
151 LXB_API lxb_status_t
152 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
153                                         lexbor_str_t *str,
154                                         lxb_dom_node_t **ret_node);
155 
156 LXB_API lxb_dom_comment_t *
157 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
158                              lxb_html_token_t *token, lxb_dom_node_t *pos);
159 
160 LXB_API lxb_dom_document_type_t *
161 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
162                                               lxb_html_token_t *token);
163 
164 LXB_API void
165 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node);
166 
167 LXB_API lxb_html_element_t *
168 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
169                                       lxb_html_token_t *token);
170 
171 LXB_API lxb_html_element_t *
172 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
173                                      lxb_html_token_t *token);
174 
175 LXB_API void
176 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
177                                         lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns);
178 
179 LXB_API void
180 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
181                                                        lxb_tag_id_t ex_tag,
182                                                        lxb_ns_id_t ex_ns);
183 
184 LXB_API void
185 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree);
186 
187 LXB_API lxb_dom_node_t *
188 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
189                                lxb_ns_id_t ns, lxb_html_tag_category_t ct);
190 
191 LXB_API lxb_dom_node_t *
192 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
193                                        lxb_dom_node_t *by_node,
194                                        lxb_html_tag_category_t ct);
195 
196 LXB_API lxb_dom_node_t *
197 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree);
198 
199 LXB_API lxb_dom_node_t *
200 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree);
201 
202 LXB_API lxb_dom_node_t *
203 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree);
204 
205 LXB_API bool
206 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree);
207 
208 LXB_API void
209 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token);
210 
211 LXB_API bool
212 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
213                                         lxb_html_token_t *token,
214                                         lxb_status_t *status);
215 
216 LXB_API bool
217 lxb_html_tree_html_integration_point(lxb_dom_node_t *node);
218 
219 LXB_API lxb_status_t
220 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
221                                        lxb_dom_attr_t *attr, void *ctx);
222 
223 LXB_API lxb_status_t
224 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
225                                     lxb_dom_attr_t *attr, void *ctx);
226 
227 
228 /*
229  * Inline functions
230  */
231 lxb_inline lxb_status_t
lxb_html_tree_begin(lxb_html_tree_t * tree,lxb_html_document_t * document)232 lxb_html_tree_begin(lxb_html_tree_t *tree, lxb_html_document_t *document)
233 {
234     tree->document = document;
235 
236     return lxb_html_tokenizer_begin(tree->tkz_ref);
237 }
238 
239 lxb_inline lxb_status_t
lxb_html_tree_chunk(lxb_html_tree_t * tree,const lxb_char_t * html,size_t size)240 lxb_html_tree_chunk(lxb_html_tree_t *tree, const lxb_char_t *html, size_t size)
241 {
242     return lxb_html_tokenizer_chunk(tree->tkz_ref, html, size);
243 }
244 
245 lxb_inline lxb_status_t
lxb_html_tree_end(lxb_html_tree_t * tree)246 lxb_html_tree_end(lxb_html_tree_t *tree)
247 {
248     if (tree->document->done != NULL) {
249         tree->document->done(tree->document);
250     }
251 
252     return lxb_html_tokenizer_end(tree->tkz_ref);
253 }
254 
255 lxb_inline lxb_status_t
lxb_html_tree_build(lxb_html_tree_t * tree,lxb_html_document_t * document,const lxb_char_t * html,size_t size)256 lxb_html_tree_build(lxb_html_tree_t *tree, lxb_html_document_t *document,
257                     const lxb_char_t *html, size_t size)
258 {
259     tree->status = lxb_html_tree_begin(tree, document);
260     if (tree->status != LXB_STATUS_OK) {
261         return tree->status;
262     }
263 
264     tree->status = lxb_html_tree_chunk(tree, html, size);
265     if (tree->status != LXB_STATUS_OK) {
266         return tree->status;
267     }
268 
269     return lxb_html_tree_end(tree);
270 }
271 
272 lxb_inline lxb_dom_node_t *
lxb_html_tree_create_node(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns)273 lxb_html_tree_create_node(lxb_html_tree_t *tree,
274                           lxb_tag_id_t tag_id, lxb_ns_id_t ns)
275 {
276     return (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
277                                                         tag_id, ns);
278 }
279 
280 lxb_inline bool
lxb_html_tree_node_is(lxb_dom_node_t * node,lxb_tag_id_t tag_id)281 lxb_html_tree_node_is(lxb_dom_node_t *node, lxb_tag_id_t tag_id)
282 {
283     return node->local_name == tag_id && node->ns == LXB_NS_HTML;
284 }
285 
286 lxb_inline lxb_dom_node_t *
lxb_html_tree_current_node(lxb_html_tree_t * tree)287 lxb_html_tree_current_node(lxb_html_tree_t *tree)
288 {
289     if (tree->open_elements->length == 0) {
290         return NULL;
291     }
292 
293     return (lxb_dom_node_t *)
294         tree->open_elements->list[ (tree->open_elements->length - 1) ];
295 }
296 
297 lxb_inline lxb_dom_node_t *
lxb_html_tree_adjusted_current_node(lxb_html_tree_t * tree)298 lxb_html_tree_adjusted_current_node(lxb_html_tree_t *tree)
299 {
300     if(tree->fragment != NULL && tree->open_elements->length == 1) {
301         return lxb_dom_interface_node(tree->fragment);
302     }
303 
304     return lxb_html_tree_current_node(tree);
305 }
306 
307 lxb_inline lxb_html_element_t *
lxb_html_tree_insert_html_element(lxb_html_tree_t * tree,lxb_html_token_t * token)308 lxb_html_tree_insert_html_element(lxb_html_tree_t *tree,
309                                   lxb_html_token_t *token)
310 {
311     return lxb_html_tree_insert_foreign_element(tree, token, LXB_NS_HTML);
312 }
313 
314 lxb_inline void
lxb_html_tree_insert_node(lxb_dom_node_t * to,lxb_dom_node_t * node,lxb_html_tree_insertion_position_t ipos)315 lxb_html_tree_insert_node(lxb_dom_node_t *to, lxb_dom_node_t *node,
316                           lxb_html_tree_insertion_position_t ipos)
317 {
318     if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
319         lxb_dom_node_insert_before_wo_events(to, node);
320         return;
321     }
322 
323     lxb_dom_node_insert_child_wo_events(to, node);
324 }
325 
326 /* TODO: if we not need to save parse errors?! */
327 lxb_inline void
lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t * tree,lxb_html_token_t * token)328 lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t *tree,
329                                              lxb_html_token_t *token)
330 {
331     if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE_SELF) == 0) {
332         return;
333     }
334 
335     bool is_void = lxb_html_tag_is_void(token->tag_id);
336 
337     if (is_void) {
338         lxb_html_tree_parse_error(tree, token,
339                                   LXB_HTML_RULES_ERROR_NOVOHTELSTTAWITRSO);
340     }
341 }
342 
343 lxb_inline bool
lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t * node)344 lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t *node)
345 {
346     if (node->ns == LXB_NS_MATH) {
347         switch (node->local_name) {
348             case LXB_TAG_MI:
349             case LXB_TAG_MO:
350             case LXB_TAG_MN:
351             case LXB_TAG_MS:
352             case LXB_TAG_MTEXT:
353                 return true;
354         }
355     }
356 
357     return false;
358 }
359 
360 lxb_inline bool
lxb_html_tree_scripting(lxb_html_tree_t * tree)361 lxb_html_tree_scripting(lxb_html_tree_t *tree)
362 {
363     return tree->scripting;
364 }
365 
366 lxb_inline void
lxb_html_tree_scripting_set(lxb_html_tree_t * tree,bool scripting)367 lxb_html_tree_scripting_set(lxb_html_tree_t *tree, bool scripting)
368 {
369     tree->scripting = scripting;
370 }
371 
372 lxb_inline void
lxb_html_tree_attach_document(lxb_html_tree_t * tree,lxb_html_document_t * doc)373 lxb_html_tree_attach_document(lxb_html_tree_t *tree, lxb_html_document_t *doc)
374 {
375     tree->document = doc;
376 }
377 
378 /*
379  * No inline functions for ABI.
380  */
381 LXB_API lxb_status_t
382 lxb_html_tree_begin_noi(lxb_html_tree_t *tree, lxb_html_document_t *document);
383 
384 LXB_API lxb_status_t
385 lxb_html_tree_chunk_noi(lxb_html_tree_t *tree, const lxb_char_t *html,
386                         size_t size);
387 
388 LXB_API lxb_status_t
389 lxb_html_tree_end_noi(lxb_html_tree_t *tree);
390 
391 LXB_API lxb_status_t
392 lxb_html_tree_build_noi(lxb_html_tree_t *tree, lxb_html_document_t *document,
393                         const lxb_char_t *html, size_t size);
394 
395 LXB_API lxb_dom_node_t *
396 lxb_html_tree_create_node_noi(lxb_html_tree_t *tree,
397                               lxb_tag_id_t tag_id, lxb_ns_id_t ns);
398 
399 LXB_API bool
400 lxb_html_tree_node_is_noi(lxb_dom_node_t *node, lxb_tag_id_t tag_id);
401 
402 LXB_API lxb_dom_node_t *
403 lxb_html_tree_current_node_noi(lxb_html_tree_t *tree);
404 
405 LXB_API lxb_dom_node_t *
406 lxb_html_tree_adjusted_current_node_noi(lxb_html_tree_t *tree);
407 
408 LXB_API lxb_html_element_t *
409 lxb_html_tree_insert_html_element_noi(lxb_html_tree_t *tree,
410                                       lxb_html_token_t *token);
411 
412 LXB_API void
413 lxb_html_tree_insert_node_noi(lxb_dom_node_t *to, lxb_dom_node_t *node,
414                               lxb_html_tree_insertion_position_t ipos);
415 
416 LXB_API void
417 lxb_html_tree_acknowledge_token_self_closing_noi(lxb_html_tree_t *tree,
418                                              lxb_html_token_t *token);
419 
420 LXB_API bool
421 lxb_html_tree_mathml_text_integration_point_noi(lxb_dom_node_t *node);
422 
423 LXB_API bool
424 lxb_html_tree_scripting_noi(lxb_html_tree_t *tree);
425 
426 LXB_API void
427 lxb_html_tree_scripting_set_noi(lxb_html_tree_t *tree, bool scripting);
428 
429 LXB_API void
430 lxb_html_tree_attach_document_noi(lxb_html_tree_t *tree,
431                                   lxb_html_document_t *doc);
432 
433 
434 #ifdef __cplusplus
435 } /* extern "C" */
436 #endif
437 
438 #endif /* LEXBOR_HTML_TREE_H */
439