xref: /php-src/ext/dom/lexbor/lexbor/html/tree.h (revision 445c1c92)
1 /*
2  * Copyright (C) 2018 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #ifndef LEXBOR_HTML_TREE_H
8 #define LEXBOR_HTML_TREE_H
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 #include "lexbor/dom/interfaces/node.h"
15 #include "lexbor/dom/interfaces/attr.h"
16 
17 #include "lexbor/html/base.h"
18 #include "lexbor/html/node.h"
19 #include "lexbor/html/tokenizer.h"
20 #include "lexbor/html/interfaces/document.h"
21 #include "lexbor/html/tag.h"
22 #include "lexbor/html/tree/error.h"
23 
24 
25 typedef bool
26 (*lxb_html_tree_insertion_mode_f)(lxb_html_tree_t *tree,
27                                   lxb_html_token_t *token);
28 
29 typedef lxb_status_t
30 (*lxb_html_tree_append_attr_f)(lxb_html_tree_t *tree,
31                                lxb_dom_attr_t *attr, void *ctx);
32 
33 typedef struct {
34     lexbor_array_obj_t *text_list;
35     bool               have_non_ws;
36 }
37 lxb_html_tree_pending_table_t;
38 
39 struct lxb_html_tree {
40     lxb_html_tokenizer_t           *tkz_ref;
41 
42     lxb_html_document_t            *document;
43     lxb_dom_node_t                 *fragment;
44 
45     lxb_html_form_element_t        *form;
46 
47     lexbor_array_t                 *open_elements;
48     lexbor_array_t                 *active_formatting;
49     lexbor_array_obj_t             *template_insertion_modes;
50 
51     lxb_html_tree_pending_table_t  pending_table;
52 
53     lexbor_array_obj_t             *parse_errors;
54 
55     bool                           foster_parenting;
56     bool                           frameset_ok;
57     bool                           scripting;
58     bool                           has_explicit_html_tag;
59     bool                           has_explicit_head_tag;
60     bool                           has_explicit_body_tag;
61 
62     lxb_html_tree_insertion_mode_f mode;
63     lxb_html_tree_insertion_mode_f original_mode;
64     lxb_html_tree_append_attr_f    before_append_attr;
65 
66     lxb_status_t                   status;
67 
68     size_t                         ref_count;
69 };
70 
71 typedef enum {
72     LXB_HTML_TREE_INSERTION_POSITION_CHILD  = 0x00,
73     LXB_HTML_TREE_INSERTION_POSITION_BEFORE = 0x01
74 }
75 lxb_html_tree_insertion_position_t;
76 
77 
78 LXB_API lxb_html_tree_t *
79 lxb_html_tree_create(void);
80 
81 LXB_API lxb_status_t
82 lxb_html_tree_init(lxb_html_tree_t *tree, lxb_html_tokenizer_t *tkz);
83 
84 LXB_API lxb_html_tree_t *
85 lxb_html_tree_ref(lxb_html_tree_t *tree);
86 
87 LXB_API lxb_html_tree_t *
88 lxb_html_tree_unref(lxb_html_tree_t *tree);
89 
90 LXB_API void
91 lxb_html_tree_clean(lxb_html_tree_t *tree);
92 
93 LXB_API lxb_html_tree_t *
94 lxb_html_tree_destroy(lxb_html_tree_t *tree);
95 
96 LXB_API lxb_status_t
97 lxb_html_tree_stop_parsing(lxb_html_tree_t *tree);
98 
99 LXB_API bool
100 lxb_html_tree_process_abort(lxb_html_tree_t *tree);
101 
102 LXB_API void
103 lxb_html_tree_parse_error(lxb_html_tree_t *tree, lxb_html_token_t *token,
104                           lxb_html_tree_error_id_t id);
105 
106 LXB_API bool
107 lxb_html_tree_construction_dispatcher(lxb_html_tree_t *tree,
108                                       lxb_html_token_t *token);
109 
110 LXB_API lxb_dom_node_t *
111 lxb_html_tree_appropriate_place_inserting_node(lxb_html_tree_t *tree,
112                                       lxb_dom_node_t *override_target,
113                                       lxb_html_tree_insertion_position_t *ipos);
114 
115 LXB_API lxb_html_element_t *
116 lxb_html_tree_insert_foreign_element(lxb_html_tree_t *tree,
117                                      lxb_html_token_t *token, lxb_ns_id_t ns);
118 
119 LXB_API lxb_html_element_t *
120 lxb_html_tree_create_element_for_token(lxb_html_tree_t *tree,
121                                        lxb_html_token_t *token, lxb_ns_id_t ns);
122 
123 LXB_API lxb_status_t
124 lxb_html_tree_append_attributes(lxb_html_tree_t *tree,
125                                 lxb_dom_element_t *element,
126                                 lxb_html_token_t *token, lxb_ns_id_t ns);
127 
128 LXB_API lxb_status_t
129 lxb_html_tree_append_attributes_from_element(lxb_html_tree_t *tree,
130                                              lxb_dom_element_t *element,
131                                              lxb_dom_element_t *from,
132                                              lxb_ns_id_t ns);
133 
134 LXB_API lxb_status_t
135 lxb_html_tree_adjust_mathml_attributes(lxb_html_tree_t *tree,
136                                        lxb_dom_attr_t *attr, void *ctx);
137 
138 LXB_API lxb_status_t
139 lxb_html_tree_adjust_svg_attributes(lxb_html_tree_t *tree,
140                                     lxb_dom_attr_t *attr, void *ctx);
141 
142 LXB_API lxb_status_t
143 lxb_html_tree_adjust_foreign_attributes(lxb_html_tree_t *tree,
144                                         lxb_dom_attr_t *attr, void *ctx);
145 
146 LXB_API lxb_status_t
147 lxb_html_tree_insert_character(lxb_html_tree_t *tree, lxb_html_token_t *token,
148                                lxb_dom_node_t **ret_node);
149 
150 LXB_API lxb_status_t
151 lxb_html_tree_insert_character_for_data(lxb_html_tree_t *tree,
152                                         lexbor_str_t *str,
153                                         lxb_dom_node_t **ret_node);
154 
155 LXB_API lxb_dom_comment_t *
156 lxb_html_tree_insert_comment(lxb_html_tree_t *tree,
157                              lxb_html_token_t *token, lxb_dom_node_t *pos);
158 
159 LXB_API lxb_dom_document_type_t *
160 lxb_html_tree_create_document_type_from_token(lxb_html_tree_t *tree,
161                                               lxb_html_token_t *token);
162 
163 LXB_API void
164 lxb_html_tree_node_delete_deep(lxb_html_tree_t *tree, lxb_dom_node_t *node);
165 
166 LXB_API lxb_html_element_t *
167 lxb_html_tree_generic_rawtext_parsing(lxb_html_tree_t *tree,
168                                       lxb_html_token_t *token);
169 
170 LXB_API lxb_html_element_t *
171 lxb_html_tree_generic_rcdata_parsing(lxb_html_tree_t *tree,
172                                      lxb_html_token_t *token);
173 
174 LXB_API void
175 lxb_html_tree_generate_implied_end_tags(lxb_html_tree_t *tree,
176                                         lxb_tag_id_t ex_tag, lxb_ns_id_t ex_ns);
177 
178 LXB_API void
179 lxb_html_tree_generate_all_implied_end_tags_thoroughly(lxb_html_tree_t *tree,
180                                                        lxb_tag_id_t ex_tag,
181                                                        lxb_ns_id_t ex_ns);
182 
183 LXB_API void
184 lxb_html_tree_reset_insertion_mode_appropriately(lxb_html_tree_t *tree);
185 
186 LXB_API lxb_dom_node_t *
187 lxb_html_tree_element_in_scope(lxb_html_tree_t *tree, lxb_tag_id_t tag_id,
188                                lxb_ns_id_t ns, lxb_html_tag_category_t ct);
189 
190 LXB_API lxb_dom_node_t *
191 lxb_html_tree_element_in_scope_by_node(lxb_html_tree_t *tree,
192                                        lxb_dom_node_t *by_node,
193                                        lxb_html_tag_category_t ct);
194 
195 LXB_API lxb_dom_node_t *
196 lxb_html_tree_element_in_scope_h123456(lxb_html_tree_t *tree);
197 
198 LXB_API lxb_dom_node_t *
199 lxb_html_tree_element_in_scope_tbody_thead_tfoot(lxb_html_tree_t *tree);
200 
201 LXB_API lxb_dom_node_t *
202 lxb_html_tree_element_in_scope_td_th(lxb_html_tree_t *tree);
203 
204 LXB_API bool
205 lxb_html_tree_check_scope_element(lxb_html_tree_t *tree);
206 
207 LXB_API void
208 lxb_html_tree_close_p_element(lxb_html_tree_t *tree, lxb_html_token_t *token);
209 
210 LXB_API bool
211 lxb_html_tree_adoption_agency_algorithm(lxb_html_tree_t *tree,
212                                         lxb_html_token_t *token,
213                                         lxb_status_t *status);
214 
215 LXB_API bool
216 lxb_html_tree_html_integration_point(lxb_dom_node_t *node);
217 
218 LXB_API lxb_status_t
219 lxb_html_tree_adjust_attributes_mathml(lxb_html_tree_t *tree,
220                                        lxb_dom_attr_t *attr, void *ctx);
221 
222 LXB_API lxb_status_t
223 lxb_html_tree_adjust_attributes_svg(lxb_html_tree_t *tree,
224                                     lxb_dom_attr_t *attr, void *ctx);
225 
226 
227 /*
228  * Inline functions
229  */
230 lxb_inline lxb_status_t
lxb_html_tree_begin(lxb_html_tree_t * tree,lxb_html_document_t * document)231 lxb_html_tree_begin(lxb_html_tree_t *tree, lxb_html_document_t *document)
232 {
233     tree->document = document;
234 
235     return lxb_html_tokenizer_begin(tree->tkz_ref);
236 }
237 
238 lxb_inline lxb_status_t
lxb_html_tree_chunk(lxb_html_tree_t * tree,const lxb_char_t * html,size_t size)239 lxb_html_tree_chunk(lxb_html_tree_t *tree, const lxb_char_t *html, size_t size)
240 {
241     return lxb_html_tokenizer_chunk(tree->tkz_ref, html, size);
242 }
243 
244 lxb_inline lxb_status_t
lxb_html_tree_end(lxb_html_tree_t * tree)245 lxb_html_tree_end(lxb_html_tree_t *tree)
246 {
247     if (tree->document->done != NULL) {
248         tree->document->done(tree->document);
249     }
250 
251     return lxb_html_tokenizer_end(tree->tkz_ref);
252 }
253 
254 lxb_inline lxb_status_t
lxb_html_tree_build(lxb_html_tree_t * tree,lxb_html_document_t * document,const lxb_char_t * html,size_t size)255 lxb_html_tree_build(lxb_html_tree_t *tree, lxb_html_document_t *document,
256                     const lxb_char_t *html, size_t size)
257 {
258     tree->status = lxb_html_tree_begin(tree, document);
259     if (tree->status != LXB_STATUS_OK) {
260         return tree->status;
261     }
262 
263     tree->status = lxb_html_tree_chunk(tree, html, size);
264     if (tree->status != LXB_STATUS_OK) {
265         return tree->status;
266     }
267 
268     return lxb_html_tree_end(tree);
269 }
270 
271 lxb_inline lxb_dom_node_t *
lxb_html_tree_create_node(lxb_html_tree_t * tree,lxb_tag_id_t tag_id,lxb_ns_id_t ns)272 lxb_html_tree_create_node(lxb_html_tree_t *tree,
273                           lxb_tag_id_t tag_id, lxb_ns_id_t ns)
274 {
275     return (lxb_dom_node_t *) lxb_html_interface_create(tree->document,
276                                                         tag_id, ns);
277 }
278 
279 lxb_inline bool
lxb_html_tree_node_is(lxb_dom_node_t * node,lxb_tag_id_t tag_id)280 lxb_html_tree_node_is(lxb_dom_node_t *node, lxb_tag_id_t tag_id)
281 {
282     return node->local_name == tag_id && node->ns == LXB_NS_HTML;
283 }
284 
285 lxb_inline lxb_dom_node_t *
lxb_html_tree_current_node(lxb_html_tree_t * tree)286 lxb_html_tree_current_node(lxb_html_tree_t *tree)
287 {
288     if (tree->open_elements->length == 0) {
289         return NULL;
290     }
291 
292     return (lxb_dom_node_t *)
293         tree->open_elements->list[ (tree->open_elements->length - 1) ];
294 }
295 
296 lxb_inline lxb_dom_node_t *
lxb_html_tree_adjusted_current_node(lxb_html_tree_t * tree)297 lxb_html_tree_adjusted_current_node(lxb_html_tree_t *tree)
298 {
299     if(tree->fragment != NULL && tree->open_elements->length == 1) {
300         return lxb_dom_interface_node(tree->fragment);
301     }
302 
303     return lxb_html_tree_current_node(tree);
304 }
305 
306 lxb_inline lxb_html_element_t *
lxb_html_tree_insert_html_element(lxb_html_tree_t * tree,lxb_html_token_t * token)307 lxb_html_tree_insert_html_element(lxb_html_tree_t *tree,
308                                   lxb_html_token_t *token)
309 {
310     return lxb_html_tree_insert_foreign_element(tree, token, LXB_NS_HTML);
311 }
312 
313 lxb_inline void
lxb_html_tree_insert_node(lxb_dom_node_t * to,lxb_dom_node_t * node,lxb_html_tree_insertion_position_t ipos)314 lxb_html_tree_insert_node(lxb_dom_node_t *to, lxb_dom_node_t *node,
315                           lxb_html_tree_insertion_position_t ipos)
316 {
317     if (ipos == LXB_HTML_TREE_INSERTION_POSITION_BEFORE) {
318         lxb_dom_node_insert_before_wo_events(to, node);
319         return;
320     }
321 
322     lxb_dom_node_insert_child_wo_events(to, node);
323 }
324 
325 /* TODO: if we not need to save parse errors?! */
326 lxb_inline void
lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t * tree,lxb_html_token_t * token)327 lxb_html_tree_acknowledge_token_self_closing(lxb_html_tree_t *tree,
328                                              lxb_html_token_t *token)
329 {
330     if ((token->type & LXB_HTML_TOKEN_TYPE_CLOSE_SELF) == 0) {
331         return;
332     }
333 
334     bool is_void = lxb_html_tag_is_void(token->tag_id);
335 
336     if (is_void) {
337         lxb_html_tree_parse_error(tree, token,
338                                   LXB_HTML_RULES_ERROR_NOVOHTELSTTAWITRSO);
339     }
340 }
341 
342 lxb_inline bool
lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t * node)343 lxb_html_tree_mathml_text_integration_point(lxb_dom_node_t *node)
344 {
345     if (node->ns == LXB_NS_MATH) {
346         switch (node->local_name) {
347             case LXB_TAG_MI:
348             case LXB_TAG_MO:
349             case LXB_TAG_MN:
350             case LXB_TAG_MS:
351             case LXB_TAG_MTEXT:
352                 return true;
353         }
354     }
355 
356     return false;
357 }
358 
359 lxb_inline bool
lxb_html_tree_scripting(lxb_html_tree_t * tree)360 lxb_html_tree_scripting(lxb_html_tree_t *tree)
361 {
362     return tree->scripting;
363 }
364 
365 lxb_inline void
lxb_html_tree_scripting_set(lxb_html_tree_t * tree,bool scripting)366 lxb_html_tree_scripting_set(lxb_html_tree_t *tree, bool scripting)
367 {
368     tree->scripting = scripting;
369 }
370 
371 lxb_inline void
lxb_html_tree_attach_document(lxb_html_tree_t * tree,lxb_html_document_t * doc)372 lxb_html_tree_attach_document(lxb_html_tree_t *tree, lxb_html_document_t *doc)
373 {
374     tree->document = doc;
375 }
376 
377 /*
378  * No inline functions for ABI.
379  */
380 LXB_API lxb_status_t
381 lxb_html_tree_begin_noi(lxb_html_tree_t *tree, lxb_html_document_t *document);
382 
383 LXB_API lxb_status_t
384 lxb_html_tree_chunk_noi(lxb_html_tree_t *tree, const lxb_char_t *html,
385                         size_t size);
386 
387 LXB_API lxb_status_t
388 lxb_html_tree_end_noi(lxb_html_tree_t *tree);
389 
390 LXB_API lxb_status_t
391 lxb_html_tree_build_noi(lxb_html_tree_t *tree, lxb_html_document_t *document,
392                         const lxb_char_t *html, size_t size);
393 
394 LXB_API lxb_dom_node_t *
395 lxb_html_tree_create_node_noi(lxb_html_tree_t *tree,
396                               lxb_tag_id_t tag_id, lxb_ns_id_t ns);
397 
398 LXB_API bool
399 lxb_html_tree_node_is_noi(lxb_dom_node_t *node, lxb_tag_id_t tag_id);
400 
401 LXB_API lxb_dom_node_t *
402 lxb_html_tree_current_node_noi(lxb_html_tree_t *tree);
403 
404 LXB_API lxb_dom_node_t *
405 lxb_html_tree_adjusted_current_node_noi(lxb_html_tree_t *tree);
406 
407 LXB_API lxb_html_element_t *
408 lxb_html_tree_insert_html_element_noi(lxb_html_tree_t *tree,
409                                       lxb_html_token_t *token);
410 
411 LXB_API void
412 lxb_html_tree_insert_node_noi(lxb_dom_node_t *to, lxb_dom_node_t *node,
413                               lxb_html_tree_insertion_position_t ipos);
414 
415 LXB_API void
416 lxb_html_tree_acknowledge_token_self_closing_noi(lxb_html_tree_t *tree,
417                                              lxb_html_token_t *token);
418 
419 LXB_API bool
420 lxb_html_tree_mathml_text_integration_point_noi(lxb_dom_node_t *node);
421 
422 LXB_API bool
423 lxb_html_tree_scripting_noi(lxb_html_tree_t *tree);
424 
425 LXB_API void
426 lxb_html_tree_scripting_set_noi(lxb_html_tree_t *tree, bool scripting);
427 
428 LXB_API void
429 lxb_html_tree_attach_document_noi(lxb_html_tree_t *tree,
430                                   lxb_html_document_t *doc);
431 
432 
433 #ifdef __cplusplus
434 } /* extern "C" */
435 #endif
436 
437 #endif /* LEXBOR_HTML_TREE_H */
438