xref: /php-src/ext/dom/lexbor/lexbor/html/parser.h (revision bffab33a)
1 /*
2  * Copyright (C) 2018-2021 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #ifndef LEXBOR_HTML_PARSER_H
8 #define LEXBOR_HTML_PARSER_H
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 #include "lexbor/html/base.h"
15 #include "lexbor/html/tree.h"
16 #include "lexbor/html/interfaces/document.h"
17 
18 #include "lexbor/ns/ns.h"
19 
20 
21 typedef enum {
22     LXB_HTML_PARSER_STATE_BEGIN            = 0x00,
23     LXB_HTML_PARSER_STATE_PROCESS          = 0x01,
24     LXB_HTML_PARSER_STATE_END              = 0x02,
25     LXB_HTML_PARSER_STATE_FRAGMENT_PROCESS = 0x03,
26     LXB_HTML_PARSER_STATE_ERROR            = 0x04
27 }
28 lxb_html_parser_state_t;
29 
30 typedef struct {
31     lxb_html_tokenizer_t    *tkz;
32     lxb_html_tree_t         *tree;
33     lxb_html_tree_t         *original_tree;
34 
35     lxb_dom_node_t          *root;
36     lxb_dom_node_t          *form;
37 
38     lxb_html_parser_state_t state;
39     lxb_status_t            status;
40 
41     size_t                  ref_count;
42 }
43 lxb_html_parser_t;
44 
45 
46 LXB_API lxb_html_parser_t *
47 lxb_html_parser_create(void);
48 
49 LXB_API lxb_status_t
50 lxb_html_parser_init(lxb_html_parser_t *parser);
51 
52 LXB_API void
53 lxb_html_parser_clean(lxb_html_parser_t *parser);
54 
55 LXB_API lxb_html_parser_t *
56 lxb_html_parser_destroy(lxb_html_parser_t *parser);
57 
58 LXB_API lxb_html_parser_t *
59 lxb_html_parser_ref(lxb_html_parser_t *parser);
60 
61 LXB_API lxb_html_parser_t *
62 lxb_html_parser_unref(lxb_html_parser_t *parser);
63 
64 
65 LXB_API lxb_html_document_t *
66 lxb_html_parse(lxb_html_parser_t *parser, const lxb_char_t *html, size_t size);
67 
68 
69 LXB_API lxb_dom_node_t *
70 lxb_html_parse_fragment(lxb_html_parser_t *parser, lxb_html_element_t *element,
71                         const lxb_char_t *html, size_t size);
72 
73 LXB_API lxb_dom_node_t *
74 lxb_html_parse_fragment_by_tag_id(lxb_html_parser_t *parser,
75                                   lxb_html_document_t *document,
76                                   lxb_tag_id_t tag_id, lxb_ns_id_t ns,
77                                   const lxb_char_t *html, size_t size);
78 
79 
80 LXB_API lxb_html_document_t *
81 lxb_html_parse_chunk_begin(lxb_html_parser_t *parser);
82 
83 LXB_API lxb_status_t
84 lxb_html_parse_chunk_process(lxb_html_parser_t *parser,
85                              const lxb_char_t *html, size_t size);
86 
87 LXB_API lxb_status_t
88 lxb_html_parse_chunk_end(lxb_html_parser_t *parser);
89 
90 
91 LXB_API lxb_status_t
92 lxb_html_parse_fragment_chunk_begin(lxb_html_parser_t *parser,
93                                     lxb_html_document_t *document,
94                                     lxb_tag_id_t tag_id, lxb_ns_id_t ns);
95 
96 LXB_API lxb_status_t
97 lxb_html_parse_fragment_chunk_process(lxb_html_parser_t *parser,
98                                       const lxb_char_t *html, size_t size);
99 
100 LXB_API lxb_dom_node_t *
101 lxb_html_parse_fragment_chunk_end(lxb_html_parser_t *parser);
102 
103 
104 /*
105  * Inline functions
106  */
107 lxb_inline lxb_html_tokenizer_t *
lxb_html_parser_tokenizer(lxb_html_parser_t * parser)108 lxb_html_parser_tokenizer(lxb_html_parser_t *parser)
109 {
110     return parser->tkz;
111 }
112 
113 lxb_inline lxb_html_tree_t *
lxb_html_parser_tree(lxb_html_parser_t * parser)114 lxb_html_parser_tree(lxb_html_parser_t *parser)
115 {
116     return parser->tree;
117 }
118 
119 lxb_inline lxb_status_t
lxb_html_parser_status(lxb_html_parser_t * parser)120 lxb_html_parser_status(lxb_html_parser_t *parser)
121 {
122     return parser->status;
123 }
124 
125 lxb_inline lxb_status_t
lxb_html_parser_state(lxb_html_parser_t * parser)126 lxb_html_parser_state(lxb_html_parser_t *parser)
127 {
128     return parser->state;
129 }
130 
131 lxb_inline bool
lxb_html_parser_scripting(lxb_html_parser_t * parser)132 lxb_html_parser_scripting(lxb_html_parser_t *parser)
133 {
134     return lxb_html_tree_scripting(parser->tree);
135 }
136 
137 lxb_inline void
lxb_html_parser_scripting_set(lxb_html_parser_t * parser,bool scripting)138 lxb_html_parser_scripting_set(lxb_html_parser_t *parser, bool scripting)
139 {
140     lxb_html_tree_scripting_set(parser->tree, scripting);
141 }
142 
143 
144 /*
145  * No inline functions for ABI.
146  */
147 LXB_API lxb_html_tokenizer_t *
148 lxb_html_parser_tokenizer_noi(lxb_html_parser_t *parser);
149 
150 LXB_API lxb_html_tree_t *
151 lxb_html_parser_tree_noi(lxb_html_parser_t *parser);
152 
153 LXB_API lxb_status_t
154 lxb_html_parser_status_noi(lxb_html_parser_t *parser);
155 
156 LXB_API lxb_status_t
157 lxb_html_parser_state_noi(lxb_html_parser_t *parser);
158 
159 LXB_API bool
160 lxb_html_parser_scripting_noi(lxb_html_parser_t *parser);
161 
162 LXB_API void
163 lxb_html_parser_scripting_set_noi(lxb_html_parser_t *parser, bool scripting);
164 
165 
166 #ifdef __cplusplus
167 } /* extern "C" */
168 #endif
169 
170 #endif /* LEXBOR_HTML_PARSER_H */
171