1 /*
2 * Copyright (C) 2018-2021 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #ifndef LEXBOR_HTML_PARSER_H
8 #define LEXBOR_HTML_PARSER_H
9
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13
14 #include "lexbor/html/base.h"
15 #include "lexbor/html/tree.h"
16 #include "lexbor/html/interfaces/document.h"
17
18 #include "lexbor/ns/ns.h"
19
20
21 typedef enum {
22 LXB_HTML_PARSER_STATE_BEGIN = 0x00,
23 LXB_HTML_PARSER_STATE_PROCESS = 0x01,
24 LXB_HTML_PARSER_STATE_END = 0x02,
25 LXB_HTML_PARSER_STATE_FRAGMENT_PROCESS = 0x03,
26 LXB_HTML_PARSER_STATE_ERROR = 0x04
27 }
28 lxb_html_parser_state_t;
29
30 typedef struct {
31 lxb_html_tokenizer_t *tkz;
32 lxb_html_tree_t *tree;
33 lxb_html_tree_t *original_tree;
34
35 lxb_dom_node_t *root;
36 lxb_dom_node_t *form;
37
38 lxb_html_parser_state_t state;
39 lxb_status_t status;
40
41 size_t ref_count;
42 }
43 lxb_html_parser_t;
44
45
46 LXB_API lxb_html_parser_t *
47 lxb_html_parser_create(void);
48
49 LXB_API lxb_status_t
50 lxb_html_parser_init(lxb_html_parser_t *parser);
51
52 LXB_API void
53 lxb_html_parser_clean(lxb_html_parser_t *parser);
54
55 LXB_API lxb_html_parser_t *
56 lxb_html_parser_destroy(lxb_html_parser_t *parser);
57
58 LXB_API lxb_html_parser_t *
59 lxb_html_parser_ref(lxb_html_parser_t *parser);
60
61 LXB_API lxb_html_parser_t *
62 lxb_html_parser_unref(lxb_html_parser_t *parser);
63
64
65 LXB_API lxb_html_document_t *
66 lxb_html_parse(lxb_html_parser_t *parser, const lxb_char_t *html, size_t size);
67
68
69 LXB_API lxb_dom_node_t *
70 lxb_html_parse_fragment(lxb_html_parser_t *parser, lxb_html_element_t *element,
71 const lxb_char_t *html, size_t size);
72
73 LXB_API lxb_dom_node_t *
74 lxb_html_parse_fragment_by_tag_id(lxb_html_parser_t *parser,
75 lxb_html_document_t *document,
76 lxb_tag_id_t tag_id, lxb_ns_id_t ns,
77 const lxb_char_t *html, size_t size);
78
79
80 LXB_API lxb_html_document_t *
81 lxb_html_parse_chunk_begin(lxb_html_parser_t *parser);
82
83 LXB_API lxb_status_t
84 lxb_html_parse_chunk_process(lxb_html_parser_t *parser,
85 const lxb_char_t *html, size_t size);
86
87 LXB_API lxb_status_t
88 lxb_html_parse_chunk_end(lxb_html_parser_t *parser);
89
90
91 LXB_API lxb_status_t
92 lxb_html_parse_fragment_chunk_begin(lxb_html_parser_t *parser,
93 lxb_html_document_t *document,
94 lxb_tag_id_t tag_id, lxb_ns_id_t ns);
95
96 LXB_API lxb_status_t
97 lxb_html_parse_fragment_chunk_process(lxb_html_parser_t *parser,
98 const lxb_char_t *html, size_t size);
99
100 LXB_API lxb_dom_node_t *
101 lxb_html_parse_fragment_chunk_end(lxb_html_parser_t *parser);
102
103
104 /*
105 * Inline functions
106 */
107 lxb_inline lxb_html_tokenizer_t *
lxb_html_parser_tokenizer(lxb_html_parser_t * parser)108 lxb_html_parser_tokenizer(lxb_html_parser_t *parser)
109 {
110 return parser->tkz;
111 }
112
113 lxb_inline lxb_html_tree_t *
lxb_html_parser_tree(lxb_html_parser_t * parser)114 lxb_html_parser_tree(lxb_html_parser_t *parser)
115 {
116 return parser->tree;
117 }
118
119 lxb_inline lxb_status_t
lxb_html_parser_status(lxb_html_parser_t * parser)120 lxb_html_parser_status(lxb_html_parser_t *parser)
121 {
122 return parser->status;
123 }
124
125 lxb_inline lxb_status_t
lxb_html_parser_state(lxb_html_parser_t * parser)126 lxb_html_parser_state(lxb_html_parser_t *parser)
127 {
128 return parser->state;
129 }
130
131 lxb_inline bool
lxb_html_parser_scripting(lxb_html_parser_t * parser)132 lxb_html_parser_scripting(lxb_html_parser_t *parser)
133 {
134 return lxb_html_tree_scripting(parser->tree);
135 }
136
137 lxb_inline void
lxb_html_parser_scripting_set(lxb_html_parser_t * parser,bool scripting)138 lxb_html_parser_scripting_set(lxb_html_parser_t *parser, bool scripting)
139 {
140 lxb_html_tree_scripting_set(parser->tree, scripting);
141 }
142
143
144 /*
145 * No inline functions for ABI.
146 */
147 LXB_API lxb_html_tokenizer_t *
148 lxb_html_parser_tokenizer_noi(lxb_html_parser_t *parser);
149
150 LXB_API lxb_html_tree_t *
151 lxb_html_parser_tree_noi(lxb_html_parser_t *parser);
152
153 LXB_API lxb_status_t
154 lxb_html_parser_status_noi(lxb_html_parser_t *parser);
155
156 LXB_API lxb_status_t
157 lxb_html_parser_state_noi(lxb_html_parser_t *parser);
158
159 LXB_API bool
160 lxb_html_parser_scripting_noi(lxb_html_parser_t *parser);
161
162 LXB_API void
163 lxb_html_parser_scripting_set_noi(lxb_html_parser_t *parser, bool scripting);
164
165
166 #ifdef __cplusplus
167 } /* extern "C" */
168 #endif
169
170 #endif /* LEXBOR_HTML_PARSER_H */
171