xref: /php-src/ext/dom/lexbor/lexbor/css/parser.c (revision 4df90af4)
1 /*
2  * Copyright (C) 2021 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/css/parser.h"
8 #include "lexbor/css/state.h"
9 #include "lexbor/css/syntax/syntax.h"
10 
11 
12 lxb_css_parser_t *
lxb_css_parser_create(void)13 lxb_css_parser_create(void)
14 {
15     return lexbor_calloc(1, sizeof(lxb_css_parser_t));
16 }
17 
18 lxb_status_t
lxb_css_parser_init(lxb_css_parser_t * parser,lxb_css_syntax_tokenizer_t * tkz)19 lxb_css_parser_init(lxb_css_parser_t *parser, lxb_css_syntax_tokenizer_t *tkz)
20 {
21     lxb_status_t status;
22     static const size_t lxb_rules_length = 128;
23     static const size_t lxb_states_length = 1024;
24 
25     if (parser == NULL) {
26         return LXB_STATUS_ERROR_OBJECT_IS_NULL;
27     }
28 
29     /* Stack */
30     parser->states_begin = lexbor_malloc(sizeof(lxb_css_parser_state_t)
31                                          * lxb_states_length);
32     if (parser->states_begin == NULL) {
33         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
34     }
35 
36     parser->states = parser->states_begin;
37     parser->states_end = parser->states_begin + lxb_states_length;
38 
39     memset(parser->states, 0x00, sizeof(lxb_css_parser_state_t));
40     parser->states->root = true;
41 
42     /* Syntax */
43     parser->my_tkz = false;
44 
45     if (tkz == NULL) {
46         tkz = lxb_css_syntax_tokenizer_create();
47         status = lxb_css_syntax_tokenizer_init(tkz);
48         if (status != LXB_STATUS_OK) {
49             return status;
50         }
51 
52         parser->my_tkz = true;
53     }
54 
55     /* Rules */
56     parser->rules_begin = lexbor_malloc(sizeof(lxb_css_syntax_rule_t)
57                                         * lxb_rules_length);
58     if (parser->rules_begin == NULL) {
59         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
60     }
61 
62     parser->rules_end = parser->rules_begin + lxb_rules_length;
63     parser->rules = parser->rules_begin;
64 
65     /*
66      * Zero those parameters that can be used (passed to the function).
67      * The parser->rules->phase parameter will be assigned at the end of the
68      * parsing.
69      *
70      * The point is that parser->rules[0] is used as a stub before exiting
71      * parsing.
72      */
73     parser->rules->context = NULL;
74 
75     /* Temp */
76     parser->pos = NULL;
77     parser->str.length = 0;
78     parser->str_size = 4096;
79 
80     parser->str.data = lexbor_malloc(sizeof(lxb_char_t) * parser->str_size);
81     if (parser->str.data == NULL) {
82         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
83     }
84 
85     parser->log = lxb_css_log_create();
86     status = lxb_css_log_init(parser->log, NULL);
87     if (status != LXB_STATUS_OK) {
88         return status;
89     }
90 
91     parser->tkz = tkz;
92     parser->types_begin = NULL;
93     parser->types_pos = NULL;
94     parser->types_end = NULL;
95     parser->stage = LXB_CSS_PARSER_CLEAN;
96     parser->receive_endings = false;
97     parser->status = LXB_STATUS_OK;
98     parser->fake_null = false;
99 
100     return LXB_STATUS_OK;
101 }
102 
103 void
lxb_css_parser_clean(lxb_css_parser_t * parser)104 lxb_css_parser_clean(lxb_css_parser_t *parser)
105 {
106     lxb_css_syntax_tokenizer_clean(parser->tkz);
107     lxb_css_log_clean(parser->log);
108 
109     parser->rules = parser->rules_begin;
110     parser->states = parser->states_begin;
111     parser->types_pos = parser->types_begin;
112     parser->stage = LXB_CSS_PARSER_CLEAN;
113     parser->status = LXB_STATUS_OK;
114     parser->pos = NULL;
115     parser->str.length = 0;
116     parser->fake_null = false;
117 }
118 
119 void
lxb_css_parser_erase(lxb_css_parser_t * parser)120 lxb_css_parser_erase(lxb_css_parser_t *parser)
121 {
122     lxb_css_parser_clean(parser);
123 
124     if (parser->memory != NULL) {
125         lxb_css_memory_clean(parser->memory);
126     }
127 }
128 
129 lxb_css_parser_t *
lxb_css_parser_destroy(lxb_css_parser_t * parser,bool self_destroy)130 lxb_css_parser_destroy(lxb_css_parser_t *parser, bool self_destroy)
131 {
132     if (parser == NULL) {
133         return NULL;
134     }
135 
136     if (parser->my_tkz) {
137         parser->tkz = lxb_css_syntax_tokenizer_destroy(parser->tkz);
138     }
139 
140     parser->log = lxb_css_log_destroy(parser->log, true);
141 
142     if (parser->rules_begin != NULL) {
143         parser->rules_begin = lexbor_free(parser->rules_begin);
144     }
145 
146     if (parser->states_begin != NULL) {
147         parser->states_begin = lexbor_free(parser->states_begin);
148     }
149 
150     if (parser->types_begin != NULL) {
151         parser->types_begin = lexbor_free(parser->types_begin);
152     }
153 
154     if (parser->str.data != NULL) {
155         parser->str.data = lexbor_free(parser->str.data);
156     }
157 
158     if (self_destroy) {
159         return lexbor_free(parser);
160     }
161 
162     return parser;
163 }
164 
165 lxb_css_parser_state_t *
lxb_css_parser_states_push(lxb_css_parser_t * parser,lxb_css_parser_state_f state,void * ctx,bool root)166 lxb_css_parser_states_push(lxb_css_parser_t *parser,
167                            lxb_css_parser_state_f state, void *ctx, bool root)
168 {
169     size_t length, cur_length;
170     lxb_css_parser_state_t *states = ++parser->states;
171 
172     if (states >= parser->states_end) {
173         cur_length = states - parser->states_begin;
174 
175         if (SIZE_MAX - cur_length < 1024) {
176             goto memory_error;
177         }
178 
179         length = cur_length + 1024;
180 
181         states = lexbor_realloc(parser->states_begin,
182                                 length * sizeof(lxb_css_parser_state_t));
183         if (states == NULL) {
184             goto memory_error;
185         }
186 
187         parser->states_begin = states;
188         parser->states_end = states + length;
189         parser->states = states + cur_length;
190 
191         states = parser->states;
192     }
193 
194     states->state = state;
195     states->context = ctx;
196     states->root = root;
197 
198     return states;
199 
200 memory_error:
201 
202     parser->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
203 
204     return NULL;
205 }
206 
207 lxb_css_parser_state_t *
lxb_css_parser_states_next(lxb_css_parser_t * parser,lxb_css_parser_state_f next,lxb_css_parser_state_f back,void * ctx,bool root)208 lxb_css_parser_states_next(lxb_css_parser_t *parser,
209                            lxb_css_parser_state_f next,
210                            lxb_css_parser_state_f back, void *ctx, bool root)
211 {
212     lxb_css_parser_state_t *state;
213 
214     state = lxb_css_parser_states_push(parser, back, ctx, root);
215     if (state == NULL) {
216         return NULL;
217     }
218 
219     parser->rules->state = next;
220 
221     return state;
222 }
223 
224 lxb_status_t
lxb_css_parser_types_push(lxb_css_parser_t * parser,lxb_css_syntax_token_type_t type)225 lxb_css_parser_types_push(lxb_css_parser_t *parser,
226                           lxb_css_syntax_token_type_t type)
227 {
228     size_t length, new_length;
229     lxb_css_syntax_token_type_t *tmp;
230 
231     if (parser->types_pos >= parser->types_end) {
232         length = parser->types_end - parser->types_begin;
233 
234         if ((SIZE_MAX - length) < 1024) {
235             return LXB_STATUS_ERROR_OVERFLOW;
236         }
237 
238         new_length = length + 1024;
239 
240         tmp = lexbor_realloc(parser->types_begin,
241                              new_length * sizeof(lxb_css_syntax_token_type_t));
242         if (tmp == NULL) {
243             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
244         }
245 
246         parser->types_begin = tmp;
247         parser->types_end = tmp + new_length;
248         parser->types_pos = parser->types_begin + length;
249     }
250 
251     *parser->types_pos++ = type;
252 
253     return LXB_STATUS_OK;
254 }
255 
256 bool
lxb_css_parser_stop(lxb_css_parser_t * parser)257 lxb_css_parser_stop(lxb_css_parser_t *parser)
258 {
259     parser->loop = false;
260     return true;
261 }
262 
263 bool
lxb_css_parser_fail(lxb_css_parser_t * parser,lxb_status_t status)264 lxb_css_parser_fail(lxb_css_parser_t *parser, lxb_status_t status)
265 {
266     parser->status = status;
267     parser->loop = false;
268     return true;
269 }
270 
271 bool
lxb_css_parser_unexpected(lxb_css_parser_t * parser)272 lxb_css_parser_unexpected(lxb_css_parser_t *parser)
273 {
274     (void) lxb_css_parser_unexpected_status(parser);
275     return true;
276 }
277 
278 bool
lxb_css_parser_success(lxb_css_parser_t * parser)279 lxb_css_parser_success(lxb_css_parser_t *parser)
280 {
281     parser->rules->state = lxb_css_state_success;
282     return true;
283 }
284 
285 bool
lxb_css_parser_failed(lxb_css_parser_t * parser)286 lxb_css_parser_failed(lxb_css_parser_t *parser)
287 {
288     lxb_css_syntax_rule_t *rule = parser->rules;
289 
290     rule->state = rule->cbx.cb->failed;
291     rule->failed = true;
292 
293     return true;
294 }
295 
296 lxb_status_t
lxb_css_parser_unexpected_status(lxb_css_parser_t * parser)297 lxb_css_parser_unexpected_status(lxb_css_parser_t *parser)
298 {
299     parser->status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
300 
301     parser->rules->failed = true;
302 
303     return LXB_STATUS_ERROR_UNEXPECTED_DATA;
304 }
305 
306 bool
lxb_css_parser_unexpected_data(lxb_css_parser_t * parser,const lxb_css_syntax_token_t * token)307 lxb_css_parser_unexpected_data(lxb_css_parser_t *parser,
308                                const lxb_css_syntax_token_t *token)
309 {
310     static const char selectors[] = "Selectors";
311     parser->status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
312 
313     if (lxb_css_syntax_token_error(parser, token, selectors) == NULL) {
314         return lxb_css_parser_memory_fail(parser);
315     }
316 
317     return true;
318 }
319 
320 lxb_status_t
lxb_css_parser_unexpected_data_status(lxb_css_parser_t * parser,const lxb_css_syntax_token_t * token)321 lxb_css_parser_unexpected_data_status(lxb_css_parser_t *parser,
322                                       const lxb_css_syntax_token_t *token)
323 {
324     static const char selectors[] = "Selectors";
325     parser->status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
326 
327     if (lxb_css_syntax_token_error(parser, token, selectors) == NULL) {
328         return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
329     }
330 
331     return LXB_STATUS_ERROR_UNEXPECTED_DATA;
332 }
333 
334 bool
lxb_css_parser_memory_fail(lxb_css_parser_t * parser)335 lxb_css_parser_memory_fail(lxb_css_parser_t *parser)
336 {
337     parser->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
338     parser->loop = false;
339     return true;
340 }
341 
342 lxb_status_t
lxb_css_parser_memory_fail_status(lxb_css_parser_t * parser)343 lxb_css_parser_memory_fail_status(lxb_css_parser_t *parser)
344 {
345     parser->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
346     parser->loop = false;
347 
348     return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
349 }
350