xref: /php-src/ext/dom/lexbor/lexbor/css/syntax/token.h (revision f0934090)
1 /*
2  * Copyright (C) 2018-2020 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #ifndef LEXBOR_CSS_SYNTAX_TOKEN_H
8 #define LEXBOR_CSS_SYNTAX_TOKEN_H
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 #include "lexbor/core/str.h"
15 
16 #include "lexbor/css/log.h"
17 #include "lexbor/css/syntax/base.h"
18 
19 
20 #define lxb_css_syntax_token_base(token) ((lxb_css_syntax_token_base_t *) (token))
21 #define lxb_css_syntax_token_ident(token) ((lxb_css_syntax_token_ident_t *) (token))
22 #define lxb_css_syntax_token_function(token) ((lxb_css_syntax_token_function_t *) (token))
23 #define lxb_css_syntax_token_at_keyword(token) ((lxb_css_syntax_token_at_keyword_t *) (token))
24 #define lxb_css_syntax_token_hash(token) ((lxb_css_syntax_token_hash_t *) (token))
25 #define lxb_css_syntax_token_string(token) ((lxb_css_syntax_token_string_t *) (token))
26 #define lxb_css_syntax_token_bad_string(token) ((lxb_css_syntax_token_bad_string_t *) (token))
27 #define lxb_css_syntax_token_url(token) ((lxb_css_syntax_token_url_t *) (token))
28 #define lxb_css_syntax_token_bad_url(token) ((lxb_css_syntax_token_bad_url_t *) (token))
29 #define lxb_css_syntax_token_delim(token) ((lxb_css_syntax_token_delim_t *) (token))
30 #define lxb_css_syntax_token_delim_char(token) (((lxb_css_syntax_token_delim_t *) (token))->character)
31 #define lxb_css_syntax_token_number(token) ((lxb_css_syntax_token_number_t *) (token))
32 #define lxb_css_syntax_token_percentage(token) ((lxb_css_syntax_token_percentage_t *) (token))
33 #define lxb_css_syntax_token_dimension(token) ((lxb_css_syntax_token_dimension_t *) (token))
34 #define lxb_css_syntax_token_dimension_string(token) (&((lxb_css_syntax_token_dimension_t *) (token))->str)
35 #define lxb_css_syntax_token_whitespace(token) ((lxb_css_syntax_token_whitespace_t *) (token))
36 #define lxb_css_syntax_token_cdo(token) ((lxb_css_syntax_token_cdo_t *) (token))
37 #define lxb_css_syntax_token_cdc(token) ((lxb_css_syntax_token_cdc_t *) (token))
38 #define lxb_css_syntax_token_colon(token) ((lxb_css_syntax_token_colon_t *) (token))
39 #define lxb_css_syntax_token_semicolon(token) ((lxb_css_syntax_token_semicolon_t *) (token))
40 #define lxb_css_syntax_token_comma(token) ((lxb_css_syntax_token_comma_t *) (token))
41 #define lxb_css_syntax_token_ls_bracket(token) ((lxb_css_syntax_token_ls_bracket_t *) (token))
42 #define lxb_css_syntax_token_rs_bracket(token) ((lxb_css_syntax_token_rs_bracket_t *) (token))
43 #define lxb_css_syntax_token_l_parenthesis(token) ((lxb_css_syntax_token_l_parenthesis_t *) (token))
44 #define lxb_css_syntax_token_r_parenthesis(token) ((lxb_css_syntax_token_r_parenthesis_t *) (token))
45 #define lxb_css_syntax_token_lc_bracket(token) ((lxb_css_syntax_token_lc_bracket_t *) (token))
46 #define lxb_css_syntax_token_rc_bracket(token) ((lxb_css_syntax_token_rc_bracket_t *) (token))
47 #define lxb_css_syntax_token_comment(token) ((lxb_css_syntax_token_comment_t *) (token))
48 
49 
50 typedef struct lxb_css_syntax_token_data lxb_css_syntax_token_data_t;
51 
52 typedef const lxb_char_t *
53 (*lxb_css_syntax_token_data_cb_f)(const lxb_char_t *begin, const lxb_char_t *end,
54                                   lexbor_str_t *str, lexbor_mraw_t *mraw,
55                                   lxb_css_syntax_token_data_t *td);
56 
57 typedef lxb_status_t
58 (*lxb_css_syntax_token_cb_f)(const lxb_char_t *data, size_t len, void *ctx);
59 
60 struct lxb_css_syntax_token_data {
61     lxb_css_syntax_token_data_cb_f cb;
62     lxb_status_t                   status;
63     int                            count;
64     uint32_t                       num;
65     bool                           is_last;
66 };
67 
68 typedef enum {
69     LXB_CSS_SYNTAX_TOKEN_UNDEF = 0x00,
70 
71     /* String tokens. */
72     LXB_CSS_SYNTAX_TOKEN_IDENT,
73     LXB_CSS_SYNTAX_TOKEN_FUNCTION,
74     LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD,
75     LXB_CSS_SYNTAX_TOKEN_HASH,
76     LXB_CSS_SYNTAX_TOKEN_STRING,
77     LXB_CSS_SYNTAX_TOKEN_BAD_STRING,
78     LXB_CSS_SYNTAX_TOKEN_URL,
79     LXB_CSS_SYNTAX_TOKEN_BAD_URL,
80     LXB_CSS_SYNTAX_TOKEN_COMMENT,                /* not in specification */
81     LXB_CSS_SYNTAX_TOKEN_WHITESPACE,
82 
83     /* Has a string. */
84     LXB_CSS_SYNTAX_TOKEN_DIMENSION,
85 
86     /* Other tokens. */
87     LXB_CSS_SYNTAX_TOKEN_DELIM,
88     LXB_CSS_SYNTAX_TOKEN_NUMBER,
89     LXB_CSS_SYNTAX_TOKEN_PERCENTAGE,
90     LXB_CSS_SYNTAX_TOKEN_CDO,
91     LXB_CSS_SYNTAX_TOKEN_CDC,
92     LXB_CSS_SYNTAX_TOKEN_COLON,
93     LXB_CSS_SYNTAX_TOKEN_SEMICOLON,
94     LXB_CSS_SYNTAX_TOKEN_COMMA,
95     LXB_CSS_SYNTAX_TOKEN_LS_BRACKET,   /* U+005B LEFT SQUARE BRACKET ([) */
96     LXB_CSS_SYNTAX_TOKEN_RS_BRACKET,  /* U+005D RIGHT SQUARE BRACKET (]) */
97     LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS,   /* U+0028 LEFT PARENTHESIS (() */
98     LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS,  /* U+0029 RIGHT PARENTHESIS ()) */
99     LXB_CSS_SYNTAX_TOKEN_LC_BRACKET,    /* U+007B LEFT CURLY BRACKET ({) */
100     LXB_CSS_SYNTAX_TOKEN_RC_BRACKET,   /* U+007D RIGHT CURLY BRACKET (}) */
101     LXB_CSS_SYNTAX_TOKEN__EOF,
102     LXB_CSS_SYNTAX_TOKEN__TERMINATED,  /* Deprecated, use LXB_CSS_SYNTAX_TOKEN__END. */
103     LXB_CSS_SYNTAX_TOKEN__END = LXB_CSS_SYNTAX_TOKEN__TERMINATED,
104     LXB_CSS_SYNTAX_TOKEN__LAST_ENTRY
105 }
106 lxb_css_syntax_token_type_t;
107 
108 typedef struct lxb_css_syntax_token_base {
109     const lxb_char_t            *begin;
110     size_t                      length;
111 
112     uintptr_t                   user_id;
113 }
114 lxb_css_syntax_token_base_t;
115 
116 typedef struct lxb_css_syntax_token_number {
117     lxb_css_syntax_token_base_t base;
118 
119     double                      num;
120     bool                        is_float;
121     bool                        have_sign;
122 }
123 lxb_css_syntax_token_number_t;
124 
125 typedef struct lxb_css_syntax_token_string {
126     lxb_css_syntax_token_base_t base;
127 
128     const lxb_char_t            *data;
129     size_t                      length;
130 }
131 lxb_css_syntax_token_string_t;
132 
133 typedef struct lxb_css_syntax_token_dimension {
134     lxb_css_syntax_token_number_t num;
135     lxb_css_syntax_token_string_t str;
136 }
137 lxb_css_syntax_token_dimension_t;
138 
139 typedef struct lxb_css_syntax_token_delim {
140     lxb_css_syntax_token_base_t base;
141     lxb_char_t                  character;
142 }
143 lxb_css_syntax_token_delim_t;
144 
145 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_ident_t;
146 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_function_t;
147 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_at_keyword_t;
148 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_hash_t;
149 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_bad_string_t;
150 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_url_t;
151 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_bad_url_t;
152 typedef lxb_css_syntax_token_number_t lxb_css_syntax_token_percentage_t;
153 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_whitespace_t;
154 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_cdo_t;
155 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_cdc_t;
156 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_colon_t;
157 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_semicolon_t;
158 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_comma_t;
159 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_ls_bracket_t;
160 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_rs_bracket_t;
161 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_l_parenthesis_t;
162 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_r_parenthesis_t;
163 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_lc_bracket_t;
164 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_rc_bracket_t;
165 typedef lxb_css_syntax_token_string_t lxb_css_syntax_token_comment_t;
166 typedef lxb_css_syntax_token_base_t   lxb_css_syntax_token_terminated_t;
167 
168 struct lxb_css_syntax_token {
169     union lxb_css_syntax_token_u {
170         lxb_css_syntax_token_base_t          base;
171         lxb_css_syntax_token_comment_t       comment;
172         lxb_css_syntax_token_number_t        number;
173         lxb_css_syntax_token_dimension_t     dimension;
174         lxb_css_syntax_token_percentage_t    percentage;
175         lxb_css_syntax_token_hash_t          hash;
176         lxb_css_syntax_token_string_t        string;
177         lxb_css_syntax_token_bad_string_t    bad_string;
178         lxb_css_syntax_token_delim_t         delim;
179         lxb_css_syntax_token_l_parenthesis_t lparenthesis;
180         lxb_css_syntax_token_r_parenthesis_t rparenthesis;
181         lxb_css_syntax_token_cdc_t           cdc;
182         lxb_css_syntax_token_function_t      function;
183         lxb_css_syntax_token_ident_t         ident;
184         lxb_css_syntax_token_url_t           url;
185         lxb_css_syntax_token_bad_url_t       bad_url;
186         lxb_css_syntax_token_at_keyword_t    at_keyword;
187         lxb_css_syntax_token_whitespace_t    whitespace;
188         lxb_css_syntax_token_terminated_t    terminated;
189     }
190     types;
191 
192     lxb_css_syntax_token_type_t type;
193     uintptr_t                   offset;
194     bool                        cloned;
195 };
196 
197 
198 LXB_API lxb_css_syntax_token_t *
199 lxb_css_syntax_token(lxb_css_syntax_tokenizer_t *tkz);
200 
201 LXB_API lxb_css_syntax_token_t *
202 lxb_css_syntax_token_next(lxb_css_syntax_tokenizer_t *tkz);
203 
204 LXB_API void
205 lxb_css_syntax_token_consume(lxb_css_syntax_tokenizer_t *tkz);
206 
207 LXB_API void
208 lxb_css_syntax_token_consume_n(lxb_css_syntax_tokenizer_t *tkz, unsigned count);
209 
210 LXB_API lxb_status_t
211 lxb_css_syntax_token_string_dup(lxb_css_syntax_token_string_t *token,
212                                 lexbor_str_t *str, lexbor_mraw_t *mraw);
213 
214 LXB_API lxb_status_t
215 lxb_css_syntax_token_string_make(lxb_css_syntax_tokenizer_t *tkz,
216                                  lxb_css_syntax_token_t *token);
217 
218 LXB_API lxb_css_syntax_token_t *
219 lxb_css_syntax_token_cached_create(lxb_css_syntax_tokenizer_t *tkz);
220 
221 LXB_API void
222 lxb_css_syntax_token_string_free(lxb_css_syntax_tokenizer_t *tkz,
223                                  lxb_css_syntax_token_t *token);
224 
225 LXB_API const lxb_char_t *
226 lxb_css_syntax_token_type_name_by_id(lxb_css_syntax_token_type_t type);
227 
228 LXB_API lxb_css_syntax_token_type_t
229 lxb_css_syntax_token_type_id_by_name(const lxb_char_t *type_name, size_t len);
230 
231 LXB_API lxb_status_t
232 lxb_css_syntax_token_serialize(const lxb_css_syntax_token_t *token,
233                                lxb_css_syntax_token_cb_f cb, void *ctx);
234 
235 LXB_API lxb_status_t
236 lxb_css_syntax_token_serialize_str(const lxb_css_syntax_token_t *token,
237                                    lexbor_str_t *str, lexbor_mraw_t *mraw);
238 
239 LXB_API lxb_char_t *
240 lxb_css_syntax_token_serialize_char(const lxb_css_syntax_token_t *token,
241                                     size_t *out_length);
242 
243 LXB_API lxb_css_log_message_t *
244 lxb_css_syntax_token_error(lxb_css_parser_t *parser,
245                            const lxb_css_syntax_token_t *token,
246                            const char *module_name);
247 
248 /*
249  * Inline functions
250  */
251 lxb_inline lxb_css_syntax_token_t *
lxb_css_syntax_token_create(lexbor_dobject_t * dobj)252 lxb_css_syntax_token_create(lexbor_dobject_t *dobj)
253 {
254     return (lxb_css_syntax_token_t *) lexbor_dobject_calloc(dobj);
255 }
256 
257 lxb_inline void
lxb_css_syntax_token_clean(lxb_css_syntax_token_t * token)258 lxb_css_syntax_token_clean(lxb_css_syntax_token_t *token)
259 {
260     memset(token, 0, sizeof(lxb_css_syntax_token_t));
261 }
262 
263 lxb_inline lxb_css_syntax_token_t *
lxb_css_syntax_token_destroy(lxb_css_syntax_token_t * token,lexbor_dobject_t * dobj)264 lxb_css_syntax_token_destroy(lxb_css_syntax_token_t *token,
265                              lexbor_dobject_t *dobj)
266 {
267     return (lxb_css_syntax_token_t *) lexbor_dobject_free(dobj, token);
268 }
269 
270 lxb_inline const lxb_char_t *
lxb_css_syntax_token_type_name(const lxb_css_syntax_token_t * token)271 lxb_css_syntax_token_type_name(const lxb_css_syntax_token_t *token)
272 {
273     return lxb_css_syntax_token_type_name_by_id(token->type);
274 }
275 
276 lxb_inline lxb_css_syntax_token_type_t
lxb_css_syntax_token_type(const lxb_css_syntax_token_t * token)277 lxb_css_syntax_token_type(const lxb_css_syntax_token_t *token)
278 {
279     return token->type;
280 }
281 
282 lxb_inline lxb_css_syntax_token_t *
lxb_css_syntax_token_wo_ws(lxb_css_syntax_tokenizer_t * tkz)283 lxb_css_syntax_token_wo_ws(lxb_css_syntax_tokenizer_t *tkz)
284 {
285     lxb_css_syntax_token_t *token;
286 
287     token = lxb_css_syntax_token(tkz);
288     if (token == NULL) {
289         return NULL;
290     }
291 
292     if (token->type == LXB_CSS_SYNTAX_TOKEN_WHITESPACE) {
293         lxb_css_syntax_token_consume(tkz);
294         token = lxb_css_syntax_token(tkz);
295     }
296 
297     return token;
298 }
299 
300 /*
301  * No inline functions for ABI.
302  */
303 LXB_API lxb_css_syntax_token_t *
304 lxb_css_syntax_token_create_noi(lexbor_dobject_t *dobj);
305 
306 LXB_API void
307 lxb_css_syntax_token_clean_noi(lxb_css_syntax_token_t *token);
308 
309 LXB_API lxb_css_syntax_token_t *
310 lxb_css_syntax_token_destroy_noi(lxb_css_syntax_token_t *token,
311                                  lexbor_dobject_t *dobj);
312 
313 LXB_API const lxb_char_t *
314 lxb_css_syntax_token_type_name_noi(lxb_css_syntax_token_t *token);
315 
316 LXB_API lxb_css_syntax_token_type_t
317 lxb_css_syntax_token_type_noi(lxb_css_syntax_token_t *token);
318 
319 
320 #ifdef __cplusplus
321 } /* extern "C" */
322 #endif
323 
324 #endif /* LEXBOR_CSS_SYNTAX_TOKEN_H */
325