xref: /php-src/ext/dom/lexbor/lexbor/css/syntax/syntax.c (revision 445c1c92)
1 /*
2  * Copyright (C) 2018-2023 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include "lexbor/css/syntax/syntax.h"
8 #include "lexbor/css/parser.h"
9 
10 #include "lexbor/core/str.h"
11 
12 #define LEXBOR_STR_RES_MAP_HEX
13 #define LEXBOR_STR_RES_MAP_HEX_TO_CHAR_LOWERCASE
14 #define LEXBOR_STR_RES_CHAR_TO_TWO_HEX_VALUE_LOWERCASE
15 #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
16 #include "lexbor/core/str_res.h"
17 
18 #define LXB_CSS_SYNTAX_RES_NAME_MAP
19 #include "lexbor/css/syntax/res.h"
20 
21 
22 static const lexbor_str_t lxb_str_ws = lexbor_str(" ");
23 
24 
25 lxb_status_t
lxb_css_syntax_parse_list_rules(lxb_css_parser_t * parser,const lxb_css_syntax_cb_list_rules_t * cb,const lxb_char_t * data,size_t length,void * ctx,bool top_level)26 lxb_css_syntax_parse_list_rules(lxb_css_parser_t *parser,
27                                 const lxb_css_syntax_cb_list_rules_t *cb,
28                                 const lxb_char_t *data, size_t length,
29                                 void *ctx, bool top_level)
30 {
31     lxb_status_t status;
32     lxb_css_syntax_rule_t *rule;
33 
34     if (lxb_css_parser_is_running(parser)) {
35         parser->status = LXB_STATUS_ERROR_WRONG_STAGE;
36         return parser->status;
37     }
38 
39     lxb_css_parser_clean(parser);
40 
41     lxb_css_parser_buffer_set(parser, data, length);
42 
43     rule = lxb_css_syntax_parser_list_rules_push(parser, NULL, NULL, cb,
44                                                  ctx, top_level,
45                                                  LXB_CSS_SYNTAX_TOKEN_UNDEF);
46     if (rule == NULL) {
47         status = parser->status;
48         goto end;
49     }
50 
51     parser->tkz->with_comment = false;
52     parser->stage = LXB_CSS_PARSER_RUN;
53 
54     status = lxb_css_syntax_parser_run(parser);
55     if (status != LXB_STATUS_OK) {
56         /* Destroy StyleSheet. */
57     }
58 
59 end:
60 
61     parser->stage = LXB_CSS_PARSER_END;
62 
63     return status;
64 }
65 
66 lxb_status_t
lxb_css_syntax_stack_expand(lxb_css_parser_t * parser,size_t count)67 lxb_css_syntax_stack_expand(lxb_css_parser_t *parser, size_t count)
68 {
69     size_t length, cur_len, size;
70     lxb_css_syntax_rule_t *p;
71 
72     if ((parser->rules + count) >= parser->rules_end) {
73         cur_len = parser->rules - parser->rules_begin;
74 
75         length = cur_len + count + 1024;
76         size = length * sizeof(lxb_css_syntax_rule_t);
77 
78         p = lexbor_realloc(parser->rules_begin, size);
79         if (p == NULL) {
80             return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
81         }
82 
83         parser->rules_begin = p;
84         parser->rules_end = p + length;
85         parser->rules = p + cur_len;
86     }
87 
88     return LXB_STATUS_OK;
89 }
90 
91 void
lxb_css_syntax_codepoint_to_ascii(lxb_css_syntax_tokenizer_t * tkz,lxb_codepoint_t cp)92 lxb_css_syntax_codepoint_to_ascii(lxb_css_syntax_tokenizer_t *tkz,
93                                   lxb_codepoint_t cp)
94 {
95     /*
96      * Zero, or is for a surrogate, or is greater than
97      * the maximum allowed code point (tkz->num > 0x10FFFF).
98      */
99     if (cp == 0 || cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) {
100         memcpy(tkz->pos, lexbor_str_res_ansi_replacement_character, 3);
101 
102         tkz->pos += 3;
103         *tkz->pos = '\0';
104 
105         return;
106     }
107 
108     lxb_char_t *data = tkz->pos;
109 
110     if (cp <= 0x0000007F) {
111         /* 0xxxxxxx */
112         data[0] = (lxb_char_t) cp;
113 
114         tkz->pos += 1;
115     }
116     else if (cp <= 0x000007FF) {
117         /* 110xxxxx 10xxxxxx */
118         data[0] = (char)(0xC0 | (cp >> 6  ));
119         data[1] = (char)(0x80 | (cp & 0x3F));
120 
121         tkz->pos += 2;
122     }
123     else if (cp <= 0x0000FFFF) {
124         /* 1110xxxx 10xxxxxx 10xxxxxx */
125         data[0] = (char)(0xE0 | ((cp >> 12)));
126         data[1] = (char)(0x80 | ((cp >> 6 ) & 0x3F));
127         data[2] = (char)(0x80 | ( cp & 0x3F));
128 
129         tkz->pos += 3;
130     }
131     else if (cp <= 0x001FFFFF) {
132         /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
133         data[0] = (char)(0xF0 | ( cp >> 18));
134         data[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
135         data[2] = (char)(0x80 | ((cp >> 6 ) & 0x3F));
136         data[3] = (char)(0x80 | ( cp & 0x3F));
137 
138         tkz->pos += 4;
139     }
140 
141     *tkz->pos = '\0';
142 }
143 
144 lxb_status_t
lxb_css_syntax_ident_serialize(const lxb_char_t * data,size_t length,lexbor_serialize_cb_f cb,void * ctx)145 lxb_css_syntax_ident_serialize(const lxb_char_t *data, size_t length,
146                                lexbor_serialize_cb_f cb, void *ctx)
147 {
148     lxb_char_t ch;
149     lxb_status_t status;
150     const char **hex_map;
151     const lxb_char_t *p = data, *end;
152 
153     static const lexbor_str_t str_s = lexbor_str("\\");
154 
155     end = data + length;
156     hex_map = lexbor_str_res_char_to_two_hex_value_lowercase;
157 
158     while (p < end) {
159         ch = *p;
160 
161         if (lxb_css_syntax_res_name_map[ch] == 0x00) {
162             lexbor_serialize_write(cb, data, p - data, ctx, status);
163             lexbor_serialize_write(cb, str_s.data, str_s.length, ctx, status);
164             lexbor_serialize_write(cb, hex_map[ch], 2, ctx, status);
165 
166             data = ++p;
167 
168             if (p < end && lexbor_str_res_map_hex[*p] != 0xff) {
169                 lexbor_serialize_write(cb, lxb_str_ws.data,
170                                        lxb_str_ws.length, ctx, status);
171             }
172 
173             continue;
174         }
175 
176         p++;
177     }
178 
179     if (data < p) {
180         lexbor_serialize_write(cb, data, p - data, ctx, status);
181     }
182 
183     return LXB_STATUS_OK;
184 }
185 
186 lxb_status_t
lxb_css_syntax_string_serialize(const lxb_char_t * data,size_t length,lexbor_serialize_cb_f cb,void * ctx)187 lxb_css_syntax_string_serialize(const lxb_char_t *data, size_t length,
188                                 lexbor_serialize_cb_f cb, void *ctx)
189 {
190     lxb_char_t ch;
191     lxb_status_t status;
192     const char **hex_map;
193     const lxb_char_t *p, *end;
194 
195     static const lexbor_str_t str_s = lexbor_str("\\");
196     static const lexbor_str_t str_dk = lexbor_str("\"");
197     static const lexbor_str_t str_ds = lexbor_str("\\\\");
198     static const lexbor_str_t str_dks = lexbor_str("\\\"");
199 
200     p = data;
201     end = data + length;
202     hex_map = lexbor_str_res_char_to_two_hex_value_lowercase;
203 
204     lexbor_serialize_write(cb, str_dk.data, str_dk.length, ctx, status);
205 
206     while (p < end) {
207         ch = *p;
208 
209         if (lxb_css_syntax_res_name_map[ch] == 0x00) {
210             switch (ch) {
211                 case '\\':
212                     lexbor_serialize_write(cb, data, p - data, ctx, status);
213                     lexbor_serialize_write(cb, str_ds.data, str_ds.length,
214                                            ctx, status);
215                     break;
216 
217                 case '"':
218                     lexbor_serialize_write(cb, data, p - data, ctx, status);
219                     lexbor_serialize_write(cb, str_dks.data, str_dks.length,
220                                            ctx, status);
221                     break;
222 
223                 case '\n':
224                 case '\t':
225                 case '\r':
226                     lexbor_serialize_write(cb, data, p - data, ctx, status);
227                     lexbor_serialize_write(cb, str_s.data, str_s.length,
228                                            ctx, status);
229                     lexbor_serialize_write(cb, hex_map[ch], 2, ctx, status);
230 
231                     p++;
232 
233                     if (p < end && lexbor_str_res_map_hex[*p] != 0xff) {
234                         lexbor_serialize_write(cb, lxb_str_ws.data,
235                                                lxb_str_ws.length, ctx, status);
236                     }
237 
238                     data = p;
239                     continue;
240 
241                 default:
242                     p++;
243                     continue;
244             }
245 
246             data = ++p;
247             continue;
248         }
249 
250         p++;
251     }
252 
253     if (data < p) {
254         lexbor_serialize_write(cb, data, p - data, ctx, status);
255     }
256 
257     lexbor_serialize_write(cb, str_dk.data, str_dk.length, ctx, status);
258 
259     return LXB_STATUS_OK;
260 }
261 
262 lxb_status_t
lxb_css_syntax_ident_or_string_serialize(const lxb_char_t * data,size_t length,lexbor_serialize_cb_f cb,void * ctx)263 lxb_css_syntax_ident_or_string_serialize(const lxb_char_t *data, size_t length,
264                                          lexbor_serialize_cb_f cb, void *ctx)
265 {
266     const lxb_char_t *p, *end;
267 
268     p = data;
269     end = data + length;
270 
271     while (p < end) {
272         if (lxb_css_syntax_res_name_map[*p++] == 0x00) {
273             return lxb_css_syntax_string_serialize(data, length, cb, ctx);
274         }
275     }
276 
277     return cb(data, length, ctx);
278 }
279