1 /*
2 * Copyright (C) 2018-2023 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include "lexbor/css/syntax/syntax.h"
8 #include "lexbor/css/parser.h"
9
10 #include "lexbor/core/str.h"
11
12 #define LEXBOR_STR_RES_MAP_HEX
13 #define LEXBOR_STR_RES_MAP_HEX_TO_CHAR_LOWERCASE
14 #define LEXBOR_STR_RES_CHAR_TO_TWO_HEX_VALUE_LOWERCASE
15 #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
16 #include "lexbor/core/str_res.h"
17
18 #define LXB_CSS_SYNTAX_RES_NAME_MAP
19 #include "lexbor/css/syntax/res.h"
20
21
22 static const lexbor_str_t lxb_str_ws = lexbor_str(" ");
23
24
25 lxb_status_t
lxb_css_syntax_parse_list_rules(lxb_css_parser_t * parser,const lxb_css_syntax_cb_list_rules_t * cb,const lxb_char_t * data,size_t length,void * ctx,bool top_level)26 lxb_css_syntax_parse_list_rules(lxb_css_parser_t *parser,
27 const lxb_css_syntax_cb_list_rules_t *cb,
28 const lxb_char_t *data, size_t length,
29 void *ctx, bool top_level)
30 {
31 lxb_status_t status;
32 lxb_css_syntax_rule_t *rule;
33
34 if (lxb_css_parser_is_running(parser)) {
35 parser->status = LXB_STATUS_ERROR_WRONG_STAGE;
36 return parser->status;
37 }
38
39 lxb_css_parser_clean(parser);
40
41 lxb_css_parser_buffer_set(parser, data, length);
42
43 rule = lxb_css_syntax_parser_list_rules_push(parser, NULL, NULL, cb,
44 ctx, top_level,
45 LXB_CSS_SYNTAX_TOKEN_UNDEF);
46 if (rule == NULL) {
47 status = parser->status;
48 goto end;
49 }
50
51 parser->tkz->with_comment = false;
52 parser->stage = LXB_CSS_PARSER_RUN;
53
54 status = lxb_css_syntax_parser_run(parser);
55 if (status != LXB_STATUS_OK) {
56 /* Destroy StyleSheet. */
57 }
58
59 end:
60
61 parser->stage = LXB_CSS_PARSER_END;
62
63 return status;
64 }
65
66 lxb_status_t
lxb_css_syntax_stack_expand(lxb_css_parser_t * parser,size_t count)67 lxb_css_syntax_stack_expand(lxb_css_parser_t *parser, size_t count)
68 {
69 size_t length, cur_len, size;
70 lxb_css_syntax_rule_t *p;
71
72 if ((parser->rules + count) >= parser->rules_end) {
73 cur_len = parser->rules - parser->rules_begin;
74
75 length = cur_len + count + 1024;
76 size = length * sizeof(lxb_css_syntax_rule_t);
77
78 p = lexbor_realloc(parser->rules_begin, size);
79 if (p == NULL) {
80 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
81 }
82
83 parser->rules_begin = p;
84 parser->rules_end = p + length;
85 parser->rules = p + cur_len;
86 }
87
88 return LXB_STATUS_OK;
89 }
90
91 void
lxb_css_syntax_codepoint_to_ascii(lxb_css_syntax_tokenizer_t * tkz,lxb_codepoint_t cp)92 lxb_css_syntax_codepoint_to_ascii(lxb_css_syntax_tokenizer_t *tkz,
93 lxb_codepoint_t cp)
94 {
95 /*
96 * Zero, or is for a surrogate, or is greater than
97 * the maximum allowed code point (tkz->num > 0x10FFFF).
98 */
99 if (cp == 0 || cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) {
100 memcpy(tkz->pos, lexbor_str_res_ansi_replacement_character, 3);
101
102 tkz->pos += 3;
103 *tkz->pos = '\0';
104
105 return;
106 }
107
108 lxb_char_t *data = tkz->pos;
109
110 if (cp <= 0x0000007F) {
111 /* 0xxxxxxx */
112 data[0] = (lxb_char_t) cp;
113
114 tkz->pos += 1;
115 }
116 else if (cp <= 0x000007FF) {
117 /* 110xxxxx 10xxxxxx */
118 data[0] = (char)(0xC0 | (cp >> 6 ));
119 data[1] = (char)(0x80 | (cp & 0x3F));
120
121 tkz->pos += 2;
122 }
123 else if (cp <= 0x0000FFFF) {
124 /* 1110xxxx 10xxxxxx 10xxxxxx */
125 data[0] = (char)(0xE0 | ((cp >> 12)));
126 data[1] = (char)(0x80 | ((cp >> 6 ) & 0x3F));
127 data[2] = (char)(0x80 | ( cp & 0x3F));
128
129 tkz->pos += 3;
130 }
131 else if (cp <= 0x001FFFFF) {
132 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
133 data[0] = (char)(0xF0 | ( cp >> 18));
134 data[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
135 data[2] = (char)(0x80 | ((cp >> 6 ) & 0x3F));
136 data[3] = (char)(0x80 | ( cp & 0x3F));
137
138 tkz->pos += 4;
139 }
140
141 *tkz->pos = '\0';
142 }
143
144 lxb_status_t
lxb_css_syntax_ident_serialize(const lxb_char_t * data,size_t length,lexbor_serialize_cb_f cb,void * ctx)145 lxb_css_syntax_ident_serialize(const lxb_char_t *data, size_t length,
146 lexbor_serialize_cb_f cb, void *ctx)
147 {
148 lxb_char_t ch;
149 lxb_status_t status;
150 const char **hex_map;
151 const lxb_char_t *p = data, *end;
152
153 static const lexbor_str_t str_s = lexbor_str("\\");
154
155 end = data + length;
156 hex_map = lexbor_str_res_char_to_two_hex_value_lowercase;
157
158 while (p < end) {
159 ch = *p;
160
161 if (lxb_css_syntax_res_name_map[ch] == 0x00) {
162 lexbor_serialize_write(cb, data, p - data, ctx, status);
163 lexbor_serialize_write(cb, str_s.data, str_s.length, ctx, status);
164 lexbor_serialize_write(cb, hex_map[ch], 2, ctx, status);
165
166 data = ++p;
167
168 if (p < end && lexbor_str_res_map_hex[*p] != 0xff) {
169 lexbor_serialize_write(cb, lxb_str_ws.data,
170 lxb_str_ws.length, ctx, status);
171 }
172
173 continue;
174 }
175
176 p++;
177 }
178
179 if (data < p) {
180 lexbor_serialize_write(cb, data, p - data, ctx, status);
181 }
182
183 return LXB_STATUS_OK;
184 }
185
186 lxb_status_t
lxb_css_syntax_string_serialize(const lxb_char_t * data,size_t length,lexbor_serialize_cb_f cb,void * ctx)187 lxb_css_syntax_string_serialize(const lxb_char_t *data, size_t length,
188 lexbor_serialize_cb_f cb, void *ctx)
189 {
190 lxb_char_t ch;
191 lxb_status_t status;
192 const char **hex_map;
193 const lxb_char_t *p, *end;
194
195 static const lexbor_str_t str_s = lexbor_str("\\");
196 static const lexbor_str_t str_dk = lexbor_str("\"");
197 static const lexbor_str_t str_ds = lexbor_str("\\\\");
198 static const lexbor_str_t str_dks = lexbor_str("\\\"");
199
200 p = data;
201 end = data + length;
202 hex_map = lexbor_str_res_char_to_two_hex_value_lowercase;
203
204 lexbor_serialize_write(cb, str_dk.data, str_dk.length, ctx, status);
205
206 while (p < end) {
207 ch = *p;
208
209 if (lxb_css_syntax_res_name_map[ch] == 0x00) {
210 switch (ch) {
211 case '\\':
212 lexbor_serialize_write(cb, data, p - data, ctx, status);
213 lexbor_serialize_write(cb, str_ds.data, str_ds.length,
214 ctx, status);
215 break;
216
217 case '"':
218 lexbor_serialize_write(cb, data, p - data, ctx, status);
219 lexbor_serialize_write(cb, str_dks.data, str_dks.length,
220 ctx, status);
221 break;
222
223 case '\n':
224 case '\t':
225 case '\r':
226 lexbor_serialize_write(cb, data, p - data, ctx, status);
227 lexbor_serialize_write(cb, str_s.data, str_s.length,
228 ctx, status);
229 lexbor_serialize_write(cb, hex_map[ch], 2, ctx, status);
230
231 p++;
232
233 if (p < end && lexbor_str_res_map_hex[*p] != 0xff) {
234 lexbor_serialize_write(cb, lxb_str_ws.data,
235 lxb_str_ws.length, ctx, status);
236 }
237
238 data = p;
239 continue;
240
241 default:
242 p++;
243 continue;
244 }
245
246 data = ++p;
247 continue;
248 }
249
250 p++;
251 }
252
253 if (data < p) {
254 lexbor_serialize_write(cb, data, p - data, ctx, status);
255 }
256
257 lexbor_serialize_write(cb, str_dk.data, str_dk.length, ctx, status);
258
259 return LXB_STATUS_OK;
260 }
261
262 lxb_status_t
lxb_css_syntax_ident_or_string_serialize(const lxb_char_t * data,size_t length,lexbor_serialize_cb_f cb,void * ctx)263 lxb_css_syntax_ident_or_string_serialize(const lxb_char_t *data, size_t length,
264 lexbor_serialize_cb_f cb, void *ctx)
265 {
266 const lxb_char_t *p, *end;
267
268 p = data;
269 end = data + length;
270
271 while (p < end) {
272 if (lxb_css_syntax_res_name_map[*p++] == 0x00) {
273 return lxb_css_syntax_string_serialize(data, length, cb, ctx);
274 }
275 }
276
277 return cb(data, length, ctx);
278 }
279