1 /*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include <string.h>
8 #include <float.h>
9
10 #include "lexbor/core/utils.h"
11 #include "lexbor/core/strtod.h"
12
13 #include "lexbor/css/syntax/state.h"
14 #include "lexbor/css/syntax/syntax.h"
15 #include "lexbor/css/syntax/tokenizer/error.h"
16
17 #define LXB_CSS_SYNTAX_RES_NAME_MAP
18 #include "lexbor/css/syntax/res.h"
19
20 #define LEXBOR_STR_RES_MAP_HEX
21 #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
22 #include "lexbor/core/str_res.h"
23
24
25 #define LXB_CSS_SYNTAX_NEXT_CHUNK(_tkz, _status, _data, _end) \
26 do { \
27 _status = lxb_css_syntax_tokenizer_next_chunk(_tkz, &_data, &_end); \
28 if (_status != LXB_STATUS_OK) { \
29 return NULL; \
30 } \
31 } \
32 while (0)
33
34
35 #define LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, _length) \
36 do { \
37 _status = lxb_css_syntax_string_append(_tkz, _begin, _length); \
38 if (_status != LXB_STATUS_OK) { \
39 return NULL; \
40 } \
41 } \
42 while (0)
43
44 #define LXB_CSS_SYNTAX_STR_APPEND(_tkz, _status, _begin, _end) \
45 LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, (_end - _begin))
46
47 #define LXB_CSS_SYNTAX_DELIM_APPEND(_tkz, _begin, _length, _ch) \
48 do { \
49 if (lxb_css_syntax_list_append_delim(_tkz, _begin, _length, _ch) \
50 == NULL) \
51 { \
52 return NULL; \
53 } \
54 } \
55 while (false)
56
57
58 static const lxb_char_t *
59 lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
60 lxb_css_syntax_token_t *token,
61 const lxb_char_t *data,
62 const lxb_char_t *end);
63
64 static const lxb_char_t *
65 lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
66 lxb_css_syntax_token_t *token,
67 lxb_char_t *buf_start, lxb_char_t *buf_end,
68 const lxb_char_t *data, const lxb_char_t *end);
69
70 static const lxb_char_t *
71 lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
72 lxb_css_syntax_token_t *token,
73 const lxb_char_t *data,
74 const lxb_char_t *end);
75
76 static const lxb_char_t *
77 lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
78 lxb_css_syntax_token_t *token,
79 const lxb_char_t *data, const lxb_char_t *end);
80
81 static const lxb_char_t *
82 lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
83 const lxb_char_t *data, const lxb_char_t *end);
84
85 static const lxb_char_t *
86 lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
87 const lxb_char_t *data, const lxb_char_t *end);
88
89
90 static const lxb_char_t *
91 lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
92 const lxb_char_t *data, const lxb_char_t *end);
93
94 static const lxb_char_t *
95 lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
96 const lxb_char_t *data, const lxb_char_t *end);
97
98 static const lxb_char_t *
99 lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
100 const lxb_char_t *data,
101 const lxb_char_t **end, size_t *length);
102
103 static const lxb_char_t *
104 lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
105 const lxb_char_t *data,
106 const lxb_char_t **end, size_t *length);
107
108
109 lxb_inline lxb_status_t
lxb_css_syntax_string_realloc(lxb_css_syntax_tokenizer_t * tkz,size_t upto)110 lxb_css_syntax_string_realloc(lxb_css_syntax_tokenizer_t *tkz, size_t upto)
111 {
112 size_t len = tkz->pos - tkz->start;
113 size_t size = (tkz->end - tkz->start) + upto;
114
115 lxb_char_t *tmp = lexbor_realloc(tkz->start, size);
116 if (tmp == NULL) {
117 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
118 return tkz->status;
119 }
120
121 tkz->start = tmp;
122 tkz->pos = tmp + len;
123 tkz->end = tmp + size;
124
125 return LXB_STATUS_OK;
126 }
127
128 lxb_inline lxb_status_t
lxb_css_syntax_string_append(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,size_t length)129 lxb_css_syntax_string_append(lxb_css_syntax_tokenizer_t *tkz,
130 const lxb_char_t *data, size_t length)
131 {
132 if ((size_t) (tkz->end - tkz->pos) <= length) {
133 if (lxb_css_syntax_string_realloc(tkz, length + 1024) != LXB_STATUS_OK) {
134 return tkz->status;
135 }
136 }
137
138 memcpy(tkz->pos, data, length);
139
140 tkz->pos += length;
141
142 return LXB_STATUS_OK;
143 }
144
145 lxb_inline lxb_status_t
lxb_css_syntax_state_string_term(lxb_css_syntax_tokenizer_t * tkz)146 lxb_css_syntax_state_string_term(lxb_css_syntax_tokenizer_t *tkz)
147 {
148 if (tkz->pos >= tkz->end) {
149 if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
150 return tkz->status;
151 }
152 }
153
154 *tkz->pos = 0x00;
155
156 return LXB_STATUS_OK;
157 }
158
159 lxb_inline const lxb_char_t *
lxb_css_syntax_state_string_set(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data)160 lxb_css_syntax_state_string_set(lxb_css_syntax_tokenizer_t *tkz,
161 lxb_css_syntax_token_t *token,
162 const lxb_char_t *data)
163 {
164 if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
165 return NULL;
166 }
167
168 lxb_css_syntax_token_string(token)->data = tkz->start;
169 lxb_css_syntax_token_string(token)->length = tkz->pos - tkz->start;
170
171 tkz->pos = tkz->start;
172
173 return data;
174 }
175
176 lxb_inline const lxb_char_t *
lxb_css_syntax_state_dimension_set(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data)177 lxb_css_syntax_state_dimension_set(lxb_css_syntax_tokenizer_t *tkz,
178 lxb_css_syntax_token_t *token,
179 const lxb_char_t *data)
180 {
181 if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
182 return NULL;
183 }
184
185 lxb_css_syntax_token_dimension_string(token)->data = tkz->start;
186 lxb_css_syntax_token_dimension_string(token)->length = tkz->pos - tkz->start;
187
188 tkz->pos = tkz->start;
189
190 return data;
191 }
192
193 lxb_inline lxb_css_syntax_token_t *
lxb_css_syntax_state_token_create(lxb_css_syntax_tokenizer_t * tkz)194 lxb_css_syntax_state_token_create(lxb_css_syntax_tokenizer_t *tkz)
195 {
196 if (tkz->prepared == 0) {
197 tkz->prepared = tkz->cache->length;
198 }
199
200 return lxb_css_syntax_token_cached_create(tkz);
201 }
202
203 /*
204 * Delim
205 */
206 lxb_inline void
lxb_css_syntax_state_delim_set(lxb_css_syntax_token_t * token,const lxb_char_t * data,lxb_char_t ch,size_t length)207 lxb_css_syntax_state_delim_set(lxb_css_syntax_token_t *token,
208 const lxb_char_t *data, lxb_char_t ch,
209 size_t length)
210 {
211 lxb_css_syntax_token_delim(token)->character = ch;
212 lxb_css_syntax_token_base(token)->begin = data;
213 lxb_css_syntax_token_base(token)->length = length;
214
215 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
216 }
217
218 lxb_inline lxb_css_syntax_token_t *
lxb_css_syntax_list_append_delim(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,size_t length,lxb_char_t ch)219 lxb_css_syntax_list_append_delim(lxb_css_syntax_tokenizer_t *tkz,
220 const lxb_char_t *data,
221 size_t length, lxb_char_t ch)
222 {
223 lxb_css_syntax_token_t *delim;
224
225 delim = lxb_css_syntax_state_token_create(tkz);
226 if (delim == NULL) {
227 return NULL;
228 }
229
230 lxb_css_syntax_state_delim_set(delim, data, ch, length);
231
232 return delim;
233 }
234
235 const lxb_char_t *
lxb_css_syntax_state_delim(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)236 lxb_css_syntax_state_delim(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
237 const lxb_char_t *data, const lxb_char_t *end)
238 {
239 lxb_css_syntax_state_delim_set(token, data, *data, 1);
240
241 return data + 1;
242 }
243
244 /*
245 * Comment
246 */
247 const lxb_char_t *
lxb_css_syntax_state_comment(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)248 lxb_css_syntax_state_comment(lxb_css_syntax_tokenizer_t *tkz,
249 lxb_css_syntax_token_t *token,
250 const lxb_char_t *data, const lxb_char_t *end)
251 {
252 size_t length;
253 lxb_status_t status;
254 const lxb_char_t *begin;
255
256 lxb_css_syntax_token_base(token)->begin = data;
257
258 /* Skip forward slash (/) */
259 data++;
260
261 if (data >= end) {
262 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
263 if (data >= end) {
264 goto delim;
265 }
266 }
267
268 /* U+002A ASTERISK (*) */
269 if (*data != 0x2A) {
270 goto delim;
271 }
272
273 begin = ++data;
274 length = 2;
275
276 do {
277 if (data >= end) {
278 if (begin < data) {
279 length += data - begin;
280 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
281 }
282
283 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
284 if (data >= end) {
285 goto error;
286 }
287
288 begin = data;
289 }
290
291 switch (*data) {
292 case 0x00:
293 if (begin < data) {
294 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
295 }
296
297 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
298 lexbor_str_res_ansi_replacement_character,
299 sizeof(lexbor_str_res_ansi_replacement_character) - 1);
300 data += 1;
301 length += data - begin;
302 begin = data;
303
304 continue;
305
306 case 0x0D:
307 data++;
308 length += data - begin;
309
310 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
311
312 tkz->pos[-1] = '\n';
313
314 if (data >= end) {
315 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
316 if (data >= end) {
317 goto error;
318 }
319 }
320
321 if (*data != 0x0A) {
322 data--;
323 }
324 else {
325 length += 1;
326 }
327
328 begin = ++data;
329
330 continue;
331
332 case 0x0C:
333 if (begin < data) {
334 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
335 }
336
337 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
338 (lxb_char_t *) "\n", 1);
339 data += 1;
340 length += data - begin;
341 begin = data;
342
343 continue;
344
345 /* U+002A ASTERISK (*) */
346 case 0x2A:
347 data++;
348
349 if (data >= end) {
350 length += data - begin;
351
352 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
353
354 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
355 if (data >= end) {
356 goto error;
357 }
358
359 if (*data == 0x2F) {
360 tkz->pos--;
361 *tkz->pos = 0x00;
362
363 data++;
364 length++;
365
366 goto done;
367 }
368
369 begin = data;
370 }
371
372 /* U+002F Forward slash (/) */
373 if (*data == 0x2F) {
374 length += data - begin;
375
376 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, (data - 1));
377
378 data++;
379 length++;
380
381 goto done;
382 }
383
384 continue;
385 }
386
387 data++;
388 }
389 while (true);
390
391 done:
392
393 token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
394
395 lxb_css_syntax_token_base(token)->length = length;
396
397 return lxb_css_syntax_state_string_set(tkz, token, data);
398
399 delim:
400
401 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
402
403 lxb_css_syntax_token_base(token)->length = 1;
404 lxb_css_syntax_token_delim(token)->character = '/';
405
406 return data;
407
408 error:
409
410 token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
411
412 lxb_css_syntax_token_base(token)->length = length;
413
414 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
415 LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINCO);
416
417 return lxb_css_syntax_state_string_set(tkz, token, data);
418 }
419
420 /*
421 * Whitespace
422 */
423 const lxb_char_t *
lxb_css_syntax_state_whitespace(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)424 lxb_css_syntax_state_whitespace(lxb_css_syntax_tokenizer_t *tkz,
425 lxb_css_syntax_token_t *token,
426 const lxb_char_t *data, const lxb_char_t *end)
427 {
428 size_t length;
429 lxb_status_t status;
430 const lxb_char_t *begin;
431
432 token->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
433
434 lxb_css_syntax_token_base(token)->begin = data;
435
436 begin = data;
437 length = 0;
438
439 do {
440 switch (*data) {
441 case 0x0D:
442 data++;
443 length += data - begin;
444
445 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
446
447 tkz->pos[-1] = '\n';
448
449 if (data >= end) {
450 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
451 if (data >= end) {
452 goto done;
453 }
454 }
455
456 if (*data != 0x0A) {
457 data--;
458 }
459 else {
460 length += 1;
461 }
462
463 begin = data + 1;
464 break;
465
466 case 0x0C:
467 length += (data + 1) - begin;
468
469 if (begin < data) {
470 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
471 }
472
473 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
474 (const lxb_char_t *) "\n", 1);
475 begin = data + 1;
476 break;
477
478 case 0x09:
479 case 0x20:
480 case 0x0A:
481 break;
482
483 default:
484 if (begin < data) {
485 length += data - begin;
486
487 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
488 }
489
490 lxb_css_syntax_token_base(token)->length = length;
491
492 return lxb_css_syntax_state_string_set(tkz, token, data);
493 }
494
495 data++;
496
497 if (data >= end) {
498 if (begin < data) {
499 length += data - begin;
500
501 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
502 }
503
504 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
505 if (data >= end) {
506 break;
507 }
508
509 begin = data;
510 }
511 }
512 while (true);
513
514 done:
515
516 lxb_css_syntax_token_base(token)->length = length;
517
518 return lxb_css_syntax_state_string_set(tkz, token, data);
519 }
520
521 /*
522 * String token for U+0022 Quotation Mark (") and U+0027 Apostrophe (')
523 */
524 const lxb_char_t *
lxb_css_syntax_state_string(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)525 lxb_css_syntax_state_string(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
526 const lxb_char_t *data, const lxb_char_t *end)
527 {
528 size_t length;
529 lxb_char_t mark;
530 lxb_status_t status;
531 const lxb_char_t *begin;
532
533 lxb_css_syntax_token_base(token)->begin = data;
534
535 mark = *data++;
536 begin = data;
537 length = 1;
538
539 for (;; data++) {
540 if (data >= end) {
541 if (begin < data) {
542 length += data - begin;
543
544 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
545 }
546
547 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
548 if (data >= end) {
549 goto error;
550 }
551
552 begin = data;
553 }
554
555 switch (*data) {
556 case 0x00:
557 length += (data + 1) - begin;
558
559 if (begin < data) {
560 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
561 }
562
563 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
564 lexbor_str_res_ansi_replacement_character,
565 sizeof(lexbor_str_res_ansi_replacement_character) - 1);
566 begin = data + 1;
567 break;
568
569 /*
570 * U+000A LINE FEED
571 * U+000D CARRIAGE RETURN
572 * U+000C FORM FEED
573 */
574 case 0x0A:
575 case 0x0D:
576 case 0x0C:
577 length += data - begin;
578
579 if (begin < data) {
580 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
581 }
582
583 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
584 LXB_CSS_SYNTAX_TOKENIZER_ERROR_NEINST);
585
586 token->type = LXB_CSS_SYNTAX_TOKEN_BAD_STRING;
587
588 lxb_css_syntax_token_base(token)->length = length;
589
590 return lxb_css_syntax_state_string_set(tkz, token, data);
591
592 /* U+005C REVERSE SOLIDUS (\) */
593 case 0x5C:
594 length += (data + 1) - begin;
595
596 if (begin < data) {
597 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
598 }
599
600 data++;
601
602 if (data >= end) {
603 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
604 if (data >= end) {
605 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
606 (const lxb_char_t *) "\\", 1);
607 goto error;
608 }
609 }
610
611 data = lxb_css_syntax_state_escaped_string(tkz, data, &end,
612 &length);
613 if (data == NULL) {
614 return NULL;
615 }
616
617 begin = data;
618
619 data--;
620 break;
621
622 default:
623 /* '"' or '\'' */
624 if (*data == mark) {
625 length += (data + 1) - begin;
626
627 if (begin < data) {
628 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
629 }
630
631 token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
632
633 lxb_css_syntax_token_base(token)->length = length;
634
635 return lxb_css_syntax_state_string_set(tkz, token,
636 data + 1);
637 }
638
639 break;
640 }
641 }
642
643 return data;
644
645 error:
646
647 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
648 LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINST);
649
650 token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
651
652 lxb_css_syntax_token_base(token)->length = length;
653
654 return lxb_css_syntax_state_string_set(tkz, token, data);
655 }
656
657 /*
658 * U+0023 NUMBER SIGN (#)
659 */
660 const lxb_char_t *
lxb_css_syntax_state_hash(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)661 lxb_css_syntax_state_hash(lxb_css_syntax_tokenizer_t *tkz,
662 lxb_css_syntax_token_t *token, const lxb_char_t *data,
663 const lxb_char_t *end)
664 {
665 size_t length;
666 lxb_char_t ch;
667 lxb_status_t status;
668 const lxb_char_t *begin;
669 lxb_css_syntax_token_t *delim;
670
671 lxb_css_syntax_token_base(token)->begin = data++;
672
673 if (data >= end) {
674 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
675 if (data >= end) {
676 goto delim;
677 }
678 }
679
680 length = 1;
681
682 if (lxb_css_syntax_res_name_map[*data] == 0x00) {
683 if (*data == 0x00) {
684 goto hash;
685 }
686
687 /* U+005C REVERSE SOLIDUS (\) */
688 if (*data != 0x5C) {
689 goto delim;
690 }
691
692 begin = data++;
693
694 if (data >= end) {
695 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
696 if (data >= end) {
697 goto push_delim;
698 }
699 }
700
701 ch = *data;
702
703 if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
704 goto push_delim;
705 }
706
707 length += 1;
708
709 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
710 if (data == NULL) {
711 return NULL;
712 }
713 }
714
715 hash:
716
717 token->type = LXB_CSS_SYNTAX_TOKEN_HASH;
718
719 lxb_css_syntax_token_base(token)->length = length;
720
721 return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
722
723 push_delim:
724
725 delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '\\');
726 if (delim == NULL) {
727 return NULL;
728 }
729
730 delim:
731
732 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
733
734 lxb_css_syntax_token_base(token)->length = 1;
735 lxb_css_syntax_token_delim(token)->character = '#';
736
737 return data;
738 }
739
740 /*
741 * U+0028 LEFT PARENTHESIS (()
742 */
743 const lxb_char_t *
lxb_css_syntax_state_lparenthesis(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)744 lxb_css_syntax_state_lparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
745 const lxb_char_t *data, const lxb_char_t *end)
746 {
747 token->type = LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS;
748
749 lxb_css_syntax_token_base(token)->begin = data;
750 lxb_css_syntax_token_base(token)->length = 1;
751
752 return data + 1;
753 }
754
755 /*
756 * U+0029 RIGHT PARENTHESIS ())
757 */
758 const lxb_char_t *
lxb_css_syntax_state_rparenthesis(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)759 lxb_css_syntax_state_rparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
760 const lxb_char_t *data, const lxb_char_t *end)
761 {
762 token->type = LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS;
763
764 lxb_css_syntax_token_base(token)->begin = data;
765 lxb_css_syntax_token_base(token)->length = 1;
766
767 return data + 1;
768 }
769
770 /*
771 * U+002B PLUS SIGN (+)
772 */
773 const lxb_char_t *
lxb_css_syntax_state_plus(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)774 lxb_css_syntax_state_plus(lxb_css_syntax_tokenizer_t *tkz,
775 lxb_css_syntax_token_t *token,
776 const lxb_char_t *data, const lxb_char_t *end)
777 {
778 lxb_status_t status;
779
780 lxb_css_syntax_token_base(token)->begin = data++;
781
782 if (data >= end) {
783 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
784 if (data >= end) {
785 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
786
787 lxb_css_syntax_token_base(token)->length = 1;
788 lxb_css_syntax_token_delim(token)->character = '+';
789
790 return data;
791 }
792 }
793
794 return lxb_css_syntax_state_plus_process(tkz, token, data, end);
795 }
796
797 const lxb_char_t *
lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)798 lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t *tkz,
799 lxb_css_syntax_token_t *token,
800 const lxb_char_t *data, const lxb_char_t *end)
801 {
802 lxb_status_t status;
803 const lxb_char_t *begin;
804 lxb_css_syntax_token_t *delim;
805
806 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
807 if (*data >= 0x30 && *data <= 0x39) {
808 lxb_css_syntax_token_number(token)->have_sign = true;
809 lxb_css_syntax_token_base(token)->length = 1;
810
811 return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
812 }
813
814 /* U+002E FULL STOP (.) */
815 if (*data == 0x2E) {
816 begin = data++;
817
818 if (data == end) {
819 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
820
821 if (data >= end || *data < 0x30 || *data > 0x39) {
822 goto push_delim;
823 }
824
825 lxb_css_syntax_token_number(token)->have_sign = true;
826 lxb_css_syntax_token_base(token)->length = 2;
827
828 return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
829 tkz->buffer + sizeof(tkz->buffer),
830 data, end);
831 }
832
833 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
834 if (*data >= 0x30 && *data <= 0x39) {
835 lxb_css_syntax_token_number(token)->have_sign = true;
836 lxb_css_syntax_token_base(token)->length = 2;
837
838 return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
839 tkz->buffer + sizeof(tkz->buffer),
840 data, end);
841 }
842
843 push_delim:
844
845 delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '.');
846 if (delim == NULL) {
847 return NULL;
848 }
849 }
850
851 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
852
853 lxb_css_syntax_token_base(token)->length = 1;
854 lxb_css_syntax_token_delim(token)->character = '+';
855
856 return data;
857 }
858
859 /*
860 * U+002C COMMA (,)
861 */
862 const lxb_char_t *
lxb_css_syntax_state_comma(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)863 lxb_css_syntax_state_comma(lxb_css_syntax_tokenizer_t *tkz,
864 lxb_css_syntax_token_t *token,
865 const lxb_char_t *data, const lxb_char_t *end)
866 {
867 token->type = LXB_CSS_SYNTAX_TOKEN_COMMA;
868
869 lxb_css_syntax_token_base(token)->begin = data;
870 lxb_css_syntax_token_base(token)->length = 1;
871
872 return data + 1;
873 }
874
875 /*
876 * U+002D HYPHEN-MINUS (-)
877 */
878 const lxb_char_t *
lxb_css_syntax_state_minus(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)879 lxb_css_syntax_state_minus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
880 const lxb_char_t *data, const lxb_char_t *end)
881 {
882 lxb_status_t status;
883
884 lxb_css_syntax_token_base(token)->begin = data++;
885
886 if (data >= end) {
887 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
888 if (data >= end) {
889 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
890
891 lxb_css_syntax_token_base(token)->length = 1;
892 lxb_css_syntax_token_delim(token)->character = '-';
893
894 return data;
895 }
896 }
897
898 return lxb_css_syntax_state_minus_process(tkz, token, data, end);
899 }
900
901 const lxb_char_t *
lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)902 lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t *tkz,
903 lxb_css_syntax_token_t *token,
904 const lxb_char_t *data, const lxb_char_t *end)
905 {
906 size_t length;
907 lxb_char_t ch;
908 lxb_status_t status;
909 const lxb_char_t *begin, *second;
910 lxb_css_syntax_token_t *delim;
911 lxb_css_syntax_token_number_t *number;
912
913 unsigned minuses_len = 1;
914 static const lxb_char_t minuses[3] = "---";
915
916 /* Check for <number-token> */
917
918 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
919 if (*data >= 0x30 && *data <= 0x39) {
920 lxb_css_syntax_token_base(token)->length = 1;
921
922 data = lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
923
924 number = lxb_css_syntax_token_number(token);
925 number->num = -number->num;
926
927 lxb_css_syntax_token_number(token)->have_sign = true;
928
929 return data;
930 }
931
932 /* U+002E FULL STOP (.) */
933 if (*data == 0x2E) {
934 begin = data++;
935
936 if (data == end) {
937 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
938 if (data >= end) {
939 goto push_delim;
940 }
941 }
942
943 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
944 if (*data >= 0x30 && *data <= 0x39) {
945 lxb_css_syntax_token_base(token)->length = 2;
946
947 data = lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
948 tkz->buffer + sizeof(tkz->buffer),
949 data, end);
950
951 number = lxb_css_syntax_token_number(token);
952 number->num = -number->num;
953
954 lxb_css_syntax_token_number(token)->have_sign = true;
955
956 return data;
957 }
958
959 push_delim:
960
961 delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '.');
962 if (delim == NULL) {
963 return NULL;
964 }
965
966 goto delim;
967 }
968
969 second = data;
970
971 /* U+002D HYPHEN-MINUS (-) */
972 if (*data == 0x2D) {
973 data++;
974
975 /* Check for <CDC-token> */
976
977 if (data == end) {
978 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
979 if (data >= end) {
980 delim = lxb_css_syntax_list_append_delim(tkz, second, 1, '-');
981 if (delim == NULL) {
982 return NULL;
983 }
984
985 goto delim;
986 }
987 }
988
989 if (*data == 0x2D) {
990 lxb_css_syntax_token_base(token)->length = 3;
991 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 3);
992
993 return lxb_css_syntax_state_ident_like_not_url(tkz, token,
994 ++data, end);
995 }
996 else if (*data == 0x3E) {
997 token->type = LXB_CSS_SYNTAX_TOKEN_CDC;
998
999 lxb_css_syntax_token_base(token)->length = 3;
1000
1001 return data + 1;
1002 }
1003
1004 minuses_len++;
1005 }
1006
1007 /* Check for <ident-token> */
1008
1009 if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
1010 || *data == 0x00)
1011 {
1012 lxb_css_syntax_token_base(token)->length = minuses_len;
1013 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1014
1015 return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
1016 }
1017
1018 length = 0;
1019
1020 /* U+005C REVERSE SOLIDUS (\) */
1021 if (*data == 0x5C) {
1022 begin = data++;
1023
1024 if (data == end) {
1025 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1026 if (data >= end) {
1027 goto delim_rev_solidus;
1028 }
1029
1030 ch = *data;
1031
1032 if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1033 length += 1;
1034 goto ident;
1035 }
1036
1037 goto delim_rev_solidus;
1038 }
1039
1040 ch = *data;
1041
1042 if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1043 length += 1;
1044 goto ident;
1045 }
1046
1047 delim_rev_solidus:
1048
1049 if (minuses_len == 2) {
1050 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, second, 1, '-');
1051 }
1052
1053 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '\\');
1054
1055 goto delim;
1056 }
1057
1058 if (minuses_len == 2) {
1059 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, second, 0, '-');
1060 }
1061
1062 delim:
1063
1064 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1065
1066 lxb_css_syntax_token_base(token)->length = 1;
1067 lxb_css_syntax_token_delim(token)->character = '-';
1068
1069 return data;
1070
1071 ident:
1072
1073 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1074
1075 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1076 if (data == NULL) {
1077 return NULL;
1078 }
1079
1080 lxb_css_syntax_token_base(token)->length = minuses_len + length;
1081
1082 return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
1083 }
1084
1085 /*
1086 * U+002E FULL STOP (.)
1087 */
1088 const lxb_char_t *
lxb_css_syntax_state_full_stop(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1089 lxb_css_syntax_state_full_stop(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1090 const lxb_char_t *data, const lxb_char_t *end)
1091 {
1092 lxb_status_t status;
1093
1094 lxb_css_syntax_token_base(token)->begin = data;
1095 lxb_css_syntax_token_number(token)->have_sign = false;
1096
1097 data++;
1098
1099 if (data >= end) {
1100 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1101 if (data >= end) {
1102 goto delim;
1103 }
1104 }
1105
1106 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1107 if (*data >= 0x30 && *data <= 0x39) {
1108 lxb_css_syntax_token_base(token)->length = 1;
1109
1110 return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
1111 tkz->buffer + sizeof(tkz->buffer),
1112 data, end);
1113 }
1114
1115 delim:
1116
1117 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1118
1119 lxb_css_syntax_token_base(token)->length = 1;
1120 lxb_css_syntax_token_delim(token)->character = '.';
1121
1122 return data;
1123 }
1124
1125 /*
1126 * U+003A COLON (:)
1127 */
1128 const lxb_char_t *
lxb_css_syntax_state_colon(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1129 lxb_css_syntax_state_colon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1130 const lxb_char_t *data, const lxb_char_t *end)
1131 {
1132 token->type = LXB_CSS_SYNTAX_TOKEN_COLON;
1133
1134 lxb_css_syntax_token_base(token)->begin = data;
1135 lxb_css_syntax_token_base(token)->length = 1;
1136
1137 return data + 1;
1138 }
1139
1140 /*
1141 * U+003B SEMICOLON (;)
1142 */
1143 const lxb_char_t *
lxb_css_syntax_state_semicolon(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1144 lxb_css_syntax_state_semicolon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1145 const lxb_char_t *data, const lxb_char_t *end)
1146 {
1147 token->type = LXB_CSS_SYNTAX_TOKEN_SEMICOLON;
1148
1149 lxb_css_syntax_token_base(token)->begin = data;
1150 lxb_css_syntax_token_base(token)->length = 1;
1151
1152 return data + 1;
1153 }
1154
1155 /*
1156 * U+003C LESS-THAN SIGN (<)
1157 */
1158 const lxb_char_t *
lxb_css_syntax_state_less_sign(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1159 lxb_css_syntax_state_less_sign(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1160 const lxb_char_t *data, const lxb_char_t *end)
1161 {
1162 size_t length;
1163 lxb_char_t ch;
1164 lxb_status_t status;
1165 const lxb_char_t *mark, *minus, *esc, *idnt;
1166 lxb_css_syntax_token_t *ident;
1167
1168 lxb_css_syntax_token_base(token)->begin = data++;
1169
1170 if ((end - data) > 2) {
1171 if (data[0] == '!' && data[1] == '-' && data[2] == '-') {
1172 data += 3;
1173
1174 token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1175 lxb_css_syntax_token_base(token)->length = 4;
1176
1177 return data;
1178 }
1179
1180 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1181
1182 lxb_css_syntax_token_base(token)->length = 1;
1183 lxb_css_syntax_token_delim(token)->character = '<';
1184
1185 return data;
1186 }
1187
1188 if (data >= end) {
1189 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1190 if (data >= end) {
1191 goto delim;
1192 }
1193 }
1194
1195 /* U+0021 EXCLAMATION MARK */
1196 if (*data != 0x21) {
1197 goto delim;
1198 }
1199
1200 mark = data++;
1201
1202 if (data == end) {
1203 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1204 if (data >= end) {
1205 goto delim_mark;
1206 }
1207 }
1208
1209 /* U+002D HYPHEN-MINUS */
1210 if (*data != 0x2D) {
1211 goto delim_mark;
1212 }
1213
1214 minus = data++;
1215
1216 if (data == end) {
1217 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1218 if (data >= end) {
1219 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1220 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1221
1222 goto delim;
1223 }
1224 }
1225
1226 /* U+002D HYPHEN-MINUS */
1227 if (*data == 0x2D) {
1228 token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1229
1230 lxb_css_syntax_token_base(token)->length = 4;
1231
1232 return data + 1;
1233 }
1234
1235 length = 1;
1236 idnt = data;
1237
1238 if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1239 goto ident_with_minus;
1240 }
1241
1242 /* U+005C REVERSE SOLIDUS (\) */
1243 if (*data == 0x5C) {
1244 esc = data++;
1245 length += 1;
1246
1247 if (data == end) {
1248 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1249 if (data >= end) {
1250 goto delim_esc;
1251 }
1252
1253 ch = *data;
1254
1255 if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1256 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1257 (const lxb_char_t *) "-", 1);
1258
1259 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1260 if (data == NULL) {
1261 return NULL;
1262 }
1263
1264 goto ident;
1265 }
1266
1267 delim_esc:
1268
1269 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1270 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1271 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\');
1272
1273 goto delim;
1274 }
1275
1276 ch = *data--;
1277
1278 if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1279 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1280 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1281
1282 goto delim;
1283 }
1284
1285 data = lxb_css_syntax_state_escaped(tkz, data + 1, &end, &length);
1286 if (data == NULL) {
1287 return NULL;
1288 }
1289 }
1290 else if (*data != 0x00) {
1291 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1292 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-');
1293
1294 goto delim;
1295 }
1296
1297 ident_with_minus:
1298
1299 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, (const lxb_char_t *) "-", 1);
1300
1301 ident:
1302
1303 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1304
1305 ident = lxb_css_syntax_state_token_create(tkz);
1306 if (ident == NULL) {
1307 return NULL;
1308 }
1309
1310 lxb_css_syntax_token_base(ident)->begin = idnt;
1311 lxb_css_syntax_token_base(ident)->length = length;
1312
1313 data = lxb_css_syntax_state_ident_like_not_url(tkz, ident, data, end);
1314 if (data == NULL) {
1315 return NULL;
1316 }
1317
1318 goto delim;
1319
1320 delim_mark:
1321
1322 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1323
1324 delim:
1325
1326 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1327
1328 lxb_css_syntax_token_base(token)->length = 1;
1329 lxb_css_syntax_token_delim(token)->character = '<';
1330
1331 return data;
1332 }
1333
1334 /*
1335 * U+0040 COMMERCIAL AT (@)
1336 */
1337 const lxb_char_t *
lxb_css_syntax_state_at(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1338 lxb_css_syntax_state_at(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1339 const lxb_char_t *data, const lxb_char_t *end)
1340 {
1341 size_t length;
1342 lxb_char_t ch;
1343 lxb_status_t status;
1344 const lxb_char_t *minus, *esc;
1345
1346 unsigned minuses_len = 0;
1347 static const lxb_char_t minuses[2] = "--";
1348
1349 token->type = LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD;
1350
1351 lxb_css_syntax_token_base(token)->begin = data++;
1352 lxb_css_syntax_token_base(token)->length = 1;
1353
1354 if (data >= end) {
1355 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1356 if (data >= end) {
1357 goto delim;
1358 }
1359 }
1360
1361 if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1362 return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1363 }
1364
1365 minus = data;
1366
1367 /* U+002D HYPHEN-MINUS */
1368 if (*data == 0x2D) {
1369 data++;
1370
1371 if (data == end) {
1372 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1373 if (data >= end) {
1374 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1375 goto delim;
1376 }
1377 }
1378
1379 if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
1380 || *data == 0x00)
1381 {
1382 lxb_css_syntax_token_base(token)->length += 1;
1383 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 1);
1384 return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1385 }
1386 else if (*data == 0x2D) {
1387 lxb_css_syntax_token_base(token)->length += 2;
1388 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 2);
1389 return lxb_css_syntax_state_consume_ident(tkz, token,
1390 data + 1, end);
1391 }
1392
1393 minuses_len++;
1394 }
1395
1396 /* U+005C REVERSE SOLIDUS (\) */
1397 if (*data == 0x5C) {
1398 esc = data++;
1399
1400 if (data == end) {
1401 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1402 if (data >= end) {
1403 goto delim_esc;
1404 }
1405 }
1406
1407 ch = *data;
1408
1409 if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1410 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1411
1412 length = 0;
1413
1414 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1415 if (data == NULL) {
1416 return NULL;
1417 }
1418
1419 lxb_css_syntax_token_base(token)->length += 1 + minuses_len + length;
1420
1421 return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1422 }
1423
1424 goto delim_esc;
1425 }
1426 else if (*data != 0x00) {
1427 if (minuses_len != 0) {
1428 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-');
1429 }
1430
1431 goto delim;
1432 }
1433
1434 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1435
1436 lxb_css_syntax_token_base(token)->length += minuses_len;
1437
1438 return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1439
1440 delim_esc:
1441
1442 if (minuses_len != 0) {
1443 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1444 }
1445
1446 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\');
1447
1448 delim:
1449
1450 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1451
1452 lxb_css_syntax_token_base(token)->length = 1;
1453 lxb_css_syntax_token_delim(token)->character = '@';
1454
1455 return data;
1456 }
1457
1458 /*
1459 * U+005B LEFT SQUARE BRACKET ([)
1460 */
1461 const lxb_char_t *
lxb_css_syntax_state_ls_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1462 lxb_css_syntax_state_ls_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1463 const lxb_char_t *data, const lxb_char_t *end)
1464 {
1465 token->type = LXB_CSS_SYNTAX_TOKEN_LS_BRACKET;
1466
1467 lxb_css_syntax_token_base(token)->begin = data;
1468 lxb_css_syntax_token_base(token)->length = 1;
1469
1470 return data + 1;
1471 }
1472
1473 /*
1474 * U+005C REVERSE SOLIDUS (\)
1475 */
1476 const lxb_char_t *
lxb_css_syntax_state_rsolidus(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1477 lxb_css_syntax_state_rsolidus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1478 const lxb_char_t *data, const lxb_char_t *end)
1479 {
1480 size_t length;
1481 lxb_char_t ch;
1482 lxb_status_t status;
1483
1484 lxb_css_syntax_token_base(token)->begin = data++;
1485
1486 if (data >= end) {
1487 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1488 if (data >= end) {
1489 goto delim;
1490 }
1491 }
1492
1493 ch = *data;
1494
1495 if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1496 goto delim;
1497 }
1498
1499 length = 1;
1500
1501 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1502 if (data == NULL) {
1503 return NULL;
1504 }
1505
1506 lxb_css_syntax_token_base(token)->length = length;
1507
1508 return lxb_css_syntax_state_ident_like(tkz, token, data, end);
1509
1510 delim:
1511
1512 token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1513
1514 lxb_css_syntax_token_base(token)->length = 1;
1515 lxb_css_syntax_token_delim(token)->character = '\\';
1516
1517 return data;
1518 }
1519
1520 /*
1521 * U+005D RIGHT SQUARE BRACKET (])
1522 */
1523 const lxb_char_t *
lxb_css_syntax_state_rs_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1524 lxb_css_syntax_state_rs_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1525 const lxb_char_t *data, const lxb_char_t *end)
1526 {
1527 token->type = LXB_CSS_SYNTAX_TOKEN_RS_BRACKET;
1528
1529 lxb_css_syntax_token_base(token)->begin = data;
1530 lxb_css_syntax_token_base(token)->length = 1;
1531
1532 return data + 1;
1533 }
1534
1535 /*
1536 * U+007B LEFT CURLY BRACKET ({)
1537 */
1538 const lxb_char_t *
lxb_css_syntax_state_lc_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1539 lxb_css_syntax_state_lc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1540 const lxb_char_t *data, const lxb_char_t *end)
1541 {
1542 token->type = LXB_CSS_SYNTAX_TOKEN_LC_BRACKET;
1543
1544 lxb_css_syntax_token_base(token)->begin = data;
1545 lxb_css_syntax_token_base(token)->length = 1;
1546
1547 return data + 1;
1548 }
1549
1550 /*
1551 * U+007D RIGHT CURLY BRACKET (})
1552 */
1553 const lxb_char_t *
lxb_css_syntax_state_rc_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1554 lxb_css_syntax_state_rc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1555 const lxb_char_t *data, const lxb_char_t *end)
1556 {
1557 token->type = LXB_CSS_SYNTAX_TOKEN_RC_BRACKET;
1558
1559 lxb_css_syntax_token_base(token)->begin = data;
1560 lxb_css_syntax_token_base(token)->length = 1;
1561
1562 return data + 1;
1563 }
1564
1565 /*
1566 * Numeric
1567 */
1568 lxb_inline void
lxb_css_syntax_consume_numeric_set_int(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * start,const lxb_char_t * end)1569 lxb_css_syntax_consume_numeric_set_int(lxb_css_syntax_tokenizer_t *tkz,
1570 lxb_css_syntax_token_t *token,
1571 const lxb_char_t *start, const lxb_char_t *end)
1572 {
1573 double num = lexbor_strtod_internal(start, (end - start), 0);
1574
1575 token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1576
1577 lxb_css_syntax_token_number(token)->is_float = false;
1578 lxb_css_syntax_token_number(token)->num = num;
1579 }
1580
1581 lxb_inline void
lxb_css_syntax_consume_numeric_set_float(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * start,const lxb_char_t * end,bool e_is_negative,int exponent,int e_digit)1582 lxb_css_syntax_consume_numeric_set_float(lxb_css_syntax_tokenizer_t *tkz,
1583 lxb_css_syntax_token_t *token,
1584 const lxb_char_t *start, const lxb_char_t *end,
1585 bool e_is_negative, int exponent, int e_digit)
1586 {
1587 if (e_is_negative) {
1588 exponent -= e_digit;
1589 }
1590 else {
1591 exponent += e_digit;
1592 }
1593
1594 double num = lexbor_strtod_internal(start, (end - start), exponent);
1595
1596 token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1597
1598 lxb_css_syntax_token_number(token)->num = num;
1599 lxb_css_syntax_token_number(token)->is_float = true;
1600 }
1601
1602 const lxb_char_t *
lxb_css_syntax_state_consume_before_numeric(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1603 lxb_css_syntax_state_consume_before_numeric(lxb_css_syntax_tokenizer_t *tkz,
1604 lxb_css_syntax_token_t *token,
1605 const lxb_char_t *data,
1606 const lxb_char_t *end)
1607 {
1608 lxb_css_syntax_token_base(token)->begin = data;
1609 lxb_css_syntax_token_base(token)->length = 0;
1610 lxb_css_syntax_token_number(token)->have_sign = false;
1611
1612 return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
1613 }
1614
1615 static const lxb_char_t *
lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1616 lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
1617 lxb_css_syntax_token_t *token,
1618 const lxb_char_t *data,
1619 const lxb_char_t *end)
1620 {
1621 size_t length;
1622 lxb_status_t status;
1623 const lxb_char_t *begin;
1624
1625 lxb_char_t *buf_start = tkz->buffer;
1626 lxb_char_t *buf_end = buf_start + sizeof(tkz->buffer);
1627
1628 begin = data;
1629 length = 0;
1630
1631 do {
1632 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1633 if (*data < 0x30 || *data > 0x39) {
1634 length += data - begin;
1635 break;
1636 }
1637
1638 if (buf_start != buf_end) {
1639 *buf_start++ = *data;
1640 }
1641
1642 if (++data == end) {
1643 length += data - begin;
1644
1645 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1646 if (data >= end) {
1647 lxb_css_syntax_token_base(token)->length += length;
1648
1649 lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1650 buf_start);
1651 return data;
1652 }
1653
1654 begin = data;
1655 }
1656 }
1657 while (true);
1658
1659 lxb_css_syntax_token_base(token)->length += length;
1660
1661 /* U+002E FULL STOP (.) */
1662 if (*data != 0x2E) {
1663 lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1664 buf_start);
1665
1666 return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1667 data, end);
1668 }
1669
1670 begin = data++;
1671
1672 if (data == end) {
1673 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1674 if (data >= end) {
1675 goto delim;
1676 }
1677 }
1678
1679 if (*data >= 0x30 && *data <= 0x39) {
1680 lxb_css_syntax_token_base(token)->length += 1;
1681
1682 return lxb_css_syntax_state_decimal(tkz, token, buf_start, buf_end,
1683 data, end);
1684 }
1685
1686 delim:
1687
1688 lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer, buf_start);
1689
1690 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '.');
1691
1692 return data;
1693 }
1694
1695 static const lxb_char_t *
lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,lxb_char_t * buf_start,lxb_char_t * buf_end,const lxb_char_t * data,const lxb_char_t * end)1696 lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
1697 lxb_css_syntax_token_t *token,
1698 lxb_char_t *buf_start, lxb_char_t *buf_end,
1699 const lxb_char_t *data, const lxb_char_t *end)
1700 {
1701 size_t length;
1702 bool e_is_negative;
1703 int exponent, e_digit;
1704 lxb_char_t ch, by;
1705 lxb_status_t status;
1706 const lxb_char_t *last, *begin;
1707 lxb_css_syntax_token_t *t_str;
1708 lxb_css_syntax_token_string_t *str;
1709
1710 exponent = 0;
1711 begin = data;
1712 length = lxb_css_syntax_token_base(token)->length;
1713
1714 str = lxb_css_syntax_token_dimension_string(token);
1715 t_str = (lxb_css_syntax_token_t *) (void *) str;
1716
1717 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1718 do {
1719 if (buf_start != buf_end) {
1720 *buf_start++ = *data;
1721 exponent -= 1;
1722 }
1723
1724 data++;
1725
1726 if (data >= end) {
1727 length += data - begin;
1728
1729 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1730 if (data >= end) {
1731 lxb_css_syntax_token_base(token)->length = length;
1732
1733 lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1734 buf_start, 0, exponent, 0);
1735 return data;
1736 }
1737
1738 begin = data;
1739 }
1740 }
1741 while (*data >= 0x30 && *data <= 0x39);
1742
1743 length += data - begin;
1744
1745 lxb_css_syntax_token_base(token)->length = length;
1746 lxb_css_syntax_token_base(str)->begin = data;
1747
1748 ch = *data;
1749
1750 /* U+0045 Latin Capital Letter (E) or U+0065 Latin Small Letter (e) */
1751 if (ch != 0x45 && ch != 0x65) {
1752 lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1753 buf_start, 0, exponent, 0);
1754
1755 return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1756 data, end);
1757 }
1758
1759 e_digit = 0;
1760 e_is_negative = false;
1761
1762 lxb_css_syntax_token_base(t_str)->length = 1;
1763
1764 if (++data == end) {
1765 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1766 if (data >= end) {
1767 lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1768 buf_start, 0, exponent, 0);
1769
1770 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1771
1772 token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1773
1774 data = lxb_css_syntax_state_dimension_set(tkz, token, data);
1775
1776 lxb_css_syntax_token_base(token)->length +=
1777 lxb_css_syntax_token_base(t_str)->length;
1778 return data;
1779 }
1780 }
1781
1782 switch (*data) {
1783 /* U+002D HYPHEN-MINUS (-) */
1784 case 0x2D:
1785 e_is_negative = true;
1786 /* fall through */
1787
1788 /* U+002B PLUS SIGN (+) */
1789 case 0x2B:
1790 last = data++;
1791 by = *last;
1792
1793 if (data == end) {
1794 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1795 if (data >= end) {
1796 goto dimension;
1797 }
1798 }
1799
1800 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1801 if (*data < 0x30 || *data > 0x39) {
1802 goto dimension;
1803 }
1804
1805 length += 1;
1806 break;
1807
1808 default:
1809 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1810 if (*data < 0x30 || *data > 0x39) {
1811 lxb_css_syntax_consume_numeric_set_float(tkz, token,
1812 tkz->buffer, buf_start,
1813 0, exponent, 0);
1814
1815 token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1816
1817 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1818
1819 data = lxb_css_syntax_state_consume_ident(tkz, t_str,
1820 data, end);
1821 if (begin == NULL) {
1822 return NULL;
1823 }
1824
1825 lxb_css_syntax_token_base(token)->length = length
1826 + lxb_css_syntax_token_base(t_str)->length;
1827 return data;
1828 }
1829
1830 break;
1831 }
1832
1833 length += 1;
1834 begin = data;
1835
1836 /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1837 do {
1838 e_digit = (*data - 0x30) + e_digit * 0x0A;
1839
1840 if (++data == end) {
1841 length += data - begin;
1842
1843 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1844 if (data >= end) {
1845 lxb_css_syntax_token_base(token)->length = length;
1846
1847 lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1848 e_is_negative, exponent, e_digit);
1849 return data;
1850 }
1851
1852 begin = data;
1853 }
1854 }
1855 while(*data >= 0x30 && *data <= 0x39);
1856
1857 length += data - begin;
1858
1859 lxb_css_syntax_token_base(token)->length = length;
1860
1861 lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1862 e_is_negative, exponent, e_digit);
1863
1864 return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1865 data, end);
1866
1867 dimension:
1868
1869 lxb_css_syntax_consume_numeric_set_float(tkz, token,
1870 tkz->buffer, buf_start,
1871 0, exponent, 0);
1872
1873 token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1874
1875 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1876
1877 if (by == '-') {
1878 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &by, 1);
1879
1880 lxb_css_syntax_token_base(t_str)->length += 1;
1881
1882 data = lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1883
1884 lxb_css_syntax_token_base(token)->length = length
1885 + lxb_css_syntax_token_base(t_str)->length;
1886 return data;
1887 }
1888
1889 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, last, (data >= end), '+');
1890
1891 lxb_css_syntax_token_base(token)->length = length
1892 + lxb_css_syntax_token_base(t_str)->length;
1893
1894 return lxb_css_syntax_state_dimension_set(tkz, token, data);
1895 }
1896
1897 static const lxb_char_t *
lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1898 lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
1899 lxb_css_syntax_token_t *token,
1900 const lxb_char_t *data,
1901 const lxb_char_t *end)
1902 {
1903 bool have_minus;
1904 size_t length;
1905 lxb_char_t ch;
1906 lxb_status_t status;
1907 const lxb_char_t *esc, *minus;
1908 lxb_css_syntax_token_t *t_str;
1909 lxb_css_syntax_token_string_t *str;
1910
1911 str = lxb_css_syntax_token_dimension_string(token);
1912 t_str = (lxb_css_syntax_token_t *) (void *) str;
1913
1914 lxb_css_syntax_token_base(t_str)->begin = data;
1915
1916 ch = *data;
1917
1918 if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1919 || ch == 0x00)
1920 {
1921 lxb_css_syntax_token_base(t_str)->length = 0;
1922 goto dimension;
1923 }
1924
1925 /* U+0025 PERCENTAGE SIGN (%) */
1926 if (ch == 0x25) {
1927 token->type = LXB_CSS_SYNTAX_TOKEN_PERCENTAGE;
1928
1929 lxb_css_syntax_token_base(token)->length += 1;
1930
1931 return data + 1;
1932 }
1933
1934 have_minus = false;
1935 minus = data;
1936
1937 /* U+002D HYPHEN-MINUS */
1938 if (ch == 0x2D) {
1939 data++;
1940
1941 if (data >= end) {
1942 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1943 if (data >= end) {
1944 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1945 return data;
1946 }
1947 }
1948
1949 ch = *data;
1950
1951 if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1952 || ch == 0x2D || ch == 0x00)
1953 {
1954 lxb_css_syntax_token_base(t_str)->length = 1;
1955
1956 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1957 (const lxb_char_t *) "-", 1);
1958 goto dimension;
1959 }
1960
1961 have_minus = true;
1962 }
1963
1964 esc = data;
1965
1966 /* U+005C REVERSE SOLIDUS (\) */
1967 if (ch == 0x5C) {
1968 data++;
1969
1970 if (data >= end) {
1971 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1972 if (data >= end) {
1973 goto delim_rev_solidus;
1974 }
1975 }
1976
1977 ch = *data;
1978
1979 if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1980 length = 1;
1981
1982 if (have_minus) {
1983 length += 1;
1984
1985 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1986 (const lxb_char_t *) "-", 1);
1987 }
1988
1989 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1990 if (data == NULL) {
1991 return NULL;
1992 }
1993
1994 lxb_css_syntax_token_base(t_str)->length = length;
1995
1996 goto dimension;
1997 }
1998
1999 delim_rev_solidus:
2000
2001 if (have_minus) {
2002 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
2003 }
2004
2005 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\');
2006
2007 return data;
2008 }
2009
2010 if (have_minus) {
2011 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-');
2012 }
2013
2014 return data;
2015
2016 dimension:
2017
2018 token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
2019
2020 data = lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
2021
2022 lxb_css_syntax_token_base(token)->length +=
2023 lxb_css_syntax_token_base(t_str)->length;
2024
2025 return data;
2026 }
2027
2028 static const lxb_char_t *
lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2029 lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
2030 lxb_css_syntax_token_t *token,
2031 const lxb_char_t *data, const lxb_char_t *end)
2032 {
2033 size_t length;
2034 lxb_status_t status;
2035 const lxb_char_t *begin;
2036
2037 begin = data;
2038 length = 0;
2039
2040 for (;; data++) {
2041 if (data >= end) {
2042 if (begin < data) {
2043 length += data - begin;
2044
2045 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2046 }
2047
2048 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2049 if (data >= end) {
2050 lxb_css_syntax_token_base(token)->length += length;
2051
2052 return lxb_css_syntax_state_string_set(tkz, token, data);
2053 }
2054
2055 begin = data;
2056 }
2057
2058 if (lxb_css_syntax_res_name_map[*data] == 0x00) {
2059
2060 /* U+005C REVERSE SOLIDUS (\) */
2061 if (*data == 0x5C) {
2062 if (begin < data) {
2063 length += data - begin;
2064
2065 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2066 }
2067
2068 begin = data;
2069
2070 if (++data == end) {
2071 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2072 if (data >= end) {
2073 goto push_delim_last;
2074 }
2075 }
2076
2077 if (*data == 0x0A || *data == 0x0C || *data == 0x0D) {
2078 goto push_delim_last;
2079 }
2080
2081 length += 1;
2082
2083 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
2084 if (data == NULL) {
2085 return NULL;
2086 }
2087
2088 begin = data--;
2089 }
2090 else if (*data == 0x00) {
2091 length += (data + 1) - begin;
2092
2093 if (begin < data) {
2094 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2095 }
2096
2097 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2098 lexbor_str_res_ansi_replacement_character,
2099 sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2100 begin = data + 1;
2101 }
2102 else {
2103 if (begin < data) {
2104 length += data - begin;
2105
2106 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2107 }
2108
2109 lxb_css_syntax_token_base(token)->length += length;
2110
2111 return lxb_css_syntax_state_string_set(tkz, token, data);
2112 }
2113 }
2114 }
2115
2116 return data;
2117
2118 push_delim_last:
2119
2120 lxb_css_syntax_token_base(token)->length += length;
2121
2122 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '\\');
2123
2124 return lxb_css_syntax_state_string_set(tkz, token, data);
2125 }
2126
2127 const lxb_char_t *
lxb_css_syntax_state_ident_like_begin(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2128 lxb_css_syntax_state_ident_like_begin(lxb_css_syntax_tokenizer_t *tkz,
2129 lxb_css_syntax_token_t *token,
2130 const lxb_char_t *data, const lxb_char_t *end)
2131 {
2132 lxb_css_syntax_token_base(token)->begin = data;
2133 lxb_css_syntax_token_base(token)->length = 0;
2134
2135 return lxb_css_syntax_state_ident_like(tkz, token, data, end);
2136 }
2137
2138 static const lxb_char_t *
lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2139 lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz,
2140 lxb_css_syntax_token_t *token,
2141 const lxb_char_t *data, const lxb_char_t *end)
2142 {
2143 size_t length;
2144 lxb_char_t ch;
2145 lxb_status_t status;
2146 const lxb_char_t *begin, *ws_begin;
2147 lxb_css_syntax_token_t *ws;
2148 lxb_css_syntax_token_string_t *str, *ws_str;
2149 static const lxb_char_t url[] = "url";
2150
2151 data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2152
2153 end = tkz->in_end;
2154
2155 if (data >= end) {
2156 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2157 if (data >= end) {
2158 token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2159 return data;
2160 }
2161 }
2162
2163 if (data < end && *data == '(') {
2164 data++;
2165
2166 lxb_css_syntax_token_base(token)->length += 1;
2167
2168 str = lxb_css_syntax_token_string(token);
2169
2170 if (str->length == 3 && lexbor_str_data_casecmp(str->data, url)) {
2171 begin = data;
2172 length = 0;
2173
2174 tkz->pos += str->length + 1;
2175 ws_begin = tkz->pos;
2176
2177 do {
2178 if (data >= end) {
2179 if (begin < data) {
2180 length += data - begin;
2181 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2182 }
2183
2184 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2185 if (data >= end) {
2186 begin = data;
2187 goto with_ws;
2188 }
2189
2190 begin = data;
2191 }
2192
2193 ch = *data;
2194
2195 if (lexbor_utils_whitespace(ch, !=, &&)) {
2196 /* U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE (') */
2197 if (ch == 0x22 || ch == 0x27) {
2198 goto with_ws;
2199 }
2200
2201 tkz->pos = tkz->start;
2202 length += data - begin;
2203
2204 lxb_css_syntax_token_base(token)->length += length;
2205
2206 return lxb_css_syntax_state_url(tkz, token, data, end);
2207 }
2208
2209 data++;
2210 }
2211 while (true);
2212 }
2213
2214 token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2215
2216 return data;
2217 }
2218
2219 token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2220
2221 return data;
2222
2223 with_ws:
2224
2225 token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2226
2227 if (ws_begin != tkz->pos || begin < data) {
2228 if (begin < data) {
2229 length += data - begin;
2230 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2231 }
2232
2233 if (tkz->pos >= tkz->end) {
2234 if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2235 return NULL;
2236 }
2237 }
2238
2239 str->data = tkz->start;
2240 *tkz->pos = 0x00;
2241
2242 ws = lxb_css_syntax_state_token_create(tkz);
2243 if (ws == NULL) {
2244 return NULL;
2245 }
2246
2247 ws->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
2248
2249 lxb_css_syntax_token_base(ws)->begin = begin;
2250 lxb_css_syntax_token_base(ws)->length = length;
2251
2252 ws_str = lxb_css_syntax_token_string(ws);
2253
2254 ws_str->data = tkz->start + str->length + 1;
2255 ws_str->length = tkz->pos - ws_str->data;
2256 }
2257
2258 tkz->pos = tkz->start;
2259
2260 return data;
2261 }
2262
2263 const lxb_char_t *
lxb_css_syntax_state_ident_like_not_url_begin(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2264 lxb_css_syntax_state_ident_like_not_url_begin(lxb_css_syntax_tokenizer_t *tkz,
2265 lxb_css_syntax_token_t *token,
2266 const lxb_char_t *data, const lxb_char_t *end)
2267 {
2268 lxb_css_syntax_token_base(token)->begin = data;
2269 lxb_css_syntax_token_base(token)->length = 0;
2270
2271 return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
2272 }
2273
2274 static const lxb_char_t *
lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2275 lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz,
2276 lxb_css_syntax_token_t *token,
2277 const lxb_char_t *data, const lxb_char_t *end)
2278 {
2279 data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2280 if (data == NULL) {
2281 return NULL;
2282 }
2283
2284 end = tkz->in_end;
2285
2286 if (data < end && *data == '(') {
2287 token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2288
2289 lxb_css_syntax_token_base(token)->length += 1;
2290
2291 return data + 1;
2292 }
2293
2294 token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2295
2296 return data;
2297 }
2298
2299 /*
2300 * URL
2301 */
2302 static const lxb_char_t *
lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2303 lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2304 const lxb_char_t *data, const lxb_char_t *end)
2305 {
2306 size_t length;
2307 lxb_char_t ch;
2308 lxb_status_t status;
2309 const lxb_char_t *begin;
2310
2311 status = LXB_STATUS_OK;
2312
2313 *tkz->pos = 0x00;
2314
2315 begin = data;
2316 length = 0;
2317
2318 do {
2319 if (data >= end) {
2320 if (begin < data) {
2321 length += data - begin;
2322 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2323 }
2324
2325 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2326 if (data >= end) {
2327 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2328 LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2329
2330 token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2331
2332 lxb_css_syntax_token_base(token)->length += length;
2333
2334 return lxb_css_syntax_state_string_set(tkz, token, data);
2335 }
2336
2337 begin = data;
2338 }
2339
2340 switch (*data) {
2341 /* U+0000 NULL (\0) */
2342 case 0x00:
2343 if (begin < data) {
2344 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2345 }
2346
2347 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2348 lexbor_str_res_ansi_replacement_character,
2349 sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2350
2351 data += 1;
2352 length += data - begin;
2353 begin = data;
2354
2355 continue;
2356
2357 /* U+0029 RIGHT PARENTHESIS ()) */
2358 case 0x29:
2359 if (begin < data) {
2360 length += data - begin;
2361 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2362 }
2363
2364 token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2365
2366 lxb_css_syntax_token_base(token)->length += length + 1;
2367
2368 return lxb_css_syntax_state_string_set(tkz, token, data + 1);
2369
2370 /*
2371 * U+0022 QUOTATION MARK (")
2372 * U+0027 APOSTROPHE (')
2373 * U+0028 LEFT PARENTHESIS (()
2374 * U+000B LINE TABULATION
2375 * U+007F DELETE
2376 */
2377 case 0x22:
2378 case 0x27:
2379 case 0x28:
2380 case 0x0B:
2381 case 0x7F:
2382 if (begin < data) {
2383 length += data - begin;
2384 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2385 }
2386
2387 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2388 LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2389
2390 lxb_css_syntax_token_base(token)->length += length + 1;
2391
2392 return lxb_css_syntax_state_bad_url(tkz, token, data + 1, end);
2393
2394 /* U+005C REVERSE SOLIDUS (\) */
2395 case 0x5C:
2396 if (begin < data) {
2397 length += data - begin;
2398 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2399 }
2400
2401 begin = ++data;
2402
2403 if (data == end) {
2404 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2405 if (data >= end) {
2406 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2407 LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2408
2409 token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2410
2411 lxb_css_syntax_token_base(token)->length += length + 1;
2412
2413 return lxb_css_syntax_state_string_set(tkz, token, data);
2414 }
2415 }
2416
2417 ch = *data;
2418
2419 if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
2420 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2421 LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2422
2423 lxb_css_syntax_token_base(token)->length += length + 1;
2424
2425 return lxb_css_syntax_state_bad_url(tkz, token, data, end);
2426 }
2427
2428 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
2429 if (data == NULL) {
2430 return NULL;
2431 }
2432
2433 begin = data--;
2434 length += 1;
2435
2436 break;
2437
2438 /*
2439 * U+0009 CHARACTER TABULATION (tab)
2440 * U+000A LINE FEED (LF)
2441 * U+000C FORM FEED (FF)
2442 * U+000D CARRIAGE RETURN (CR)
2443 * U+0020 SPACE
2444 */
2445 case 0x09:
2446 case 0x0A:
2447 case 0x0C:
2448 case 0x0D:
2449 case 0x20:
2450 if (begin < data) {
2451 length += data - begin;
2452 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2453 }
2454
2455 begin = ++data;
2456 length += 1;
2457
2458 do {
2459 if (data == end) {
2460 length += data - begin;
2461
2462 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2463 if (data >= end) {
2464 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2465 LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2466
2467 token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2468
2469 lxb_css_syntax_token_base(token)->length += length;
2470
2471 return lxb_css_syntax_state_string_set(tkz, token, data);
2472 }
2473
2474 begin = data;
2475 }
2476
2477 ch = *data;
2478
2479 if (lexbor_utils_whitespace(ch, !=, &&)) {
2480 length += data - begin;
2481
2482 /* U+0029 RIGHT PARENTHESIS ()) */
2483 if (*data == 0x29) {
2484 token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2485
2486 lxb_css_syntax_token_base(token)->length += length + 1;
2487
2488 return lxb_css_syntax_state_string_set(tkz, token,
2489 data + 1);
2490 }
2491
2492 lxb_css_syntax_token_base(token)->length += length;
2493
2494 return lxb_css_syntax_state_bad_url(tkz, token,
2495 data, end);
2496 }
2497
2498 data++;
2499 }
2500 while (true);
2501
2502 default:
2503 /*
2504 * Inclusive:
2505 * U+0000 NULL and U+0008 BACKSPACE or
2506 * U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE
2507 */
2508 if ((*data <= 0x08)
2509 || (*data >= 0x0E && *data <= 0x1F))
2510 {
2511 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2512 LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2513
2514 lxb_css_syntax_token_base(token)->length += length;
2515
2516 return lxb_css_syntax_state_bad_url(tkz, token,
2517 data + 1, end);
2518 }
2519
2520 break;
2521 }
2522
2523 data++;
2524 }
2525 while (true);
2526
2527 return data;
2528 }
2529
2530 /*
2531 * Bad URL
2532 */
2533 static const lxb_char_t *
lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2534 lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2535 const lxb_char_t *data, const lxb_char_t *end)
2536 {
2537 size_t length;
2538 lxb_status_t status;
2539 const lxb_char_t *begin;
2540
2541 token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2542
2543 if(lxb_css_syntax_state_string_set(tkz, token, data) == NULL) {
2544 return NULL;
2545 }
2546
2547 begin = data;
2548 length = 0;
2549
2550 do {
2551 if (data >= end) {
2552 length += data - begin;
2553
2554 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2555 if (data >= end) {
2556 lxb_css_syntax_token_base(token)->length += length;
2557 return data;
2558 }
2559
2560 begin = data;
2561 }
2562
2563 /* U+0029 RIGHT PARENTHESIS ()) */
2564 if (*data == 0x29) {
2565 data++;
2566 length += data - begin;
2567
2568 lxb_css_syntax_token_base(token)->length += length;
2569
2570 return data;
2571 }
2572 /* U+005C REVERSE SOLIDUS (\) */
2573 else if (*data == 0x5C) {
2574 data++;
2575
2576 if (data >= end) {
2577 continue;
2578 }
2579 }
2580
2581 data++;
2582 }
2583 while (true);
2584
2585 return data;
2586 }
2587
2588 lxb_inline lxb_status_t
lxb_css_syntax_string_append_rep(lxb_css_syntax_tokenizer_t * tkz)2589 lxb_css_syntax_string_append_rep(lxb_css_syntax_tokenizer_t *tkz)
2590 {
2591 return lxb_css_syntax_string_append(tkz, lexbor_str_res_ansi_replacement_character,
2592 sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2593 }
2594
2595 static const lxb_char_t *
lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,const lxb_char_t ** end,size_t * length)2596 lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
2597 const lxb_char_t *data,
2598 const lxb_char_t **end, size_t *length)
2599 {
2600 uint32_t cp;
2601 unsigned count;
2602 lxb_status_t status;
2603
2604 cp = 0;
2605
2606 for (count = 0; count < 6; count++, data++) {
2607 if (data >= *end) {
2608 status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2609 if (status != LXB_STATUS_OK) {
2610 return NULL;
2611 }
2612
2613 if (data >= *end) {
2614 if (count == 0) {
2615 return *end;
2616 }
2617
2618 break;
2619 }
2620 }
2621
2622 if (lexbor_str_res_map_hex[*data] == 0xFF) {
2623 if (count == 0) {
2624 *length += 1;
2625
2626 if (*data == 0x00) {
2627 status = lxb_css_syntax_string_append_rep(tkz);
2628 if (status != LXB_STATUS_OK) {
2629 return NULL;
2630 }
2631
2632 return data + 1;
2633 }
2634
2635 status = lxb_css_syntax_string_append(tkz, data, 1);
2636 if (status != LXB_STATUS_OK) {
2637 return NULL;
2638 }
2639
2640 return data + 1;
2641 }
2642
2643 switch (*data) {
2644 case 0x0D:
2645 data++;
2646 *length += 1;
2647
2648 status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data,
2649 end);
2650 if (status != LXB_STATUS_OK) {
2651 return NULL;
2652 }
2653
2654 if (data >= *end) {
2655 break;
2656 }
2657
2658 if (*data == 0x0A) {
2659 data++;
2660 *length += 1;
2661 }
2662
2663 break;
2664
2665 case 0x09:
2666 case 0x20:
2667 case 0x0A:
2668 case 0x0C:
2669 data++;
2670 *length += 1;
2671 break;
2672 }
2673
2674 break;
2675 }
2676
2677 cp <<= 4;
2678 cp |= lexbor_str_res_map_hex[*data];
2679 }
2680
2681 if ((tkz->end - tkz->pos) < 5) {
2682 if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2683 return NULL;
2684 }
2685 }
2686
2687 lxb_css_syntax_codepoint_to_ascii(tkz, cp);
2688
2689 *length += count;
2690
2691 return data;
2692 }
2693
2694 static const lxb_char_t *
lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,const lxb_char_t ** end,size_t * length)2695 lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
2696 const lxb_char_t *data,
2697 const lxb_char_t **end, size_t *length)
2698 {
2699 lxb_status_t status;
2700
2701 /* U+000D CARRIAGE RETURN */
2702 if (*data == 0x0D) {
2703 data++;
2704 *length += 1;
2705
2706 if (data >= *end) {
2707 status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2708 if (status != LXB_STATUS_OK) {
2709 return NULL;
2710 }
2711
2712 if (data >= *end) {
2713 return data;
2714 }
2715 }
2716
2717 /* U+000A LINE FEED */
2718 if (*data == 0x0A) {
2719 data++;
2720 *length += 1;
2721 }
2722
2723 return data;
2724 }
2725
2726 if (*data == 0x00) {
2727 status = lxb_css_syntax_string_append_rep(tkz);
2728 if (status != LXB_STATUS_OK) {
2729 return NULL;
2730 }
2731
2732 *length += 1;
2733
2734 return data + 1;
2735 }
2736
2737 if (*data == 0x0A || *data == 0x0C) {
2738 *length += 1;
2739
2740 return data + 1;
2741 }
2742
2743 return lxb_css_syntax_state_escaped(tkz, data, end, length);
2744 }
2745