xref: /php-src/ext/dom/lexbor/lexbor/css/syntax/state.c (revision 7defc235)
1 /*
2  * Copyright (C) 2018-2020 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #include <string.h>
8 #include <float.h>
9 
10 #include "lexbor/core/utils.h"
11 #include "lexbor/core/strtod.h"
12 
13 #include "lexbor/css/syntax/state.h"
14 #include "lexbor/css/syntax/syntax.h"
15 #include "lexbor/css/syntax/tokenizer/error.h"
16 
17 #define LXB_CSS_SYNTAX_RES_NAME_MAP
18 #include "lexbor/css/syntax/res.h"
19 
20 #define LEXBOR_STR_RES_MAP_HEX
21 #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
22 #include "lexbor/core/str_res.h"
23 
24 
25 #define LXB_CSS_SYNTAX_NEXT_CHUNK(_tkz, _status, _data, _end)                  \
26     do {                                                                       \
27         _status = lxb_css_syntax_tokenizer_next_chunk(_tkz, &_data, &_end);    \
28         if (_status != LXB_STATUS_OK) {                                        \
29             return NULL;                                                       \
30         }                                                                      \
31     }                                                                          \
32     while (0)
33 
34 
35 #define LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, _length)          \
36     do {                                                                       \
37         _status = lxb_css_syntax_string_append(_tkz, _begin, _length);         \
38         if (_status != LXB_STATUS_OK) {                                        \
39             return NULL;                                                       \
40         }                                                                      \
41     }                                                                          \
42     while (0)
43 
44 #define LXB_CSS_SYNTAX_STR_APPEND(_tkz, _status, _begin, _end)                 \
45     LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, (_end - _begin))
46 
47 #define LXB_CSS_SYNTAX_DELIM_APPEND(_tkz, _begin, _length, _ch)                \
48     do {                                                                       \
49         if (lxb_css_syntax_list_append_delim(_tkz, _begin, _length, _ch)       \
50             == NULL)                                                           \
51         {                                                                      \
52             return NULL;                                                       \
53         }                                                                      \
54     }                                                                          \
55     while (false)
56 
57 
58 static const lxb_char_t *
59 lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
60                                      lxb_css_syntax_token_t *token,
61                                      const lxb_char_t *data,
62                                      const lxb_char_t *end);
63 
64 static const lxb_char_t *
65 lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
66                              lxb_css_syntax_token_t *token,
67                              lxb_char_t *buf_start, lxb_char_t *buf_end,
68                              const lxb_char_t *data, const lxb_char_t *end);
69 
70 static const lxb_char_t *
71 lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
72                                                 lxb_css_syntax_token_t *token,
73                                                 const lxb_char_t *data,
74                                                 const lxb_char_t *end);
75 
76 static const lxb_char_t *
77 lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
78                                    lxb_css_syntax_token_t *token,
79                                    const lxb_char_t *data, const lxb_char_t *end);
80 
81 static const lxb_char_t *
82 lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
83                                 const lxb_char_t *data, const lxb_char_t *end);
84 
85 static const lxb_char_t *
86 lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
87                                         const lxb_char_t *data, const lxb_char_t *end);
88 
89 
90 static const lxb_char_t *
91 lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
92                          const lxb_char_t *data, const lxb_char_t *end);
93 
94 static const lxb_char_t *
95 lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
96                              const lxb_char_t *data, const lxb_char_t *end);
97 
98 static const lxb_char_t *
99 lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
100                              const lxb_char_t *data,
101                              const lxb_char_t **end, size_t *length);
102 
103 static const lxb_char_t *
104 lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
105                                     const lxb_char_t *data,
106                                     const lxb_char_t **end, size_t *length);
107 
108 
109 lxb_inline lxb_status_t
lxb_css_syntax_string_realloc(lxb_css_syntax_tokenizer_t * tkz,size_t upto)110 lxb_css_syntax_string_realloc(lxb_css_syntax_tokenizer_t *tkz, size_t upto)
111 {
112     size_t len = tkz->pos - tkz->start;
113     size_t size = (tkz->end - tkz->start) + upto;
114 
115     lxb_char_t *tmp = lexbor_realloc(tkz->start, size);
116     if (tmp == NULL) {
117         tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
118         return tkz->status;
119     }
120 
121     tkz->start = tmp;
122     tkz->pos = tmp + len;
123     tkz->end = tmp + size;
124 
125     return LXB_STATUS_OK;
126 }
127 
128 lxb_inline lxb_status_t
lxb_css_syntax_string_append(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,size_t length)129 lxb_css_syntax_string_append(lxb_css_syntax_tokenizer_t *tkz,
130                              const lxb_char_t *data, size_t length)
131 {
132     if ((size_t) (tkz->end - tkz->pos) <= length) {
133         if (lxb_css_syntax_string_realloc(tkz, length + 1024) != LXB_STATUS_OK) {
134             return tkz->status;
135         }
136     }
137 
138     memcpy(tkz->pos, data, length);
139 
140     tkz->pos += length;
141 
142     return LXB_STATUS_OK;
143 }
144 
145 lxb_inline lxb_status_t
lxb_css_syntax_state_string_term(lxb_css_syntax_tokenizer_t * tkz)146 lxb_css_syntax_state_string_term(lxb_css_syntax_tokenizer_t *tkz)
147 {
148     if (tkz->pos >= tkz->end) {
149         if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
150             return tkz->status;
151         }
152     }
153 
154     *tkz->pos = 0x00;
155 
156     return LXB_STATUS_OK;
157 }
158 
159 lxb_inline const lxb_char_t *
lxb_css_syntax_state_string_set(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data)160 lxb_css_syntax_state_string_set(lxb_css_syntax_tokenizer_t *tkz,
161                                 lxb_css_syntax_token_t *token,
162                                 const lxb_char_t *data)
163 {
164     if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
165         return NULL;
166     }
167 
168     lxb_css_syntax_token_string(token)->data = tkz->start;
169     lxb_css_syntax_token_string(token)->length = tkz->pos - tkz->start;
170 
171     tkz->pos = tkz->start;
172 
173     return data;
174 }
175 
176 lxb_inline const lxb_char_t *
lxb_css_syntax_state_dimension_set(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data)177 lxb_css_syntax_state_dimension_set(lxb_css_syntax_tokenizer_t *tkz,
178                                    lxb_css_syntax_token_t *token,
179                                    const lxb_char_t *data)
180 {
181     if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
182         return NULL;
183     }
184 
185     lxb_css_syntax_token_dimension_string(token)->data = tkz->start;
186     lxb_css_syntax_token_dimension_string(token)->length = tkz->pos - tkz->start;
187 
188     tkz->pos = tkz->start;
189 
190     return data;
191 }
192 
193 lxb_inline lxb_css_syntax_token_t *
lxb_css_syntax_state_token_create(lxb_css_syntax_tokenizer_t * tkz)194 lxb_css_syntax_state_token_create(lxb_css_syntax_tokenizer_t *tkz)
195 {
196     if (tkz->prepared == 0) {
197         tkz->prepared = tkz->cache->length;
198     }
199 
200     return lxb_css_syntax_token_cached_create(tkz);
201 }
202 
203 /*
204  * Delim
205  */
206 lxb_inline void
lxb_css_syntax_state_delim_set(lxb_css_syntax_token_t * token,const lxb_char_t * data,lxb_char_t ch,size_t length)207 lxb_css_syntax_state_delim_set(lxb_css_syntax_token_t *token,
208                                const lxb_char_t *data, lxb_char_t ch,
209                                size_t length)
210 {
211     lxb_css_syntax_token_delim(token)->character = ch;
212     lxb_css_syntax_token_base(token)->begin = data;
213     lxb_css_syntax_token_base(token)->length = length;
214 
215     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
216 }
217 
218 lxb_inline lxb_css_syntax_token_t *
lxb_css_syntax_list_append_delim(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,size_t length,lxb_char_t ch)219 lxb_css_syntax_list_append_delim(lxb_css_syntax_tokenizer_t *tkz,
220                                  const lxb_char_t *data,
221                                  size_t length, lxb_char_t ch)
222 {
223     lxb_css_syntax_token_t *delim;
224 
225     delim = lxb_css_syntax_state_token_create(tkz);
226     if (delim == NULL) {
227         return NULL;
228     }
229 
230     lxb_css_syntax_state_delim_set(delim, data, ch, length);
231 
232     return delim;
233 }
234 
235 const lxb_char_t *
lxb_css_syntax_state_delim(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)236 lxb_css_syntax_state_delim(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
237                            const lxb_char_t *data, const lxb_char_t *end)
238 {
239     lxb_css_syntax_state_delim_set(token, data, *data, 1);
240 
241     return data + 1;
242 }
243 
244 /*
245  * Comment
246  */
247 const lxb_char_t *
lxb_css_syntax_state_comment(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)248 lxb_css_syntax_state_comment(lxb_css_syntax_tokenizer_t *tkz,
249                              lxb_css_syntax_token_t *token,
250                              const lxb_char_t *data, const lxb_char_t *end)
251 {
252     size_t length;
253     lxb_status_t status;
254     const lxb_char_t *begin;
255 
256     lxb_css_syntax_token_base(token)->begin = data;
257 
258     /* Skip forward slash (/) */
259     data++;
260 
261     if (data >= end) {
262         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
263         if (data >= end) {
264             goto delim;
265         }
266     }
267 
268     /* U+002A ASTERISK (*) */
269     if (*data != 0x2A) {
270         goto delim;
271     }
272 
273     begin = ++data;
274     length = 2;
275 
276     do {
277         if (data >= end) {
278             if (begin < data) {
279                 length += data - begin;
280                 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
281             }
282 
283             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
284             if (data >= end) {
285                 goto error;
286             }
287 
288             begin = data;
289         }
290 
291         switch (*data) {
292             case 0x00:
293                 if (begin < data) {
294                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
295                 }
296 
297                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
298                                               lexbor_str_res_ansi_replacement_character,
299                                               sizeof(lexbor_str_res_ansi_replacement_character) - 1);
300                 data += 1;
301                 length += data - begin;
302                 begin = data;
303 
304                 continue;
305 
306             case 0x0D:
307                 data++;
308                 length += data - begin;
309 
310                 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
311 
312                 tkz->pos[-1] = '\n';
313 
314                 if (data >= end) {
315                     LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
316                     if (data >= end) {
317                         goto error;
318                     }
319                 }
320 
321                 if (*data != 0x0A) {
322                     data--;
323                 }
324                 else {
325                     length += 1;
326                 }
327 
328                 begin = ++data;
329 
330                 continue;
331 
332             case 0x0C:
333                 if (begin < data) {
334                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
335                 }
336 
337                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
338                                               (lxb_char_t *) "\n", 1);
339                 data += 1;
340                 length += data - begin;
341                 begin = data;
342 
343                 continue;
344 
345             /* U+002A ASTERISK (*) */
346             case 0x2A:
347                 data++;
348 
349                 if (data >= end) {
350                     length += data - begin;
351 
352                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
353 
354                     LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
355                     if (data >= end) {
356                         goto error;
357                     }
358 
359                     if (*data == 0x2F) {
360                         tkz->pos--;
361                         *tkz->pos = 0x00;
362 
363                         data++;
364                         length++;
365 
366                         goto done;
367                     }
368 
369                     begin = data;
370                 }
371 
372                 /* U+002F Forward slash (/) */
373                 if (*data == 0x2F) {
374                     length += data - begin;
375 
376                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, (data - 1));
377 
378                     data++;
379                     length++;
380 
381                     goto done;
382                 }
383 
384                 continue;
385         }
386 
387         data++;
388     }
389     while (true);
390 
391 done:
392 
393     token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
394 
395     lxb_css_syntax_token_base(token)->length = length;
396 
397     return lxb_css_syntax_state_string_set(tkz, token, data);
398 
399 delim:
400 
401     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
402 
403     lxb_css_syntax_token_base(token)->length = 1;
404     lxb_css_syntax_token_delim(token)->character = '/';
405 
406     return data;
407 
408 error:
409 
410     token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
411 
412     lxb_css_syntax_token_base(token)->length = length;
413 
414     lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
415                                        LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINCO);
416 
417     return lxb_css_syntax_state_string_set(tkz, token, data);
418 }
419 
420 /*
421  * Whitespace
422  */
423 const lxb_char_t *
lxb_css_syntax_state_whitespace(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)424 lxb_css_syntax_state_whitespace(lxb_css_syntax_tokenizer_t *tkz,
425                                 lxb_css_syntax_token_t *token,
426                                 const lxb_char_t *data, const lxb_char_t *end)
427 {
428     size_t length;
429     lxb_status_t status;
430     const lxb_char_t *begin;
431 
432     token->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
433 
434     lxb_css_syntax_token_base(token)->begin = data;
435 
436     begin = data;
437     length = 0;
438 
439     do {
440         switch (*data) {
441             case 0x0D:
442                 data++;
443                 length += data - begin;
444 
445                 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
446 
447                 tkz->pos[-1] = '\n';
448 
449                 if (data >= end) {
450                     LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
451                     if (data >= end) {
452                         goto done;
453                     }
454                 }
455 
456                 if (*data != 0x0A) {
457                     data--;
458                 }
459                 else {
460                     length += 1;
461                 }
462 
463                 begin = data + 1;
464                 break;
465 
466             case 0x0C:
467                 length += (data + 1) - begin;
468 
469                 if (begin < data) {
470                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
471                 }
472 
473                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
474                                               (const lxb_char_t *) "\n", 1);
475                 begin = data + 1;
476                 break;
477 
478             case 0x09:
479             case 0x20:
480             case 0x0A:
481                 break;
482 
483             default:
484                 if (begin < data) {
485                     length += data - begin;
486 
487                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
488                 }
489 
490                 lxb_css_syntax_token_base(token)->length = length;
491 
492                 return lxb_css_syntax_state_string_set(tkz, token, data);
493         }
494 
495         data++;
496 
497         if (data >= end) {
498             if (begin < data) {
499                 length += data - begin;
500 
501                 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
502             }
503 
504             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
505             if (data >= end) {
506                 break;
507             }
508 
509             begin = data;
510         }
511     }
512     while (true);
513 
514 done:
515 
516     lxb_css_syntax_token_base(token)->length = length;
517 
518     return lxb_css_syntax_state_string_set(tkz, token, data);
519 }
520 
521 /*
522  * String token for U+0022 Quotation Mark (") and U+0027 Apostrophe (')
523  */
524 const lxb_char_t *
lxb_css_syntax_state_string(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)525 lxb_css_syntax_state_string(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
526                             const lxb_char_t *data, const lxb_char_t *end)
527 {
528     size_t length;
529     lxb_char_t mark;
530     lxb_status_t status;
531     const lxb_char_t *begin;
532 
533     lxb_css_syntax_token_base(token)->begin = data;
534 
535     mark = *data++;
536     begin = data;
537     length = 1;
538 
539     for (;; data++) {
540         if (data >= end) {
541             if (begin < data) {
542                 length += data - begin;
543 
544                 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
545             }
546 
547             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
548             if (data >= end) {
549                 goto error;
550             }
551 
552             begin = data;
553         }
554 
555         switch (*data) {
556             case 0x00:
557                 length += (data + 1) - begin;
558 
559                 if (begin < data) {
560                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
561                 }
562 
563                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
564                                               lexbor_str_res_ansi_replacement_character,
565                                               sizeof(lexbor_str_res_ansi_replacement_character) - 1);
566                 begin = data + 1;
567                 break;
568 
569             /*
570              * U+000A LINE FEED
571              * U+000D CARRIAGE RETURN
572              * U+000C FORM FEED
573              */
574             case 0x0A:
575             case 0x0D:
576             case 0x0C:
577                 length += data - begin;
578 
579                 if (begin < data) {
580                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
581                 }
582 
583                 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
584                                          LXB_CSS_SYNTAX_TOKENIZER_ERROR_NEINST);
585 
586                 token->type = LXB_CSS_SYNTAX_TOKEN_BAD_STRING;
587 
588                 lxb_css_syntax_token_base(token)->length = length;
589 
590                 return lxb_css_syntax_state_string_set(tkz, token, data);
591 
592             /* U+005C REVERSE SOLIDUS (\) */
593             case 0x5C:
594                 length += (data + 1) - begin;
595 
596                 if (begin < data) {
597                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
598                 }
599 
600                 data++;
601 
602                 if (data >= end) {
603                     LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
604                     if (data >= end) {
605                         LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
606                                                   (const lxb_char_t *) "\\", 1);
607                         goto error;
608                     }
609                 }
610 
611                 data = lxb_css_syntax_state_escaped_string(tkz, data, &end,
612                                                            &length);
613                 if (data == NULL) {
614                     return NULL;
615                 }
616 
617                 begin = data;
618 
619                 data--;
620                 break;
621 
622             default:
623                 /* '"' or '\'' */
624                 if (*data == mark) {
625                     length += (data + 1) - begin;
626 
627                     if (begin < data) {
628                         LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
629                     }
630 
631                     token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
632 
633                     lxb_css_syntax_token_base(token)->length = length;
634 
635                     return lxb_css_syntax_state_string_set(tkz, token,
636                                                            data + 1);
637                 }
638 
639                 break;
640         }
641     }
642 
643     return data;
644 
645 error:
646 
647     lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
648                                        LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINST);
649 
650     token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
651 
652     lxb_css_syntax_token_base(token)->length = length;
653 
654     return lxb_css_syntax_state_string_set(tkz, token, data);
655 }
656 
657 /*
658  * U+0023 NUMBER SIGN (#)
659  */
660 const lxb_char_t *
lxb_css_syntax_state_hash(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)661 lxb_css_syntax_state_hash(lxb_css_syntax_tokenizer_t *tkz,
662                           lxb_css_syntax_token_t *token, const lxb_char_t *data,
663                           const lxb_char_t *end)
664 {
665     size_t length;
666     lxb_char_t ch;
667     lxb_status_t status;
668     const lxb_char_t *begin;
669     lxb_css_syntax_token_t *delim;
670 
671     lxb_css_syntax_token_base(token)->begin = data++;
672 
673     if (data >= end) {
674         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
675         if (data >= end) {
676             goto delim;
677         }
678     }
679 
680     length = 1;
681 
682     if (lxb_css_syntax_res_name_map[*data] == 0x00) {
683         if (*data == 0x00) {
684             goto hash;
685         }
686 
687         /* U+005C REVERSE SOLIDUS (\) */
688         if (*data != 0x5C) {
689             goto delim;
690         }
691 
692         begin = data++;
693 
694         if (data >= end) {
695             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
696             if (data >= end) {
697                 goto push_delim;
698             }
699         }
700 
701         ch = *data;
702 
703         if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
704             goto push_delim;
705         }
706 
707         length += 1;
708 
709         data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
710         if (data == NULL) {
711             return NULL;
712         }
713     }
714 
715 hash:
716 
717     token->type = LXB_CSS_SYNTAX_TOKEN_HASH;
718 
719     lxb_css_syntax_token_base(token)->length = length;
720 
721     return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
722 
723 push_delim:
724 
725     delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '\\');
726     if (delim == NULL) {
727         return NULL;
728     }
729 
730 delim:
731 
732     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
733 
734     lxb_css_syntax_token_base(token)->length = 1;
735     lxb_css_syntax_token_delim(token)->character = '#';
736 
737     return data;
738 }
739 
740 /*
741  * U+0028 LEFT PARENTHESIS (()
742  */
743 const lxb_char_t *
lxb_css_syntax_state_lparenthesis(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)744 lxb_css_syntax_state_lparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
745                                   const lxb_char_t *data, const lxb_char_t *end)
746 {
747     token->type = LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS;
748 
749     lxb_css_syntax_token_base(token)->begin = data;
750     lxb_css_syntax_token_base(token)->length = 1;
751 
752     return data + 1;
753 }
754 
755 /*
756  * U+0029 RIGHT PARENTHESIS ())
757  */
758 const lxb_char_t *
lxb_css_syntax_state_rparenthesis(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)759 lxb_css_syntax_state_rparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
760                                   const lxb_char_t *data, const lxb_char_t *end)
761 {
762     token->type = LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS;
763 
764     lxb_css_syntax_token_base(token)->begin = data;
765     lxb_css_syntax_token_base(token)->length = 1;
766 
767     return data + 1;
768 }
769 
770 /*
771  * U+002B PLUS SIGN (+)
772  */
773 const lxb_char_t *
lxb_css_syntax_state_plus(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)774 lxb_css_syntax_state_plus(lxb_css_syntax_tokenizer_t *tkz,
775                           lxb_css_syntax_token_t *token,
776                           const lxb_char_t *data, const lxb_char_t *end)
777 {
778     lxb_status_t status;
779 
780     lxb_css_syntax_token_base(token)->begin = data++;
781 
782     if (data >= end) {
783         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
784         if (data >= end) {
785             token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
786 
787             lxb_css_syntax_token_base(token)->length = 1;
788             lxb_css_syntax_token_delim(token)->character = '+';
789 
790             return data;
791         }
792     }
793 
794     return lxb_css_syntax_state_plus_process(tkz, token, data, end);
795 }
796 
797 const lxb_char_t *
lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)798 lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t *tkz,
799                                   lxb_css_syntax_token_t *token,
800                                   const lxb_char_t *data, const lxb_char_t *end)
801 {
802     lxb_status_t status;
803     const lxb_char_t *begin;
804     lxb_css_syntax_token_t *delim;
805 
806     /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
807     if (*data >= 0x30 && *data <= 0x39) {
808         lxb_css_syntax_token_number(token)->have_sign = true;
809         lxb_css_syntax_token_base(token)->length = 1;
810 
811         return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
812     }
813 
814     /* U+002E FULL STOP (.) */
815     if (*data == 0x2E) {
816         begin = data++;
817 
818         if (data == end) {
819             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
820 
821             if (data >= end || *data < 0x30 || *data > 0x39) {
822                 goto push_delim;
823             }
824 
825             lxb_css_syntax_token_number(token)->have_sign = true;
826             lxb_css_syntax_token_base(token)->length = 2;
827 
828             return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
829                                                 tkz->buffer + sizeof(tkz->buffer),
830                                                 data, end);
831         }
832 
833         /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
834         if (*data >= 0x30 && *data <= 0x39) {
835             lxb_css_syntax_token_number(token)->have_sign = true;
836             lxb_css_syntax_token_base(token)->length = 2;
837 
838             return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
839                                                 tkz->buffer + sizeof(tkz->buffer),
840                                                 data, end);
841         }
842 
843     push_delim:
844 
845         delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '.');
846         if (delim == NULL) {
847             return NULL;
848         }
849     }
850 
851     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
852 
853     lxb_css_syntax_token_base(token)->length = 1;
854     lxb_css_syntax_token_delim(token)->character = '+';
855 
856     return data;
857 }
858 
859 /*
860  * U+002C COMMA (,)
861  */
862 const lxb_char_t *
lxb_css_syntax_state_comma(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)863 lxb_css_syntax_state_comma(lxb_css_syntax_tokenizer_t *tkz,
864                            lxb_css_syntax_token_t *token,
865                            const lxb_char_t *data, const lxb_char_t *end)
866 {
867     token->type = LXB_CSS_SYNTAX_TOKEN_COMMA;
868 
869     lxb_css_syntax_token_base(token)->begin = data;
870     lxb_css_syntax_token_base(token)->length = 1;
871 
872     return data + 1;
873 }
874 
875 /*
876  * U+002D HYPHEN-MINUS (-)
877  */
878 const lxb_char_t *
lxb_css_syntax_state_minus(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)879 lxb_css_syntax_state_minus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
880                            const lxb_char_t *data, const lxb_char_t *end)
881 {
882     lxb_status_t status;
883 
884     lxb_css_syntax_token_base(token)->begin = data++;
885 
886     if (data >= end) {
887         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
888         if (data >= end) {
889             token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
890 
891             lxb_css_syntax_token_base(token)->length = 1;
892             lxb_css_syntax_token_delim(token)->character = '-';
893 
894             return data;
895         }
896     }
897 
898     return lxb_css_syntax_state_minus_process(tkz, token, data, end);
899 }
900 
901 const lxb_char_t *
lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)902 lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t *tkz,
903                                    lxb_css_syntax_token_t *token,
904                                    const lxb_char_t *data, const lxb_char_t *end)
905 {
906     size_t length;
907     lxb_char_t ch;
908     lxb_status_t status;
909     const lxb_char_t *begin, *second;
910     lxb_css_syntax_token_t *delim;
911     lxb_css_syntax_token_number_t *number;
912 
913     unsigned minuses_len = 1;
914     static const lxb_char_t minuses[3] = "---";
915 
916     /* Check for <number-token> */
917 
918     /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
919     if (*data >= 0x30 && *data <= 0x39) {
920         lxb_css_syntax_token_base(token)->length = 1;
921 
922         data = lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
923 
924         number = lxb_css_syntax_token_number(token);
925         number->num = -number->num;
926 
927         lxb_css_syntax_token_number(token)->have_sign = true;
928 
929         return data;
930     }
931 
932     /* U+002E FULL STOP (.) */
933     if (*data == 0x2E) {
934         begin = data++;
935 
936         if (data == end) {
937             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
938             if (data >= end) {
939                 goto push_delim;
940             }
941         }
942 
943         /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
944         if (*data >= 0x30 && *data <= 0x39) {
945             lxb_css_syntax_token_base(token)->length = 2;
946 
947             data = lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
948                                                 tkz->buffer + sizeof(tkz->buffer),
949                                                 data, end);
950 
951             number = lxb_css_syntax_token_number(token);
952             number->num = -number->num;
953 
954             lxb_css_syntax_token_number(token)->have_sign = true;
955 
956             return data;
957         }
958 
959     push_delim:
960 
961         delim = lxb_css_syntax_list_append_delim(tkz, begin, 1, '.');
962         if (delim == NULL) {
963             return NULL;
964         }
965 
966         goto delim;
967     }
968 
969     second = data;
970 
971     /* U+002D HYPHEN-MINUS (-) */
972     if (*data == 0x2D) {
973         data++;
974 
975         /* Check for <CDC-token> */
976 
977         if (data == end) {
978             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
979             if (data >= end) {
980                 delim = lxb_css_syntax_list_append_delim(tkz, second, 1, '-');
981                 if (delim == NULL) {
982                     return NULL;
983                 }
984 
985                 goto delim;
986             }
987         }
988 
989         if (*data == 0x2D) {
990             lxb_css_syntax_token_base(token)->length = 3;
991             LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 3);
992 
993             return lxb_css_syntax_state_ident_like_not_url(tkz, token,
994                                                            ++data, end);
995         }
996         else if (*data == 0x3E) {
997             token->type = LXB_CSS_SYNTAX_TOKEN_CDC;
998 
999             lxb_css_syntax_token_base(token)->length = 3;
1000 
1001             return data + 1;
1002         }
1003 
1004         minuses_len++;
1005     }
1006 
1007     /* Check for <ident-token> */
1008 
1009     if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
1010         || *data == 0x00)
1011     {
1012         lxb_css_syntax_token_base(token)->length = minuses_len;
1013         LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1014 
1015         return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
1016     }
1017 
1018     length = 0;
1019 
1020     /* U+005C REVERSE SOLIDUS (\) */
1021     if (*data == 0x5C) {
1022         begin = data++;
1023 
1024         if (data == end) {
1025             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1026             if (data >= end) {
1027                 goto delim_rev_solidus;
1028             }
1029 
1030             ch = *data;
1031 
1032             if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1033                 length += 1;
1034                 goto ident;
1035             }
1036 
1037             goto delim_rev_solidus;
1038         }
1039 
1040         ch = *data;
1041 
1042         if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1043             length += 1;
1044             goto ident;
1045         }
1046 
1047     delim_rev_solidus:
1048 
1049         if (minuses_len == 2) {
1050             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, second, 1, '-');
1051         }
1052 
1053         LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '\\');
1054 
1055         goto delim;
1056     }
1057 
1058     if (minuses_len == 2) {
1059         LXB_CSS_SYNTAX_DELIM_APPEND(tkz, second, 0, '-');
1060     }
1061 
1062 delim:
1063 
1064     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1065 
1066     lxb_css_syntax_token_base(token)->length = 1;
1067     lxb_css_syntax_token_delim(token)->character = '-';
1068 
1069     return data;
1070 
1071 ident:
1072 
1073     LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1074 
1075     data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1076     if (data == NULL) {
1077         return NULL;
1078     }
1079 
1080     lxb_css_syntax_token_base(token)->length = minuses_len + length;
1081 
1082     return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
1083 }
1084 
1085 /*
1086  * U+002E FULL STOP (.)
1087  */
1088 const lxb_char_t *
lxb_css_syntax_state_full_stop(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1089 lxb_css_syntax_state_full_stop(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1090                                const lxb_char_t *data, const lxb_char_t *end)
1091 {
1092     lxb_status_t status;
1093 
1094     lxb_css_syntax_token_base(token)->begin = data;
1095     lxb_css_syntax_token_number(token)->have_sign = false;
1096 
1097     data++;
1098 
1099     if (data >= end) {
1100         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1101         if (data >= end) {
1102             goto delim;
1103         }
1104     }
1105 
1106     /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1107     if (*data >= 0x30 && *data <= 0x39) {
1108         lxb_css_syntax_token_base(token)->length = 1;
1109 
1110         return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
1111                                             tkz->buffer + sizeof(tkz->buffer),
1112                                             data, end);
1113     }
1114 
1115 delim:
1116 
1117     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1118 
1119     lxb_css_syntax_token_base(token)->length = 1;
1120     lxb_css_syntax_token_delim(token)->character = '.';
1121 
1122     return data;
1123 }
1124 
1125 /*
1126  * U+003A COLON (:)
1127  */
1128 const lxb_char_t *
lxb_css_syntax_state_colon(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1129 lxb_css_syntax_state_colon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1130                            const lxb_char_t *data, const lxb_char_t *end)
1131 {
1132     token->type = LXB_CSS_SYNTAX_TOKEN_COLON;
1133 
1134     lxb_css_syntax_token_base(token)->begin = data;
1135     lxb_css_syntax_token_base(token)->length = 1;
1136 
1137     return data + 1;
1138 }
1139 
1140 /*
1141  * U+003B SEMICOLON (;)
1142  */
1143 const lxb_char_t *
lxb_css_syntax_state_semicolon(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1144 lxb_css_syntax_state_semicolon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1145                                const lxb_char_t *data, const lxb_char_t *end)
1146 {
1147     token->type = LXB_CSS_SYNTAX_TOKEN_SEMICOLON;
1148 
1149     lxb_css_syntax_token_base(token)->begin = data;
1150     lxb_css_syntax_token_base(token)->length = 1;
1151 
1152     return data + 1;
1153 }
1154 
1155 /*
1156  * U+003C LESS-THAN SIGN (<)
1157  */
1158 const lxb_char_t *
lxb_css_syntax_state_less_sign(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1159 lxb_css_syntax_state_less_sign(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1160                                const lxb_char_t *data, const lxb_char_t *end)
1161 {
1162     size_t length;
1163     lxb_char_t ch;
1164     lxb_status_t status;
1165     const lxb_char_t *mark, *minus, *esc, *idnt;
1166     lxb_css_syntax_token_t *ident;
1167 
1168     lxb_css_syntax_token_base(token)->begin = data++;
1169 
1170     if ((end - data) > 2) {
1171         if (data[0] == '!' && data[1] == '-' && data[2] == '-') {
1172             data += 3;
1173 
1174             token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1175             lxb_css_syntax_token_base(token)->length = 4;
1176 
1177             return data;
1178         }
1179 
1180         token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1181 
1182         lxb_css_syntax_token_base(token)->length = 1;
1183         lxb_css_syntax_token_delim(token)->character = '<';
1184 
1185         return data;
1186     }
1187 
1188     if (data >= end) {
1189         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1190         if (data >= end) {
1191             goto delim;
1192         }
1193     }
1194 
1195     /* U+0021 EXCLAMATION MARK */
1196     if (*data != 0x21) {
1197         goto delim;
1198     }
1199 
1200     mark = data++;
1201 
1202     if (data == end) {
1203         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1204         if (data >= end) {
1205             goto delim_mark;
1206         }
1207     }
1208 
1209     /* U+002D HYPHEN-MINUS */
1210     if (*data != 0x2D) {
1211         goto delim_mark;
1212     }
1213 
1214     minus = data++;
1215 
1216     if (data == end) {
1217         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1218         if (data >= end) {
1219             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1220             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1221 
1222             goto delim;
1223         }
1224     }
1225 
1226     /* U+002D HYPHEN-MINUS */
1227     if (*data == 0x2D) {
1228         token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1229 
1230         lxb_css_syntax_token_base(token)->length = 4;
1231 
1232         return data + 1;
1233     }
1234 
1235     length = 1;
1236     idnt = data;
1237 
1238     if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1239         goto ident_with_minus;
1240     }
1241 
1242     /* U+005C REVERSE SOLIDUS (\) */
1243     if (*data == 0x5C) {
1244         esc = data++;
1245         length += 1;
1246 
1247         if (data == end) {
1248             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1249             if (data >= end) {
1250                 goto delim_esc;
1251             }
1252 
1253             ch = *data;
1254 
1255             if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1256                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1257                                               (const lxb_char_t *) "-", 1);
1258 
1259                 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1260                 if (data == NULL) {
1261                     return NULL;
1262                 }
1263 
1264                 goto ident;
1265             }
1266 
1267         delim_esc:
1268 
1269             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1270             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1271             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\');
1272 
1273             goto delim;
1274         }
1275 
1276         ch = *data--;
1277 
1278         if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1279             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1280             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1281 
1282             goto delim;
1283         }
1284 
1285         data = lxb_css_syntax_state_escaped(tkz, data + 1, &end, &length);
1286         if (data == NULL) {
1287             return NULL;
1288         }
1289     }
1290     else if (*data != 0x00) {
1291         LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1292         LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-');
1293 
1294         goto delim;
1295     }
1296 
1297 ident_with_minus:
1298 
1299     LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, (const lxb_char_t *) "-", 1);
1300 
1301 ident:
1302 
1303     LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1304 
1305     ident = lxb_css_syntax_state_token_create(tkz);
1306     if (ident == NULL) {
1307         return NULL;
1308     }
1309 
1310     lxb_css_syntax_token_base(ident)->begin = idnt;
1311     lxb_css_syntax_token_base(ident)->length = length;
1312 
1313     data = lxb_css_syntax_state_ident_like_not_url(tkz, ident, data, end);
1314     if (data == NULL) {
1315         return NULL;
1316     }
1317 
1318     goto delim;
1319 
1320 delim_mark:
1321 
1322     LXB_CSS_SYNTAX_DELIM_APPEND(tkz, mark, 1, '!');
1323 
1324 delim:
1325 
1326     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1327 
1328     lxb_css_syntax_token_base(token)->length = 1;
1329     lxb_css_syntax_token_delim(token)->character = '<';
1330 
1331     return data;
1332 }
1333 
1334 /*
1335  * U+0040 COMMERCIAL AT (@)
1336  */
1337 const lxb_char_t *
lxb_css_syntax_state_at(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1338 lxb_css_syntax_state_at(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1339                         const lxb_char_t *data, const lxb_char_t *end)
1340 {
1341     size_t length;
1342     lxb_char_t ch;
1343     lxb_status_t status;
1344     const lxb_char_t *minus, *esc;
1345 
1346     unsigned minuses_len = 0;
1347     static const lxb_char_t minuses[2] = "--";
1348 
1349     token->type = LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD;
1350 
1351     lxb_css_syntax_token_base(token)->begin = data++;
1352     lxb_css_syntax_token_base(token)->length = 1;
1353 
1354     if (data >= end) {
1355         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1356         if (data >= end) {
1357             goto delim;
1358         }
1359     }
1360 
1361     if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1362         return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1363     }
1364 
1365     minus = data;
1366 
1367     /* U+002D HYPHEN-MINUS */
1368     if (*data == 0x2D) {
1369         data++;
1370 
1371         if (data == end) {
1372             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1373             if (data >= end) {
1374                 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1375                 goto delim;
1376             }
1377         }
1378 
1379         if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
1380             || *data == 0x00)
1381         {
1382             lxb_css_syntax_token_base(token)->length += 1;
1383             LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 1);
1384             return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1385         }
1386         else if (*data == 0x2D) {
1387             lxb_css_syntax_token_base(token)->length += 2;
1388             LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 2);
1389             return lxb_css_syntax_state_consume_ident(tkz, token,
1390                                                       data + 1, end);
1391         }
1392 
1393         minuses_len++;
1394     }
1395 
1396     /* U+005C REVERSE SOLIDUS (\) */
1397     if (*data == 0x5C) {
1398         esc = data++;
1399 
1400         if (data == end) {
1401             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1402             if (data >= end) {
1403                 goto delim_esc;
1404             }
1405         }
1406 
1407         ch = *data;
1408 
1409         if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1410             LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1411 
1412             length = 0;
1413 
1414             data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1415             if (data == NULL) {
1416                 return NULL;
1417             }
1418 
1419             lxb_css_syntax_token_base(token)->length += 1 + minuses_len + length;
1420 
1421             return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1422         }
1423 
1424         goto delim_esc;
1425     }
1426     else if (*data != 0x00) {
1427         if (minuses_len != 0) {
1428             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-');
1429         }
1430 
1431         goto delim;
1432     }
1433 
1434     LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1435 
1436     lxb_css_syntax_token_base(token)->length += minuses_len;
1437 
1438     return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1439 
1440 delim_esc:
1441 
1442     if (minuses_len != 0) {
1443         LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1444     }
1445 
1446     LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\');
1447 
1448 delim:
1449 
1450     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1451 
1452     lxb_css_syntax_token_base(token)->length = 1;
1453     lxb_css_syntax_token_delim(token)->character = '@';
1454 
1455     return data;
1456 }
1457 
1458 /*
1459  * U+005B LEFT SQUARE BRACKET ([)
1460  */
1461 const lxb_char_t *
lxb_css_syntax_state_ls_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1462 lxb_css_syntax_state_ls_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1463                                 const lxb_char_t *data, const lxb_char_t *end)
1464 {
1465     token->type = LXB_CSS_SYNTAX_TOKEN_LS_BRACKET;
1466 
1467     lxb_css_syntax_token_base(token)->begin = data;
1468     lxb_css_syntax_token_base(token)->length = 1;
1469 
1470     return data + 1;
1471 }
1472 
1473 /*
1474  * U+005C REVERSE SOLIDUS (\)
1475  */
1476 const lxb_char_t *
lxb_css_syntax_state_rsolidus(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1477 lxb_css_syntax_state_rsolidus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1478                               const lxb_char_t *data, const lxb_char_t *end)
1479 {
1480     size_t length;
1481     lxb_char_t ch;
1482     lxb_status_t status;
1483 
1484     lxb_css_syntax_token_base(token)->begin = data++;
1485 
1486     if (data >= end) {
1487         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1488         if (data >= end) {
1489             goto delim;
1490         }
1491     }
1492 
1493     ch = *data;
1494 
1495     if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1496         goto delim;
1497     }
1498 
1499     length = 1;
1500 
1501     data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1502     if (data == NULL) {
1503         return NULL;
1504     }
1505 
1506     lxb_css_syntax_token_base(token)->length = length;
1507 
1508     return lxb_css_syntax_state_ident_like(tkz, token, data, end);
1509 
1510 delim:
1511 
1512     token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1513 
1514     lxb_css_syntax_token_base(token)->length = 1;
1515     lxb_css_syntax_token_delim(token)->character = '\\';
1516 
1517     return data;
1518 }
1519 
1520 /*
1521  * U+005D RIGHT SQUARE BRACKET (])
1522  */
1523 const lxb_char_t *
lxb_css_syntax_state_rs_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1524 lxb_css_syntax_state_rs_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1525                                 const lxb_char_t *data, const lxb_char_t *end)
1526 {
1527     token->type = LXB_CSS_SYNTAX_TOKEN_RS_BRACKET;
1528 
1529     lxb_css_syntax_token_base(token)->begin = data;
1530     lxb_css_syntax_token_base(token)->length = 1;
1531 
1532     return data + 1;
1533 }
1534 
1535 /*
1536  * U+007B LEFT CURLY BRACKET ({)
1537  */
1538 const lxb_char_t *
lxb_css_syntax_state_lc_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1539 lxb_css_syntax_state_lc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1540                                 const lxb_char_t *data, const lxb_char_t *end)
1541 {
1542     token->type = LXB_CSS_SYNTAX_TOKEN_LC_BRACKET;
1543 
1544     lxb_css_syntax_token_base(token)->begin = data;
1545     lxb_css_syntax_token_base(token)->length = 1;
1546 
1547     return data + 1;
1548 }
1549 
1550 /*
1551  * U+007D RIGHT CURLY BRACKET (})
1552  */
1553 const lxb_char_t *
lxb_css_syntax_state_rc_bracket(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1554 lxb_css_syntax_state_rc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1555                                 const lxb_char_t *data, const lxb_char_t *end)
1556 {
1557     token->type = LXB_CSS_SYNTAX_TOKEN_RC_BRACKET;
1558 
1559     lxb_css_syntax_token_base(token)->begin = data;
1560     lxb_css_syntax_token_base(token)->length = 1;
1561 
1562     return data + 1;
1563 }
1564 
1565 /*
1566  * Numeric
1567  */
1568 lxb_inline void
lxb_css_syntax_consume_numeric_set_int(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * start,const lxb_char_t * end)1569 lxb_css_syntax_consume_numeric_set_int(lxb_css_syntax_tokenizer_t *tkz,
1570                                        lxb_css_syntax_token_t *token,
1571                                        const lxb_char_t *start, const lxb_char_t *end)
1572 {
1573     double num = lexbor_strtod_internal(start, (end - start), 0);
1574 
1575     token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1576 
1577     lxb_css_syntax_token_number(token)->is_float = false;
1578     lxb_css_syntax_token_number(token)->num = num;
1579 }
1580 
1581 lxb_inline void
lxb_css_syntax_consume_numeric_set_float(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * start,const lxb_char_t * end,bool e_is_negative,int exponent,int e_digit)1582 lxb_css_syntax_consume_numeric_set_float(lxb_css_syntax_tokenizer_t *tkz,
1583                                          lxb_css_syntax_token_t *token,
1584                                          const lxb_char_t *start, const lxb_char_t *end,
1585                                          bool e_is_negative, int exponent, int e_digit)
1586 {
1587     if (e_is_negative) {
1588         exponent -= e_digit;
1589     }
1590     else {
1591         exponent += e_digit;
1592     }
1593 
1594     double num = lexbor_strtod_internal(start, (end - start), exponent);
1595 
1596     token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1597 
1598     lxb_css_syntax_token_number(token)->num = num;
1599     lxb_css_syntax_token_number(token)->is_float = true;
1600 }
1601 
1602 const lxb_char_t *
lxb_css_syntax_state_consume_before_numeric(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1603 lxb_css_syntax_state_consume_before_numeric(lxb_css_syntax_tokenizer_t *tkz,
1604                                             lxb_css_syntax_token_t *token,
1605                                             const lxb_char_t *data,
1606                                             const lxb_char_t *end)
1607 {
1608     lxb_css_syntax_token_base(token)->begin = data;
1609     lxb_css_syntax_token_base(token)->length = 0;
1610     lxb_css_syntax_token_number(token)->have_sign = false;
1611 
1612     return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
1613 }
1614 
1615 static const lxb_char_t *
lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1616 lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
1617                                      lxb_css_syntax_token_t *token,
1618                                      const lxb_char_t *data,
1619                                      const lxb_char_t *end)
1620 {
1621     size_t length;
1622     lxb_status_t status;
1623     const lxb_char_t *begin;
1624 
1625     lxb_char_t *buf_start = tkz->buffer;
1626     lxb_char_t *buf_end = buf_start + sizeof(tkz->buffer);
1627 
1628     begin = data;
1629     length = 0;
1630 
1631     do {
1632         /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1633         if (*data < 0x30 || *data > 0x39) {
1634             length += data - begin;
1635             break;
1636         }
1637 
1638         if (buf_start != buf_end) {
1639             *buf_start++ = *data;
1640         }
1641 
1642         if (++data == end) {
1643             length += data - begin;
1644 
1645             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1646             if (data >= end) {
1647                 lxb_css_syntax_token_base(token)->length += length;
1648 
1649                 lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1650                                                        buf_start);
1651                 return data;
1652             }
1653 
1654             begin = data;
1655         }
1656     }
1657     while (true);
1658 
1659     lxb_css_syntax_token_base(token)->length += length;
1660 
1661     /* U+002E FULL STOP (.) */
1662     if (*data != 0x2E) {
1663         lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1664                                                buf_start);
1665 
1666         return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1667                                                                data, end);
1668     }
1669 
1670     begin = data++;
1671 
1672     if (data == end) {
1673         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1674         if (data >= end) {
1675             goto delim;
1676         }
1677     }
1678 
1679     if (*data >= 0x30 && *data <= 0x39) {
1680         lxb_css_syntax_token_base(token)->length += 1;
1681 
1682         return lxb_css_syntax_state_decimal(tkz, token, buf_start, buf_end,
1683                                             data, end);
1684     }
1685 
1686 delim:
1687 
1688     lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer, buf_start);
1689 
1690     LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '.');
1691 
1692     return data;
1693 }
1694 
1695 static const lxb_char_t *
lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,lxb_char_t * buf_start,lxb_char_t * buf_end,const lxb_char_t * data,const lxb_char_t * end)1696 lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
1697                              lxb_css_syntax_token_t *token,
1698                              lxb_char_t *buf_start, lxb_char_t *buf_end,
1699                              const lxb_char_t *data, const lxb_char_t *end)
1700 {
1701     size_t length;
1702     bool e_is_negative;
1703     int exponent, e_digit;
1704     lxb_char_t ch, by;
1705     lxb_status_t status;
1706     const lxb_char_t *last, *begin;
1707     lxb_css_syntax_token_t *t_str;
1708     lxb_css_syntax_token_string_t *str;
1709 
1710     exponent = 0;
1711     begin = data;
1712     length = lxb_css_syntax_token_base(token)->length;
1713 
1714     str = lxb_css_syntax_token_dimension_string(token);
1715     t_str = (lxb_css_syntax_token_t *) (void *) str;
1716 
1717     /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1718     do {
1719         if (buf_start != buf_end) {
1720             *buf_start++ = *data;
1721             exponent -= 1;
1722         }
1723 
1724         data++;
1725 
1726         if (data >= end) {
1727             length += data - begin;
1728 
1729             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1730             if (data >= end) {
1731                 lxb_css_syntax_token_base(token)->length = length;
1732 
1733                 lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1734                                                          buf_start, 0, exponent, 0);
1735                 return data;
1736             }
1737 
1738             begin = data;
1739         }
1740     }
1741     while (*data >= 0x30 && *data <= 0x39);
1742 
1743     length += data - begin;
1744 
1745     lxb_css_syntax_token_base(token)->length = length;
1746     lxb_css_syntax_token_base(str)->begin = data;
1747 
1748     ch = *data;
1749 
1750     /* U+0045 Latin Capital Letter (E) or U+0065 Latin Small Letter (e) */
1751     if (ch != 0x45 && ch != 0x65) {
1752         lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1753                                                  buf_start, 0, exponent, 0);
1754 
1755         return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1756                                                                data, end);
1757     }
1758 
1759     e_digit = 0;
1760     e_is_negative = false;
1761 
1762     lxb_css_syntax_token_base(t_str)->length = 1;
1763 
1764     if (++data == end) {
1765         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1766         if (data >= end) {
1767             lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1768                                                      buf_start, 0, exponent, 0);
1769 
1770             LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1771 
1772             token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1773 
1774             data = lxb_css_syntax_state_dimension_set(tkz, token, data);
1775 
1776             lxb_css_syntax_token_base(token)->length +=
1777                                       lxb_css_syntax_token_base(t_str)->length;
1778             return data;
1779         }
1780     }
1781 
1782     switch (*data) {
1783         /* U+002D HYPHEN-MINUS (-) */
1784         case 0x2D:
1785             e_is_negative = true;
1786             /* fall through */
1787 
1788         /* U+002B PLUS SIGN (+) */
1789         case 0x2B:
1790             last = data++;
1791             by = *last;
1792 
1793             if (data == end) {
1794                 LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1795                 if (data >= end) {
1796                     goto dimension;
1797                 }
1798             }
1799 
1800             /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1801             if (*data < 0x30 || *data > 0x39) {
1802                 goto dimension;
1803             }
1804 
1805             length += 1;
1806             break;
1807 
1808         default:
1809             /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1810             if (*data < 0x30 || *data > 0x39) {
1811                 lxb_css_syntax_consume_numeric_set_float(tkz, token,
1812                                                          tkz->buffer, buf_start,
1813                                                          0, exponent, 0);
1814 
1815                 token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1816 
1817                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1818 
1819                 data = lxb_css_syntax_state_consume_ident(tkz, t_str,
1820                                                            data, end);
1821                 if (begin == NULL) {
1822                     return NULL;
1823                 }
1824 
1825                 lxb_css_syntax_token_base(token)->length = length
1826                                 + lxb_css_syntax_token_base(t_str)->length;
1827                 return data;
1828             }
1829 
1830             break;
1831     }
1832 
1833     length += 1;
1834     begin = data;
1835 
1836     /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1837     do {
1838         e_digit = (*data - 0x30) + e_digit * 0x0A;
1839 
1840         if (++data == end) {
1841             length += data - begin;
1842 
1843             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1844             if (data >= end) {
1845                 lxb_css_syntax_token_base(token)->length = length;
1846 
1847                 lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1848                                                          e_is_negative, exponent, e_digit);
1849                 return data;
1850             }
1851 
1852             begin = data;
1853         }
1854     }
1855     while(*data >= 0x30 && *data <= 0x39);
1856 
1857     length += data - begin;
1858 
1859     lxb_css_syntax_token_base(token)->length = length;
1860 
1861     lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1862                                              e_is_negative, exponent, e_digit);
1863 
1864     return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1865                                                            data, end);
1866 
1867 dimension:
1868 
1869     lxb_css_syntax_consume_numeric_set_float(tkz, token,
1870                                              tkz->buffer, buf_start,
1871                                              0, exponent, 0);
1872 
1873     token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1874 
1875     LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1876 
1877     if (by == '-') {
1878         LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &by, 1);
1879 
1880         lxb_css_syntax_token_base(t_str)->length += 1;
1881 
1882         data = lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1883 
1884         lxb_css_syntax_token_base(token)->length = length
1885                                     + lxb_css_syntax_token_base(t_str)->length;
1886         return data;
1887     }
1888 
1889     LXB_CSS_SYNTAX_DELIM_APPEND(tkz, last, (data >= end), '+');
1890 
1891     lxb_css_syntax_token_base(token)->length = length
1892                                     + lxb_css_syntax_token_base(t_str)->length;
1893 
1894     return lxb_css_syntax_state_dimension_set(tkz, token, data);
1895 }
1896 
1897 static const lxb_char_t *
lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)1898 lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
1899                                                 lxb_css_syntax_token_t *token,
1900                                                 const lxb_char_t *data,
1901                                                 const lxb_char_t *end)
1902 {
1903     bool have_minus;
1904     size_t length;
1905     lxb_char_t ch;
1906     lxb_status_t status;
1907     const lxb_char_t *esc, *minus;
1908     lxb_css_syntax_token_t *t_str;
1909     lxb_css_syntax_token_string_t *str;
1910 
1911     str = lxb_css_syntax_token_dimension_string(token);
1912     t_str = (lxb_css_syntax_token_t *) (void *) str;
1913 
1914     lxb_css_syntax_token_base(t_str)->begin = data;
1915 
1916     ch = *data;
1917 
1918     if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1919         || ch == 0x00)
1920     {
1921         lxb_css_syntax_token_base(t_str)->length = 0;
1922         goto dimension;
1923     }
1924 
1925     /* U+0025 PERCENTAGE SIGN (%) */
1926     if (ch == 0x25) {
1927         token->type = LXB_CSS_SYNTAX_TOKEN_PERCENTAGE;
1928 
1929         lxb_css_syntax_token_base(token)->length += 1;
1930 
1931         return data + 1;
1932     }
1933 
1934     have_minus = false;
1935     minus = data;
1936 
1937     /* U+002D HYPHEN-MINUS */
1938     if (ch == 0x2D) {
1939         data++;
1940 
1941         if (data >= end) {
1942             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1943             if (data >= end) {
1944                 LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
1945                 return data;
1946             }
1947         }
1948 
1949         ch = *data;
1950 
1951         if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1952             || ch == 0x2D || ch == 0x00)
1953         {
1954             lxb_css_syntax_token_base(t_str)->length = 1;
1955 
1956             LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1957                                           (const lxb_char_t *) "-", 1);
1958             goto dimension;
1959         }
1960 
1961         have_minus = true;
1962     }
1963 
1964     esc = data;
1965 
1966     /* U+005C REVERSE SOLIDUS (\) */
1967     if (ch == 0x5C) {
1968         data++;
1969 
1970         if (data >= end) {
1971             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1972             if (data >= end) {
1973                 goto delim_rev_solidus;
1974             }
1975         }
1976 
1977         ch = *data;
1978 
1979         if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1980             length = 1;
1981 
1982             if (have_minus) {
1983                 length += 1;
1984 
1985                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1986                                               (const lxb_char_t *) "-", 1);
1987             }
1988 
1989             data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
1990             if (data == NULL) {
1991                 return NULL;
1992             }
1993 
1994             lxb_css_syntax_token_base(t_str)->length = length;
1995 
1996             goto dimension;
1997         }
1998 
1999     delim_rev_solidus:
2000 
2001         if (have_minus) {
2002             LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 1, '-');
2003         }
2004 
2005         LXB_CSS_SYNTAX_DELIM_APPEND(tkz, esc, 1, '\\');
2006 
2007         return data;
2008     }
2009 
2010     if (have_minus) {
2011         LXB_CSS_SYNTAX_DELIM_APPEND(tkz, minus, 0, '-');
2012     }
2013 
2014     return data;
2015 
2016 dimension:
2017 
2018     token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
2019 
2020     data = lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
2021 
2022     lxb_css_syntax_token_base(token)->length +=
2023                                     lxb_css_syntax_token_base(t_str)->length;
2024 
2025     return data;
2026 }
2027 
2028 static const lxb_char_t *
lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2029 lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
2030                                    lxb_css_syntax_token_t *token,
2031                                    const lxb_char_t *data, const lxb_char_t *end)
2032 {
2033     size_t length;
2034     lxb_status_t status;
2035     const lxb_char_t *begin;
2036 
2037     begin = data;
2038     length = 0;
2039 
2040     for (;; data++) {
2041         if (data >= end) {
2042             if (begin < data) {
2043                 length += data - begin;
2044 
2045                 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2046             }
2047 
2048             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2049             if (data >= end) {
2050                 lxb_css_syntax_token_base(token)->length += length;
2051 
2052                 return lxb_css_syntax_state_string_set(tkz, token, data);
2053             }
2054 
2055             begin = data;
2056         }
2057 
2058         if (lxb_css_syntax_res_name_map[*data] == 0x00) {
2059 
2060             /* U+005C REVERSE SOLIDUS (\) */
2061             if (*data == 0x5C) {
2062                 if (begin < data) {
2063                     length += data - begin;
2064 
2065                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2066                 }
2067 
2068                 begin = data;
2069 
2070                 if (++data == end) {
2071                     LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2072                     if (data >= end) {
2073                         goto push_delim_last;
2074                     }
2075                 }
2076 
2077                 if (*data == 0x0A || *data == 0x0C || *data == 0x0D) {
2078                     goto push_delim_last;
2079                 }
2080 
2081                 length += 1;
2082 
2083                 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
2084                 if (data == NULL) {
2085                     return NULL;
2086                 }
2087 
2088                 begin = data--;
2089             }
2090             else if (*data == 0x00) {
2091                 length += (data + 1) - begin;
2092 
2093                 if (begin < data) {
2094                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2095                 }
2096 
2097                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2098                                               lexbor_str_res_ansi_replacement_character,
2099                                               sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2100                 begin = data + 1;
2101             }
2102             else {
2103                 if (begin < data) {
2104                     length += data - begin;
2105 
2106                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2107                 }
2108 
2109                 lxb_css_syntax_token_base(token)->length += length;
2110 
2111                 return lxb_css_syntax_state_string_set(tkz, token, data);
2112             }
2113         }
2114     }
2115 
2116     return data;
2117 
2118 push_delim_last:
2119 
2120     lxb_css_syntax_token_base(token)->length += length;
2121 
2122     LXB_CSS_SYNTAX_DELIM_APPEND(tkz, begin, 1, '\\');
2123 
2124     return lxb_css_syntax_state_string_set(tkz, token, data);
2125 }
2126 
2127 const lxb_char_t *
lxb_css_syntax_state_ident_like_begin(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2128 lxb_css_syntax_state_ident_like_begin(lxb_css_syntax_tokenizer_t *tkz,
2129                                       lxb_css_syntax_token_t *token,
2130                                       const lxb_char_t *data, const lxb_char_t *end)
2131 {
2132     lxb_css_syntax_token_base(token)->begin = data;
2133     lxb_css_syntax_token_base(token)->length = 0;
2134 
2135     return lxb_css_syntax_state_ident_like(tkz, token, data, end);
2136 }
2137 
2138 static const lxb_char_t *
lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2139 lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz,
2140                                 lxb_css_syntax_token_t *token,
2141                                 const lxb_char_t *data, const lxb_char_t *end)
2142 {
2143     size_t length;
2144     lxb_char_t ch;
2145     lxb_status_t status;
2146     const lxb_char_t *begin, *ws_begin;
2147     lxb_css_syntax_token_t *ws;
2148     lxb_css_syntax_token_string_t *str, *ws_str;
2149     static const lxb_char_t url[] = "url";
2150 
2151     data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2152 
2153     end = tkz->in_end;
2154 
2155     if (data >= end) {
2156         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2157         if (data >= end) {
2158             token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2159             return data;
2160         }
2161     }
2162 
2163     if (data < end && *data == '(') {
2164         data++;
2165 
2166         lxb_css_syntax_token_base(token)->length += 1;
2167 
2168         str = lxb_css_syntax_token_string(token);
2169 
2170         if (str->length == 3 && lexbor_str_data_casecmp(str->data, url)) {
2171             begin = data;
2172             length = 0;
2173 
2174             tkz->pos += str->length + 1;
2175             ws_begin = tkz->pos;
2176 
2177             do {
2178                 if (data >= end) {
2179                     if (begin < data) {
2180                         length += data - begin;
2181                         LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2182                     }
2183 
2184                     LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2185                     if (data >= end) {
2186                         begin = data;
2187                         goto with_ws;
2188                     }
2189 
2190                     begin = data;
2191                 }
2192 
2193                 ch = *data;
2194 
2195                 if (lexbor_utils_whitespace(ch, !=, &&)) {
2196                     /* U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE (') */
2197                     if (ch == 0x22 || ch == 0x27) {
2198                         goto with_ws;
2199                     }
2200 
2201                     tkz->pos = tkz->start;
2202                     length += data - begin;
2203 
2204                     lxb_css_syntax_token_base(token)->length += length;
2205 
2206                     return lxb_css_syntax_state_url(tkz, token, data, end);
2207                 }
2208 
2209                 data++;
2210             }
2211             while (true);
2212         }
2213 
2214         token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2215 
2216         return data;
2217     }
2218 
2219     token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2220 
2221     return data;
2222 
2223 with_ws:
2224 
2225     token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2226 
2227     if (ws_begin != tkz->pos || begin < data) {
2228         if (begin < data) {
2229             length += data - begin;
2230             LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2231         }
2232 
2233         if (tkz->pos >= tkz->end) {
2234             if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2235                 return NULL;
2236             }
2237         }
2238 
2239         str->data = tkz->start;
2240         *tkz->pos = 0x00;
2241 
2242         ws = lxb_css_syntax_state_token_create(tkz);
2243         if (ws == NULL) {
2244             return NULL;
2245         }
2246 
2247         ws->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
2248 
2249         lxb_css_syntax_token_base(ws)->begin = begin;
2250         lxb_css_syntax_token_base(ws)->length = length;
2251 
2252         ws_str = lxb_css_syntax_token_string(ws);
2253 
2254         ws_str->data = tkz->start + str->length + 1;
2255         ws_str->length = tkz->pos - ws_str->data;
2256     }
2257 
2258     tkz->pos = tkz->start;
2259 
2260     return data;
2261 }
2262 
2263 const lxb_char_t *
lxb_css_syntax_state_ident_like_not_url_begin(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2264 lxb_css_syntax_state_ident_like_not_url_begin(lxb_css_syntax_tokenizer_t *tkz,
2265                                               lxb_css_syntax_token_t *token,
2266                                               const lxb_char_t *data, const lxb_char_t *end)
2267 {
2268     lxb_css_syntax_token_base(token)->begin = data;
2269     lxb_css_syntax_token_base(token)->length = 0;
2270 
2271     return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
2272 }
2273 
2274 static const lxb_char_t *
lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2275 lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz,
2276                                         lxb_css_syntax_token_t *token,
2277                                         const lxb_char_t *data, const lxb_char_t *end)
2278 {
2279     data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2280     if (data == NULL) {
2281         return NULL;
2282     }
2283 
2284     end = tkz->in_end;
2285 
2286     if (data < end && *data == '(') {
2287         token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2288 
2289         lxb_css_syntax_token_base(token)->length += 1;
2290 
2291         return data + 1;
2292     }
2293 
2294     token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2295 
2296     return data;
2297 }
2298 
2299 /*
2300  * URL
2301  */
2302 static const lxb_char_t *
lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2303 lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2304                          const lxb_char_t *data, const lxb_char_t *end)
2305 {
2306     size_t length;
2307     lxb_char_t ch;
2308     lxb_status_t status;
2309     const lxb_char_t *begin;
2310 
2311     status = LXB_STATUS_OK;
2312 
2313     *tkz->pos = 0x00;
2314 
2315     begin = data;
2316     length = 0;
2317 
2318     do {
2319         if (data >= end) {
2320             if (begin < data) {
2321                 length += data - begin;
2322                 LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2323             }
2324 
2325             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2326             if (data >= end) {
2327                 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2328                                                    LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2329 
2330                 token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2331 
2332                 lxb_css_syntax_token_base(token)->length += length;
2333 
2334                 return lxb_css_syntax_state_string_set(tkz, token, data);
2335             }
2336 
2337             begin = data;
2338         }
2339 
2340         switch (*data) {
2341             /* U+0000 NULL (\0) */
2342             case 0x00:
2343                 if (begin < data) {
2344                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2345                 }
2346 
2347                 LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2348                                               lexbor_str_res_ansi_replacement_character,
2349                                               sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2350 
2351                 data += 1;
2352                 length += data - begin;
2353                 begin = data;
2354 
2355                 continue;
2356 
2357             /* U+0029 RIGHT PARENTHESIS ()) */
2358             case 0x29:
2359                 if (begin < data) {
2360                     length += data - begin;
2361                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2362                 }
2363 
2364                 token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2365 
2366                 lxb_css_syntax_token_base(token)->length += length + 1;
2367 
2368                 return lxb_css_syntax_state_string_set(tkz, token, data + 1);
2369 
2370             /*
2371              * U+0022 QUOTATION MARK (")
2372              * U+0027 APOSTROPHE (')
2373              * U+0028 LEFT PARENTHESIS (()
2374              * U+000B LINE TABULATION
2375              * U+007F DELETE
2376              */
2377             case 0x22:
2378             case 0x27:
2379             case 0x28:
2380             case 0x0B:
2381             case 0x7F:
2382                 if (begin < data) {
2383                     length += data - begin;
2384                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2385                 }
2386 
2387                 lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2388                                          LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2389 
2390                 lxb_css_syntax_token_base(token)->length += length + 1;
2391 
2392                 return lxb_css_syntax_state_bad_url(tkz, token, data + 1, end);
2393 
2394             /* U+005C REVERSE SOLIDUS (\) */
2395             case 0x5C:
2396                 if (begin < data) {
2397                     length += data - begin;
2398                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2399                 }
2400 
2401                 begin = ++data;
2402 
2403                 if (data == end) {
2404                     LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2405                     if (data >= end) {
2406                         lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2407                                                            LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2408 
2409                         token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2410 
2411                         lxb_css_syntax_token_base(token)->length += length + 1;
2412 
2413                         return lxb_css_syntax_state_string_set(tkz, token, data);
2414                     }
2415                 }
2416 
2417                 ch = *data;
2418 
2419                 if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
2420                     lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2421                                        LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2422 
2423                     lxb_css_syntax_token_base(token)->length += length + 1;
2424 
2425                     return lxb_css_syntax_state_bad_url(tkz, token, data, end);
2426                 }
2427 
2428                 data = lxb_css_syntax_state_escaped(tkz, data, &end, &length);
2429                 if (data == NULL) {
2430                     return NULL;
2431                 }
2432 
2433                 begin = data--;
2434                 length += 1;
2435 
2436                 break;
2437 
2438             /*
2439              * U+0009 CHARACTER TABULATION (tab)
2440              * U+000A LINE FEED (LF)
2441              * U+000C FORM FEED (FF)
2442              * U+000D CARRIAGE RETURN (CR)
2443              * U+0020 SPACE
2444              */
2445             case 0x09:
2446             case 0x0A:
2447             case 0x0C:
2448             case 0x0D:
2449             case 0x20:
2450                 if (begin < data) {
2451                     length += data - begin;
2452                     LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2453                 }
2454 
2455                 begin = ++data;
2456                 length += 1;
2457 
2458                 do {
2459                     if (data == end) {
2460                         length += data - begin;
2461 
2462                         LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2463                         if (data >= end) {
2464                             lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2465                                                                LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2466 
2467                             token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2468 
2469                             lxb_css_syntax_token_base(token)->length += length;
2470 
2471                             return lxb_css_syntax_state_string_set(tkz, token, data);
2472                         }
2473 
2474                         begin = data;
2475                     }
2476 
2477                     ch = *data;
2478 
2479                     if (lexbor_utils_whitespace(ch, !=, &&)) {
2480                         length += data - begin;
2481 
2482                         /* U+0029 RIGHT PARENTHESIS ()) */
2483                         if (*data == 0x29) {
2484                             token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2485 
2486                             lxb_css_syntax_token_base(token)->length += length + 1;
2487 
2488                             return lxb_css_syntax_state_string_set(tkz, token,
2489                                                                    data + 1);
2490                         }
2491 
2492                         lxb_css_syntax_token_base(token)->length += length;
2493 
2494                         return lxb_css_syntax_state_bad_url(tkz, token,
2495                                                             data, end);
2496                     }
2497 
2498                     data++;
2499                 }
2500                 while (true);
2501 
2502             default:
2503                 /*
2504                  * Inclusive:
2505                  * U+0000 NULL and U+0008 BACKSPACE or
2506                  * U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE
2507                  */
2508                 if ((*data <= 0x08)
2509                     || (*data >= 0x0E && *data <= 0x1F))
2510                 {
2511                     lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2512                                          LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2513 
2514                     lxb_css_syntax_token_base(token)->length += length;
2515 
2516                     return lxb_css_syntax_state_bad_url(tkz, token,
2517                                                         data + 1, end);
2518                 }
2519 
2520                 break;
2521         }
2522 
2523         data++;
2524     }
2525     while (true);
2526 
2527     return data;
2528 }
2529 
2530 /*
2531  * Bad URL
2532  */
2533 static const lxb_char_t *
lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_token_t * token,const lxb_char_t * data,const lxb_char_t * end)2534 lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2535                                const lxb_char_t *data, const lxb_char_t *end)
2536 {
2537     size_t length;
2538     lxb_status_t status;
2539     const lxb_char_t *begin;
2540 
2541     token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2542 
2543     if(lxb_css_syntax_state_string_set(tkz, token, data) == NULL) {
2544         return NULL;
2545     }
2546 
2547     begin = data;
2548     length = 0;
2549 
2550     do {
2551         if (data >= end) {
2552             length += data - begin;
2553 
2554             LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2555             if (data >= end) {
2556                 lxb_css_syntax_token_base(token)->length += length;
2557                 return data;
2558             }
2559 
2560             begin = data;
2561         }
2562 
2563         /* U+0029 RIGHT PARENTHESIS ()) */
2564         if (*data == 0x29) {
2565             data++;
2566             length += data - begin;
2567 
2568             lxb_css_syntax_token_base(token)->length += length;
2569 
2570             return data;
2571         }
2572         /* U+005C REVERSE SOLIDUS (\) */
2573         else if (*data == 0x5C) {
2574             data++;
2575 
2576             if (data >= end) {
2577                 continue;
2578             }
2579         }
2580 
2581         data++;
2582     }
2583     while (true);
2584 
2585     return data;
2586 }
2587 
2588 lxb_inline lxb_status_t
lxb_css_syntax_string_append_rep(lxb_css_syntax_tokenizer_t * tkz)2589 lxb_css_syntax_string_append_rep(lxb_css_syntax_tokenizer_t *tkz)
2590 {
2591     return lxb_css_syntax_string_append(tkz, lexbor_str_res_ansi_replacement_character,
2592                                         sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2593 }
2594 
2595 static const lxb_char_t *
lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,const lxb_char_t ** end,size_t * length)2596 lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
2597                              const lxb_char_t *data,
2598                              const lxb_char_t **end, size_t *length)
2599 {
2600     uint32_t cp;
2601     unsigned count;
2602     lxb_status_t status;
2603 
2604     cp = 0;
2605 
2606     for (count = 0; count < 6; count++, data++) {
2607         if (data >= *end) {
2608             status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2609             if (status != LXB_STATUS_OK) {
2610                 return NULL;
2611             }
2612 
2613             if (data >= *end) {
2614                 if (count == 0) {
2615                     return *end;
2616                 }
2617 
2618                 break;
2619             }
2620         }
2621 
2622         if (lexbor_str_res_map_hex[*data] == 0xFF) {
2623             if (count == 0) {
2624                 *length += 1;
2625 
2626                 if (*data == 0x00) {
2627                     status = lxb_css_syntax_string_append_rep(tkz);
2628                     if (status != LXB_STATUS_OK) {
2629                         return NULL;
2630                     }
2631 
2632                     return data + 1;
2633                 }
2634 
2635                 status = lxb_css_syntax_string_append(tkz, data, 1);
2636                 if (status != LXB_STATUS_OK) {
2637                     return NULL;
2638                 }
2639 
2640                 return data + 1;
2641             }
2642 
2643             switch (*data) {
2644                 case 0x0D:
2645                     data++;
2646                     *length += 1;
2647 
2648                     status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data,
2649                                                                  end);
2650                     if (status != LXB_STATUS_OK) {
2651                         return NULL;
2652                     }
2653 
2654                     if (data >= *end) {
2655                         break;
2656                     }
2657 
2658                     if (*data == 0x0A) {
2659                         data++;
2660                         *length += 1;
2661                     }
2662 
2663                     break;
2664 
2665                 case 0x09:
2666                 case 0x20:
2667                 case 0x0A:
2668                 case 0x0C:
2669                     data++;
2670                     *length += 1;
2671                     break;
2672             }
2673 
2674             break;
2675         }
2676 
2677         cp <<= 4;
2678         cp |= lexbor_str_res_map_hex[*data];
2679     }
2680 
2681     if ((tkz->end - tkz->pos) < 5) {
2682         if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2683             return NULL;
2684         }
2685     }
2686 
2687     lxb_css_syntax_codepoint_to_ascii(tkz, cp);
2688 
2689     *length += count;
2690 
2691     return data;
2692 }
2693 
2694 static const lxb_char_t *
lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,const lxb_char_t ** end,size_t * length)2695 lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
2696                                     const lxb_char_t *data,
2697                                     const lxb_char_t **end, size_t *length)
2698 {
2699     lxb_status_t status;
2700 
2701     /* U+000D CARRIAGE RETURN */
2702     if (*data == 0x0D) {
2703         data++;
2704         *length += 1;
2705 
2706         if (data >= *end) {
2707             status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2708             if (status != LXB_STATUS_OK) {
2709                 return NULL;
2710             }
2711 
2712             if (data >= *end) {
2713                 return data;
2714             }
2715         }
2716 
2717         /* U+000A LINE FEED */
2718         if (*data == 0x0A) {
2719             data++;
2720             *length += 1;
2721         }
2722 
2723         return data;
2724     }
2725 
2726     if (*data == 0x00) {
2727         status = lxb_css_syntax_string_append_rep(tkz);
2728         if (status != LXB_STATUS_OK) {
2729             return NULL;
2730         }
2731 
2732         *length += 1;
2733 
2734         return data + 1;
2735     }
2736 
2737     if (*data == 0x0A || *data == 0x0C) {
2738         *length += 1;
2739 
2740         return data + 1;
2741     }
2742 
2743     return lxb_css_syntax_state_escaped(tkz, data, end, length);
2744 }
2745