1 /*
2  * Copyright (C) 2018-2020 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #ifndef LEXBOR_CSS_SYNTAX_TOKENIZER_H
8 #define LEXBOR_CSS_SYNTAX_TOKENIZER_H
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 #include "lexbor/core/array_obj.h"
15 
16 #include "lexbor/css/syntax/base.h"
17 #include "lexbor/css/syntax/token.h"
18 
19 
20 /* State */
21 typedef const lxb_char_t *
22 (*lxb_css_syntax_tokenizer_state_f)(lxb_css_syntax_tokenizer_t *tkz,
23                                  lxb_css_syntax_token_t *token,
24                                  const lxb_char_t *data, const lxb_char_t *end);
25 
26 typedef lxb_status_t
27 (*lxb_css_syntax_tokenizer_chunk_f)(lxb_css_syntax_tokenizer_t *tkz,
28                                     const lxb_char_t **data,
29                                     const lxb_char_t **end, void *ctx);
30 
31 
32 enum lxb_css_syntax_tokenizer_opt {
33     LXB_CSS_SYNTAX_TOKENIZER_OPT_UNDEF = 0x00,
34 };
35 
36 typedef struct {
37     lxb_css_syntax_token_t **list;
38     size_t                 size;
39     size_t                 length;
40 }
41 lxb_css_syntax_tokenizer_cache_t;
42 
43 struct lxb_css_syntax_tokenizer {
44     lxb_css_syntax_tokenizer_cache_t   *cache;
45     lexbor_dobject_t                   *tokens;
46 
47     lexbor_array_obj_t                 *parse_errors;
48 
49     const lxb_char_t                   *in_begin;
50     const lxb_char_t                   *in_end;
51     const lxb_char_t                   *begin;
52 
53     uintptr_t                          offset;
54     size_t                             cache_pos;
55     size_t                             prepared;
56 
57     lexbor_mraw_t                      *mraw;
58 
59     lxb_css_syntax_tokenizer_chunk_f   chunk_cb;
60     void                               *chunk_ctx;
61 
62     /* Temp */
63     lxb_char_t                         *start;
64     lxb_char_t                         *pos;
65     const lxb_char_t                   *end;
66     lxb_char_t                         buffer[128];
67     lxb_css_syntax_token_data_t        token_data;
68 
69     /* Process */
70     unsigned int                       opt;             /* bitmap */
71     lxb_status_t                       status;
72     bool                               eof;
73     bool                               with_comment;
74 };
75 
76 
77 LXB_API lxb_css_syntax_tokenizer_t *
78 lxb_css_syntax_tokenizer_create(void);
79 
80 LXB_API lxb_status_t
81 lxb_css_syntax_tokenizer_init(lxb_css_syntax_tokenizer_t *tkz);
82 
83 LXB_API lxb_status_t
84 lxb_css_syntax_tokenizer_clean(lxb_css_syntax_tokenizer_t *tkz);
85 
86 LXB_API lxb_css_syntax_tokenizer_t *
87 lxb_css_syntax_tokenizer_destroy(lxb_css_syntax_tokenizer_t *tkz);
88 
89 LXB_API lxb_status_t
90 lxb_css_syntax_tokenizer_next_chunk(lxb_css_syntax_tokenizer_t *tkz,
91                                     const lxb_char_t **data, const lxb_char_t **end);
92 
93 LXB_API bool
94 lxb_css_syntax_tokenizer_lookup_colon(lxb_css_syntax_tokenizer_t *tkz);
95 
96 LXB_API bool
97 lxb_css_syntax_tokenizer_lookup_important(lxb_css_syntax_tokenizer_t *tkz,
98                                           lxb_css_syntax_token_type_t stop,
99                                           const lxb_char_t stop_ch);
100 
101 LXB_API bool
102 lxb_css_syntax_tokenizer_lookup_declaration_ws_end(lxb_css_syntax_tokenizer_t *tkz,
103                                                    lxb_css_syntax_token_type_t stop,
104                                                    const lxb_char_t stop_ch);
105 
106 /*
107  * Inline functions
108  */
109 lxb_inline lxb_status_t
lxb_css_syntax_tokenizer_status(lxb_css_syntax_tokenizer_t * tkz)110 lxb_css_syntax_tokenizer_status(lxb_css_syntax_tokenizer_t *tkz)
111 {
112     return tkz->status;
113 }
114 
115 lxb_inline void
lxb_css_syntax_tokenizer_chunk_cb_set(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_tokenizer_chunk_f cb,void * ctx)116 lxb_css_syntax_tokenizer_chunk_cb_set(lxb_css_syntax_tokenizer_t *tkz,
117                                       lxb_css_syntax_tokenizer_chunk_f cb,
118                                       void *ctx)
119 {
120     tkz->chunk_cb = cb;
121     tkz->chunk_ctx = ctx;
122 }
123 
124 lxb_inline void
lxb_css_syntax_tokenizer_buffer_set(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,size_t size)125 lxb_css_syntax_tokenizer_buffer_set(lxb_css_syntax_tokenizer_t *tkz,
126                                     const lxb_char_t *data, size_t size)
127 {
128     tkz->in_begin = data;
129     tkz->in_end = data + size;
130 }
131 
132 
133 /*
134  * No inline functions for ABI.
135  */
136 LXB_API lxb_status_t
137 lxb_css_syntax_tokenizer_status_noi(lxb_css_syntax_tokenizer_t *tkz);
138 
139 
140 #ifdef __cplusplus
141 } /* extern "C" */
142 #endif
143 
144 #endif /* LEXBOR_CSS_SYNTAX_TOKENIZER_H */
145