1 /*
2 * Copyright (C) 2018-2020 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #ifndef LEXBOR_CSS_SYNTAX_TOKENIZER_H
8 #define LEXBOR_CSS_SYNTAX_TOKENIZER_H
9
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13
14 #include "lexbor/core/array_obj.h"
15
16 #include "lexbor/css/syntax/base.h"
17 #include "lexbor/css/syntax/token.h"
18
19
20 /* State */
21 typedef const lxb_char_t *
22 (*lxb_css_syntax_tokenizer_state_f)(lxb_css_syntax_tokenizer_t *tkz,
23 lxb_css_syntax_token_t *token,
24 const lxb_char_t *data, const lxb_char_t *end);
25
26 typedef lxb_status_t
27 (*lxb_css_syntax_tokenizer_chunk_f)(lxb_css_syntax_tokenizer_t *tkz,
28 const lxb_char_t **data,
29 const lxb_char_t **end, void *ctx);
30
31
32 enum lxb_css_syntax_tokenizer_opt {
33 LXB_CSS_SYNTAX_TOKENIZER_OPT_UNDEF = 0x00,
34 };
35
36 typedef struct {
37 lxb_css_syntax_token_t **list;
38 size_t size;
39 size_t length;
40 }
41 lxb_css_syntax_tokenizer_cache_t;
42
43 struct lxb_css_syntax_tokenizer {
44 lxb_css_syntax_tokenizer_cache_t *cache;
45 lexbor_dobject_t *tokens;
46
47 lexbor_array_obj_t *parse_errors;
48
49 const lxb_char_t *in_begin;
50 const lxb_char_t *in_end;
51 const lxb_char_t *begin;
52
53 uintptr_t offset;
54 size_t cache_pos;
55 size_t prepared;
56
57 lexbor_mraw_t *mraw;
58
59 lxb_css_syntax_tokenizer_chunk_f chunk_cb;
60 void *chunk_ctx;
61
62 /* Temp */
63 lxb_char_t *start;
64 lxb_char_t *pos;
65 const lxb_char_t *end;
66 lxb_char_t buffer[128];
67 lxb_css_syntax_token_data_t token_data;
68
69 /* Process */
70 unsigned int opt; /* bitmap */
71 lxb_status_t status;
72 bool eof;
73 bool with_comment;
74 };
75
76
77 LXB_API lxb_css_syntax_tokenizer_t *
78 lxb_css_syntax_tokenizer_create(void);
79
80 LXB_API lxb_status_t
81 lxb_css_syntax_tokenizer_init(lxb_css_syntax_tokenizer_t *tkz);
82
83 LXB_API lxb_status_t
84 lxb_css_syntax_tokenizer_clean(lxb_css_syntax_tokenizer_t *tkz);
85
86 LXB_API lxb_css_syntax_tokenizer_t *
87 lxb_css_syntax_tokenizer_destroy(lxb_css_syntax_tokenizer_t *tkz);
88
89 LXB_API lxb_status_t
90 lxb_css_syntax_tokenizer_next_chunk(lxb_css_syntax_tokenizer_t *tkz,
91 const lxb_char_t **data, const lxb_char_t **end);
92
93 LXB_API bool
94 lxb_css_syntax_tokenizer_lookup_colon(lxb_css_syntax_tokenizer_t *tkz);
95
96 LXB_API bool
97 lxb_css_syntax_tokenizer_lookup_important(lxb_css_syntax_tokenizer_t *tkz,
98 lxb_css_syntax_token_type_t stop,
99 const lxb_char_t stop_ch);
100
101 LXB_API bool
102 lxb_css_syntax_tokenizer_lookup_declaration_ws_end(lxb_css_syntax_tokenizer_t *tkz,
103 lxb_css_syntax_token_type_t stop,
104 const lxb_char_t stop_ch);
105
106 /*
107 * Inline functions
108 */
109 lxb_inline lxb_status_t
lxb_css_syntax_tokenizer_status(lxb_css_syntax_tokenizer_t * tkz)110 lxb_css_syntax_tokenizer_status(lxb_css_syntax_tokenizer_t *tkz)
111 {
112 return tkz->status;
113 }
114
115 lxb_inline void
lxb_css_syntax_tokenizer_chunk_cb_set(lxb_css_syntax_tokenizer_t * tkz,lxb_css_syntax_tokenizer_chunk_f cb,void * ctx)116 lxb_css_syntax_tokenizer_chunk_cb_set(lxb_css_syntax_tokenizer_t *tkz,
117 lxb_css_syntax_tokenizer_chunk_f cb,
118 void *ctx)
119 {
120 tkz->chunk_cb = cb;
121 tkz->chunk_ctx = ctx;
122 }
123
124 lxb_inline void
lxb_css_syntax_tokenizer_buffer_set(lxb_css_syntax_tokenizer_t * tkz,const lxb_char_t * data,size_t size)125 lxb_css_syntax_tokenizer_buffer_set(lxb_css_syntax_tokenizer_t *tkz,
126 const lxb_char_t *data, size_t size)
127 {
128 tkz->in_begin = data;
129 tkz->in_end = data + size;
130 }
131
132
133 /*
134 * No inline functions for ABI.
135 */
136 LXB_API lxb_status_t
137 lxb_css_syntax_tokenizer_status_noi(lxb_css_syntax_tokenizer_t *tkz);
138
139
140 #ifdef __cplusplus
141 } /* extern "C" */
142 #endif
143
144 #endif /* LEXBOR_CSS_SYNTAX_TOKENIZER_H */
145