1 /*
2  * Copyright (C) 2018-2020 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #ifndef LEXBOR_HTML_TOKENIZER_STATE_H
8 #define LEXBOR_HTML_TOKENIZER_STATE_H
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 #include "lexbor/html/tokenizer.h"
15 
16 #define lxb_html_tokenizer_state_begin_set(tkz, v_data)                        \
17     (tkz->begin = v_data)
18 
19 #define lxb_html_tokenizer_state_append_data_m(tkz, v_data)                    \
20     do {                                                                       \
21         if (lxb_html_tokenizer_temp_append_data(tkz, v_data)) {                \
22             return end;                                                        \
23         }                                                                      \
24     }                                                                          \
25     while (0)
26 
27 #define lxb_html_tokenizer_state_append_m(tkz, v_data, size)                   \
28     do {                                                                       \
29         if (lxb_html_tokenizer_temp_append(tkz, (const lxb_char_t *) (v_data), \
30                                            (size)))                            \
31         {                                                                      \
32             return end;                                                        \
33         }                                                                      \
34     }                                                                          \
35     while (0)
36 
37 #define lxb_html_tokenizer_state_append_replace_m(tkz)                         \
38     do {                                                                       \
39         if (lxb_html_tokenizer_temp_append(tkz,                                \
40                         lexbor_str_res_ansi_replacement_character,             \
41                         sizeof(lexbor_str_res_ansi_replacement_character) - 1))\
42         {                                                                      \
43             return end;                                                        \
44         }                                                                      \
45     }                                                                          \
46     while (0)
47 
48 #define lxb_html_tokenizer_state_set_tag_m(tkz, _start, _end)                  \
49     do {                                                                       \
50         const lxb_tag_data_t *tag;                                             \
51         tag = lxb_tag_append_lower(tkz->tags, (_start), (_end) - (_start));    \
52         if (tag == NULL) {                                                     \
53             tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;                  \
54             return end;                                                        \
55         }                                                                      \
56         tkz->token->tag_id = tag->tag_id;                                      \
57     }                                                                          \
58     while (0)
59 
60 #define lxb_html_tokenizer_state_set_name_m(tkz)                               \
61     do {                                                                       \
62         lxb_dom_attr_data_t *data;                                             \
63         data = lxb_dom_attr_local_name_append(tkz->attrs, tkz->start,          \
64                                               tkz->pos - tkz->start);          \
65         if (data == NULL) {                                                    \
66             tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;                  \
67             return end;                                                        \
68         }                                                                      \
69         tkz->token->attr_last->name = data;                                    \
70     }                                                                          \
71     while (0)
72 
73 #define lxb_html_tokenizer_state_set_value_m(tkz)                              \
74     do {                                                                       \
75         lxb_html_token_attr_t *attr = tkz->token->attr_last;                   \
76                                                                                \
77         attr->value_size = (size_t) (tkz->pos - tkz->start);                   \
78                                                                                \
79         attr->value = lexbor_mraw_alloc(tkz->attrs_mraw, attr->value_size + 1);\
80         if (attr->value == NULL) {                                             \
81             tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;                  \
82             return end;                                                        \
83         }                                                                      \
84         memcpy(attr->value, tkz->start, attr->value_size);                     \
85         attr->value[attr->value_size] = 0x00;                                  \
86     }                                                                          \
87     while (0)
88 
89 #define lxb_html_tokenizer_state_token_set_begin(tkz, v_begin)                 \
90     do {                                                                       \
91         tkz->pos = tkz->start;                                                 \
92         tkz->token->begin = v_begin;                                           \
93         tkz->token->line = tkz->current_line;                                  \
94         tkz->token->column = tkz->current_column;                              \
95     }                                                                          \
96     while (0)
97 
98 #define lxb_html_tokenizer_state_token_set_end(tkz, v_end)                     \
99     (tkz->token->end = v_end)
100 
101 #define lxb_html_tokenizer_state_token_set_end_down(tkz, v_end, offset)        \
102     do {                                                                       \
103         tkz->token->end = lexbor_in_node_pos_down(tkz->incoming_node, NULL,    \
104                                                   v_end, offset);              \
105     }                                                                          \
106     while (0)
107 
108 #define lxb_html_tokenizer_state_token_set_end_oef(tkz)                        \
109     (tkz->token->end = tkz->last)
110 
111 #define lxb_html_tokenizer_state_token_attr_add_m(tkz, attr, v_return)         \
112     do {                                                                       \
113         attr = lxb_html_token_attr_append(tkz->token, tkz->dobj_token_attr);   \
114         if (attr == NULL) {                                                    \
115             tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;                  \
116             return v_return;                                                   \
117         }                                                                      \
118     }                                                                          \
119     while (0)
120 
121 #define lxb_html_tokenizer_state_token_attr_set_name_begin(tkz, v_begin)       \
122     do {                                                                       \
123         tkz->pos = tkz->start;                                                 \
124         tkz->token->attr_last->name_begin = v_begin;                           \
125     }                                                                          \
126     while (0)
127 
128 #define lxb_html_tokenizer_state_token_attr_set_name_end(tkz, v_end)           \
129     (tkz->token->attr_last->name_end = v_end)
130 
131 #define lxb_html_tokenizer_state_token_attr_set_name_end_oef(tkz)              \
132     (tkz->token->attr_last->name_end = tkz->last)
133 
134 #define lxb_html_tokenizer_state_token_attr_set_value_begin(tkz, v_begin)      \
135     do {                                                                       \
136         tkz->pos = tkz->start;                                                 \
137         tkz->token->attr_last->value_begin = v_begin;                          \
138     }                                                                          \
139     while (0)
140 
141 #define lxb_html_tokenizer_state_token_attr_set_value_end(tkz, v_end)          \
142     (tkz->token->attr_last->value_end = v_end)
143 
144 #define lxb_html_tokenizer_state_token_attr_set_value_end_oef(tkz)             \
145     (tkz->token->attr_last->value_end = tkz->last)
146 
147 #define _lxb_html_tokenizer_state_token_done_m(tkz, v_end)                     \
148     tkz->token = tkz->callback_token_done(tkz, tkz->token,                     \
149                                           tkz->callback_token_ctx);            \
150     if (tkz->token == NULL) {                                                  \
151         if (tkz->status == LXB_STATUS_OK) {                                    \
152             tkz->status = LXB_STATUS_ERROR;                                    \
153         }                                                                      \
154         return v_end;                                                          \
155     }
156 
157 #define lxb_html_tokenizer_state_token_done_m(tkz, v_end)                      \
158     do {                                                                       \
159         if (tkz->token->begin != tkz->token->end) {                            \
160             _lxb_html_tokenizer_state_token_done_m(tkz, v_end)                 \
161         }                                                                      \
162         lxb_html_token_clean(tkz->token);                                      \
163         tkz->pos = tkz->start;                                                 \
164     }                                                                          \
165     while (0)
166 
167 #define lxb_html_tokenizer_state_token_done_wo_check_m(tkz, v_end)             \
168     do {                                                                       \
169         _lxb_html_tokenizer_state_token_done_m(tkz, v_end)                     \
170         lxb_html_token_clean(tkz->token);                                      \
171     }                                                                          \
172     while (0)
173 
174 #define lxb_html_tokenizer_state_set_text(tkz)                                 \
175     do {                                                                       \
176         tkz->token->text_start = tkz->start;                                   \
177         tkz->token->text_end = tkz->pos;                                       \
178     }                                                                          \
179     while (0)
180 
181 #define lxb_html_tokenizer_state_token_emit_text_not_empty_m(tkz, v_end)       \
182     do {                                                                       \
183         if (tkz->token->begin != tkz->token->end) {                            \
184             tkz->token->tag_id = LXB_TAG__TEXT;                                \
185                                                                                \
186             lxb_html_tokenizer_state_set_text(tkz);                            \
187             _lxb_html_tokenizer_state_token_done_m(tkz, v_end)                 \
188             lxb_html_token_clean(tkz->token);                                  \
189         }                                                                      \
190     }                                                                          \
191     while (0)
192 
193 
194 LXB_API const lxb_char_t *
195 lxb_html_tokenizer_state_data_before(lxb_html_tokenizer_t *tkz,
196                                      const lxb_char_t *data,
197                                      const lxb_char_t *end);
198 
199 LXB_API const lxb_char_t *
200 lxb_html_tokenizer_state_plaintext_before(lxb_html_tokenizer_t *tkz,
201                                           const lxb_char_t *data,
202                                           const lxb_char_t *end);
203 
204 LXB_API const lxb_char_t *
205 lxb_html_tokenizer_state_before_attribute_name(lxb_html_tokenizer_t *tkz,
206                                                const lxb_char_t *data,
207                                                const lxb_char_t *end);
208 
209 LXB_API const lxb_char_t *
210 lxb_html_tokenizer_state_self_closing_start_tag(lxb_html_tokenizer_t *tkz,
211                                                 const lxb_char_t *data,
212                                                 const lxb_char_t *end);
213 
214 LXB_API const lxb_char_t *
215 lxb_html_tokenizer_state_cr(lxb_html_tokenizer_t *tkz, const lxb_char_t *data,
216                             const lxb_char_t *end);
217 
218 LXB_API const lxb_char_t *
219 lxb_html_tokenizer_state_char_ref(lxb_html_tokenizer_t *tkz,
220                                   const lxb_char_t *data, const lxb_char_t *end);
221 
222 
223 #ifdef __cplusplus
224 } /* extern "C" */
225 #endif
226 
227 #endif /* LEXBOR_HTML_TOKENIZER_STATE_H */
228