1 /* 2 * Copyright (C) 2018-2020 Alexander Borisov 3 * 4 * Author: Alexander Borisov <borisov@lexbor.com> 5 */ 6 7 #ifndef LEXBOR_HTML_TOKENIZER_STATE_H 8 #define LEXBOR_HTML_TOKENIZER_STATE_H 9 10 #ifdef __cplusplus 11 extern "C" { 12 #endif 13 14 #include "lexbor/html/tokenizer.h" 15 16 #define lxb_html_tokenizer_state_begin_set(tkz, v_data) \ 17 (tkz->begin = v_data) 18 19 #define lxb_html_tokenizer_state_append_data_m(tkz, v_data) \ 20 do { \ 21 if (lxb_html_tokenizer_temp_append_data(tkz, v_data)) { \ 22 return end; \ 23 } \ 24 } \ 25 while (0) 26 27 #define lxb_html_tokenizer_state_append_m(tkz, v_data, size) \ 28 do { \ 29 if (lxb_html_tokenizer_temp_append(tkz, (const lxb_char_t *) (v_data), \ 30 (size))) \ 31 { \ 32 return end; \ 33 } \ 34 } \ 35 while (0) 36 37 #define lxb_html_tokenizer_state_append_replace_m(tkz) \ 38 do { \ 39 if (lxb_html_tokenizer_temp_append(tkz, \ 40 lexbor_str_res_ansi_replacement_character, \ 41 sizeof(lexbor_str_res_ansi_replacement_character) - 1))\ 42 { \ 43 return end; \ 44 } \ 45 } \ 46 while (0) 47 48 #define lxb_html_tokenizer_state_set_tag_m(tkz, _start, _end) \ 49 do { \ 50 const lxb_tag_data_t *tag; \ 51 tag = lxb_tag_append_lower(tkz->tags, (_start), (_end) - (_start)); \ 52 if (tag == NULL) { \ 53 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \ 54 return end; \ 55 } \ 56 tkz->token->tag_id = tag->tag_id; \ 57 } \ 58 while (0) 59 60 #define lxb_html_tokenizer_state_set_name_m(tkz) \ 61 do { \ 62 lxb_dom_attr_data_t *data; \ 63 data = lxb_dom_attr_local_name_append(tkz->attrs, tkz->start, \ 64 tkz->pos - tkz->start); \ 65 if (data == NULL) { \ 66 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \ 67 return end; \ 68 } \ 69 tkz->token->attr_last->name = data; \ 70 } \ 71 while (0) 72 73 #define lxb_html_tokenizer_state_set_value_m(tkz) \ 74 do { \ 75 lxb_html_token_attr_t *attr = tkz->token->attr_last; \ 76 \ 77 attr->value_size = (size_t) (tkz->pos - tkz->start); \ 78 \ 79 attr->value = lexbor_mraw_alloc(tkz->attrs_mraw, attr->value_size + 1);\ 80 if (attr->value == NULL) { \ 81 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \ 82 return end; \ 83 } \ 84 memcpy(attr->value, tkz->start, attr->value_size); \ 85 attr->value[attr->value_size] = 0x00; \ 86 } \ 87 while (0) 88 89 #define lxb_html_tokenizer_state_token_set_begin(tkz, v_begin) \ 90 do { \ 91 tkz->pos = tkz->start; \ 92 tkz->token->begin = v_begin; \ 93 tkz->token->line = tkz->current_line; \ 94 tkz->token->column = tkz->current_column; \ 95 } \ 96 while (0) 97 98 #define lxb_html_tokenizer_state_token_set_end(tkz, v_end) \ 99 (tkz->token->end = v_end) 100 101 #define lxb_html_tokenizer_state_token_set_end_down(tkz, v_end, offset) \ 102 do { \ 103 tkz->token->end = lexbor_in_node_pos_down(tkz->incoming_node, NULL, \ 104 v_end, offset); \ 105 } \ 106 while (0) 107 108 #define lxb_html_tokenizer_state_token_set_end_oef(tkz) \ 109 (tkz->token->end = tkz->last) 110 111 #define lxb_html_tokenizer_state_token_attr_add_m(tkz, attr, v_return) \ 112 do { \ 113 attr = lxb_html_token_attr_append(tkz->token, tkz->dobj_token_attr); \ 114 if (attr == NULL) { \ 115 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \ 116 return v_return; \ 117 } \ 118 } \ 119 while (0) 120 121 #define lxb_html_tokenizer_state_token_attr_set_name_begin(tkz, v_begin) \ 122 do { \ 123 tkz->pos = tkz->start; \ 124 tkz->token->attr_last->name_begin = v_begin; \ 125 } \ 126 while (0) 127 128 #define lxb_html_tokenizer_state_token_attr_set_name_end(tkz, v_end) \ 129 (tkz->token->attr_last->name_end = v_end) 130 131 #define lxb_html_tokenizer_state_token_attr_set_name_end_oef(tkz) \ 132 (tkz->token->attr_last->name_end = tkz->last) 133 134 #define lxb_html_tokenizer_state_token_attr_set_value_begin(tkz, v_begin) \ 135 do { \ 136 tkz->pos = tkz->start; \ 137 tkz->token->attr_last->value_begin = v_begin; \ 138 } \ 139 while (0) 140 141 #define lxb_html_tokenizer_state_token_attr_set_value_end(tkz, v_end) \ 142 (tkz->token->attr_last->value_end = v_end) 143 144 #define lxb_html_tokenizer_state_token_attr_set_value_end_oef(tkz) \ 145 (tkz->token->attr_last->value_end = tkz->last) 146 147 #define _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \ 148 tkz->token = tkz->callback_token_done(tkz, tkz->token, \ 149 tkz->callback_token_ctx); \ 150 if (tkz->token == NULL) { \ 151 if (tkz->status == LXB_STATUS_OK) { \ 152 tkz->status = LXB_STATUS_ERROR; \ 153 } \ 154 return v_end; \ 155 } 156 157 #define lxb_html_tokenizer_state_token_done_m(tkz, v_end) \ 158 do { \ 159 if (tkz->token->begin != tkz->token->end) { \ 160 _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \ 161 } \ 162 lxb_html_token_clean(tkz->token); \ 163 tkz->pos = tkz->start; \ 164 } \ 165 while (0) 166 167 #define lxb_html_tokenizer_state_token_done_wo_check_m(tkz, v_end) \ 168 do { \ 169 _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \ 170 lxb_html_token_clean(tkz->token); \ 171 } \ 172 while (0) 173 174 #define lxb_html_tokenizer_state_set_text(tkz) \ 175 do { \ 176 tkz->token->text_start = tkz->start; \ 177 tkz->token->text_end = tkz->pos; \ 178 } \ 179 while (0) 180 181 #define lxb_html_tokenizer_state_token_emit_text_not_empty_m(tkz, v_end) \ 182 do { \ 183 if (tkz->token->begin != tkz->token->end) { \ 184 tkz->token->tag_id = LXB_TAG__TEXT; \ 185 \ 186 lxb_html_tokenizer_state_set_text(tkz); \ 187 _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \ 188 lxb_html_token_clean(tkz->token); \ 189 } \ 190 } \ 191 while (0) 192 193 194 LXB_API const lxb_char_t * 195 lxb_html_tokenizer_state_data_before(lxb_html_tokenizer_t *tkz, 196 const lxb_char_t *data, 197 const lxb_char_t *end); 198 199 LXB_API const lxb_char_t * 200 lxb_html_tokenizer_state_plaintext_before(lxb_html_tokenizer_t *tkz, 201 const lxb_char_t *data, 202 const lxb_char_t *end); 203 204 LXB_API const lxb_char_t * 205 lxb_html_tokenizer_state_before_attribute_name(lxb_html_tokenizer_t *tkz, 206 const lxb_char_t *data, 207 const lxb_char_t *end); 208 209 LXB_API const lxb_char_t * 210 lxb_html_tokenizer_state_self_closing_start_tag(lxb_html_tokenizer_t *tkz, 211 const lxb_char_t *data, 212 const lxb_char_t *end); 213 214 LXB_API const lxb_char_t * 215 lxb_html_tokenizer_state_cr(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, 216 const lxb_char_t *end); 217 218 LXB_API const lxb_char_t * 219 lxb_html_tokenizer_state_char_ref(lxb_html_tokenizer_t *tkz, 220 const lxb_char_t *data, const lxb_char_t *end); 221 222 223 #ifdef __cplusplus 224 } /* extern "C" */ 225 #endif 226 227 #endif /* LEXBOR_HTML_TOKENIZER_STATE_H */ 228