1 /*
2 * Copyright (C) 2019 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #ifndef LEXBOR_ENCODING_ENCODING_H
8 #define LEXBOR_ENCODING_ENCODING_H
9
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13
14
15 #include "lexbor/encoding/base.h"
16 #include "lexbor/encoding/res.h"
17 #include "lexbor/encoding/encode.h"
18 #include "lexbor/encoding/decode.h"
19
20 #include "lexbor/core/shs.h"
21
22
23 /*
24 * Before searching will be removed any leading and trailing
25 * ASCII whitespace in name.
26 */
27 LXB_API const lxb_encoding_data_t *
28 lxb_encoding_data_by_pre_name(const lxb_char_t *name, size_t length);
29
30
31 /*
32 * Inline functions
33 */
34
35 /*
36 * Encode
37 */
38 lxb_inline lxb_status_t
lxb_encoding_encode_init(lxb_encoding_encode_t * encode,const lxb_encoding_data_t * encoding_data,lxb_char_t * buffer_out,size_t buffer_length)39 lxb_encoding_encode_init(lxb_encoding_encode_t *encode,
40 const lxb_encoding_data_t *encoding_data,
41 lxb_char_t *buffer_out, size_t buffer_length)
42 {
43 if (encoding_data == NULL) {
44 return LXB_STATUS_ERROR_WRONG_ARGS;
45 }
46
47 memset(encode, 0, sizeof(lxb_encoding_encode_t));
48
49 encode->buffer_out = buffer_out;
50 encode->buffer_length = buffer_length;
51 encode->encoding_data = encoding_data;
52
53 return LXB_STATUS_OK;
54 }
55
56 lxb_inline lxb_status_t
lxb_encoding_encode_finish(lxb_encoding_encode_t * encode)57 lxb_encoding_encode_finish(lxb_encoding_encode_t *encode)
58 {
59 if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
60 return lxb_encoding_encode_iso_2022_jp_eof(encode);
61 }
62
63 return LXB_STATUS_OK;
64 }
65
66 lxb_inline lxb_char_t *
lxb_encoding_encode_buf(lxb_encoding_encode_t * encode)67 lxb_encoding_encode_buf(lxb_encoding_encode_t *encode)
68 {
69 return encode->buffer_out;
70 }
71
72 lxb_inline void
lxb_encoding_encode_buf_set(lxb_encoding_encode_t * encode,lxb_char_t * buffer_out,size_t buffer_length)73 lxb_encoding_encode_buf_set(lxb_encoding_encode_t *encode,
74 lxb_char_t *buffer_out, size_t buffer_length)
75 {
76 encode->buffer_out = buffer_out;
77 encode->buffer_length = buffer_length;
78 encode->buffer_used = 0;
79 }
80
81 lxb_inline void
lxb_encoding_encode_buf_used_set(lxb_encoding_encode_t * encode,size_t buffer_used)82 lxb_encoding_encode_buf_used_set(lxb_encoding_encode_t *encode,
83 size_t buffer_used)
84 {
85 encode->buffer_used = buffer_used;
86 }
87
88 lxb_inline size_t
lxb_encoding_encode_buf_used(lxb_encoding_encode_t * encode)89 lxb_encoding_encode_buf_used(lxb_encoding_encode_t *encode)
90 {
91 return encode->buffer_used;
92 }
93
94 lxb_inline lxb_status_t
lxb_encoding_encode_replace_set(lxb_encoding_encode_t * encode,const lxb_char_t * replace,size_t length)95 lxb_encoding_encode_replace_set(lxb_encoding_encode_t *encode,
96 const lxb_char_t *replace, size_t length)
97 {
98 if (encode->buffer_out == NULL || encode->buffer_length < length) {
99 return LXB_STATUS_SMALL_BUFFER;
100 }
101
102 encode->replace_to = replace;
103 encode->replace_len = length;
104
105 return LXB_STATUS_OK;
106 }
107
108 lxb_inline lxb_status_t
lxb_encoding_encode_buf_add_to(lxb_encoding_encode_t * encode,lxb_char_t * data,size_t length)109 lxb_encoding_encode_buf_add_to(lxb_encoding_encode_t *encode,
110 lxb_char_t *data, size_t length)
111 {
112 if ((encode->buffer_used + length) > encode->buffer_length) {
113 return LXB_STATUS_SMALL_BUFFER;
114 }
115
116 memcpy(&encode->buffer_out[encode->buffer_used], data, length);
117
118 encode->buffer_used += length;
119
120 return LXB_STATUS_OK;
121 }
122
123 /*
124 * Decode
125 */
126 lxb_inline lxb_status_t
lxb_encoding_decode_buf_add_to(lxb_encoding_decode_t * decode,const lxb_codepoint_t * data,size_t length)127 lxb_encoding_decode_buf_add_to(lxb_encoding_decode_t *decode,
128 const lxb_codepoint_t *data, size_t length)
129 {
130 if ((decode->buffer_used + length) > decode->buffer_length) {
131 return LXB_STATUS_SMALL_BUFFER;
132 }
133
134 memcpy(&decode->buffer_out[decode->buffer_used], data,
135 sizeof(lxb_codepoint_t) * length);
136
137 decode->buffer_used += length;
138
139 return LXB_STATUS_OK;
140 }
141
142 lxb_inline lxb_status_t
lxb_encoding_decode_init(lxb_encoding_decode_t * decode,const lxb_encoding_data_t * encoding_data,lxb_codepoint_t * buffer_out,size_t buffer_length)143 lxb_encoding_decode_init(lxb_encoding_decode_t *decode,
144 const lxb_encoding_data_t *encoding_data,
145 lxb_codepoint_t *buffer_out, size_t buffer_length)
146 {
147 if (encoding_data == NULL) {
148 return LXB_STATUS_ERROR_WRONG_ARGS;
149 }
150
151 memset(decode, 0, sizeof(lxb_encoding_decode_t));
152
153 decode->buffer_out = buffer_out;
154 decode->buffer_length = buffer_length;
155 decode->encoding_data = encoding_data;
156
157 return LXB_STATUS_OK;
158 }
159
160 lxb_inline lxb_status_t
lxb_encoding_decode_finish(lxb_encoding_decode_t * decode)161 lxb_encoding_decode_finish(lxb_encoding_decode_t *decode)
162 {
163 lxb_status_t status;
164
165 if (decode->status != LXB_STATUS_OK) {
166
167 if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
168 && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
169 {
170 return LXB_STATUS_OK;
171 }
172
173 if (decode->replace_to == NULL) {
174 return LXB_STATUS_ERROR;
175 }
176
177 status = lxb_encoding_decode_buf_add_to(decode, decode->replace_to,
178 decode->replace_len);
179 if (status == LXB_STATUS_SMALL_BUFFER) {
180 return status;
181 }
182 }
183
184 return LXB_STATUS_OK;
185 }
186
187 lxb_inline lxb_codepoint_t *
lxb_encoding_decode_buf(lxb_encoding_decode_t * decode)188 lxb_encoding_decode_buf(lxb_encoding_decode_t *decode)
189 {
190 return decode->buffer_out;
191 }
192
193 lxb_inline void
lxb_encoding_decode_buf_set(lxb_encoding_decode_t * decode,lxb_codepoint_t * buffer_out,size_t buffer_length)194 lxb_encoding_decode_buf_set(lxb_encoding_decode_t *decode,
195 lxb_codepoint_t *buffer_out, size_t buffer_length)
196 {
197 decode->buffer_out = buffer_out;
198 decode->buffer_length = buffer_length;
199 decode->buffer_used = 0;
200 }
201
202 lxb_inline void
lxb_encoding_decode_buf_used_set(lxb_encoding_decode_t * decode,size_t buffer_used)203 lxb_encoding_decode_buf_used_set(lxb_encoding_decode_t *decode,
204 size_t buffer_used)
205 {
206 decode->buffer_used = buffer_used;
207 }
208
209 lxb_inline size_t
lxb_encoding_decode_buf_used(lxb_encoding_decode_t * decode)210 lxb_encoding_decode_buf_used(lxb_encoding_decode_t *decode)
211 {
212 return decode->buffer_used;
213 }
214
215 lxb_inline lxb_status_t
lxb_encoding_decode_replace_set(lxb_encoding_decode_t * decode,const lxb_codepoint_t * replace,size_t length)216 lxb_encoding_decode_replace_set(lxb_encoding_decode_t *decode,
217 const lxb_codepoint_t *replace, size_t length)
218 {
219 if (decode->buffer_out == NULL || decode->buffer_length < length) {
220 return LXB_STATUS_SMALL_BUFFER;
221 }
222
223 decode->replace_to = replace;
224 decode->replace_len = length;
225
226 return LXB_STATUS_OK;
227 }
228
229 /*
230 * Single encode.
231 */
232 lxb_inline lxb_status_t
lxb_encoding_encode_init_single(lxb_encoding_encode_t * encode,const lxb_encoding_data_t * encoding_data)233 lxb_encoding_encode_init_single(lxb_encoding_encode_t *encode,
234 const lxb_encoding_data_t *encoding_data)
235 {
236 if (encoding_data == NULL) {
237 return LXB_STATUS_ERROR_WRONG_ARGS;
238 }
239
240 memset(encode, 0, sizeof(lxb_encoding_encode_t));
241
242 encode->encoding_data = encoding_data;
243
244 return LXB_STATUS_OK;
245 }
246
247 lxb_inline int8_t
lxb_encoding_encode_finish_single(lxb_encoding_encode_t * encode,lxb_char_t ** data,const lxb_char_t * end)248 lxb_encoding_encode_finish_single(lxb_encoding_encode_t *encode,
249 lxb_char_t **data, const lxb_char_t *end)
250 {
251 if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
252 return lxb_encoding_encode_iso_2022_jp_eof_single(encode, data, end);
253 }
254
255 return 0;
256 }
257
258 /*
259 * Single decode.
260 */
261 lxb_inline lxb_status_t
lxb_encoding_decode_init_single(lxb_encoding_decode_t * decode,const lxb_encoding_data_t * encoding_data)262 lxb_encoding_decode_init_single(lxb_encoding_decode_t *decode,
263 const lxb_encoding_data_t *encoding_data)
264 {
265 if (encoding_data == NULL) {
266 return LXB_STATUS_ERROR_WRONG_ARGS;
267 }
268
269 memset(decode, 0, sizeof(lxb_encoding_decode_t));
270
271 decode->encoding_data = encoding_data;
272
273 return LXB_STATUS_OK;
274 }
275
276 lxb_inline lxb_status_t
lxb_encoding_decode_finish_single(lxb_encoding_decode_t * decode)277 lxb_encoding_decode_finish_single(lxb_encoding_decode_t *decode)
278 {
279 if (decode->status != LXB_STATUS_OK) {
280
281 if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
282 && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
283 {
284 return LXB_STATUS_OK;
285 }
286
287 return LXB_STATUS_ERROR;
288 }
289
290 return LXB_STATUS_OK;
291 }
292
293 /*
294 * Encoding data.
295 */
296 lxb_inline const lxb_encoding_data_t *
lxb_encoding_data_by_name(const lxb_char_t * name,size_t length)297 lxb_encoding_data_by_name(const lxb_char_t *name, size_t length)
298 {
299 const lexbor_shs_entry_t *entry;
300
301 if (length == 0) {
302 return NULL;
303 }
304
305 entry = lexbor_shs_entry_get_lower_static(lxb_encoding_res_shs_entities,
306 name, length);
307 if (entry == NULL) {
308 return NULL;
309 }
310
311 return (const lxb_encoding_data_t *) entry->value;
312 }
313
314 lxb_inline const lxb_encoding_data_t *
lxb_encoding_data(lxb_encoding_t encoding)315 lxb_encoding_data(lxb_encoding_t encoding)
316 {
317 if (encoding >= LXB_ENCODING_LAST_ENTRY) {
318 return NULL;
319 }
320
321 return &lxb_encoding_res_map[encoding];
322 }
323
324 lxb_inline lxb_encoding_encode_f
lxb_encoding_encode_function(lxb_encoding_t encoding)325 lxb_encoding_encode_function(lxb_encoding_t encoding)
326 {
327 if (encoding >= LXB_ENCODING_LAST_ENTRY) {
328 return NULL;
329 }
330
331 return lxb_encoding_res_map[encoding].encode;
332 }
333
334 lxb_inline lxb_encoding_decode_f
lxb_encoding_decode_function(lxb_encoding_t encoding)335 lxb_encoding_decode_function(lxb_encoding_t encoding)
336 {
337 if (encoding >= LXB_ENCODING_LAST_ENTRY) {
338 return NULL;
339 }
340
341 return lxb_encoding_res_map[encoding].decode;
342 }
343
344 lxb_inline lxb_status_t
lxb_encoding_data_call_encode(lxb_encoding_data_t * encoding_data,lxb_encoding_encode_t * ctx,const lxb_codepoint_t ** cp,const lxb_codepoint_t * end)345 lxb_encoding_data_call_encode(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
346 const lxb_codepoint_t **cp, const lxb_codepoint_t *end)
347 {
348 return encoding_data->encode(ctx, cp, end);
349 }
350
351 lxb_inline lxb_status_t
lxb_encoding_data_call_decode(lxb_encoding_data_t * encoding_data,lxb_encoding_decode_t * ctx,const lxb_char_t ** data,const lxb_char_t * end)352 lxb_encoding_data_call_decode(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
353 const lxb_char_t **data, const lxb_char_t *end)
354 {
355 return encoding_data->decode(ctx, data, end);
356 }
357
358 lxb_inline lxb_encoding_t
lxb_encoding_data_encoding(lxb_encoding_data_t * data)359 lxb_encoding_data_encoding(lxb_encoding_data_t *data)
360 {
361 return data->encoding;
362 }
363
364 /*
365 * No inline functions for ABI.
366 */
367 LXB_API lxb_status_t
368 lxb_encoding_encode_init_noi(lxb_encoding_encode_t *encode,
369 const lxb_encoding_data_t *encoding_data,
370 lxb_char_t *buffer_out, size_t buffer_length);
371
372 LXB_API lxb_status_t
373 lxb_encoding_encode_finish_noi(lxb_encoding_encode_t *encode);
374
375 LXB_API lxb_char_t *
376 lxb_encoding_encode_buf_noi(lxb_encoding_encode_t *encode);
377
378 LXB_API void
379 lxb_encoding_encode_buf_set_noi(lxb_encoding_encode_t *encode,
380 lxb_char_t *buffer_out, size_t buffer_length);
381
382 LXB_API void
383 lxb_encoding_encode_buf_used_set_noi(lxb_encoding_encode_t *encode,
384 size_t buffer_used);
385
386 LXB_API size_t
387 lxb_encoding_encode_buf_used_noi(lxb_encoding_encode_t *encode);
388
389 LXB_API lxb_status_t
390 lxb_encoding_encode_replace_set_noi(lxb_encoding_encode_t *encode,
391 const lxb_char_t *replace, size_t buffer_length);
392
393 LXB_API lxb_status_t
394 lxb_encoding_encode_buf_add_to_noi(lxb_encoding_encode_t *encode,
395 lxb_char_t *data, size_t length);
396
397 LXB_API lxb_status_t
398 lxb_encoding_decode_init_noi(lxb_encoding_decode_t *decode,
399 const lxb_encoding_data_t *encoding_data,
400 lxb_codepoint_t *buffer_out, size_t buffer_length);
401
402 LXB_API lxb_status_t
403 lxb_encoding_decode_finish_noi(lxb_encoding_decode_t *decode);
404
405 LXB_API lxb_codepoint_t *
406 lxb_encoding_decode_buf_noi(lxb_encoding_decode_t *decode);
407
408 LXB_API void
409 lxb_encoding_decode_buf_set_noi(lxb_encoding_decode_t *decode,
410 lxb_codepoint_t *buffer_out, size_t buffer_length);
411
412 LXB_API void
413 lxb_encoding_decode_buf_used_set_noi(lxb_encoding_decode_t *decode,
414 size_t buffer_used);
415
416 LXB_API size_t
417 lxb_encoding_decode_buf_used_noi(lxb_encoding_decode_t *decode);
418
419 LXB_API lxb_status_t
420 lxb_encoding_decode_replace_set_noi(lxb_encoding_decode_t *decode,
421 const lxb_codepoint_t *replace, size_t length);
422
423 LXB_API lxb_status_t
424 lxb_encoding_decode_buf_add_to_noi(lxb_encoding_decode_t *decode,
425 const lxb_codepoint_t *data, size_t length);
426
427 LXB_API lxb_status_t
428 lxb_encoding_encode_init_single_noi(lxb_encoding_encode_t *encode,
429 const lxb_encoding_data_t *encoding_data);
430
431 LXB_API int8_t
432 lxb_encoding_encode_finish_single_noi(lxb_encoding_encode_t *encode,
433 lxb_char_t **data, const lxb_char_t *end);
434
435 LXB_API lxb_status_t
436 lxb_encoding_decode_init_single_noi(lxb_encoding_decode_t *decode,
437 const lxb_encoding_data_t *encoding_data);
438
439 LXB_API lxb_status_t
440 lxb_encoding_decode_finish_single_noi(lxb_encoding_decode_t *decode);
441
442 LXB_API const lxb_encoding_data_t *
443 lxb_encoding_data_by_name_noi(const lxb_char_t *name, size_t length);
444
445 LXB_API const lxb_encoding_data_t *
446 lxb_encoding_data_noi(lxb_encoding_t encoding);
447
448 LXB_API lxb_encoding_encode_f
449 lxb_encoding_encode_function_noi(lxb_encoding_t encoding);
450
451 LXB_API lxb_encoding_decode_f
452 lxb_encoding_decode_function_noi(lxb_encoding_t encoding);
453
454 LXB_API lxb_status_t
455 lxb_encoding_data_call_encode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
456 const lxb_codepoint_t **cp, const lxb_codepoint_t *end);
457 LXB_API lxb_status_t
458 lxb_encoding_data_call_decode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
459 const lxb_char_t **data, const lxb_char_t *end);
460
461 LXB_API lxb_encoding_t
462 lxb_encoding_data_encoding_noi(lxb_encoding_data_t *data);
463
464 LXB_API size_t
465 lxb_encoding_encode_t_sizeof(void);
466
467 LXB_API size_t
468 lxb_encoding_decode_t_sizeof(void);
469
470
471 #ifdef __cplusplus
472 } /* extern "C" */
473 #endif
474
475 #endif /* LEXBOR_ENCODING_ENCODING_H */
476