1 /*
2  * Copyright (C) 2019 Alexander Borisov
3  *
4  * Author: Alexander Borisov <borisov@lexbor.com>
5  */
6 
7 #ifndef LEXBOR_ENCODING_ENCODING_H
8 #define LEXBOR_ENCODING_ENCODING_H
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 
15 #include "lexbor/encoding/base.h"
16 #include "lexbor/encoding/res.h"
17 #include "lexbor/encoding/encode.h"
18 #include "lexbor/encoding/decode.h"
19 
20 #include "lexbor/core/shs.h"
21 
22 
23 /*
24  * Before searching will be removed any leading and trailing
25  * ASCII whitespace in name.
26  */
27 LXB_API const lxb_encoding_data_t *
28 lxb_encoding_data_by_pre_name(const lxb_char_t *name, size_t length);
29 
30 
31 /*
32  * Inline functions
33  */
34 
35 /*
36  * Encode
37  */
38 lxb_inline lxb_status_t
lxb_encoding_encode_init(lxb_encoding_encode_t * encode,const lxb_encoding_data_t * encoding_data,lxb_char_t * buffer_out,size_t buffer_length)39 lxb_encoding_encode_init(lxb_encoding_encode_t *encode,
40                          const lxb_encoding_data_t *encoding_data,
41                          lxb_char_t *buffer_out, size_t buffer_length)
42 {
43     if (encoding_data == NULL) {
44         return LXB_STATUS_ERROR_WRONG_ARGS;
45     }
46 
47     memset(encode, 0, sizeof(lxb_encoding_encode_t));
48 
49     encode->buffer_out = buffer_out;
50     encode->buffer_length = buffer_length;
51     encode->encoding_data = encoding_data;
52 
53     return LXB_STATUS_OK;
54 }
55 
56 lxb_inline lxb_status_t
lxb_encoding_encode_finish(lxb_encoding_encode_t * encode)57 lxb_encoding_encode_finish(lxb_encoding_encode_t *encode)
58 {
59     if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
60         return lxb_encoding_encode_iso_2022_jp_eof(encode);
61     }
62 
63     return LXB_STATUS_OK;
64 }
65 
66 lxb_inline lxb_char_t *
lxb_encoding_encode_buf(lxb_encoding_encode_t * encode)67 lxb_encoding_encode_buf(lxb_encoding_encode_t *encode)
68 {
69     return encode->buffer_out;
70 }
71 
72 lxb_inline void
lxb_encoding_encode_buf_set(lxb_encoding_encode_t * encode,lxb_char_t * buffer_out,size_t buffer_length)73 lxb_encoding_encode_buf_set(lxb_encoding_encode_t *encode,
74                             lxb_char_t *buffer_out, size_t buffer_length)
75 {
76     encode->buffer_out = buffer_out;
77     encode->buffer_length = buffer_length;
78     encode->buffer_used = 0;
79 }
80 
81 lxb_inline void
lxb_encoding_encode_buf_used_set(lxb_encoding_encode_t * encode,size_t buffer_used)82 lxb_encoding_encode_buf_used_set(lxb_encoding_encode_t *encode,
83                                  size_t buffer_used)
84 {
85     encode->buffer_used = buffer_used;
86 }
87 
88 lxb_inline size_t
lxb_encoding_encode_buf_used(lxb_encoding_encode_t * encode)89 lxb_encoding_encode_buf_used(lxb_encoding_encode_t *encode)
90 {
91     return encode->buffer_used;
92 }
93 
94 lxb_inline lxb_status_t
lxb_encoding_encode_replace_set(lxb_encoding_encode_t * encode,const lxb_char_t * replace,size_t length)95 lxb_encoding_encode_replace_set(lxb_encoding_encode_t *encode,
96                                 const lxb_char_t *replace, size_t length)
97 {
98     if (encode->buffer_out == NULL || encode->buffer_length < length) {
99         return LXB_STATUS_SMALL_BUFFER;
100     }
101 
102     encode->replace_to = replace;
103     encode->replace_len = length;
104 
105     return LXB_STATUS_OK;
106 }
107 
108 lxb_inline lxb_status_t
lxb_encoding_encode_buf_add_to(lxb_encoding_encode_t * encode,lxb_char_t * data,size_t length)109 lxb_encoding_encode_buf_add_to(lxb_encoding_encode_t *encode,
110                                lxb_char_t *data, size_t length)
111 {
112     if ((encode->buffer_used + length) > encode->buffer_length) {
113         return LXB_STATUS_SMALL_BUFFER;
114     }
115 
116     memcpy(&encode->buffer_out[encode->buffer_used], data, length);
117 
118     encode->buffer_used += length;
119 
120     return LXB_STATUS_OK;
121 }
122 
123 /*
124  * Decode
125  */
126 lxb_inline lxb_status_t
lxb_encoding_decode_buf_add_to(lxb_encoding_decode_t * decode,const lxb_codepoint_t * data,size_t length)127 lxb_encoding_decode_buf_add_to(lxb_encoding_decode_t *decode,
128                                const lxb_codepoint_t *data, size_t length)
129 {
130     if ((decode->buffer_used + length) > decode->buffer_length) {
131         return LXB_STATUS_SMALL_BUFFER;
132     }
133 
134     memcpy(&decode->buffer_out[decode->buffer_used], data,
135            sizeof(lxb_codepoint_t) * length);
136 
137     decode->buffer_used += length;
138 
139     return LXB_STATUS_OK;
140 }
141 
142 lxb_inline lxb_status_t
lxb_encoding_decode_init(lxb_encoding_decode_t * decode,const lxb_encoding_data_t * encoding_data,lxb_codepoint_t * buffer_out,size_t buffer_length)143 lxb_encoding_decode_init(lxb_encoding_decode_t *decode,
144                          const lxb_encoding_data_t *encoding_data,
145                          lxb_codepoint_t *buffer_out, size_t buffer_length)
146 {
147     if (encoding_data == NULL) {
148         return LXB_STATUS_ERROR_WRONG_ARGS;
149     }
150 
151     memset(decode, 0, sizeof(lxb_encoding_decode_t));
152 
153     decode->buffer_out = buffer_out;
154     decode->buffer_length = buffer_length;
155     decode->encoding_data = encoding_data;
156 
157     return LXB_STATUS_OK;
158 }
159 
160 lxb_inline lxb_status_t
lxb_encoding_decode_finish(lxb_encoding_decode_t * decode)161 lxb_encoding_decode_finish(lxb_encoding_decode_t *decode)
162 {
163     lxb_status_t status;
164 
165     if (decode->status != LXB_STATUS_OK) {
166 
167         if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
168             && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
169         {
170             return LXB_STATUS_OK;
171         }
172 
173         if (decode->replace_to == NULL) {
174             return LXB_STATUS_ERROR;
175         }
176 
177         status = lxb_encoding_decode_buf_add_to(decode, decode->replace_to,
178                                                 decode->replace_len);
179         if (status == LXB_STATUS_SMALL_BUFFER) {
180             return status;
181         }
182     }
183 
184     return LXB_STATUS_OK;
185 }
186 
187 lxb_inline lxb_codepoint_t *
lxb_encoding_decode_buf(lxb_encoding_decode_t * decode)188 lxb_encoding_decode_buf(lxb_encoding_decode_t *decode)
189 {
190     return decode->buffer_out;
191 }
192 
193 lxb_inline void
lxb_encoding_decode_buf_set(lxb_encoding_decode_t * decode,lxb_codepoint_t * buffer_out,size_t buffer_length)194 lxb_encoding_decode_buf_set(lxb_encoding_decode_t *decode,
195                             lxb_codepoint_t *buffer_out, size_t buffer_length)
196 {
197     decode->buffer_out = buffer_out;
198     decode->buffer_length = buffer_length;
199     decode->buffer_used = 0;
200 }
201 
202 lxb_inline void
lxb_encoding_decode_buf_used_set(lxb_encoding_decode_t * decode,size_t buffer_used)203 lxb_encoding_decode_buf_used_set(lxb_encoding_decode_t *decode,
204                                  size_t buffer_used)
205 {
206     decode->buffer_used = buffer_used;
207 }
208 
209 lxb_inline size_t
lxb_encoding_decode_buf_used(lxb_encoding_decode_t * decode)210 lxb_encoding_decode_buf_used(lxb_encoding_decode_t *decode)
211 {
212     return decode->buffer_used;
213 }
214 
215 lxb_inline lxb_status_t
lxb_encoding_decode_replace_set(lxb_encoding_decode_t * decode,const lxb_codepoint_t * replace,size_t length)216 lxb_encoding_decode_replace_set(lxb_encoding_decode_t *decode,
217                                 const lxb_codepoint_t *replace, size_t length)
218 {
219     if (decode->buffer_out == NULL || decode->buffer_length < length) {
220         return LXB_STATUS_SMALL_BUFFER;
221     }
222 
223     decode->replace_to = replace;
224     decode->replace_len = length;
225 
226     return LXB_STATUS_OK;
227 }
228 
229 /*
230  * Single encode.
231  */
232 lxb_inline lxb_status_t
lxb_encoding_encode_init_single(lxb_encoding_encode_t * encode,const lxb_encoding_data_t * encoding_data)233 lxb_encoding_encode_init_single(lxb_encoding_encode_t *encode,
234                                 const lxb_encoding_data_t *encoding_data)
235 {
236     if (encoding_data == NULL) {
237         return LXB_STATUS_ERROR_WRONG_ARGS;
238     }
239 
240     memset(encode, 0, sizeof(lxb_encoding_encode_t));
241 
242     encode->encoding_data = encoding_data;
243 
244     return LXB_STATUS_OK;
245 }
246 
247 lxb_inline int8_t
lxb_encoding_encode_finish_single(lxb_encoding_encode_t * encode,lxb_char_t ** data,const lxb_char_t * end)248 lxb_encoding_encode_finish_single(lxb_encoding_encode_t *encode,
249                                   lxb_char_t **data, const lxb_char_t *end)
250 {
251     if (encode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP) {
252         return lxb_encoding_encode_iso_2022_jp_eof_single(encode, data, end);
253     }
254 
255     return 0;
256 }
257 
258 /*
259  * Single decode.
260  */
261 lxb_inline lxb_status_t
lxb_encoding_decode_init_single(lxb_encoding_decode_t * decode,const lxb_encoding_data_t * encoding_data)262 lxb_encoding_decode_init_single(lxb_encoding_decode_t *decode,
263                                 const lxb_encoding_data_t *encoding_data)
264 {
265     if (encoding_data == NULL) {
266         return LXB_STATUS_ERROR_WRONG_ARGS;
267     }
268 
269     memset(decode, 0, sizeof(lxb_encoding_decode_t));
270 
271     decode->encoding_data = encoding_data;
272 
273     return LXB_STATUS_OK;
274 }
275 
276 lxb_inline lxb_status_t
lxb_encoding_decode_finish_single(lxb_encoding_decode_t * decode)277 lxb_encoding_decode_finish_single(lxb_encoding_decode_t *decode)
278 {
279     if (decode->status != LXB_STATUS_OK) {
280 
281         if (decode->encoding_data->encoding == LXB_ENCODING_ISO_2022_JP
282             && decode->u.iso_2022_jp.state == LXB_ENCODING_DECODE_2022_JP_ASCII)
283         {
284             return LXB_STATUS_OK;
285         }
286 
287         return LXB_STATUS_ERROR;
288     }
289 
290     return LXB_STATUS_OK;
291 }
292 
293 /*
294  * Encoding data.
295  */
296 lxb_inline const lxb_encoding_data_t *
lxb_encoding_data_by_name(const lxb_char_t * name,size_t length)297 lxb_encoding_data_by_name(const lxb_char_t *name, size_t length)
298 {
299     const lexbor_shs_entry_t *entry;
300 
301     if (length == 0) {
302         return NULL;
303     }
304 
305     entry = lexbor_shs_entry_get_lower_static(lxb_encoding_res_shs_entities,
306                                               name, length);
307     if (entry == NULL) {
308         return NULL;
309     }
310 
311     return (const lxb_encoding_data_t *) entry->value;
312 }
313 
314 lxb_inline const lxb_encoding_data_t *
lxb_encoding_data(lxb_encoding_t encoding)315 lxb_encoding_data(lxb_encoding_t encoding)
316 {
317     if (encoding >= LXB_ENCODING_LAST_ENTRY) {
318         return NULL;
319     }
320 
321     return &lxb_encoding_res_map[encoding];
322 }
323 
324 lxb_inline lxb_encoding_encode_f
lxb_encoding_encode_function(lxb_encoding_t encoding)325 lxb_encoding_encode_function(lxb_encoding_t encoding)
326 {
327     if (encoding >= LXB_ENCODING_LAST_ENTRY) {
328         return NULL;
329     }
330 
331     return lxb_encoding_res_map[encoding].encode;
332 }
333 
334 lxb_inline lxb_encoding_decode_f
lxb_encoding_decode_function(lxb_encoding_t encoding)335 lxb_encoding_decode_function(lxb_encoding_t encoding)
336 {
337     if (encoding >= LXB_ENCODING_LAST_ENTRY) {
338         return NULL;
339     }
340 
341     return lxb_encoding_res_map[encoding].decode;
342 }
343 
344 lxb_inline lxb_status_t
lxb_encoding_data_call_encode(lxb_encoding_data_t * encoding_data,lxb_encoding_encode_t * ctx,const lxb_codepoint_t ** cp,const lxb_codepoint_t * end)345 lxb_encoding_data_call_encode(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
346                               const lxb_codepoint_t **cp, const lxb_codepoint_t *end)
347 {
348     return encoding_data->encode(ctx, cp, end);
349 }
350 
351 lxb_inline lxb_status_t
lxb_encoding_data_call_decode(lxb_encoding_data_t * encoding_data,lxb_encoding_decode_t * ctx,const lxb_char_t ** data,const lxb_char_t * end)352 lxb_encoding_data_call_decode(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
353                               const lxb_char_t **data, const lxb_char_t *end)
354 {
355     return encoding_data->decode(ctx, data, end);
356 }
357 
358 lxb_inline lxb_encoding_t
lxb_encoding_data_encoding(lxb_encoding_data_t * data)359 lxb_encoding_data_encoding(lxb_encoding_data_t *data)
360 {
361     return data->encoding;
362 }
363 
364 /*
365  * No inline functions for ABI.
366  */
367 LXB_API lxb_status_t
368 lxb_encoding_encode_init_noi(lxb_encoding_encode_t *encode,
369                              const lxb_encoding_data_t *encoding_data,
370                              lxb_char_t *buffer_out, size_t buffer_length);
371 
372 LXB_API lxb_status_t
373 lxb_encoding_encode_finish_noi(lxb_encoding_encode_t *encode);
374 
375 LXB_API lxb_char_t *
376 lxb_encoding_encode_buf_noi(lxb_encoding_encode_t *encode);
377 
378 LXB_API void
379 lxb_encoding_encode_buf_set_noi(lxb_encoding_encode_t *encode,
380                                 lxb_char_t *buffer_out, size_t buffer_length);
381 
382 LXB_API void
383 lxb_encoding_encode_buf_used_set_noi(lxb_encoding_encode_t *encode,
384                                      size_t buffer_used);
385 
386 LXB_API size_t
387 lxb_encoding_encode_buf_used_noi(lxb_encoding_encode_t *encode);
388 
389 LXB_API lxb_status_t
390 lxb_encoding_encode_replace_set_noi(lxb_encoding_encode_t *encode,
391                                const lxb_char_t *replace, size_t buffer_length);
392 
393 LXB_API lxb_status_t
394 lxb_encoding_encode_buf_add_to_noi(lxb_encoding_encode_t *encode,
395                                    lxb_char_t *data, size_t length);
396 
397 LXB_API lxb_status_t
398 lxb_encoding_decode_init_noi(lxb_encoding_decode_t *decode,
399                              const lxb_encoding_data_t *encoding_data,
400                              lxb_codepoint_t *buffer_out, size_t buffer_length);
401 
402 LXB_API lxb_status_t
403 lxb_encoding_decode_finish_noi(lxb_encoding_decode_t *decode);
404 
405 LXB_API lxb_codepoint_t *
406 lxb_encoding_decode_buf_noi(lxb_encoding_decode_t *decode);
407 
408 LXB_API void
409 lxb_encoding_decode_buf_set_noi(lxb_encoding_decode_t *decode,
410                              lxb_codepoint_t *buffer_out, size_t buffer_length);
411 
412 LXB_API void
413 lxb_encoding_decode_buf_used_set_noi(lxb_encoding_decode_t *decode,
414                                      size_t buffer_used);
415 
416 LXB_API size_t
417 lxb_encoding_decode_buf_used_noi(lxb_encoding_decode_t *decode);
418 
419 LXB_API lxb_status_t
420 lxb_encoding_decode_replace_set_noi(lxb_encoding_decode_t *decode,
421                                  const lxb_codepoint_t *replace, size_t length);
422 
423 LXB_API lxb_status_t
424 lxb_encoding_decode_buf_add_to_noi(lxb_encoding_decode_t *decode,
425                                    const lxb_codepoint_t *data, size_t length);
426 
427 LXB_API lxb_status_t
428 lxb_encoding_encode_init_single_noi(lxb_encoding_encode_t *encode,
429                                     const lxb_encoding_data_t *encoding_data);
430 
431 LXB_API int8_t
432 lxb_encoding_encode_finish_single_noi(lxb_encoding_encode_t *encode,
433                                       lxb_char_t **data, const lxb_char_t *end);
434 
435 LXB_API lxb_status_t
436 lxb_encoding_decode_init_single_noi(lxb_encoding_decode_t *decode,
437                                     const lxb_encoding_data_t *encoding_data);
438 
439 LXB_API lxb_status_t
440 lxb_encoding_decode_finish_single_noi(lxb_encoding_decode_t *decode);
441 
442 LXB_API const lxb_encoding_data_t *
443 lxb_encoding_data_by_name_noi(const lxb_char_t *name, size_t length);
444 
445 LXB_API const lxb_encoding_data_t *
446 lxb_encoding_data_noi(lxb_encoding_t encoding);
447 
448 LXB_API lxb_encoding_encode_f
449 lxb_encoding_encode_function_noi(lxb_encoding_t encoding);
450 
451 LXB_API lxb_encoding_decode_f
452 lxb_encoding_decode_function_noi(lxb_encoding_t encoding);
453 
454 LXB_API lxb_status_t
455 lxb_encoding_data_call_encode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_encode_t *ctx,
456                                   const lxb_codepoint_t **cp, const lxb_codepoint_t *end);
457 LXB_API lxb_status_t
458 lxb_encoding_data_call_decode_noi(lxb_encoding_data_t *encoding_data, lxb_encoding_decode_t *ctx,
459                                   const lxb_char_t **data, const lxb_char_t *end);
460 
461 LXB_API lxb_encoding_t
462 lxb_encoding_data_encoding_noi(lxb_encoding_data_t *data);
463 
464 LXB_API size_t
465 lxb_encoding_encode_t_sizeof(void);
466 
467 LXB_API size_t
468 lxb_encoding_decode_t_sizeof(void);
469 
470 
471 #ifdef __cplusplus
472 } /* extern "C" */
473 #endif
474 
475 #endif /* LEXBOR_ENCODING_ENCODING_H */
476