1 /*
2 * charset=UTF-8
3 */
4
5 /*
6 * "streamable kanji code filter and converter"
7 *
8 * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
9 *
10 * This software is released under the GNU Lesser General Public License.
11 * (Version 2.1, February 1999)
12 * Please read the following detail of the licence (in japanese).
13 *
14 * ◆使用許諾条件◆
15 *
16 * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
17 * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
18 * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
19 * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
20 * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
21 * することはできません。
22 *
23 * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
24 * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
25 * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
26 * による許諾を得る必要があります。
27 *
28 * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
29 * ます。「GNU Lesser General Public License」とは、これまでLibrary General
30 * Public Licenseと呼ばれていたものです。
31 * http://www.gnu.org/ --- GNUウェブサイト
32 * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
33 * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
34 *
35 * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
36 * はありません。
37 *
38 * ◆保証内容◆
39 *
40 * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
41 * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
42 * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
43 * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
44 * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
45 * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
46 * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
47 * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
48 * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
49 * 契約・規定に優先します。
50 *
51 * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
52 *
53 * 〒102-0073
54 * 東京都千代田区九段北1-13-5日本地所第一ビル4F
55 * 株式会社ハッピーサイズ
56 * Phone: 03-3512-3655, Fax: 03-3512-3656
57 * Email: sales@happysize.co.jp
58 * Web: http://happysize.com/
59 *
60 * ◆著者◆
61 *
62 * 金本 茂 <sgk@happysize.co.jp>
63 *
64 * ◆履歴◆
65 *
66 * 1998/11/10 sgk implementation in C++
67 * 1999/4/25 sgk Cで書きなおし。
68 * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
69 * 1999/6/?? Unicodeサポート。
70 * 1999/6/22 sgk ライセンスをLGPLに変更。
71 *
72 */
73
74 /*
75 * Unicode support
76 *
77 * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
78 * All rights reserved.
79 *
80 */
81
82
83 #ifdef HAVE_CONFIG_H
84 #include "config.h"
85 #endif
86
87 #include <stddef.h>
88 #include <string.h>
89 #include <limits.h>
90
91 #include "mbfilter.h"
92 #include "mbfl_filter_output.h"
93 #include "mbfilter_8bit.h"
94 #include "mbfilter_pass.h"
95 #include "mbfilter_wchar.h"
96 #include "filters/mbfilter_ascii.h"
97 #include "filters/mbfilter_base64.h"
98 #include "filters/mbfilter_qprint.h"
99 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
100 #include "filters/mbfilter_utf8.h"
101
102 #include "eaw_table.h"
103
104 /* hex character table "0123456789ABCDEF" */
105 static char mbfl_hexchar_table[] = {
106 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
107 };
108
109
110
111 /*
112 * encoding filter
113 */
114 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
115
116
117 /*
118 * buffering converter
119 */
120 mbfl_buffer_converter *
mbfl_buffer_converter_new(const mbfl_encoding * from,const mbfl_encoding * to,size_t buf_initsz)121 mbfl_buffer_converter_new(
122 const mbfl_encoding *from,
123 const mbfl_encoding *to,
124 size_t buf_initsz)
125 {
126 mbfl_buffer_converter *convd;
127
128 /* allocate */
129 convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof(mbfl_buffer_converter));
130 if (convd == NULL) {
131 return NULL;
132 }
133
134 /* initialize */
135 convd->from = from;
136 convd->to = to;
137
138 /* create convert filter */
139 convd->filter1 = NULL;
140 convd->filter2 = NULL;
141 if (mbfl_convert_filter_get_vtbl(convd->from, convd->to) != NULL) {
142 convd->filter1 = mbfl_convert_filter_new(convd->from, convd->to, mbfl_memory_device_output, NULL, &convd->device);
143 } else {
144 convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, convd->to, mbfl_memory_device_output, NULL, &convd->device);
145 if (convd->filter2 != NULL) {
146 convd->filter1 = mbfl_convert_filter_new(convd->from,
147 &mbfl_encoding_wchar,
148 (int (*)(int, void*))convd->filter2->filter_function,
149 (int (*)(void*))convd->filter2->filter_flush,
150 convd->filter2);
151 if (convd->filter1 == NULL) {
152 mbfl_convert_filter_delete(convd->filter2);
153 }
154 }
155 }
156 if (convd->filter1 == NULL) {
157 mbfl_free(convd);
158 return NULL;
159 }
160
161 mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
162
163 return convd;
164 }
165
166
167 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)168 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
169 {
170 if (convd != NULL) {
171 if (convd->filter1) {
172 mbfl_convert_filter_delete(convd->filter1);
173 }
174 if (convd->filter2) {
175 mbfl_convert_filter_delete(convd->filter2);
176 }
177 mbfl_memory_device_clear(&convd->device);
178 mbfl_free((void*)convd);
179 }
180 }
181
182 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)183 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
184 {
185 mbfl_memory_device_reset(&convd->device);
186 }
187
188 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)189 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
190 {
191 if (convd != NULL) {
192 if (convd->filter2 != NULL) {
193 convd->filter2->illegal_mode = mode;
194 } else if (convd->filter1 != NULL) {
195 convd->filter1->illegal_mode = mode;
196 } else {
197 return 0;
198 }
199 }
200
201 return 1;
202 }
203
204 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)205 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
206 {
207 if (convd != NULL) {
208 if (convd->filter2 != NULL) {
209 convd->filter2->illegal_substchar = substchar;
210 } else if (convd->filter1 != NULL) {
211 convd->filter1->illegal_substchar = substchar;
212 } else {
213 return 0;
214 }
215 }
216
217 return 1;
218 }
219
220 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,size_t n)221 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, size_t n)
222 {
223 mbfl_convert_filter *filter;
224 int (*filter_function)(int c, mbfl_convert_filter *filter);
225
226 if (convd != NULL && p != NULL) {
227 filter = convd->filter1;
228 if (filter != NULL) {
229 filter_function = filter->filter_function;
230 while (n > 0) {
231 if ((*filter_function)(*p++, filter) < 0) {
232 break;
233 }
234 n--;
235 }
236 }
237 }
238
239 return n;
240 }
241
242 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)243 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
244 {
245 return mbfl_buffer_converter_feed2(convd, string, NULL);
246 }
247
248 int
mbfl_buffer_converter_feed2(mbfl_buffer_converter * convd,mbfl_string * string,size_t * loc)249 mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, size_t *loc)
250 {
251 size_t n;
252 unsigned char *p;
253 mbfl_convert_filter *filter;
254 int (*filter_function)(int c, mbfl_convert_filter *filter);
255
256 if (convd == NULL || string == NULL) {
257 return -1;
258 }
259 mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
260 /* feed data */
261 n = string->len;
262 p = string->val;
263
264 filter = convd->filter1;
265 if (filter != NULL) {
266 filter_function = filter->filter_function;
267 while (n > 0) {
268 if ((*filter_function)(*p++, filter) < 0) {
269 if (loc) {
270 *loc = p - string->val;
271 }
272 return -1;
273 }
274 n--;
275 }
276 }
277 if (loc) {
278 *loc = p - string->val;
279 }
280 return 0;
281 }
282
283
284 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)285 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
286 {
287 if (convd == NULL) {
288 return -1;
289 }
290
291 if (convd->filter1 != NULL) {
292 mbfl_convert_filter_flush(convd->filter1);
293 }
294 if (convd->filter2 != NULL) {
295 mbfl_convert_filter_flush(convd->filter2);
296 }
297
298 return 0;
299 }
300
301 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)302 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
303 {
304 if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
305 result->encoding = convd->to;
306 result->val = convd->device.buffer;
307 result->len = convd->device.pos;
308 } else {
309 result = NULL;
310 }
311
312 return result;
313 }
314
315 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)316 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
317 {
318 if (convd == NULL || result == NULL) {
319 return NULL;
320 }
321 result->encoding = convd->to;
322 return mbfl_memory_device_result(&convd->device, result);
323 }
324
325 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)326 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
327 mbfl_string *result)
328 {
329 if (convd == NULL || string == NULL || result == NULL) {
330 return NULL;
331 }
332 mbfl_buffer_converter_feed(convd, string);
333 if (convd->filter1 != NULL) {
334 mbfl_convert_filter_flush(convd->filter1);
335 }
336 if (convd->filter2 != NULL) {
337 mbfl_convert_filter_flush(convd->filter2);
338 }
339 result->encoding = convd->to;
340 return mbfl_memory_device_result(&convd->device, result);
341 }
342
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)343 size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
344 {
345 size_t num_illegalchars = 0;
346
347 if (convd == NULL) {
348 return 0;
349 }
350
351 if (convd->filter1 != NULL) {
352 num_illegalchars += convd->filter1->num_illegalchar;
353 }
354
355 if (convd->filter2 != NULL) {
356 num_illegalchars += convd->filter2->num_illegalchar;
357 }
358
359 return num_illegalchars;
360 }
361
362 /*
363 * encoding detector
364 */
365 mbfl_encoding_detector *
mbfl_encoding_detector_new(const mbfl_encoding ** elist,int elistsz,int strict)366 mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict)
367 {
368 mbfl_encoding_detector *identd;
369
370 int i, num;
371 mbfl_identify_filter *filter;
372
373 if (elist == NULL || elistsz <= 0) {
374 return NULL;
375 }
376
377 /* allocate */
378 identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
379 if (identd == NULL) {
380 return NULL;
381 }
382 identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
383 if (identd->filter_list == NULL) {
384 mbfl_free(identd);
385 return NULL;
386 }
387
388 /* create filters */
389 i = 0;
390 num = 0;
391 while (i < elistsz) {
392 filter = mbfl_identify_filter_new2(elist[i]);
393 if (filter != NULL) {
394 identd->filter_list[num] = filter;
395 num++;
396 }
397 i++;
398 }
399 identd->filter_list_size = num;
400
401 /* set strict flag */
402 identd->strict = strict;
403
404 return identd;
405 }
406
407
408 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)409 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
410 {
411 int i;
412
413 if (identd != NULL) {
414 if (identd->filter_list != NULL) {
415 i = identd->filter_list_size;
416 while (i > 0) {
417 i--;
418 mbfl_identify_filter_delete(identd->filter_list[i]);
419 }
420 mbfl_free((void *)identd->filter_list);
421 }
422 mbfl_free((void *)identd);
423 }
424 }
425
426 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)427 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
428 {
429 int res = 0;
430 /* feed data */
431 if (identd != NULL && string != NULL && string->val != NULL) {
432 int num = identd->filter_list_size;
433 size_t n = string->len;
434 unsigned char *p = string->val;
435 int bad = 0;
436 while (n > 0) {
437 int i;
438 for (i = 0; i < num; i++) {
439 mbfl_identify_filter *filter = identd->filter_list[i];
440 if (!filter->flag) {
441 (*filter->filter_function)(*p, filter);
442 if (filter->flag) {
443 bad++;
444 }
445 }
446 }
447 if ((num - 1) <= bad) {
448 res = 1;
449 break;
450 }
451 p++;
452 n--;
453 }
454 }
455
456 return res;
457 }
458
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)459 const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
460 {
461 mbfl_identify_filter *filter;
462 const mbfl_encoding *encoding = NULL;
463 int n;
464
465 /* judge */
466 if (identd != NULL) {
467 n = identd->filter_list_size - 1;
468 while (n >= 0) {
469 filter = identd->filter_list[n];
470 if (!filter->flag) {
471 if (!identd->strict || !filter->status) {
472 encoding = filter->encoding;
473 }
474 }
475 n--;
476 }
477
478 /* fallback judge */
479 if (!encoding) {
480 n = identd->filter_list_size - 1;
481 while (n >= 0) {
482 filter = identd->filter_list[n];
483 if (!filter->flag) {
484 encoding = filter->encoding;
485 }
486 n--;
487 }
488 }
489 }
490
491 return encoding;
492 }
493
494 /*
495 * encoding converter
496 */
497 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,const mbfl_encoding * toenc)498 mbfl_convert_encoding(
499 mbfl_string *string,
500 mbfl_string *result,
501 const mbfl_encoding *toenc)
502 {
503 size_t n;
504 unsigned char *p;
505 mbfl_memory_device device;
506 mbfl_convert_filter *filter1;
507 mbfl_convert_filter *filter2;
508
509 /* initialize */
510 if (toenc == NULL || string == NULL || result == NULL) {
511 return NULL;
512 }
513
514 filter1 = NULL;
515 filter2 = NULL;
516 if (mbfl_convert_filter_get_vtbl(string->encoding, toenc) != NULL) {
517 filter1 = mbfl_convert_filter_new(string->encoding, toenc, mbfl_memory_device_output, 0, &device);
518 } else {
519 filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
520 if (filter2 != NULL) {
521 filter1 = mbfl_convert_filter_new(string->encoding, &mbfl_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
522 if (filter1 == NULL) {
523 mbfl_convert_filter_delete(filter2);
524 }
525 }
526 }
527 if (filter1 == NULL) {
528 return NULL;
529 }
530
531 if (filter2 != NULL) {
532 filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
533 filter2->illegal_substchar = 0x3f; /* '?' */
534 }
535
536 mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
537
538 /* feed data */
539 n = string->len;
540 p = string->val;
541 if (p != NULL) {
542 while (n > 0) {
543 if ((*filter1->filter_function)(*p++, filter1) < 0) {
544 break;
545 }
546 n--;
547 }
548 }
549
550 mbfl_convert_filter_flush(filter1);
551 mbfl_convert_filter_delete(filter1);
552 if (filter2 != NULL) {
553 mbfl_convert_filter_flush(filter2);
554 mbfl_convert_filter_delete(filter2);
555 }
556
557 return mbfl_memory_device_result(&device, result);
558 }
559
560
561 /*
562 * identify encoding
563 */
564 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,const mbfl_encoding ** elist,int elistsz,int strict)565 mbfl_identify_encoding(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
566 {
567 int i, num, bad;
568 size_t n;
569 unsigned char *p;
570 mbfl_identify_filter *flist, *filter;
571 const mbfl_encoding *encoding;
572
573 /* flist is an array of mbfl_identify_filter instances */
574 flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
575 if (flist == NULL) {
576 return NULL;
577 }
578
579 num = 0;
580 if (elist != NULL) {
581 for (i = 0; i < elistsz; i++) {
582 if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
583 num++;
584 }
585 }
586 }
587
588 /* feed data */
589 n = string->len;
590 p = string->val;
591
592 if (p != NULL) {
593 bad = 0;
594 while (n > 0) {
595 for (i = 0; i < num; i++) {
596 filter = &flist[i];
597 if (!filter->flag) {
598 (*filter->filter_function)(*p, filter);
599 if (filter->flag) {
600 bad++;
601 }
602 }
603 }
604 if ((num - 1) <= bad && !strict) {
605 break;
606 }
607 p++;
608 n--;
609 }
610 }
611
612 /* judge */
613 encoding = NULL;
614
615 for (i = 0; i < num; i++) {
616 filter = &flist[i];
617 if (!filter->flag) {
618 if (strict && filter->status) {
619 continue;
620 }
621 encoding = filter->encoding;
622 break;
623 }
624 }
625
626 /* fall-back judge */
627 if (!encoding) {
628 for (i = 0; i < num; i++) {
629 filter = &flist[i];
630 if (!filter->flag && (!strict || !filter->status)) {
631 encoding = filter->encoding;
632 break;
633 }
634 }
635 }
636
637 /* cleanup */
638 /* dtors should be called in reverse order */
639 i = num;
640 while (--i >= 0) {
641 mbfl_identify_filter_cleanup(&flist[i]);
642 }
643
644 mbfl_free((void *)flist);
645
646 return encoding;
647 }
648
649 /*
650 * strlen
651 */
652 static int
filter_count_output(int c,void * data)653 filter_count_output(int c, void *data)
654 {
655 (*(size_t *)data)++;
656 return c;
657 }
658
659 size_t
mbfl_strlen(mbfl_string * string)660 mbfl_strlen(mbfl_string *string)
661 {
662 size_t len, n, k;
663 unsigned char *p;
664 const mbfl_encoding *encoding = string->encoding;
665
666 len = 0;
667 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
668 len = string->len;
669 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
670 len = string->len/2;
671 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
672 len = string->len/4;
673 } else if (encoding->mblen_table != NULL) {
674 const unsigned char *mbtab = encoding->mblen_table;
675 n = 0;
676 p = string->val;
677 k = string->len;
678 /* count */
679 if (p != NULL) {
680 while (n < k) {
681 unsigned m = mbtab[*p];
682 n += m;
683 p += m;
684 len++;
685 }
686 }
687 } else {
688 /* wchar filter */
689 mbfl_convert_filter *filter = mbfl_convert_filter_new(
690 string->encoding,
691 &mbfl_encoding_wchar,
692 filter_count_output, 0, &len);
693 if (filter == NULL) {
694 return (size_t) -1;
695 }
696 /* count */
697 n = string->len;
698 p = string->val;
699 if (p != NULL) {
700 while (n > 0) {
701 (*filter->filter_function)(*p++, filter);
702 n--;
703 }
704 }
705 mbfl_convert_filter_delete(filter);
706 }
707
708 return len;
709 }
710
711
712 /*
713 * strpos
714 */
715 struct collector_strpos_data {
716 mbfl_convert_filter *next_filter;
717 mbfl_wchar_device needle;
718 size_t needle_len;
719 size_t start;
720 size_t output;
721 size_t found_pos;
722 size_t needle_pos;
723 size_t matched_pos;
724 };
725
726 static int
collector_strpos(int c,void * data)727 collector_strpos(int c, void* data)
728 {
729 int *p, *h, *m;
730 ssize_t n;
731 struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
732
733 if (pc->output >= pc->start) {
734 if (c == (int)pc->needle.buffer[pc->needle_pos]) {
735 if (pc->needle_pos == 0) {
736 pc->found_pos = pc->output; /* found position */
737 }
738 pc->needle_pos++; /* needle pointer */
739 if (pc->needle_pos >= pc->needle_len) {
740 pc->matched_pos = pc->found_pos; /* matched position */
741 pc->needle_pos--;
742 goto retry;
743 }
744 } else if (pc->needle_pos != 0) {
745 retry:
746 h = (int *)pc->needle.buffer;
747 h++;
748 for (;;) {
749 pc->found_pos++;
750 p = h;
751 m = (int *)pc->needle.buffer;
752 n = pc->needle_pos - 1;
753 while (n > 0 && *p == *m) {
754 n--;
755 p++;
756 m++;
757 }
758 if (n <= 0) {
759 if (*m != c) {
760 pc->needle_pos = 0;
761 }
762 break;
763 } else {
764 h++;
765 pc->needle_pos--;
766 }
767 }
768 }
769 }
770
771 pc->output++;
772 return c;
773 }
774
775 /*
776 * oddlen
777 */
778 size_t
mbfl_oddlen(mbfl_string * string)779 mbfl_oddlen(mbfl_string *string)
780 {
781 size_t len, n, k;
782 unsigned char *p;
783 const mbfl_encoding *encoding = string->encoding;
784
785 len = 0;
786 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
787 return 0;
788 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
789 return len % 2;
790 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
791 return len % 4;
792 } else if (encoding->mblen_table != NULL) {
793 const unsigned char *mbtab = encoding->mblen_table;
794 n = 0;
795 p = string->val;
796 k = string->len;
797 /* count */
798 if (p != NULL) {
799 while (n < k) {
800 unsigned m = mbtab[*p];
801 n += m;
802 p += m;
803 };
804 }
805 return n-k;
806 } else {
807 /* how can i do ? */
808 return 0;
809 }
810 /* NOT REACHED */
811 }
812
813 size_t
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,ssize_t offset,int reverse)814 mbfl_strpos(
815 mbfl_string *haystack,
816 mbfl_string *needle,
817 ssize_t offset,
818 int reverse)
819 {
820 size_t result;
821 mbfl_string _haystack_u8, _needle_u8;
822 const mbfl_string *haystack_u8, *needle_u8 = NULL;
823 const unsigned char *u8_tbl;
824
825 if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
826 return (size_t) -8;
827 }
828
829 {
830 const mbfl_encoding *u8_enc = &mbfl_encoding_utf8;
831 if (u8_enc->mblen_table == NULL) {
832 return (size_t) -8;
833 }
834 u8_tbl = u8_enc->mblen_table;
835 }
836
837 if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) {
838 mbfl_string_init(&_haystack_u8);
839 haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, &mbfl_encoding_utf8);
840 if (haystack_u8 == NULL) {
841 result = (size_t) -4;
842 goto out;
843 }
844 } else {
845 haystack_u8 = haystack;
846 }
847
848 if (needle->encoding->no_encoding != mbfl_no_encoding_utf8) {
849 mbfl_string_init(&_needle_u8);
850 needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, &mbfl_encoding_utf8);
851 if (needle_u8 == NULL) {
852 result = (size_t) -4;
853 goto out;
854 }
855 } else {
856 needle_u8 = needle;
857 }
858
859 if (needle_u8->len < 1) {
860 result = (size_t) -8;
861 goto out;
862 }
863
864 result = (size_t) -1;
865 if (haystack_u8->len < needle_u8->len) {
866 goto out;
867 }
868
869 if (!reverse) {
870 size_t jtbl[1 << (sizeof(unsigned char) * 8)];
871 size_t needle_u8_len = needle_u8->len;
872 size_t i;
873 const unsigned char *p, *q, *e;
874 const unsigned char *haystack_u8_val = haystack_u8->val,
875 *needle_u8_val = needle_u8->val;
876 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
877 jtbl[i] = needle_u8_len + 1;
878 }
879 for (i = 0; i < needle_u8_len - 1; ++i) {
880 jtbl[needle_u8_val[i]] = needle_u8_len - i;
881 }
882 e = haystack_u8_val + haystack_u8->len;
883 p = haystack_u8_val;
884 while (offset-- > 0) {
885 if (p >= e) {
886 result = (size_t) -16;
887 goto out;
888 }
889 p += u8_tbl[*p];
890 }
891 p += needle_u8_len;
892 if (p > e) {
893 goto out;
894 }
895 while (p <= e) {
896 const unsigned char *pv = p;
897 q = needle_u8_val + needle_u8_len;
898 for (;;) {
899 if (q == needle_u8_val) {
900 result = 0;
901 while (p > haystack_u8_val) {
902 unsigned char c = *--p;
903 if (c < 0x80) {
904 ++result;
905 } else if ((c & 0xc0) != 0x80) {
906 ++result;
907 }
908 }
909 goto out;
910 }
911 if (*--q != *--p) {
912 break;
913 }
914 }
915 p += jtbl[*p];
916 if (p <= pv) {
917 p = pv + 1;
918 }
919 }
920 } else {
921 size_t jtbl[1 << (sizeof(unsigned char) * 8)];
922 size_t needle_u8_len = needle_u8->len, needle_len = 0;
923 size_t i;
924 const unsigned char *p, *e, *q, *qe;
925 const unsigned char *haystack_u8_val = haystack_u8->val,
926 *needle_u8_val = needle_u8->val;
927 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
928 jtbl[i] = needle_u8_len;
929 }
930 for (i = needle_u8_len - 1; i > 0; --i) {
931 unsigned char c = needle_u8_val[i];
932 jtbl[c] = i;
933 if (c < 0x80) {
934 ++needle_len;
935 } else if ((c & 0xc0) != 0x80) {
936 ++needle_len;
937 }
938 }
939 {
940 unsigned char c = needle_u8_val[0];
941 if (c < 0x80) {
942 ++needle_len;
943 } else if ((c & 0xc0) != 0x80) {
944 ++needle_len;
945 }
946 }
947 e = haystack_u8_val;
948 p = e + haystack_u8->len;
949 qe = needle_u8_val + needle_u8_len;
950 if (offset < 0) {
951 if (-offset > needle_len) {
952 offset += needle_len;
953 while (offset < 0) {
954 unsigned char c;
955 if (p <= e) {
956 result = (size_t) -16;
957 goto out;
958 }
959 c = *(--p);
960 if (c < 0x80) {
961 ++offset;
962 } else if ((c & 0xc0) != 0x80) {
963 ++offset;
964 }
965 }
966 }
967 } else {
968 const unsigned char *ee = haystack_u8_val + haystack_u8->len;
969 while (offset-- > 0) {
970 if (e >= ee) {
971 result = (size_t) -16;
972 goto out;
973 }
974 e += u8_tbl[*e];
975 }
976 }
977 if (p < e + needle_u8_len) {
978 goto out;
979 }
980 p -= needle_u8_len;
981 while (p >= e) {
982 const unsigned char *pv = p;
983 q = needle_u8_val;
984 for (;;) {
985 if (q == qe) {
986 result = 0;
987 p -= needle_u8_len;
988 while (p > haystack_u8_val) {
989 unsigned char c = *--p;
990 if (c < 0x80) {
991 ++result;
992 } else if ((c & 0xc0) != 0x80) {
993 ++result;
994 }
995 }
996 goto out;
997 }
998 if (*q != *p) {
999 break;
1000 }
1001 ++p, ++q;
1002 }
1003 p -= jtbl[*p];
1004 if (p >= pv) {
1005 p = pv - 1;
1006 }
1007 }
1008 }
1009 out:
1010 if (haystack_u8 == &_haystack_u8) {
1011 mbfl_string_clear(&_haystack_u8);
1012 }
1013 if (needle_u8 == &_needle_u8) {
1014 mbfl_string_clear(&_needle_u8);
1015 }
1016 return result;
1017 }
1018
1019 /*
1020 * substr_count
1021 */
1022
1023 size_t
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1024 mbfl_substr_count(
1025 mbfl_string *haystack,
1026 mbfl_string *needle
1027 )
1028 {
1029 size_t n, result = 0;
1030 unsigned char *p;
1031 mbfl_convert_filter *filter;
1032 struct collector_strpos_data pc;
1033
1034 if (haystack == NULL || needle == NULL) {
1035 return (size_t) -8;
1036 }
1037 /* needle is converted into wchar */
1038 mbfl_wchar_device_init(&pc.needle);
1039 filter = mbfl_convert_filter_new(
1040 needle->encoding,
1041 &mbfl_encoding_wchar,
1042 mbfl_wchar_device_output, 0, &pc.needle);
1043 if (filter == NULL) {
1044 return (size_t) -4;
1045 }
1046 mbfl_convert_filter_feed_string(filter, needle->val, needle->len);
1047 mbfl_convert_filter_flush(filter);
1048 mbfl_convert_filter_delete(filter);
1049 pc.needle_len = pc.needle.pos;
1050 if (pc.needle.buffer == NULL) {
1051 return (size_t) -4;
1052 }
1053 if (pc.needle_len <= 0) {
1054 mbfl_wchar_device_clear(&pc.needle);
1055 return (size_t) -2;
1056 }
1057 /* initialize filter and collector data */
1058 filter = mbfl_convert_filter_new(
1059 haystack->encoding,
1060 &mbfl_encoding_wchar,
1061 collector_strpos, 0, &pc);
1062 if (filter == NULL) {
1063 mbfl_wchar_device_clear(&pc.needle);
1064 return (size_t) -4;
1065 }
1066 pc.start = 0;
1067 pc.output = 0;
1068 pc.needle_pos = 0;
1069 pc.found_pos = 0;
1070 pc.matched_pos = (size_t) -1;
1071
1072 /* feed data */
1073 p = haystack->val;
1074 n = haystack->len;
1075 if (p != NULL) {
1076 while (n > 0) {
1077 if ((*filter->filter_function)(*p++, filter) < 0) {
1078 pc.matched_pos = (size_t) -4;
1079 break;
1080 }
1081 if (pc.matched_pos != (size_t) -1) {
1082 ++result;
1083 pc.matched_pos = (size_t) -1;
1084 pc.needle_pos = 0;
1085 }
1086 n--;
1087 }
1088 }
1089 mbfl_convert_filter_flush(filter);
1090 mbfl_convert_filter_delete(filter);
1091 mbfl_wchar_device_clear(&pc.needle);
1092
1093 return result;
1094 }
1095
1096 /*
1097 * substr
1098 */
1099 struct collector_substr_data {
1100 mbfl_convert_filter *next_filter;
1101 size_t start;
1102 size_t stop;
1103 size_t output;
1104 };
1105
1106 static int
collector_substr(int c,void * data)1107 collector_substr(int c, void* data)
1108 {
1109 struct collector_substr_data *pc = (struct collector_substr_data*)data;
1110
1111 if (pc->output >= pc->stop) {
1112 return -1;
1113 }
1114
1115 if (pc->output >= pc->start) {
1116 (*pc->next_filter->filter_function)(c, pc->next_filter);
1117 }
1118
1119 pc->output++;
1120
1121 return c;
1122 }
1123
1124 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1125 mbfl_substr(
1126 mbfl_string *string,
1127 mbfl_string *result,
1128 size_t from,
1129 size_t length)
1130 {
1131 const mbfl_encoding *encoding = string->encoding;
1132 size_t n, k, len, start, end;
1133 unsigned m;
1134 unsigned char *p, *w;
1135
1136 mbfl_string_init(result);
1137 result->no_language = string->no_language;
1138 result->encoding = string->encoding;
1139
1140 if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1141 encoding->mblen_table != NULL) {
1142 len = string->len;
1143 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1144 start = from;
1145 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1146 start = from*2;
1147 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1148 start = from*4;
1149 } else {
1150 const unsigned char *mbtab = encoding->mblen_table;
1151 start = 0;
1152 n = 0;
1153 k = 0;
1154 p = string->val;
1155 /* search start position */
1156 while (k <= from) {
1157 start = n;
1158 if (n >= len) {
1159 break;
1160 }
1161 m = mbtab[*p];
1162 n += m;
1163 p += m;
1164 k++;
1165 }
1166 }
1167
1168 if (length == MBFL_SUBSTR_UNTIL_END) {
1169 end = len;
1170 } else if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1171 end = start + length;
1172 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1173 end = start + length*2;
1174 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1175 end = start + length*4;
1176 } else {
1177 const unsigned char *mbtab = encoding->mblen_table;
1178 end = start;
1179 n = start;
1180 k = 0;
1181 p = string->val + start;
1182 /* detect end position */
1183 while (k <= length) {
1184 end = n;
1185 if (n >= len) {
1186 break;
1187 }
1188 m = mbtab[*p];
1189 n += m;
1190 p += m;
1191 k++;
1192 }
1193 }
1194
1195 if (start > len) {
1196 start = len;
1197 }
1198 if (end > len) {
1199 end = len;
1200 }
1201 if (start > end) {
1202 start = end;
1203 }
1204
1205 /* allocate memory and copy */
1206 n = end - start;
1207 result->len = 0;
1208 result->val = w = (unsigned char*)mbfl_malloc(n + 1);
1209 if (w != NULL) {
1210 result->len = n;
1211 memcpy(w, string->val + start, n);
1212 w[n] = '\0';
1213 } else {
1214 result = NULL;
1215 }
1216 } else {
1217 mbfl_memory_device device;
1218 struct collector_substr_data pc;
1219 mbfl_convert_filter *decoder;
1220 mbfl_convert_filter *encoder;
1221
1222 if (length == MBFL_SUBSTR_UNTIL_END) {
1223 length = mbfl_strlen(string) - from;
1224 }
1225
1226 mbfl_memory_device_init(&device, length + 1, 0);
1227 mbfl_string_init(result);
1228 result->no_language = string->no_language;
1229 result->encoding = string->encoding;
1230 /* output code filter */
1231 decoder = mbfl_convert_filter_new(
1232 &mbfl_encoding_wchar,
1233 string->encoding,
1234 mbfl_memory_device_output, 0, &device);
1235 /* wchar filter */
1236 encoder = mbfl_convert_filter_new(
1237 string->encoding,
1238 &mbfl_encoding_wchar,
1239 collector_substr, 0, &pc);
1240 if (decoder == NULL || encoder == NULL) {
1241 mbfl_convert_filter_delete(encoder);
1242 mbfl_convert_filter_delete(decoder);
1243 return NULL;
1244 }
1245 pc.next_filter = decoder;
1246 pc.start = from;
1247 pc.stop = from + length;
1248 pc.output = 0;
1249
1250 /* feed data */
1251 p = string->val;
1252 n = string->len;
1253 if (p != NULL) {
1254 while (n > 0) {
1255 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1256 break;
1257 }
1258 n--;
1259 }
1260 }
1261
1262 mbfl_convert_filter_flush(encoder);
1263 mbfl_convert_filter_flush(decoder);
1264 result = mbfl_memory_device_result(&device, result);
1265 mbfl_convert_filter_delete(encoder);
1266 mbfl_convert_filter_delete(decoder);
1267 }
1268
1269 return result;
1270 }
1271
1272 /*
1273 * strcut
1274 */
1275 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1276 mbfl_strcut(
1277 mbfl_string *string,
1278 mbfl_string *result,
1279 size_t from,
1280 size_t length)
1281 {
1282 const mbfl_encoding *encoding = string->encoding;
1283 mbfl_memory_device device;
1284
1285 if (from >= string->len) {
1286 from = string->len;
1287 }
1288
1289 mbfl_string_init(result);
1290 result->no_language = string->no_language;
1291 result->encoding = string->encoding;
1292
1293 if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1294 | MBFL_ENCTYPE_WCS2BE
1295 | MBFL_ENCTYPE_WCS2LE
1296 | MBFL_ENCTYPE_WCS4BE
1297 | MBFL_ENCTYPE_WCS4LE))
1298 || encoding->mblen_table != NULL) {
1299 const unsigned char *start = NULL;
1300 const unsigned char *end = NULL;
1301 unsigned char *w;
1302 size_t sz;
1303
1304 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1305 from &= -2;
1306
1307 if (length >= string->len - from) {
1308 length = string->len - from;
1309 }
1310
1311 start = string->val + from;
1312 end = start + (length & -2);
1313 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1314 from &= -4;
1315
1316 if (length >= string->len - from) {
1317 length = string->len - from;
1318 }
1319
1320 start = string->val + from;
1321 end = start + (length & -4);
1322 } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1323 if (length >= string->len - from) {
1324 length = string->len - from;
1325 }
1326
1327 start = string->val + from;
1328 end = start + length;
1329 } else if (encoding->mblen_table != NULL) {
1330 const unsigned char *mbtab = encoding->mblen_table;
1331 const unsigned char *p, *q;
1332 int m;
1333
1334 /* search start position */
1335 for (m = 0, p = string->val, q = p + from;
1336 p < q; p += (m = mbtab[*p]));
1337
1338 if (p > q) {
1339 p -= m;
1340 }
1341
1342 start = p;
1343
1344 /* search end position */
1345 if (length >= string->len - (start - string->val)) {
1346 end = string->val + string->len;
1347 } else {
1348 for (q = p + length; p < q; p += (m = mbtab[*p]));
1349
1350 if (p > q) {
1351 p -= m;
1352 }
1353 end = p;
1354 }
1355 } else {
1356 /* never reached */
1357 return NULL;
1358 }
1359
1360 /* allocate memory and copy string */
1361 sz = end - start;
1362 if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1363 sizeof(unsigned char))) == NULL) {
1364 return NULL;
1365 }
1366
1367 memcpy(w, start, sz);
1368 w[sz] = '\0';
1369 w[sz + 1] = '\0';
1370 w[sz + 2] = '\0';
1371 w[sz + 3] = '\0';
1372
1373 result->val = w;
1374 result->len = sz;
1375 } else {
1376 mbfl_convert_filter *encoder = NULL;
1377 mbfl_convert_filter *decoder = NULL;
1378 const unsigned char *p, *q, *r;
1379 struct {
1380 mbfl_convert_filter encoder;
1381 mbfl_convert_filter decoder;
1382 const unsigned char *p;
1383 size_t pos;
1384 } bk, _bk;
1385
1386 /* output code filter */
1387 if (!(decoder = mbfl_convert_filter_new(
1388 &mbfl_encoding_wchar,
1389 string->encoding,
1390 mbfl_memory_device_output, 0, &device))) {
1391 return NULL;
1392 }
1393
1394 /* wchar filter */
1395 if (!(encoder = mbfl_convert_filter_new(
1396 string->encoding,
1397 &mbfl_encoding_wchar,
1398 mbfl_filter_output_null,
1399 NULL, NULL))) {
1400 mbfl_convert_filter_delete(decoder);
1401 return NULL;
1402 }
1403
1404 mbfl_memory_device_init(&device, length + 8, 0);
1405
1406 p = string->val;
1407
1408 /* search start position */
1409 for (q = string->val + from; p < q; p++) {
1410 (*encoder->filter_function)(*p, encoder);
1411 }
1412
1413 /* switch the drain direction */
1414 encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1415 encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1416 encoder->data = decoder;
1417
1418 q = string->val + string->len;
1419
1420 /* save the encoder, decoder state and the pointer */
1421 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1422 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1423 _bk.p = p;
1424 _bk.pos = device.pos;
1425
1426 if (length > q - p) {
1427 length = q - p;
1428 }
1429
1430 if (length >= 20) {
1431 /* output a little shorter than "length" */
1432 /* XXX: the constant "20" was determined purely on the heuristics. */
1433 for (r = p + length - 20; p < r; p++) {
1434 (*encoder->filter_function)(*p, encoder);
1435 }
1436
1437 /* if the offset of the resulting string exceeds the length,
1438 * then restore the state */
1439 if (device.pos > length) {
1440 p = _bk.p;
1441 device.pos = _bk.pos;
1442 decoder->filter_dtor(decoder);
1443 encoder->filter_dtor(encoder);
1444 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1445 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1446 bk = _bk;
1447 } else {
1448 /* save the encoder, decoder state and the pointer */
1449 mbfl_convert_filter_copy(decoder, &bk.decoder);
1450 mbfl_convert_filter_copy(encoder, &bk.encoder);
1451 bk.p = p;
1452 bk.pos = device.pos;
1453
1454 /* flush the stream */
1455 (*encoder->filter_flush)(encoder);
1456
1457 /* if the offset of the resulting string exceeds the length,
1458 * then restore the state */
1459 if (device.pos > length) {
1460 bk.decoder.filter_dtor(&bk.decoder);
1461 bk.encoder.filter_dtor(&bk.encoder);
1462
1463 p = _bk.p;
1464 device.pos = _bk.pos;
1465 decoder->filter_dtor(decoder);
1466 encoder->filter_dtor(encoder);
1467 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1468 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1469 bk = _bk;
1470 } else {
1471 _bk.decoder.filter_dtor(&_bk.decoder);
1472 _bk.encoder.filter_dtor(&_bk.encoder);
1473
1474 p = bk.p;
1475 device.pos = bk.pos;
1476 decoder->filter_dtor(decoder);
1477 encoder->filter_dtor(encoder);
1478 mbfl_convert_filter_copy(&bk.decoder, decoder);
1479 mbfl_convert_filter_copy(&bk.encoder, encoder);
1480 }
1481 }
1482 } else {
1483 bk = _bk;
1484 }
1485
1486 /* detect end position */
1487 while (p < q) {
1488 (*encoder->filter_function)(*p, encoder);
1489
1490 if (device.pos > length) {
1491 /* restore filter */
1492 p = bk.p;
1493 device.pos = bk.pos;
1494 decoder->filter_dtor(decoder);
1495 encoder->filter_dtor(encoder);
1496 mbfl_convert_filter_copy(&bk.decoder, decoder);
1497 mbfl_convert_filter_copy(&bk.encoder, encoder);
1498 break;
1499 }
1500
1501 p++;
1502
1503 /* backup current state */
1504 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1505 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1506 _bk.pos = device.pos;
1507 _bk.p = p;
1508
1509 (*encoder->filter_flush)(encoder);
1510
1511 if (device.pos > length) {
1512 _bk.decoder.filter_dtor(&_bk.decoder);
1513 _bk.encoder.filter_dtor(&_bk.encoder);
1514
1515 /* restore filter */
1516 p = bk.p;
1517 device.pos = bk.pos;
1518 decoder->filter_dtor(decoder);
1519 encoder->filter_dtor(encoder);
1520 mbfl_convert_filter_copy(&bk.decoder, decoder);
1521 mbfl_convert_filter_copy(&bk.encoder, encoder);
1522 break;
1523 }
1524
1525 bk.decoder.filter_dtor(&bk.decoder);
1526 bk.encoder.filter_dtor(&bk.encoder);
1527
1528 p = _bk.p;
1529 device.pos = _bk.pos;
1530 decoder->filter_dtor(decoder);
1531 encoder->filter_dtor(encoder);
1532 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1533 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1534
1535 bk = _bk;
1536 }
1537
1538 (*encoder->filter_flush)(encoder);
1539
1540 bk.decoder.filter_dtor(&bk.decoder);
1541 bk.encoder.filter_dtor(&bk.encoder);
1542
1543 result = mbfl_memory_device_result(&device, result);
1544
1545 mbfl_convert_filter_delete(encoder);
1546 mbfl_convert_filter_delete(decoder);
1547 }
1548
1549 return result;
1550 }
1551
1552
1553 /*
1554 * strwidth
1555 */
is_fullwidth(int c)1556 static size_t is_fullwidth(int c)
1557 {
1558 int i;
1559
1560 if (c < mbfl_eaw_table[0].begin) {
1561 return 0;
1562 }
1563
1564 for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1565 if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1566 return 1;
1567 }
1568 }
1569
1570 return 0;
1571 }
1572
1573 static int
filter_count_width(int c,void * data)1574 filter_count_width(int c, void* data)
1575 {
1576 (*(size_t *)data) += (is_fullwidth(c) ? 2: 1);
1577 return c;
1578 }
1579
1580 size_t
mbfl_strwidth(mbfl_string * string)1581 mbfl_strwidth(mbfl_string *string)
1582 {
1583 size_t len, n;
1584 unsigned char *p;
1585 mbfl_convert_filter *filter;
1586
1587 len = 0;
1588 if (string->len > 0 && string->val != NULL) {
1589 /* wchar filter */
1590 filter = mbfl_convert_filter_new(
1591 string->encoding,
1592 &mbfl_encoding_wchar,
1593 filter_count_width, 0, &len);
1594 if (filter == NULL) {
1595 mbfl_convert_filter_delete(filter);
1596 return -1;
1597 }
1598
1599 /* feed data */
1600 p = string->val;
1601 n = string->len;
1602 while (n > 0) {
1603 (*filter->filter_function)(*p++, filter);
1604 n--;
1605 }
1606
1607 mbfl_convert_filter_flush(filter);
1608 mbfl_convert_filter_delete(filter);
1609 }
1610
1611 return len;
1612 }
1613
1614
1615 /*
1616 * strimwidth
1617 */
1618 struct collector_strimwidth_data {
1619 mbfl_convert_filter *decoder;
1620 mbfl_convert_filter *decoder_backup;
1621 mbfl_memory_device device;
1622 size_t from;
1623 size_t width;
1624 size_t outwidth;
1625 size_t outchar;
1626 size_t endpos;
1627 int status;
1628 };
1629
1630 static int
collector_strimwidth(int c,void * data)1631 collector_strimwidth(int c, void* data)
1632 {
1633 struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1634
1635 switch (pc->status) {
1636 case 10:
1637 (*pc->decoder->filter_function)(c, pc->decoder);
1638 break;
1639 default:
1640 if (pc->outchar >= pc->from) {
1641 pc->outwidth += (is_fullwidth(c) ? 2: 1);
1642
1643 if (pc->outwidth > pc->width) {
1644 if (pc->status == 0) {
1645 pc->endpos = pc->device.pos;
1646 mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1647 }
1648 pc->status++;
1649 (*pc->decoder->filter_function)(c, pc->decoder);
1650 c = -1;
1651 } else {
1652 (*pc->decoder->filter_function)(c, pc->decoder);
1653 }
1654 }
1655 pc->outchar++;
1656 break;
1657 }
1658
1659 return c;
1660 }
1661
1662 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,size_t from,size_t width)1663 mbfl_strimwidth(
1664 mbfl_string *string,
1665 mbfl_string *marker,
1666 mbfl_string *result,
1667 size_t from,
1668 size_t width)
1669 {
1670 struct collector_strimwidth_data pc;
1671 mbfl_convert_filter *encoder;
1672 size_t n, mkwidth;
1673 unsigned char *p;
1674
1675 if (string == NULL || result == NULL) {
1676 return NULL;
1677 }
1678 mbfl_string_init(result);
1679 result->no_language = string->no_language;
1680 result->encoding = string->encoding;
1681 mbfl_memory_device_init(&pc.device, MIN(string->len, width), 0);
1682
1683 /* output code filter */
1684 pc.decoder = mbfl_convert_filter_new(
1685 &mbfl_encoding_wchar,
1686 string->encoding,
1687 mbfl_memory_device_output, 0, &pc.device);
1688 pc.decoder_backup = mbfl_convert_filter_new(
1689 &mbfl_encoding_wchar,
1690 string->encoding,
1691 mbfl_memory_device_output, 0, &pc.device);
1692 /* wchar filter */
1693 encoder = mbfl_convert_filter_new(
1694 string->encoding,
1695 &mbfl_encoding_wchar,
1696 collector_strimwidth, 0, &pc);
1697 if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1698 mbfl_convert_filter_delete(encoder);
1699 mbfl_convert_filter_delete(pc.decoder);
1700 mbfl_convert_filter_delete(pc.decoder_backup);
1701 return NULL;
1702 }
1703 mkwidth = 0;
1704 if (marker) {
1705 mkwidth = mbfl_strwidth(marker);
1706 }
1707 pc.from = from;
1708 pc.width = width - mkwidth;
1709 pc.outwidth = 0;
1710 pc.outchar = 0;
1711 pc.status = 0;
1712 pc.endpos = 0;
1713
1714 /* feed data */
1715 p = string->val;
1716 n = string->len;
1717 if (p != NULL) {
1718 while (n > 0) {
1719 n--;
1720 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1721 break;
1722 }
1723 }
1724 mbfl_convert_filter_flush(encoder);
1725 if (pc.status != 0 && mkwidth > 0) {
1726 pc.width += mkwidth;
1727 if (n > 0) {
1728 while (n > 0) {
1729 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1730 break;
1731 }
1732 n--;
1733 }
1734 mbfl_convert_filter_flush(encoder);
1735 } else if (pc.outwidth > pc.width) {
1736 pc.status++;
1737 }
1738 if (pc.status != 1) {
1739 pc.status = 10;
1740 pc.device.pos = pc.endpos;
1741 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1742 mbfl_convert_filter_reset(encoder, marker->encoding, &mbfl_encoding_wchar);
1743 p = marker->val;
1744 n = marker->len;
1745 while (n > 0) {
1746 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1747 break;
1748 }
1749 n--;
1750 }
1751 mbfl_convert_filter_flush(encoder);
1752 }
1753 } else if (pc.status != 0) {
1754 pc.device.pos = pc.endpos;
1755 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1756 }
1757 mbfl_convert_filter_flush(pc.decoder);
1758 }
1759 result = mbfl_memory_device_result(&pc.device, result);
1760 mbfl_convert_filter_delete(encoder);
1761 mbfl_convert_filter_delete(pc.decoder);
1762 mbfl_convert_filter_delete(pc.decoder_backup);
1763
1764 return result;
1765 }
1766
1767 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1768 mbfl_ja_jp_hantozen(
1769 mbfl_string *string,
1770 mbfl_string *result,
1771 int mode)
1772 {
1773 size_t n;
1774 unsigned char *p;
1775 mbfl_memory_device device;
1776 mbfl_convert_filter *decoder = NULL;
1777 mbfl_convert_filter *encoder = NULL;
1778 mbfl_convert_filter *tl_filter = NULL;
1779 mbfl_convert_filter *next_filter = NULL;
1780 mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1781
1782 mbfl_memory_device_init(&device, string->len, 0);
1783 mbfl_string_init(result);
1784
1785 result->no_language = string->no_language;
1786 result->encoding = string->encoding;
1787
1788 decoder = mbfl_convert_filter_new(
1789 &mbfl_encoding_wchar,
1790 string->encoding,
1791 mbfl_memory_device_output, 0, &device);
1792 if (decoder == NULL) {
1793 goto out;
1794 }
1795 next_filter = decoder;
1796
1797 param =
1798 (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1799 if (param == NULL) {
1800 goto out;
1801 }
1802
1803 param->mode = mode;
1804
1805 tl_filter = mbfl_convert_filter_new2(
1806 &vtbl_tl_jisx0201_jisx0208,
1807 (int(*)(int, void*))next_filter->filter_function,
1808 (int(*)(void*))next_filter->filter_flush,
1809 next_filter);
1810 if (tl_filter == NULL) {
1811 mbfl_free(param);
1812 goto out;
1813 }
1814
1815 tl_filter->opaque = param;
1816 next_filter = tl_filter;
1817
1818 encoder = mbfl_convert_filter_new(
1819 string->encoding,
1820 &mbfl_encoding_wchar,
1821 (int(*)(int, void*))next_filter->filter_function,
1822 (int(*)(void*))next_filter->filter_flush,
1823 next_filter);
1824 if (encoder == NULL) {
1825 goto out;
1826 }
1827
1828 /* feed data */
1829 p = string->val;
1830 n = string->len;
1831 if (p != NULL) {
1832 while (n > 0) {
1833 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1834 break;
1835 }
1836 n--;
1837 }
1838 }
1839
1840 mbfl_convert_filter_flush(encoder);
1841 result = mbfl_memory_device_result(&device, result);
1842 out:
1843 if (tl_filter != NULL) {
1844 if (tl_filter->opaque != NULL) {
1845 mbfl_free(tl_filter->opaque);
1846 }
1847 mbfl_convert_filter_delete(tl_filter);
1848 }
1849
1850 if (decoder != NULL) {
1851 mbfl_convert_filter_delete(decoder);
1852 }
1853
1854 if (encoder != NULL) {
1855 mbfl_convert_filter_delete(encoder);
1856 }
1857
1858 return result;
1859 }
1860
1861
1862 /*
1863 * MIME header encode
1864 */
1865 struct mime_header_encoder_data {
1866 mbfl_convert_filter *conv1_filter;
1867 mbfl_convert_filter *block_filter;
1868 mbfl_convert_filter *conv2_filter;
1869 mbfl_convert_filter *conv2_filter_backup;
1870 mbfl_convert_filter *encod_filter;
1871 mbfl_convert_filter *encod_filter_backup;
1872 mbfl_memory_device outdev;
1873 mbfl_memory_device tmpdev;
1874 int status1;
1875 int status2;
1876 size_t prevpos;
1877 size_t linehead;
1878 size_t firstindent;
1879 int encnamelen;
1880 int lwsplen;
1881 char encname[128];
1882 char lwsp[16];
1883 };
1884
1885 static int
mime_header_encoder_block_collector(int c,void * data)1886 mime_header_encoder_block_collector(int c, void *data)
1887 {
1888 size_t n;
1889 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1890
1891 switch (pe->status2) {
1892 case 1: /* encoded word */
1893 pe->prevpos = pe->outdev.pos;
1894 mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1895 mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1896 (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1897 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1898 (*pe->encod_filter->filter_flush)(pe->encod_filter);
1899 n = pe->outdev.pos - pe->linehead + pe->firstindent;
1900 pe->outdev.pos = pe->prevpos;
1901 mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1902 mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1903 if (n >= 74) {
1904 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1905 (*pe->encod_filter->filter_flush)(pe->encod_filter);
1906 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
1907 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1908 pe->linehead = pe->outdev.pos;
1909 pe->firstindent = 0;
1910 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1911 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1912 } else {
1913 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1914 }
1915 break;
1916
1917 default:
1918 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1919 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1920 pe->status2 = 1;
1921 break;
1922 }
1923
1924 return c;
1925 }
1926
1927 static int
mime_header_encoder_collector(int c,void * data)1928 mime_header_encoder_collector(int c, void *data)
1929 {
1930 static int qp_table[256] = {
1931 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1932 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1933 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
1934 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
1935 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
1936 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
1937 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
1938 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
1939 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
1940 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
1941 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
1942 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
1943 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
1944 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
1945 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
1946 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */
1947 };
1948
1949 size_t n;
1950 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1951
1952 switch (pe->status1) {
1953 case 11: /* encoded word */
1954 (*pe->block_filter->filter_function)(c, pe->block_filter);
1955 break;
1956
1957 default: /* ASCII */
1958 if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
1959 mbfl_memory_device_output(c, &pe->tmpdev);
1960 pe->status1 = 1;
1961 } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */
1962 mbfl_memory_device_output(c, &pe->tmpdev);
1963 } else {
1964 if (pe->tmpdev.pos < 74 && c == 0x20) {
1965 n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
1966 if (n > 74) {
1967 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
1968 pe->linehead = pe->outdev.pos;
1969 pe->firstindent = 0;
1970 } else if (pe->outdev.pos > 0) {
1971 mbfl_memory_device_output(0x20, &pe->outdev);
1972 }
1973 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
1974 mbfl_memory_device_reset(&pe->tmpdev);
1975 pe->status1 = 0;
1976 } else {
1977 n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
1978 if (n > 60) {
1979 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
1980 pe->linehead = pe->outdev.pos;
1981 pe->firstindent = 0;
1982 } else if (pe->outdev.pos > 0) {
1983 mbfl_memory_device_output(0x20, &pe->outdev);
1984 }
1985 mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
1986 mbfl_memory_device_reset(&pe->tmpdev);
1987 (*pe->block_filter->filter_function)(c, pe->block_filter);
1988 pe->status1 = 11;
1989 }
1990 }
1991 break;
1992 }
1993
1994 return c;
1995 }
1996
1997 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)1998 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
1999 {
2000 if (pe->status1 >= 10) {
2001 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2002 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2003 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2004 } else if (pe->tmpdev.pos > 0) {
2005 if (pe->outdev.pos > 0) {
2006 if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2007 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2008 } else {
2009 mbfl_memory_device_output(0x20, &pe->outdev);
2010 }
2011 }
2012 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2013 }
2014 mbfl_memory_device_reset(&pe->tmpdev);
2015 pe->prevpos = 0;
2016 pe->linehead = 0;
2017 pe->status1 = 0;
2018 pe->status2 = 0;
2019
2020 return mbfl_memory_device_result(&pe->outdev, result);
2021 }
2022
2023 struct mime_header_encoder_data*
mime_header_encoder_new(const mbfl_encoding * incode,const mbfl_encoding * outcode,const mbfl_encoding * transenc)2024 mime_header_encoder_new(
2025 const mbfl_encoding *incode,
2026 const mbfl_encoding *outcode,
2027 const mbfl_encoding *transenc)
2028 {
2029 size_t n;
2030 const char *s;
2031 struct mime_header_encoder_data *pe;
2032
2033 /* get output encoding and check MIME charset name */
2034 if (outcode->mime_name == NULL || outcode->mime_name[0] == '\0') {
2035 return NULL;
2036 }
2037
2038 pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2039 if (pe == NULL) {
2040 return NULL;
2041 }
2042
2043 mbfl_memory_device_init(&pe->outdev, 0, 0);
2044 mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2045 pe->prevpos = 0;
2046 pe->linehead = 0;
2047 pe->firstindent = 0;
2048 pe->status1 = 0;
2049 pe->status2 = 0;
2050
2051 /* make the encoding description string exp. "=?ISO-2022-JP?B?" */
2052 n = 0;
2053 pe->encname[n++] = 0x3d;
2054 pe->encname[n++] = 0x3f;
2055 s = outcode->mime_name;
2056 while (*s) {
2057 pe->encname[n++] = *s++;
2058 }
2059 pe->encname[n++] = 0x3f;
2060 if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2061 pe->encname[n++] = 0x51;
2062 } else {
2063 pe->encname[n++] = 0x42;
2064 transenc = &mbfl_encoding_base64;
2065 }
2066 pe->encname[n++] = 0x3f;
2067 pe->encname[n] = '\0';
2068 pe->encnamelen = n;
2069
2070 n = 0;
2071 pe->lwsp[n++] = 0x0d;
2072 pe->lwsp[n++] = 0x0a;
2073 pe->lwsp[n++] = 0x20;
2074 pe->lwsp[n] = '\0';
2075 pe->lwsplen = n;
2076
2077 /* transfer encode filter */
2078 pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2079 pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2080
2081 /* Output code filter */
2082 pe->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2083 pe->conv2_filter_backup = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2084
2085 /* encoded block filter */
2086 pe->block_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, &mbfl_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2087
2088 /* Input code filter */
2089 pe->conv1_filter = mbfl_convert_filter_new(incode, &mbfl_encoding_wchar, mime_header_encoder_collector, 0, pe);
2090
2091 if (pe->encod_filter == NULL ||
2092 pe->encod_filter_backup == NULL ||
2093 pe->conv2_filter == NULL ||
2094 pe->conv2_filter_backup == NULL ||
2095 pe->conv1_filter == NULL) {
2096 mime_header_encoder_delete(pe);
2097 return NULL;
2098 }
2099
2100 if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2101 pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2102 pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2103 } else {
2104 pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2105 pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2106 }
2107
2108 return pe;
2109 }
2110
2111 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2112 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2113 {
2114 if (pe) {
2115 mbfl_convert_filter_delete(pe->conv1_filter);
2116 mbfl_convert_filter_delete(pe->block_filter);
2117 mbfl_convert_filter_delete(pe->conv2_filter);
2118 mbfl_convert_filter_delete(pe->conv2_filter_backup);
2119 mbfl_convert_filter_delete(pe->encod_filter);
2120 mbfl_convert_filter_delete(pe->encod_filter_backup);
2121 mbfl_memory_device_clear(&pe->outdev);
2122 mbfl_memory_device_clear(&pe->tmpdev);
2123 mbfl_free((void*)pe);
2124 }
2125 }
2126
2127 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2128 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2129 {
2130 return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2131 }
2132
2133 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode,const mbfl_encoding * encoding,const char * linefeed,int indent)2134 mbfl_mime_header_encode(
2135 mbfl_string *string,
2136 mbfl_string *result,
2137 const mbfl_encoding *outcode,
2138 const mbfl_encoding *encoding,
2139 const char *linefeed,
2140 int indent)
2141 {
2142 size_t n;
2143 unsigned char *p;
2144 struct mime_header_encoder_data *pe;
2145
2146 mbfl_string_init(result);
2147 result->no_language = string->no_language;
2148 result->encoding = &mbfl_encoding_ascii;
2149
2150 pe = mime_header_encoder_new(string->encoding, outcode, encoding);
2151 if (pe == NULL) {
2152 return NULL;
2153 }
2154
2155 if (linefeed != NULL) {
2156 n = 0;
2157 while (*linefeed && n < 8) {
2158 pe->lwsp[n++] = *linefeed++;
2159 }
2160 pe->lwsp[n++] = 0x20;
2161 pe->lwsp[n] = '\0';
2162 pe->lwsplen = n;
2163 }
2164 if (indent > 0 && indent < 74) {
2165 pe->firstindent = indent;
2166 }
2167
2168 n = string->len;
2169 p = string->val;
2170 while (n > 0) {
2171 (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2172 n--;
2173 }
2174
2175 result = mime_header_encoder_result(pe, result);
2176 mime_header_encoder_delete(pe);
2177
2178 return result;
2179 }
2180
2181
2182 /*
2183 * MIME header decode
2184 */
2185 struct mime_header_decoder_data {
2186 mbfl_convert_filter *deco_filter;
2187 mbfl_convert_filter *conv1_filter;
2188 mbfl_convert_filter *conv2_filter;
2189 mbfl_memory_device outdev;
2190 mbfl_memory_device tmpdev;
2191 size_t cspos;
2192 int status;
2193 const mbfl_encoding *encoding;
2194 const mbfl_encoding *incode;
2195 const mbfl_encoding *outcode;
2196 };
2197
2198 static int
mime_header_decoder_collector(int c,void * data)2199 mime_header_decoder_collector(int c, void* data)
2200 {
2201 const mbfl_encoding *encoding;
2202 struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2203
2204 switch (pd->status) {
2205 case 1:
2206 if (c == 0x3f) { /* ? */
2207 mbfl_memory_device_output(c, &pd->tmpdev);
2208 pd->cspos = pd->tmpdev.pos;
2209 pd->status = 2;
2210 } else {
2211 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2212 mbfl_memory_device_reset(&pd->tmpdev);
2213 if (c == 0x3d) { /* = */
2214 mbfl_memory_device_output(c, &pd->tmpdev);
2215 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2216 pd->status = 9;
2217 } else {
2218 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2219 pd->status = 0;
2220 }
2221 }
2222 break;
2223 case 2: /* store charset string */
2224 if (c == 0x3f) { /* ? */
2225 /* identify charset */
2226 mbfl_memory_device_output('\0', &pd->tmpdev);
2227 encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2228 if (encoding != NULL) {
2229 pd->incode = encoding;
2230 pd->status = 3;
2231 }
2232 mbfl_memory_device_unput(&pd->tmpdev);
2233 mbfl_memory_device_output(c, &pd->tmpdev);
2234 } else {
2235 mbfl_memory_device_output(c, &pd->tmpdev);
2236 if (pd->tmpdev.pos > 100) { /* too long charset string */
2237 pd->status = 0;
2238 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2239 mbfl_memory_device_unput(&pd->tmpdev);
2240 pd->status = 9;
2241 }
2242 if (pd->status != 2) {
2243 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2244 mbfl_memory_device_reset(&pd->tmpdev);
2245 }
2246 }
2247 break;
2248 case 3: /* identify encoding */
2249 mbfl_memory_device_output(c, &pd->tmpdev);
2250 if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */
2251 pd->encoding = &mbfl_encoding_base64;
2252 pd->status = 4;
2253 } else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */
2254 pd->encoding = &mbfl_encoding_qprint;
2255 pd->status = 4;
2256 } else {
2257 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2258 mbfl_memory_device_unput(&pd->tmpdev);
2259 pd->status = 9;
2260 } else {
2261 pd->status = 0;
2262 }
2263 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2264 mbfl_memory_device_reset(&pd->tmpdev);
2265 }
2266 break;
2267 case 4: /* reset filter */
2268 mbfl_memory_device_output(c, &pd->tmpdev);
2269 if (c == 0x3f) { /* ? */
2270 /* charset convert filter */
2271 mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, &mbfl_encoding_wchar);
2272 /* decode filter */
2273 mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, &mbfl_encoding_8bit);
2274 pd->status = 5;
2275 } else {
2276 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2277 mbfl_memory_device_unput(&pd->tmpdev);
2278 pd->status = 9;
2279 } else {
2280 pd->status = 0;
2281 }
2282 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2283 }
2284 mbfl_memory_device_reset(&pd->tmpdev);
2285 break;
2286 case 5: /* encoded block */
2287 if (c == 0x3f) { /* ? */
2288 pd->status = 6;
2289 } else {
2290 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2291 }
2292 break;
2293 case 6: /* check end position */
2294 if (c == 0x3d) { /* = */
2295 /* flush and reset filter */
2296 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2297 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2298 mbfl_convert_filter_reset(pd->conv1_filter, &mbfl_encoding_ascii, &mbfl_encoding_wchar);
2299 pd->status = 7;
2300 } else {
2301 (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2302 if (c != 0x3f) { /* ? */
2303 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2304 pd->status = 5;
2305 }
2306 }
2307 break;
2308 case 7: /* after encoded block */
2309 if (c == 0x0d || c == 0x0a) { /* CR LF */
2310 pd->status = 8;
2311 } else {
2312 mbfl_memory_device_output(c, &pd->tmpdev);
2313 if (c == 0x3d) { /* = */
2314 pd->status = 1;
2315 } else if (c != 0x20 && c != 0x09) { /* not space */
2316 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2317 mbfl_memory_device_reset(&pd->tmpdev);
2318 pd->status = 0;
2319 }
2320 }
2321 break;
2322 case 8: /* folding */
2323 case 9: /* folding */
2324 if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2325 if (c == 0x3d) { /* = */
2326 if (pd->status == 8) {
2327 mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */
2328 } else {
2329 (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2330 }
2331 mbfl_memory_device_output(c, &pd->tmpdev);
2332 pd->status = 1;
2333 } else {
2334 mbfl_memory_device_output(0x20, &pd->tmpdev);
2335 mbfl_memory_device_output(c, &pd->tmpdev);
2336 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2337 mbfl_memory_device_reset(&pd->tmpdev);
2338 pd->status = 0;
2339 }
2340 }
2341 break;
2342 default: /* non encoded block */
2343 if (c == 0x0d || c == 0x0a) { /* CR LF */
2344 pd->status = 9;
2345 } else if (c == 0x3d) { /* = */
2346 mbfl_memory_device_output(c, &pd->tmpdev);
2347 pd->status = 1;
2348 } else {
2349 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2350 }
2351 break;
2352 }
2353
2354 return c;
2355 }
2356
2357 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2358 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2359 {
2360 switch (pd->status) {
2361 case 1:
2362 case 2:
2363 case 3:
2364 case 4:
2365 case 7:
2366 case 8:
2367 case 9:
2368 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2369 break;
2370 case 5:
2371 case 6:
2372 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2373 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2374 break;
2375 }
2376 (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2377 mbfl_memory_device_reset(&pd->tmpdev);
2378 pd->status = 0;
2379
2380 return mbfl_memory_device_result(&pd->outdev, result);
2381 }
2382
2383 struct mime_header_decoder_data*
mime_header_decoder_new(const mbfl_encoding * outcode)2384 mime_header_decoder_new(const mbfl_encoding *outcode)
2385 {
2386 struct mime_header_decoder_data *pd;
2387
2388 pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2389 if (pd == NULL) {
2390 return NULL;
2391 }
2392
2393 mbfl_memory_device_init(&pd->outdev, 0, 0);
2394 mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2395 pd->cspos = 0;
2396 pd->status = 0;
2397 pd->encoding = &mbfl_encoding_8bit;
2398 pd->incode = &mbfl_encoding_ascii;
2399 pd->outcode = outcode;
2400 /* charset convert filter */
2401 pd->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2402 pd->conv1_filter = mbfl_convert_filter_new(pd->incode, &mbfl_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2403 /* decode filter */
2404 pd->deco_filter = mbfl_convert_filter_new(pd->encoding, &mbfl_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2405
2406 if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2407 mime_header_decoder_delete(pd);
2408 return NULL;
2409 }
2410
2411 return pd;
2412 }
2413
2414 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2415 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2416 {
2417 if (pd) {
2418 mbfl_convert_filter_delete(pd->conv2_filter);
2419 mbfl_convert_filter_delete(pd->conv1_filter);
2420 mbfl_convert_filter_delete(pd->deco_filter);
2421 mbfl_memory_device_clear(&pd->outdev);
2422 mbfl_memory_device_clear(&pd->tmpdev);
2423 mbfl_free((void*)pd);
2424 }
2425 }
2426
2427 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2428 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2429 {
2430 return mime_header_decoder_collector(c, pd);
2431 }
2432
2433 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode)2434 mbfl_mime_header_decode(
2435 mbfl_string *string,
2436 mbfl_string *result,
2437 const mbfl_encoding *outcode)
2438 {
2439 size_t n;
2440 unsigned char *p;
2441 struct mime_header_decoder_data *pd;
2442
2443 mbfl_string_init(result);
2444 result->no_language = string->no_language;
2445 result->encoding = outcode;
2446
2447 pd = mime_header_decoder_new(outcode);
2448 if (pd == NULL) {
2449 return NULL;
2450 }
2451
2452 /* feed data */
2453 n = string->len;
2454 p = string->val;
2455 while (n > 0) {
2456 mime_header_decoder_collector(*p++, pd);
2457 n--;
2458 }
2459
2460 result = mime_header_decoder_result(pd, result);
2461 mime_header_decoder_delete(pd);
2462
2463 return result;
2464 }
2465
2466
2467
2468 /*
2469 * convert HTML numeric entity
2470 */
2471 struct collector_htmlnumericentity_data {
2472 mbfl_convert_filter *decoder;
2473 int status;
2474 int cache;
2475 int digit;
2476 int *convmap;
2477 int mapsize;
2478 };
2479
2480 static int
collector_encode_htmlnumericentity(int c,void * data)2481 collector_encode_htmlnumericentity(int c, void *data)
2482 {
2483 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2484 int f, n, s, r, d, size, *mapelm;
2485
2486 size = pc->mapsize;
2487 f = 0;
2488 n = 0;
2489 while (n < size) {
2490 mapelm = &(pc->convmap[n*4]);
2491 if (c >= mapelm[0] && c <= mapelm[1]) {
2492 s = (c + mapelm[2]) & mapelm[3];
2493 if (s >= 0) {
2494 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2495 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2496 r = 100000000;
2497 s %= r;
2498 while (r > 0) {
2499 d = s/r;
2500 if (d || f) {
2501 f = 1;
2502 s %= r;
2503 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2504 }
2505 r /= 10;
2506 }
2507 if (!f) {
2508 f = 1;
2509 (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2510 }
2511 (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2512 }
2513 }
2514 if (f) {
2515 break;
2516 }
2517 n++;
2518 }
2519 if (!f) {
2520 (*pc->decoder->filter_function)(c, pc->decoder);
2521 }
2522
2523 return c;
2524 }
2525
2526 static int
collector_decode_htmlnumericentity(int c,void * data)2527 collector_decode_htmlnumericentity(int c, void *data)
2528 {
2529 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2530 int f, n, s, r, d, size, *mapelm;
2531
2532 switch (pc->status) {
2533 case 1:
2534 if (c == 0x23) { /* '#' */
2535 pc->status = 2;
2536 } else {
2537 pc->status = 0;
2538 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2539 (*pc->decoder->filter_function)(c, pc->decoder);
2540 }
2541 break;
2542 case 2:
2543 if (c == 0x78) { /* 'x' */
2544 pc->status = 4;
2545 } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2546 pc->cache = c - 0x30;
2547 pc->status = 3;
2548 pc->digit = 1;
2549 } else {
2550 pc->status = 0;
2551 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2552 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2553 (*pc->decoder->filter_function)(c, pc->decoder);
2554 }
2555 break;
2556 case 3:
2557 s = 0;
2558 f = 0;
2559 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2560 s = pc->cache;
2561 if (pc->digit > 9 || s > INT_MAX/10) {
2562 pc->status = 0;
2563 f = 1;
2564 } else {
2565 s = s*10 + (c - 0x30);
2566 pc->cache = s;
2567 pc->digit++;
2568 }
2569 } else {
2570 pc->status = 0;
2571 s = pc->cache;
2572 f = 1;
2573 n = 0;
2574 size = pc->mapsize;
2575 while (n < size) {
2576 mapelm = &(pc->convmap[n*4]);
2577 d = s - mapelm[2];
2578 if (d >= mapelm[0] && d <= mapelm[1]) {
2579 f = 0;
2580 (*pc->decoder->filter_function)(d, pc->decoder);
2581 if (c != 0x3b) { /* ';' */
2582 (*pc->decoder->filter_function)(c, pc->decoder);
2583 }
2584 break;
2585 }
2586 n++;
2587 }
2588 }
2589 if (f) {
2590 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2591 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2592 r = 1;
2593 n = pc->digit;
2594 while (n > 1) {
2595 r *= 10;
2596 n--;
2597 }
2598 while (r > 0) {
2599 d = s/r;
2600 s %= r;
2601 r /= 10;
2602 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2603 }
2604 (*pc->decoder->filter_function)(c, pc->decoder);
2605 }
2606 break;
2607 case 4:
2608 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2609 pc->cache = c - 0x30;
2610 pc->status = 5;
2611 pc->digit = 1;
2612 } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
2613 pc->cache = c - 0x41 + 10;
2614 pc->status = 5;
2615 pc->digit = 1;
2616 } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */
2617 pc->cache = c - 0x61 + 10;
2618 pc->status = 5;
2619 pc->digit = 1;
2620 } else {
2621 pc->status = 0;
2622 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2623 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2624 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2625 (*pc->decoder->filter_function)(c, pc->decoder);
2626 }
2627 break;
2628 case 5:
2629 s = 0;
2630 f = 0;
2631 if ((c >= 0x30 && c <= 0x39) ||
2632 (c >= 0x41 && c <= 0x46) ||
2633 (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */
2634 if (pc->digit > 9) {
2635 pc->status = 0;
2636 s = pc->cache;
2637 f = 1;
2638 } else {
2639 if (c >= 0x30 && c <= 0x39) {
2640 s = pc->cache*16 + (c - 0x30);
2641 } else if (c >= 0x41 && c <= 0x46) {
2642 s = pc->cache*16 + (c - 0x41 + 10);
2643 } else {
2644 s = pc->cache*16 + (c - 0x61 + 10);
2645 }
2646 pc->cache = s;
2647 pc->digit++;
2648 }
2649 } else {
2650 pc->status = 0;
2651 s = pc->cache;
2652 f = 1;
2653 n = 0;
2654 size = pc->mapsize;
2655 while (n < size) {
2656 mapelm = &(pc->convmap[n*4]);
2657 d = s - mapelm[2];
2658 if (d >= mapelm[0] && d <= mapelm[1]) {
2659 f = 0;
2660 (*pc->decoder->filter_function)(d, pc->decoder);
2661 if (c != 0x3b) { /* ';' */
2662 (*pc->decoder->filter_function)(c, pc->decoder);
2663 }
2664 break;
2665 }
2666 n++;
2667 }
2668 }
2669 if (f) {
2670 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2671 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2672 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2673 r = 1;
2674 n = pc->digit;
2675 while (n > 0) {
2676 r *= 16;
2677 n--;
2678 }
2679 s %= r;
2680 r /= 16;
2681 while (r > 0) {
2682 d = s/r;
2683 s %= r;
2684 r /= 16;
2685 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2686 }
2687 (*pc->decoder->filter_function)(c, pc->decoder);
2688 }
2689 break;
2690 default:
2691 if (c == 0x26) { /* '&' */
2692 pc->status = 1;
2693 } else {
2694 (*pc->decoder->filter_function)(c, pc->decoder);
2695 }
2696 break;
2697 }
2698
2699 return c;
2700 }
2701
2702 static int
collector_encode_hex_htmlnumericentity(int c,void * data)2703 collector_encode_hex_htmlnumericentity(int c, void *data)
2704 {
2705 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2706 int f, n, s, r, d, size, *mapelm;
2707
2708 size = pc->mapsize;
2709 f = 0;
2710 n = 0;
2711 while (n < size) {
2712 mapelm = &(pc->convmap[n*4]);
2713 if (c >= mapelm[0] && c <= mapelm[1]) {
2714 s = (c + mapelm[2]) & mapelm[3];
2715 if (s >= 0) {
2716 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2717 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2718 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2719 r = 0x1000000;
2720 s %= r;
2721 while (r > 0) {
2722 d = s/r;
2723 if (d || f) {
2724 f = 1;
2725 s %= r;
2726 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2727 }
2728 r /= 16;
2729 }
2730 if (!f) {
2731 f = 1;
2732 (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2733 }
2734 (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2735 }
2736 }
2737 if (f) {
2738 break;
2739 }
2740 n++;
2741 }
2742 if (!f) {
2743 (*pc->decoder->filter_function)(c, pc->decoder);
2744 }
2745
2746 return c;
2747 }
2748
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2749 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2750 {
2751 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2752 int n, s, r, d;
2753
2754 if (pc->status) {
2755 switch (pc->status) {
2756 case 1: /* '&' */
2757 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2758 break;
2759 case 2: /* '#' */
2760 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2761 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2762 break;
2763 case 3: /* '0'-'9' */
2764 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2765 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2766
2767 s = pc->cache;
2768 r = 1;
2769 n = pc->digit;
2770 while (n > 1) {
2771 r *= 10;
2772 n--;
2773 }
2774 while (r > 0) {
2775 d = s/r;
2776 s %= r;
2777 r /= 10;
2778 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2779 }
2780
2781 break;
2782 case 4: /* 'x' */
2783 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2784 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2785 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2786 break;
2787 case 5: /* '0'-'9','a'-'f' */
2788 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2789 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2790 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2791
2792 s = pc->cache;
2793 r = 1;
2794 n = pc->digit;
2795 while (n > 0) {
2796 r *= 16;
2797 n--;
2798 }
2799 s %= r;
2800 r /= 16;
2801 while (r > 0) {
2802 d = s/r;
2803 s %= r;
2804 r /= 16;
2805 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2806 }
2807 break;
2808 default:
2809 break;
2810 }
2811 }
2812
2813 pc->status = 0;
2814 pc->cache = 0;
2815 pc->digit = 0;
2816
2817 return 0;
2818 }
2819
2820
2821 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)2822 mbfl_html_numeric_entity(
2823 mbfl_string *string,
2824 mbfl_string *result,
2825 int *convmap,
2826 int mapsize,
2827 int type)
2828 {
2829 struct collector_htmlnumericentity_data pc;
2830 mbfl_memory_device device;
2831 mbfl_convert_filter *encoder;
2832 size_t n;
2833 unsigned char *p;
2834
2835 if (string == NULL || result == NULL) {
2836 return NULL;
2837 }
2838 mbfl_string_init(result);
2839 result->no_language = string->no_language;
2840 result->encoding = string->encoding;
2841 mbfl_memory_device_init(&device, string->len, 0);
2842
2843 /* output code filter */
2844 pc.decoder = mbfl_convert_filter_new(
2845 &mbfl_encoding_wchar,
2846 string->encoding,
2847 mbfl_memory_device_output, 0, &device);
2848 /* wchar filter */
2849 if (type == 0) { /* decimal output */
2850 encoder = mbfl_convert_filter_new(
2851 string->encoding,
2852 &mbfl_encoding_wchar,
2853 collector_encode_htmlnumericentity, 0, &pc);
2854 } else if (type == 2) { /* hex output */
2855 encoder = mbfl_convert_filter_new(
2856 string->encoding,
2857 &mbfl_encoding_wchar,
2858 collector_encode_hex_htmlnumericentity, 0, &pc);
2859 } else { /* type == 1: decimal/hex input */
2860 encoder = mbfl_convert_filter_new(
2861 string->encoding,
2862 &mbfl_encoding_wchar,
2863 collector_decode_htmlnumericentity,
2864 (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
2865 }
2866 if (pc.decoder == NULL || encoder == NULL) {
2867 mbfl_convert_filter_delete(encoder);
2868 mbfl_convert_filter_delete(pc.decoder);
2869 return NULL;
2870 }
2871 pc.status = 0;
2872 pc.cache = 0;
2873 pc.digit = 0;
2874 pc.convmap = convmap;
2875 pc.mapsize = mapsize;
2876
2877 /* feed data */
2878 p = string->val;
2879 n = string->len;
2880 if (p != NULL) {
2881 while (n > 0) {
2882 if ((*encoder->filter_function)(*p++, encoder) < 0) {
2883 break;
2884 }
2885 n--;
2886 }
2887 }
2888 mbfl_convert_filter_flush(encoder);
2889 mbfl_convert_filter_flush(pc.decoder);
2890 result = mbfl_memory_device_result(&device, result);
2891 mbfl_convert_filter_delete(encoder);
2892 mbfl_convert_filter_delete(pc.decoder);
2893
2894 return result;
2895 }
2896