1 /*
2 * charset=UTF-8
3 */
4
5 /*
6 * "streamable kanji code filter and converter"
7 *
8 * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
9 *
10 * This software is released under the GNU Lesser General Public License.
11 * (Version 2.1, February 1999)
12 * Please read the following detail of the licence (in japanese).
13 *
14 * ◆使用許諾条件◆
15 *
16 * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
17 * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
18 * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
19 * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
20 * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
21 * することはできません。
22 *
23 * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
24 * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
25 * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
26 * による許諾を得る必要があります。
27 *
28 * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
29 * ます。「GNU Lesser General Public License」とは、これまでLibrary General
30 * Public Licenseと呼ばれていたものです。
31 * http://www.gnu.org/ --- GNUウェブサイト
32 * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
33 * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
34 *
35 * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
36 * はありません。
37 *
38 * ◆保証内容◆
39 *
40 * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
41 * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
42 * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
43 * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
44 * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
45 * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
46 * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
47 * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
48 * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
49 * 契約・規定に優先します。
50 *
51 * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
52 *
53 * 〒102-0073
54 * 東京都千代田区九段北1-13-5日本地所第一ビル4F
55 * 株式会社ハッピーサイズ
56 * Phone: 03-3512-3655, Fax: 03-3512-3656
57 * Email: sales@happysize.co.jp
58 * Web: http://happysize.com/
59 *
60 * ◆著者◆
61 *
62 * 金本 茂 <sgk@happysize.co.jp>
63 *
64 * ◆履歴◆
65 *
66 * 1998/11/10 sgk implementation in C++
67 * 1999/4/25 sgk Cで書きなおし。
68 * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
69 * 1999/6/?? Unicodeサポート。
70 * 1999/6/22 sgk ライセンスをLGPLに変更。
71 *
72 */
73
74 /*
75 * Unicode support
76 *
77 * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
78 * All rights reserved.
79 *
80 */
81
82
83 #ifdef HAVE_CONFIG_H
84 #include "config.h"
85 #endif
86
87 #include <stddef.h>
88
89 #ifdef HAVE_STRING_H
90 #include <string.h>
91 #endif
92
93 #ifdef HAVE_STRINGS_H
94 #include <strings.h>
95 #endif
96
97 #ifdef HAVE_STDDEF_H
98 #include <stddef.h>
99 #endif
100
101 #include "mbfilter.h"
102 #include "mbfl_filter_output.h"
103 #include "mbfilter_8bit.h"
104 #include "mbfilter_pass.h"
105 #include "mbfilter_wchar.h"
106 #include "filters/mbfilter_ascii.h"
107 #include "filters/mbfilter_base64.h"
108 #include "filters/mbfilter_qprint.h"
109 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
110 #include "filters/mbfilter_utf8.h"
111
112 #include "eaw_table.h"
113
114 /* hex character table "0123456789ABCDEF" */
115 static char mbfl_hexchar_table[] = {
116 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
117 };
118
119
120
121 /*
122 * encoding filter
123 */
124 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
125
126
127 /*
128 * buffering converter
129 */
130 mbfl_buffer_converter *
mbfl_buffer_converter_new(const mbfl_encoding * from,const mbfl_encoding * to,size_t buf_initsz)131 mbfl_buffer_converter_new(
132 const mbfl_encoding *from,
133 const mbfl_encoding *to,
134 size_t buf_initsz)
135 {
136 mbfl_buffer_converter *convd;
137
138 /* allocate */
139 convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof(mbfl_buffer_converter));
140 if (convd == NULL) {
141 return NULL;
142 }
143
144 /* initialize */
145 convd->from = from;
146 convd->to = to;
147
148 /* create convert filter */
149 convd->filter1 = NULL;
150 convd->filter2 = NULL;
151 if (mbfl_convert_filter_get_vtbl(convd->from, convd->to) != NULL) {
152 convd->filter1 = mbfl_convert_filter_new(convd->from, convd->to, mbfl_memory_device_output, NULL, &convd->device);
153 } else {
154 convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, convd->to, mbfl_memory_device_output, NULL, &convd->device);
155 if (convd->filter2 != NULL) {
156 convd->filter1 = mbfl_convert_filter_new(convd->from,
157 &mbfl_encoding_wchar,
158 (int (*)(int, void*))convd->filter2->filter_function,
159 (int (*)(void*))convd->filter2->filter_flush,
160 convd->filter2);
161 if (convd->filter1 == NULL) {
162 mbfl_convert_filter_delete(convd->filter2);
163 }
164 }
165 }
166 if (convd->filter1 == NULL) {
167 mbfl_free(convd);
168 return NULL;
169 }
170
171 mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
172
173 return convd;
174 }
175
176
177 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)178 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
179 {
180 if (convd != NULL) {
181 if (convd->filter1) {
182 mbfl_convert_filter_delete(convd->filter1);
183 }
184 if (convd->filter2) {
185 mbfl_convert_filter_delete(convd->filter2);
186 }
187 mbfl_memory_device_clear(&convd->device);
188 mbfl_free((void*)convd);
189 }
190 }
191
192 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)193 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
194 {
195 mbfl_memory_device_reset(&convd->device);
196 }
197
198 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)199 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
200 {
201 if (convd != NULL) {
202 if (convd->filter2 != NULL) {
203 convd->filter2->illegal_mode = mode;
204 } else if (convd->filter1 != NULL) {
205 convd->filter1->illegal_mode = mode;
206 } else {
207 return 0;
208 }
209 }
210
211 return 1;
212 }
213
214 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)215 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
216 {
217 if (convd != NULL) {
218 if (convd->filter2 != NULL) {
219 convd->filter2->illegal_substchar = substchar;
220 } else if (convd->filter1 != NULL) {
221 convd->filter1->illegal_substchar = substchar;
222 } else {
223 return 0;
224 }
225 }
226
227 return 1;
228 }
229
230 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,size_t n)231 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, size_t n)
232 {
233 mbfl_convert_filter *filter;
234 int (*filter_function)(int c, mbfl_convert_filter *filter);
235
236 if (convd != NULL && p != NULL) {
237 filter = convd->filter1;
238 if (filter != NULL) {
239 filter_function = filter->filter_function;
240 while (n > 0) {
241 if ((*filter_function)(*p++, filter) < 0) {
242 break;
243 }
244 n--;
245 }
246 }
247 }
248
249 return n;
250 }
251
252 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)253 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
254 {
255 return mbfl_buffer_converter_feed2(convd, string, NULL);
256 }
257
258 int
mbfl_buffer_converter_feed2(mbfl_buffer_converter * convd,mbfl_string * string,size_t * loc)259 mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, size_t *loc)
260 {
261 size_t n;
262 unsigned char *p;
263 mbfl_convert_filter *filter;
264 int (*filter_function)(int c, mbfl_convert_filter *filter);
265
266 if (convd == NULL || string == NULL) {
267 return -1;
268 }
269 mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
270 /* feed data */
271 n = string->len;
272 p = string->val;
273
274 filter = convd->filter1;
275 if (filter != NULL) {
276 filter_function = filter->filter_function;
277 while (n > 0) {
278 if ((*filter_function)(*p++, filter) < 0) {
279 if (loc) {
280 *loc = p - string->val;
281 }
282 return -1;
283 }
284 n--;
285 }
286 }
287 if (loc) {
288 *loc = p - string->val;
289 }
290 return 0;
291 }
292
293
294 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)295 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
296 {
297 if (convd == NULL) {
298 return -1;
299 }
300
301 if (convd->filter1 != NULL) {
302 mbfl_convert_filter_flush(convd->filter1);
303 }
304 if (convd->filter2 != NULL) {
305 mbfl_convert_filter_flush(convd->filter2);
306 }
307
308 return 0;
309 }
310
311 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)312 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
313 {
314 if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
315 result->encoding = convd->to;
316 result->val = convd->device.buffer;
317 result->len = convd->device.pos;
318 } else {
319 result = NULL;
320 }
321
322 return result;
323 }
324
325 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)326 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
327 {
328 if (convd == NULL || result == NULL) {
329 return NULL;
330 }
331 result->encoding = convd->to;
332 return mbfl_memory_device_result(&convd->device, result);
333 }
334
335 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)336 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
337 mbfl_string *result)
338 {
339 if (convd == NULL || string == NULL || result == NULL) {
340 return NULL;
341 }
342 mbfl_buffer_converter_feed(convd, string);
343 if (convd->filter1 != NULL) {
344 mbfl_convert_filter_flush(convd->filter1);
345 }
346 if (convd->filter2 != NULL) {
347 mbfl_convert_filter_flush(convd->filter2);
348 }
349 result->encoding = convd->to;
350 return mbfl_memory_device_result(&convd->device, result);
351 }
352
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)353 size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
354 {
355 size_t num_illegalchars = 0;
356
357 if (convd == NULL) {
358 return 0;
359 }
360
361 if (convd->filter1 != NULL) {
362 num_illegalchars += convd->filter1->num_illegalchar;
363 }
364
365 if (convd->filter2 != NULL) {
366 num_illegalchars += convd->filter2->num_illegalchar;
367 }
368
369 return num_illegalchars;
370 }
371
372 /*
373 * encoding detector
374 */
375 mbfl_encoding_detector *
mbfl_encoding_detector_new(const mbfl_encoding ** elist,int elistsz,int strict)376 mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict)
377 {
378 mbfl_encoding_detector *identd;
379
380 int i, num;
381 mbfl_identify_filter *filter;
382
383 if (elist == NULL || elistsz <= 0) {
384 return NULL;
385 }
386
387 /* allocate */
388 identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
389 if (identd == NULL) {
390 return NULL;
391 }
392 identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
393 if (identd->filter_list == NULL) {
394 mbfl_free(identd);
395 return NULL;
396 }
397
398 /* create filters */
399 i = 0;
400 num = 0;
401 while (i < elistsz) {
402 filter = mbfl_identify_filter_new2(elist[i]);
403 if (filter != NULL) {
404 identd->filter_list[num] = filter;
405 num++;
406 }
407 i++;
408 }
409 identd->filter_list_size = num;
410
411 /* set strict flag */
412 identd->strict = strict;
413
414 return identd;
415 }
416
417
418 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)419 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
420 {
421 int i;
422
423 if (identd != NULL) {
424 if (identd->filter_list != NULL) {
425 i = identd->filter_list_size;
426 while (i > 0) {
427 i--;
428 mbfl_identify_filter_delete(identd->filter_list[i]);
429 }
430 mbfl_free((void *)identd->filter_list);
431 }
432 mbfl_free((void *)identd);
433 }
434 }
435
436 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)437 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
438 {
439 int res = 0;
440 /* feed data */
441 if (identd != NULL && string != NULL && string->val != NULL) {
442 int num = identd->filter_list_size;
443 size_t n = string->len;
444 unsigned char *p = string->val;
445 int bad = 0;
446 while (n > 0) {
447 int i;
448 for (i = 0; i < num; i++) {
449 mbfl_identify_filter *filter = identd->filter_list[i];
450 if (!filter->flag) {
451 (*filter->filter_function)(*p, filter);
452 if (filter->flag) {
453 bad++;
454 }
455 }
456 }
457 if ((num - 1) <= bad) {
458 res = 1;
459 break;
460 }
461 p++;
462 n--;
463 }
464 }
465
466 return res;
467 }
468
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)469 const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
470 {
471 mbfl_identify_filter *filter;
472 const mbfl_encoding *encoding = NULL;
473 int n;
474
475 /* judge */
476 if (identd != NULL) {
477 n = identd->filter_list_size - 1;
478 while (n >= 0) {
479 filter = identd->filter_list[n];
480 if (!filter->flag) {
481 if (!identd->strict || !filter->status) {
482 encoding = filter->encoding;
483 }
484 }
485 n--;
486 }
487
488 /* fallback judge */
489 if (!encoding) {
490 n = identd->filter_list_size - 1;
491 while (n >= 0) {
492 filter = identd->filter_list[n];
493 if (!filter->flag) {
494 encoding = filter->encoding;
495 }
496 n--;
497 }
498 }
499 }
500
501 return encoding;
502 }
503
504 /*
505 * encoding converter
506 */
507 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,const mbfl_encoding * toenc)508 mbfl_convert_encoding(
509 mbfl_string *string,
510 mbfl_string *result,
511 const mbfl_encoding *toenc)
512 {
513 size_t n;
514 unsigned char *p;
515 mbfl_memory_device device;
516 mbfl_convert_filter *filter1;
517 mbfl_convert_filter *filter2;
518
519 /* initialize */
520 if (toenc == NULL || string == NULL || result == NULL) {
521 return NULL;
522 }
523
524 filter1 = NULL;
525 filter2 = NULL;
526 if (mbfl_convert_filter_get_vtbl(string->encoding, toenc) != NULL) {
527 filter1 = mbfl_convert_filter_new(string->encoding, toenc, mbfl_memory_device_output, 0, &device);
528 } else {
529 filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
530 if (filter2 != NULL) {
531 filter1 = mbfl_convert_filter_new(string->encoding, &mbfl_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
532 if (filter1 == NULL) {
533 mbfl_convert_filter_delete(filter2);
534 }
535 }
536 }
537 if (filter1 == NULL) {
538 return NULL;
539 }
540
541 if (filter2 != NULL) {
542 filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
543 filter2->illegal_substchar = 0x3f; /* '?' */
544 }
545
546 mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
547
548 /* feed data */
549 n = string->len;
550 p = string->val;
551 if (p != NULL) {
552 while (n > 0) {
553 if ((*filter1->filter_function)(*p++, filter1) < 0) {
554 break;
555 }
556 n--;
557 }
558 }
559
560 mbfl_convert_filter_flush(filter1);
561 mbfl_convert_filter_delete(filter1);
562 if (filter2 != NULL) {
563 mbfl_convert_filter_flush(filter2);
564 mbfl_convert_filter_delete(filter2);
565 }
566
567 return mbfl_memory_device_result(&device, result);
568 }
569
570
571 /*
572 * identify encoding
573 */
574 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,const mbfl_encoding ** elist,int elistsz,int strict)575 mbfl_identify_encoding(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
576 {
577 int i, num, bad;
578 size_t n;
579 unsigned char *p;
580 mbfl_identify_filter *flist, *filter;
581 const mbfl_encoding *encoding;
582
583 /* flist is an array of mbfl_identify_filter instances */
584 flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
585 if (flist == NULL) {
586 return NULL;
587 }
588
589 num = 0;
590 if (elist != NULL) {
591 for (i = 0; i < elistsz; i++) {
592 if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
593 num++;
594 }
595 }
596 }
597
598 /* feed data */
599 n = string->len;
600 p = string->val;
601
602 if (p != NULL) {
603 bad = 0;
604 while (n > 0) {
605 for (i = 0; i < num; i++) {
606 filter = &flist[i];
607 if (!filter->flag) {
608 (*filter->filter_function)(*p, filter);
609 if (filter->flag) {
610 bad++;
611 }
612 }
613 }
614 if ((num - 1) <= bad && !strict) {
615 break;
616 }
617 p++;
618 n--;
619 }
620 }
621
622 /* judge */
623 encoding = NULL;
624
625 for (i = 0; i < num; i++) {
626 filter = &flist[i];
627 if (!filter->flag) {
628 if (strict && filter->status) {
629 continue;
630 }
631 encoding = filter->encoding;
632 break;
633 }
634 }
635
636 /* fall-back judge */
637 if (!encoding) {
638 for (i = 0; i < num; i++) {
639 filter = &flist[i];
640 if (!filter->flag && (!strict || !filter->status)) {
641 encoding = filter->encoding;
642 break;
643 }
644 }
645 }
646
647 /* cleanup */
648 /* dtors should be called in reverse order */
649 i = num;
650 while (--i >= 0) {
651 mbfl_identify_filter_cleanup(&flist[i]);
652 }
653
654 mbfl_free((void *)flist);
655
656 return encoding;
657 }
658
659 /*
660 * strlen
661 */
662 static int
filter_count_output(int c,void * data)663 filter_count_output(int c, void *data)
664 {
665 (*(size_t *)data)++;
666 return c;
667 }
668
669 size_t
mbfl_strlen(mbfl_string * string)670 mbfl_strlen(mbfl_string *string)
671 {
672 size_t len, n, k;
673 unsigned char *p;
674 const mbfl_encoding *encoding = string->encoding;
675
676 len = 0;
677 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
678 len = string->len;
679 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
680 len = string->len/2;
681 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
682 len = string->len/4;
683 } else if (encoding->mblen_table != NULL) {
684 const unsigned char *mbtab = encoding->mblen_table;
685 n = 0;
686 p = string->val;
687 k = string->len;
688 /* count */
689 if (p != NULL) {
690 while (n < k) {
691 unsigned m = mbtab[*p];
692 n += m;
693 p += m;
694 len++;
695 }
696 }
697 } else {
698 /* wchar filter */
699 mbfl_convert_filter *filter = mbfl_convert_filter_new(
700 string->encoding,
701 &mbfl_encoding_wchar,
702 filter_count_output, 0, &len);
703 if (filter == NULL) {
704 return (size_t) -1;
705 }
706 /* count */
707 n = string->len;
708 p = string->val;
709 if (p != NULL) {
710 while (n > 0) {
711 (*filter->filter_function)(*p++, filter);
712 n--;
713 }
714 }
715 mbfl_convert_filter_delete(filter);
716 }
717
718 return len;
719 }
720
721
722 /*
723 * strpos
724 */
725 struct collector_strpos_data {
726 mbfl_convert_filter *next_filter;
727 mbfl_wchar_device needle;
728 size_t needle_len;
729 size_t start;
730 size_t output;
731 size_t found_pos;
732 size_t needle_pos;
733 size_t matched_pos;
734 };
735
736 static int
collector_strpos(int c,void * data)737 collector_strpos(int c, void* data)
738 {
739 int *p, *h, *m;
740 ssize_t n;
741 struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
742
743 if (pc->output >= pc->start) {
744 if (c == (int)pc->needle.buffer[pc->needle_pos]) {
745 if (pc->needle_pos == 0) {
746 pc->found_pos = pc->output; /* found position */
747 }
748 pc->needle_pos++; /* needle pointer */
749 if (pc->needle_pos >= pc->needle_len) {
750 pc->matched_pos = pc->found_pos; /* matched position */
751 pc->needle_pos--;
752 goto retry;
753 }
754 } else if (pc->needle_pos != 0) {
755 retry:
756 h = (int *)pc->needle.buffer;
757 h++;
758 for (;;) {
759 pc->found_pos++;
760 p = h;
761 m = (int *)pc->needle.buffer;
762 n = pc->needle_pos - 1;
763 while (n > 0 && *p == *m) {
764 n--;
765 p++;
766 m++;
767 }
768 if (n <= 0) {
769 if (*m != c) {
770 pc->needle_pos = 0;
771 }
772 break;
773 } else {
774 h++;
775 pc->needle_pos--;
776 }
777 }
778 }
779 }
780
781 pc->output++;
782 return c;
783 }
784
785 /*
786 * oddlen
787 */
788 size_t
mbfl_oddlen(mbfl_string * string)789 mbfl_oddlen(mbfl_string *string)
790 {
791 size_t len, n, k;
792 unsigned char *p;
793 const mbfl_encoding *encoding = string->encoding;
794
795 len = 0;
796 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
797 return 0;
798 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
799 return len % 2;
800 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
801 return len % 4;
802 } else if (encoding->mblen_table != NULL) {
803 const unsigned char *mbtab = encoding->mblen_table;
804 n = 0;
805 p = string->val;
806 k = string->len;
807 /* count */
808 if (p != NULL) {
809 while (n < k) {
810 unsigned m = mbtab[*p];
811 n += m;
812 p += m;
813 };
814 }
815 return n-k;
816 } else {
817 /* how can i do ? */
818 return 0;
819 }
820 /* NOT REACHED */
821 }
822
823 size_t
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,ssize_t offset,int reverse)824 mbfl_strpos(
825 mbfl_string *haystack,
826 mbfl_string *needle,
827 ssize_t offset,
828 int reverse)
829 {
830 size_t result;
831 mbfl_string _haystack_u8, _needle_u8;
832 const mbfl_string *haystack_u8, *needle_u8 = NULL;
833 const unsigned char *u8_tbl;
834
835 if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
836 return (size_t) -8;
837 }
838
839 {
840 const mbfl_encoding *u8_enc = &mbfl_encoding_utf8;
841 if (u8_enc->mblen_table == NULL) {
842 return (size_t) -8;
843 }
844 u8_tbl = u8_enc->mblen_table;
845 }
846
847 if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) {
848 mbfl_string_init(&_haystack_u8);
849 haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, &mbfl_encoding_utf8);
850 if (haystack_u8 == NULL) {
851 result = (size_t) -4;
852 goto out;
853 }
854 } else {
855 haystack_u8 = haystack;
856 }
857
858 if (needle->encoding->no_encoding != mbfl_no_encoding_utf8) {
859 mbfl_string_init(&_needle_u8);
860 needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, &mbfl_encoding_utf8);
861 if (needle_u8 == NULL) {
862 result = (size_t) -4;
863 goto out;
864 }
865 } else {
866 needle_u8 = needle;
867 }
868
869 if (needle_u8->len < 1) {
870 result = (size_t) -8;
871 goto out;
872 }
873
874 result = (size_t) -1;
875 if (haystack_u8->len < needle_u8->len) {
876 goto out;
877 }
878
879 if (!reverse) {
880 size_t jtbl[1 << (sizeof(unsigned char) * 8)];
881 size_t needle_u8_len = needle_u8->len;
882 size_t i;
883 const unsigned char *p, *q, *e;
884 const unsigned char *haystack_u8_val = haystack_u8->val,
885 *needle_u8_val = needle_u8->val;
886 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
887 jtbl[i] = needle_u8_len + 1;
888 }
889 for (i = 0; i < needle_u8_len - 1; ++i) {
890 jtbl[needle_u8_val[i]] = needle_u8_len - i;
891 }
892 e = haystack_u8_val + haystack_u8->len;
893 p = haystack_u8_val;
894 while (offset-- > 0) {
895 if (p >= e) {
896 result = (size_t) -16;
897 goto out;
898 }
899 p += u8_tbl[*p];
900 }
901 p += needle_u8_len;
902 if (p > e) {
903 goto out;
904 }
905 while (p <= e) {
906 const unsigned char *pv = p;
907 q = needle_u8_val + needle_u8_len;
908 for (;;) {
909 if (q == needle_u8_val) {
910 result = 0;
911 while (p > haystack_u8_val) {
912 unsigned char c = *--p;
913 if (c < 0x80) {
914 ++result;
915 } else if ((c & 0xc0) != 0x80) {
916 ++result;
917 }
918 }
919 goto out;
920 }
921 if (*--q != *--p) {
922 break;
923 }
924 }
925 p += jtbl[*p];
926 if (p <= pv) {
927 p = pv + 1;
928 }
929 }
930 } else {
931 size_t jtbl[1 << (sizeof(unsigned char) * 8)];
932 size_t needle_u8_len = needle_u8->len, needle_len = 0;
933 size_t i;
934 const unsigned char *p, *e, *q, *qe;
935 const unsigned char *haystack_u8_val = haystack_u8->val,
936 *needle_u8_val = needle_u8->val;
937 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
938 jtbl[i] = needle_u8_len;
939 }
940 for (i = needle_u8_len - 1; i > 0; --i) {
941 unsigned char c = needle_u8_val[i];
942 jtbl[c] = i;
943 if (c < 0x80) {
944 ++needle_len;
945 } else if ((c & 0xc0) != 0x80) {
946 ++needle_len;
947 }
948 }
949 {
950 unsigned char c = needle_u8_val[0];
951 if (c < 0x80) {
952 ++needle_len;
953 } else if ((c & 0xc0) != 0x80) {
954 ++needle_len;
955 }
956 }
957 e = haystack_u8_val;
958 p = e + haystack_u8->len;
959 qe = needle_u8_val + needle_u8_len;
960 if (offset < 0) {
961 if (-offset > needle_len) {
962 offset += needle_len;
963 while (offset < 0) {
964 unsigned char c;
965 if (p <= e) {
966 result = (size_t) -16;
967 goto out;
968 }
969 c = *(--p);
970 if (c < 0x80) {
971 ++offset;
972 } else if ((c & 0xc0) != 0x80) {
973 ++offset;
974 }
975 }
976 }
977 } else {
978 const unsigned char *ee = haystack_u8_val + haystack_u8->len;
979 while (offset-- > 0) {
980 if (e >= ee) {
981 result = (size_t) -16;
982 goto out;
983 }
984 e += u8_tbl[*e];
985 }
986 }
987 if (p < e + needle_u8_len) {
988 goto out;
989 }
990 p -= needle_u8_len;
991 while (p >= e) {
992 const unsigned char *pv = p;
993 q = needle_u8_val;
994 for (;;) {
995 if (q == qe) {
996 result = 0;
997 p -= needle_u8_len;
998 while (p > haystack_u8_val) {
999 unsigned char c = *--p;
1000 if (c < 0x80) {
1001 ++result;
1002 } else if ((c & 0xc0) != 0x80) {
1003 ++result;
1004 }
1005 }
1006 goto out;
1007 }
1008 if (*q != *p) {
1009 break;
1010 }
1011 ++p, ++q;
1012 }
1013 p -= jtbl[*p];
1014 if (p >= pv) {
1015 p = pv - 1;
1016 }
1017 }
1018 }
1019 out:
1020 if (haystack_u8 == &_haystack_u8) {
1021 mbfl_string_clear(&_haystack_u8);
1022 }
1023 if (needle_u8 == &_needle_u8) {
1024 mbfl_string_clear(&_needle_u8);
1025 }
1026 return result;
1027 }
1028
1029 /*
1030 * substr_count
1031 */
1032
1033 size_t
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1034 mbfl_substr_count(
1035 mbfl_string *haystack,
1036 mbfl_string *needle
1037 )
1038 {
1039 size_t n, result = 0;
1040 unsigned char *p;
1041 mbfl_convert_filter *filter;
1042 struct collector_strpos_data pc;
1043
1044 if (haystack == NULL || needle == NULL) {
1045 return (size_t) -8;
1046 }
1047 /* needle is converted into wchar */
1048 mbfl_wchar_device_init(&pc.needle);
1049 filter = mbfl_convert_filter_new(
1050 needle->encoding,
1051 &mbfl_encoding_wchar,
1052 mbfl_wchar_device_output, 0, &pc.needle);
1053 if (filter == NULL) {
1054 return (size_t) -4;
1055 }
1056 mbfl_convert_filter_feed_string(filter, needle->val, needle->len);
1057 mbfl_convert_filter_flush(filter);
1058 mbfl_convert_filter_delete(filter);
1059 pc.needle_len = pc.needle.pos;
1060 if (pc.needle.buffer == NULL) {
1061 return (size_t) -4;
1062 }
1063 if (pc.needle_len <= 0) {
1064 mbfl_wchar_device_clear(&pc.needle);
1065 return (size_t) -2;
1066 }
1067 /* initialize filter and collector data */
1068 filter = mbfl_convert_filter_new(
1069 haystack->encoding,
1070 &mbfl_encoding_wchar,
1071 collector_strpos, 0, &pc);
1072 if (filter == NULL) {
1073 mbfl_wchar_device_clear(&pc.needle);
1074 return (size_t) -4;
1075 }
1076 pc.start = 0;
1077 pc.output = 0;
1078 pc.needle_pos = 0;
1079 pc.found_pos = 0;
1080 pc.matched_pos = (size_t) -1;
1081
1082 /* feed data */
1083 p = haystack->val;
1084 n = haystack->len;
1085 if (p != NULL) {
1086 while (n > 0) {
1087 if ((*filter->filter_function)(*p++, filter) < 0) {
1088 pc.matched_pos = (size_t) -4;
1089 break;
1090 }
1091 if (pc.matched_pos != (size_t) -1) {
1092 ++result;
1093 pc.matched_pos = (size_t) -1;
1094 pc.needle_pos = 0;
1095 }
1096 n--;
1097 }
1098 }
1099 mbfl_convert_filter_flush(filter);
1100 mbfl_convert_filter_delete(filter);
1101 mbfl_wchar_device_clear(&pc.needle);
1102
1103 return result;
1104 }
1105
1106 /*
1107 * substr
1108 */
1109 struct collector_substr_data {
1110 mbfl_convert_filter *next_filter;
1111 size_t start;
1112 size_t stop;
1113 size_t output;
1114 };
1115
1116 static int
collector_substr(int c,void * data)1117 collector_substr(int c, void* data)
1118 {
1119 struct collector_substr_data *pc = (struct collector_substr_data*)data;
1120
1121 if (pc->output >= pc->stop) {
1122 return -1;
1123 }
1124
1125 if (pc->output >= pc->start) {
1126 (*pc->next_filter->filter_function)(c, pc->next_filter);
1127 }
1128
1129 pc->output++;
1130
1131 return c;
1132 }
1133
1134 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1135 mbfl_substr(
1136 mbfl_string *string,
1137 mbfl_string *result,
1138 size_t from,
1139 size_t length)
1140 {
1141 const mbfl_encoding *encoding = string->encoding;
1142 size_t n, k, len, start, end;
1143 unsigned m;
1144 unsigned char *p, *w;
1145
1146 mbfl_string_init(result);
1147 result->no_language = string->no_language;
1148 result->encoding = string->encoding;
1149
1150 if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1151 encoding->mblen_table != NULL) {
1152 len = string->len;
1153 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1154 start = from;
1155 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1156 start = from*2;
1157 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1158 start = from*4;
1159 } else {
1160 const unsigned char *mbtab = encoding->mblen_table;
1161 start = 0;
1162 n = 0;
1163 k = 0;
1164 p = string->val;
1165 /* search start position */
1166 while (k <= from) {
1167 start = n;
1168 if (n >= len) {
1169 break;
1170 }
1171 m = mbtab[*p];
1172 n += m;
1173 p += m;
1174 k++;
1175 }
1176 }
1177
1178 if (length == MBFL_SUBSTR_UNTIL_END) {
1179 end = len;
1180 } else if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1181 end = start + length;
1182 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1183 end = start + length*2;
1184 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1185 end = start + length*4;
1186 } else {
1187 const unsigned char *mbtab = encoding->mblen_table;
1188 end = start;
1189 n = start;
1190 k = 0;
1191 p = string->val + start;
1192 /* detect end position */
1193 while (k <= length) {
1194 end = n;
1195 if (n >= len) {
1196 break;
1197 }
1198 m = mbtab[*p];
1199 n += m;
1200 p += m;
1201 k++;
1202 }
1203 }
1204
1205 if (start > len) {
1206 start = len;
1207 }
1208 if (end > len) {
1209 end = len;
1210 }
1211 if (start > end) {
1212 start = end;
1213 }
1214
1215 /* allocate memory and copy */
1216 n = end - start;
1217 result->len = 0;
1218 result->val = w = (unsigned char*)mbfl_malloc(n + 1);
1219 if (w != NULL) {
1220 result->len = n;
1221 memcpy(w, string->val + start, n);
1222 w[n] = '\0';
1223 } else {
1224 result = NULL;
1225 }
1226 } else {
1227 mbfl_memory_device device;
1228 struct collector_substr_data pc;
1229 mbfl_convert_filter *decoder;
1230 mbfl_convert_filter *encoder;
1231
1232 if (length == MBFL_SUBSTR_UNTIL_END) {
1233 length = mbfl_strlen(string) - from;
1234 }
1235
1236 mbfl_memory_device_init(&device, length + 1, 0);
1237 mbfl_string_init(result);
1238 result->no_language = string->no_language;
1239 result->encoding = string->encoding;
1240 /* output code filter */
1241 decoder = mbfl_convert_filter_new(
1242 &mbfl_encoding_wchar,
1243 string->encoding,
1244 mbfl_memory_device_output, 0, &device);
1245 /* wchar filter */
1246 encoder = mbfl_convert_filter_new(
1247 string->encoding,
1248 &mbfl_encoding_wchar,
1249 collector_substr, 0, &pc);
1250 if (decoder == NULL || encoder == NULL) {
1251 mbfl_convert_filter_delete(encoder);
1252 mbfl_convert_filter_delete(decoder);
1253 return NULL;
1254 }
1255 pc.next_filter = decoder;
1256 pc.start = from;
1257 pc.stop = from + length;
1258 pc.output = 0;
1259
1260 /* feed data */
1261 p = string->val;
1262 n = string->len;
1263 if (p != NULL) {
1264 while (n > 0) {
1265 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1266 break;
1267 }
1268 n--;
1269 }
1270 }
1271
1272 mbfl_convert_filter_flush(encoder);
1273 mbfl_convert_filter_flush(decoder);
1274 result = mbfl_memory_device_result(&device, result);
1275 mbfl_convert_filter_delete(encoder);
1276 mbfl_convert_filter_delete(decoder);
1277 }
1278
1279 return result;
1280 }
1281
1282 /*
1283 * strcut
1284 */
1285 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1286 mbfl_strcut(
1287 mbfl_string *string,
1288 mbfl_string *result,
1289 size_t from,
1290 size_t length)
1291 {
1292 const mbfl_encoding *encoding = string->encoding;
1293 mbfl_memory_device device;
1294
1295 if (from >= string->len) {
1296 from = string->len;
1297 }
1298
1299 mbfl_string_init(result);
1300 result->no_language = string->no_language;
1301 result->encoding = string->encoding;
1302
1303 if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1304 | MBFL_ENCTYPE_WCS2BE
1305 | MBFL_ENCTYPE_WCS2LE
1306 | MBFL_ENCTYPE_WCS4BE
1307 | MBFL_ENCTYPE_WCS4LE))
1308 || encoding->mblen_table != NULL) {
1309 const unsigned char *start = NULL;
1310 const unsigned char *end = NULL;
1311 unsigned char *w;
1312 size_t sz;
1313
1314 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1315 from &= -2;
1316
1317 if (length >= string->len - from) {
1318 length = string->len - from;
1319 }
1320
1321 start = string->val + from;
1322 end = start + (length & -2);
1323 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1324 from &= -4;
1325
1326 if (length >= string->len - from) {
1327 length = string->len - from;
1328 }
1329
1330 start = string->val + from;
1331 end = start + (length & -4);
1332 } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1333 if (length >= string->len - from) {
1334 length = string->len - from;
1335 }
1336
1337 start = string->val + from;
1338 end = start + length;
1339 } else if (encoding->mblen_table != NULL) {
1340 const unsigned char *mbtab = encoding->mblen_table;
1341 const unsigned char *p, *q;
1342 int m;
1343
1344 /* search start position */
1345 for (m = 0, p = string->val, q = p + from;
1346 p < q; p += (m = mbtab[*p]));
1347
1348 if (p > q) {
1349 p -= m;
1350 }
1351
1352 start = p;
1353
1354 /* search end position */
1355 if (length >= string->len - (start - string->val)) {
1356 end = string->val + string->len;
1357 } else {
1358 for (q = p + length; p < q; p += (m = mbtab[*p]));
1359
1360 if (p > q) {
1361 p -= m;
1362 }
1363 end = p;
1364 }
1365 } else {
1366 /* never reached */
1367 return NULL;
1368 }
1369
1370 /* allocate memory and copy string */
1371 sz = end - start;
1372 if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1373 sizeof(unsigned char))) == NULL) {
1374 return NULL;
1375 }
1376
1377 memcpy(w, start, sz);
1378 w[sz] = '\0';
1379 w[sz + 1] = '\0';
1380 w[sz + 2] = '\0';
1381 w[sz + 3] = '\0';
1382
1383 result->val = w;
1384 result->len = sz;
1385 } else {
1386 mbfl_convert_filter *encoder = NULL;
1387 mbfl_convert_filter *decoder = NULL;
1388 const unsigned char *p, *q, *r;
1389 struct {
1390 mbfl_convert_filter encoder;
1391 mbfl_convert_filter decoder;
1392 const unsigned char *p;
1393 size_t pos;
1394 } bk, _bk;
1395
1396 /* output code filter */
1397 if (!(decoder = mbfl_convert_filter_new(
1398 &mbfl_encoding_wchar,
1399 string->encoding,
1400 mbfl_memory_device_output, 0, &device))) {
1401 return NULL;
1402 }
1403
1404 /* wchar filter */
1405 if (!(encoder = mbfl_convert_filter_new(
1406 string->encoding,
1407 &mbfl_encoding_wchar,
1408 mbfl_filter_output_null,
1409 NULL, NULL))) {
1410 mbfl_convert_filter_delete(decoder);
1411 return NULL;
1412 }
1413
1414 mbfl_memory_device_init(&device, length + 8, 0);
1415
1416 p = string->val;
1417
1418 /* search start position */
1419 for (q = string->val + from; p < q; p++) {
1420 (*encoder->filter_function)(*p, encoder);
1421 }
1422
1423 /* switch the drain direction */
1424 encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1425 encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1426 encoder->data = decoder;
1427
1428 q = string->val + string->len;
1429
1430 /* save the encoder, decoder state and the pointer */
1431 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1432 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1433 _bk.p = p;
1434 _bk.pos = device.pos;
1435
1436 if (length > q - p) {
1437 length = q - p;
1438 }
1439
1440 if (length >= 20) {
1441 /* output a little shorter than "length" */
1442 /* XXX: the constant "20" was determined purely on the heuristics. */
1443 for (r = p + length - 20; p < r; p++) {
1444 (*encoder->filter_function)(*p, encoder);
1445 }
1446
1447 /* if the offset of the resulting string exceeds the length,
1448 * then restore the state */
1449 if (device.pos > length) {
1450 p = _bk.p;
1451 device.pos = _bk.pos;
1452 decoder->filter_dtor(decoder);
1453 encoder->filter_dtor(encoder);
1454 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1455 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1456 bk = _bk;
1457 } else {
1458 /* save the encoder, decoder state and the pointer */
1459 mbfl_convert_filter_copy(decoder, &bk.decoder);
1460 mbfl_convert_filter_copy(encoder, &bk.encoder);
1461 bk.p = p;
1462 bk.pos = device.pos;
1463
1464 /* flush the stream */
1465 (*encoder->filter_flush)(encoder);
1466
1467 /* if the offset of the resulting string exceeds the length,
1468 * then restore the state */
1469 if (device.pos > length) {
1470 bk.decoder.filter_dtor(&bk.decoder);
1471 bk.encoder.filter_dtor(&bk.encoder);
1472
1473 p = _bk.p;
1474 device.pos = _bk.pos;
1475 decoder->filter_dtor(decoder);
1476 encoder->filter_dtor(encoder);
1477 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1478 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1479 bk = _bk;
1480 } else {
1481 _bk.decoder.filter_dtor(&_bk.decoder);
1482 _bk.encoder.filter_dtor(&_bk.encoder);
1483
1484 p = bk.p;
1485 device.pos = bk.pos;
1486 decoder->filter_dtor(decoder);
1487 encoder->filter_dtor(encoder);
1488 mbfl_convert_filter_copy(&bk.decoder, decoder);
1489 mbfl_convert_filter_copy(&bk.encoder, encoder);
1490 }
1491 }
1492 } else {
1493 bk = _bk;
1494 }
1495
1496 /* detect end position */
1497 while (p < q) {
1498 (*encoder->filter_function)(*p, encoder);
1499
1500 if (device.pos > length) {
1501 /* restore filter */
1502 p = bk.p;
1503 device.pos = bk.pos;
1504 decoder->filter_dtor(decoder);
1505 encoder->filter_dtor(encoder);
1506 mbfl_convert_filter_copy(&bk.decoder, decoder);
1507 mbfl_convert_filter_copy(&bk.encoder, encoder);
1508 break;
1509 }
1510
1511 p++;
1512
1513 /* backup current state */
1514 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1515 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1516 _bk.pos = device.pos;
1517 _bk.p = p;
1518
1519 (*encoder->filter_flush)(encoder);
1520
1521 if (device.pos > length) {
1522 _bk.decoder.filter_dtor(&_bk.decoder);
1523 _bk.encoder.filter_dtor(&_bk.encoder);
1524
1525 /* restore filter */
1526 p = bk.p;
1527 device.pos = bk.pos;
1528 decoder->filter_dtor(decoder);
1529 encoder->filter_dtor(encoder);
1530 mbfl_convert_filter_copy(&bk.decoder, decoder);
1531 mbfl_convert_filter_copy(&bk.encoder, encoder);
1532 break;
1533 }
1534
1535 bk.decoder.filter_dtor(&bk.decoder);
1536 bk.encoder.filter_dtor(&bk.encoder);
1537
1538 p = _bk.p;
1539 device.pos = _bk.pos;
1540 decoder->filter_dtor(decoder);
1541 encoder->filter_dtor(encoder);
1542 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1543 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1544
1545 bk = _bk;
1546 }
1547
1548 (*encoder->filter_flush)(encoder);
1549
1550 bk.decoder.filter_dtor(&bk.decoder);
1551 bk.encoder.filter_dtor(&bk.encoder);
1552
1553 result = mbfl_memory_device_result(&device, result);
1554
1555 mbfl_convert_filter_delete(encoder);
1556 mbfl_convert_filter_delete(decoder);
1557 }
1558
1559 return result;
1560 }
1561
1562
1563 /*
1564 * strwidth
1565 */
is_fullwidth(int c)1566 static size_t is_fullwidth(int c)
1567 {
1568 int i;
1569
1570 if (c < mbfl_eaw_table[0].begin) {
1571 return 0;
1572 }
1573
1574 for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1575 if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1576 return 1;
1577 }
1578 }
1579
1580 return 0;
1581 }
1582
1583 static int
filter_count_width(int c,void * data)1584 filter_count_width(int c, void* data)
1585 {
1586 (*(size_t *)data) += (is_fullwidth(c) ? 2: 1);
1587 return c;
1588 }
1589
1590 size_t
mbfl_strwidth(mbfl_string * string)1591 mbfl_strwidth(mbfl_string *string)
1592 {
1593 size_t len, n;
1594 unsigned char *p;
1595 mbfl_convert_filter *filter;
1596
1597 len = 0;
1598 if (string->len > 0 && string->val != NULL) {
1599 /* wchar filter */
1600 filter = mbfl_convert_filter_new(
1601 string->encoding,
1602 &mbfl_encoding_wchar,
1603 filter_count_width, 0, &len);
1604 if (filter == NULL) {
1605 mbfl_convert_filter_delete(filter);
1606 return -1;
1607 }
1608
1609 /* feed data */
1610 p = string->val;
1611 n = string->len;
1612 while (n > 0) {
1613 (*filter->filter_function)(*p++, filter);
1614 n--;
1615 }
1616
1617 mbfl_convert_filter_flush(filter);
1618 mbfl_convert_filter_delete(filter);
1619 }
1620
1621 return len;
1622 }
1623
1624
1625 /*
1626 * strimwidth
1627 */
1628 struct collector_strimwidth_data {
1629 mbfl_convert_filter *decoder;
1630 mbfl_convert_filter *decoder_backup;
1631 mbfl_memory_device device;
1632 size_t from;
1633 size_t width;
1634 size_t outwidth;
1635 size_t outchar;
1636 size_t endpos;
1637 int status;
1638 };
1639
1640 static int
collector_strimwidth(int c,void * data)1641 collector_strimwidth(int c, void* data)
1642 {
1643 struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1644
1645 switch (pc->status) {
1646 case 10:
1647 (*pc->decoder->filter_function)(c, pc->decoder);
1648 break;
1649 default:
1650 if (pc->outchar >= pc->from) {
1651 pc->outwidth += (is_fullwidth(c) ? 2: 1);
1652
1653 if (pc->outwidth > pc->width) {
1654 if (pc->status == 0) {
1655 pc->endpos = pc->device.pos;
1656 mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1657 }
1658 pc->status++;
1659 (*pc->decoder->filter_function)(c, pc->decoder);
1660 c = -1;
1661 } else {
1662 (*pc->decoder->filter_function)(c, pc->decoder);
1663 }
1664 }
1665 pc->outchar++;
1666 break;
1667 }
1668
1669 return c;
1670 }
1671
1672 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,size_t from,size_t width)1673 mbfl_strimwidth(
1674 mbfl_string *string,
1675 mbfl_string *marker,
1676 mbfl_string *result,
1677 size_t from,
1678 size_t width)
1679 {
1680 struct collector_strimwidth_data pc;
1681 mbfl_convert_filter *encoder;
1682 size_t n, mkwidth;
1683 unsigned char *p;
1684
1685 if (string == NULL || result == NULL) {
1686 return NULL;
1687 }
1688 mbfl_string_init(result);
1689 result->no_language = string->no_language;
1690 result->encoding = string->encoding;
1691 mbfl_memory_device_init(&pc.device, MIN(string->len, width), 0);
1692
1693 /* output code filter */
1694 pc.decoder = mbfl_convert_filter_new(
1695 &mbfl_encoding_wchar,
1696 string->encoding,
1697 mbfl_memory_device_output, 0, &pc.device);
1698 pc.decoder_backup = mbfl_convert_filter_new(
1699 &mbfl_encoding_wchar,
1700 string->encoding,
1701 mbfl_memory_device_output, 0, &pc.device);
1702 /* wchar filter */
1703 encoder = mbfl_convert_filter_new(
1704 string->encoding,
1705 &mbfl_encoding_wchar,
1706 collector_strimwidth, 0, &pc);
1707 if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1708 mbfl_convert_filter_delete(encoder);
1709 mbfl_convert_filter_delete(pc.decoder);
1710 mbfl_convert_filter_delete(pc.decoder_backup);
1711 return NULL;
1712 }
1713 mkwidth = 0;
1714 if (marker) {
1715 mkwidth = mbfl_strwidth(marker);
1716 }
1717 pc.from = from;
1718 pc.width = width - mkwidth;
1719 pc.outwidth = 0;
1720 pc.outchar = 0;
1721 pc.status = 0;
1722 pc.endpos = 0;
1723
1724 /* feed data */
1725 p = string->val;
1726 n = string->len;
1727 if (p != NULL) {
1728 while (n > 0) {
1729 n--;
1730 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1731 break;
1732 }
1733 }
1734 mbfl_convert_filter_flush(encoder);
1735 if (pc.status != 0 && mkwidth > 0) {
1736 pc.width += mkwidth;
1737 if (n > 0) {
1738 while (n > 0) {
1739 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1740 break;
1741 }
1742 n--;
1743 }
1744 mbfl_convert_filter_flush(encoder);
1745 } else if (pc.outwidth > pc.width) {
1746 pc.status++;
1747 }
1748 if (pc.status != 1) {
1749 pc.status = 10;
1750 pc.device.pos = pc.endpos;
1751 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1752 mbfl_convert_filter_reset(encoder, marker->encoding, &mbfl_encoding_wchar);
1753 p = marker->val;
1754 n = marker->len;
1755 while (n > 0) {
1756 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1757 break;
1758 }
1759 n--;
1760 }
1761 mbfl_convert_filter_flush(encoder);
1762 }
1763 } else if (pc.status != 0) {
1764 pc.device.pos = pc.endpos;
1765 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1766 }
1767 mbfl_convert_filter_flush(pc.decoder);
1768 }
1769 result = mbfl_memory_device_result(&pc.device, result);
1770 mbfl_convert_filter_delete(encoder);
1771 mbfl_convert_filter_delete(pc.decoder);
1772 mbfl_convert_filter_delete(pc.decoder_backup);
1773
1774 return result;
1775 }
1776
1777 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1778 mbfl_ja_jp_hantozen(
1779 mbfl_string *string,
1780 mbfl_string *result,
1781 int mode)
1782 {
1783 size_t n;
1784 unsigned char *p;
1785 mbfl_memory_device device;
1786 mbfl_convert_filter *decoder = NULL;
1787 mbfl_convert_filter *encoder = NULL;
1788 mbfl_convert_filter *tl_filter = NULL;
1789 mbfl_convert_filter *next_filter = NULL;
1790 mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1791
1792 mbfl_memory_device_init(&device, string->len, 0);
1793 mbfl_string_init(result);
1794
1795 result->no_language = string->no_language;
1796 result->encoding = string->encoding;
1797
1798 decoder = mbfl_convert_filter_new(
1799 &mbfl_encoding_wchar,
1800 string->encoding,
1801 mbfl_memory_device_output, 0, &device);
1802 if (decoder == NULL) {
1803 goto out;
1804 }
1805 next_filter = decoder;
1806
1807 param =
1808 (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1809 if (param == NULL) {
1810 goto out;
1811 }
1812
1813 param->mode = mode;
1814
1815 tl_filter = mbfl_convert_filter_new2(
1816 &vtbl_tl_jisx0201_jisx0208,
1817 (int(*)(int, void*))next_filter->filter_function,
1818 (int(*)(void*))next_filter->filter_flush,
1819 next_filter);
1820 if (tl_filter == NULL) {
1821 mbfl_free(param);
1822 goto out;
1823 }
1824
1825 tl_filter->opaque = param;
1826 next_filter = tl_filter;
1827
1828 encoder = mbfl_convert_filter_new(
1829 string->encoding,
1830 &mbfl_encoding_wchar,
1831 (int(*)(int, void*))next_filter->filter_function,
1832 (int(*)(void*))next_filter->filter_flush,
1833 next_filter);
1834 if (encoder == NULL) {
1835 goto out;
1836 }
1837
1838 /* feed data */
1839 p = string->val;
1840 n = string->len;
1841 if (p != NULL) {
1842 while (n > 0) {
1843 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1844 break;
1845 }
1846 n--;
1847 }
1848 }
1849
1850 mbfl_convert_filter_flush(encoder);
1851 result = mbfl_memory_device_result(&device, result);
1852 out:
1853 if (tl_filter != NULL) {
1854 if (tl_filter->opaque != NULL) {
1855 mbfl_free(tl_filter->opaque);
1856 }
1857 mbfl_convert_filter_delete(tl_filter);
1858 }
1859
1860 if (decoder != NULL) {
1861 mbfl_convert_filter_delete(decoder);
1862 }
1863
1864 if (encoder != NULL) {
1865 mbfl_convert_filter_delete(encoder);
1866 }
1867
1868 return result;
1869 }
1870
1871
1872 /*
1873 * MIME header encode
1874 */
1875 struct mime_header_encoder_data {
1876 mbfl_convert_filter *conv1_filter;
1877 mbfl_convert_filter *block_filter;
1878 mbfl_convert_filter *conv2_filter;
1879 mbfl_convert_filter *conv2_filter_backup;
1880 mbfl_convert_filter *encod_filter;
1881 mbfl_convert_filter *encod_filter_backup;
1882 mbfl_memory_device outdev;
1883 mbfl_memory_device tmpdev;
1884 int status1;
1885 int status2;
1886 size_t prevpos;
1887 size_t linehead;
1888 size_t firstindent;
1889 int encnamelen;
1890 int lwsplen;
1891 char encname[128];
1892 char lwsp[16];
1893 };
1894
1895 static int
mime_header_encoder_block_collector(int c,void * data)1896 mime_header_encoder_block_collector(int c, void *data)
1897 {
1898 size_t n;
1899 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1900
1901 switch (pe->status2) {
1902 case 1: /* encoded word */
1903 pe->prevpos = pe->outdev.pos;
1904 mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1905 mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1906 (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1907 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1908 (*pe->encod_filter->filter_flush)(pe->encod_filter);
1909 n = pe->outdev.pos - pe->linehead + pe->firstindent;
1910 pe->outdev.pos = pe->prevpos;
1911 mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1912 mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1913 if (n >= 74) {
1914 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1915 (*pe->encod_filter->filter_flush)(pe->encod_filter);
1916 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
1917 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1918 pe->linehead = pe->outdev.pos;
1919 pe->firstindent = 0;
1920 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1921 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1922 } else {
1923 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1924 }
1925 break;
1926
1927 default:
1928 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1929 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1930 pe->status2 = 1;
1931 break;
1932 }
1933
1934 return c;
1935 }
1936
1937 static int
mime_header_encoder_collector(int c,void * data)1938 mime_header_encoder_collector(int c, void *data)
1939 {
1940 static int qp_table[256] = {
1941 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1942 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1943 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
1944 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
1945 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
1946 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
1947 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
1948 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
1949 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
1950 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
1951 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
1952 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
1953 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
1954 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
1955 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
1956 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */
1957 };
1958
1959 size_t n;
1960 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1961
1962 switch (pe->status1) {
1963 case 11: /* encoded word */
1964 (*pe->block_filter->filter_function)(c, pe->block_filter);
1965 break;
1966
1967 default: /* ASCII */
1968 if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
1969 mbfl_memory_device_output(c, &pe->tmpdev);
1970 pe->status1 = 1;
1971 } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */
1972 mbfl_memory_device_output(c, &pe->tmpdev);
1973 } else {
1974 if (pe->tmpdev.pos < 74 && c == 0x20) {
1975 n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
1976 if (n > 74) {
1977 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
1978 pe->linehead = pe->outdev.pos;
1979 pe->firstindent = 0;
1980 } else if (pe->outdev.pos > 0) {
1981 mbfl_memory_device_output(0x20, &pe->outdev);
1982 }
1983 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
1984 mbfl_memory_device_reset(&pe->tmpdev);
1985 pe->status1 = 0;
1986 } else {
1987 n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
1988 if (n > 60) {
1989 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
1990 pe->linehead = pe->outdev.pos;
1991 pe->firstindent = 0;
1992 } else if (pe->outdev.pos > 0) {
1993 mbfl_memory_device_output(0x20, &pe->outdev);
1994 }
1995 mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
1996 mbfl_memory_device_reset(&pe->tmpdev);
1997 (*pe->block_filter->filter_function)(c, pe->block_filter);
1998 pe->status1 = 11;
1999 }
2000 }
2001 break;
2002 }
2003
2004 return c;
2005 }
2006
2007 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)2008 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2009 {
2010 if (pe->status1 >= 10) {
2011 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2012 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2013 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2014 } else if (pe->tmpdev.pos > 0) {
2015 if (pe->outdev.pos > 0) {
2016 if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2017 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2018 } else {
2019 mbfl_memory_device_output(0x20, &pe->outdev);
2020 }
2021 }
2022 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2023 }
2024 mbfl_memory_device_reset(&pe->tmpdev);
2025 pe->prevpos = 0;
2026 pe->linehead = 0;
2027 pe->status1 = 0;
2028 pe->status2 = 0;
2029
2030 return mbfl_memory_device_result(&pe->outdev, result);
2031 }
2032
2033 struct mime_header_encoder_data*
mime_header_encoder_new(const mbfl_encoding * incode,const mbfl_encoding * outcode,const mbfl_encoding * transenc)2034 mime_header_encoder_new(
2035 const mbfl_encoding *incode,
2036 const mbfl_encoding *outcode,
2037 const mbfl_encoding *transenc)
2038 {
2039 size_t n;
2040 const char *s;
2041 struct mime_header_encoder_data *pe;
2042
2043 /* get output encoding and check MIME charset name */
2044 if (outcode->mime_name == NULL || outcode->mime_name[0] == '\0') {
2045 return NULL;
2046 }
2047
2048 pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2049 if (pe == NULL) {
2050 return NULL;
2051 }
2052
2053 mbfl_memory_device_init(&pe->outdev, 0, 0);
2054 mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2055 pe->prevpos = 0;
2056 pe->linehead = 0;
2057 pe->firstindent = 0;
2058 pe->status1 = 0;
2059 pe->status2 = 0;
2060
2061 /* make the encoding description string exp. "=?ISO-2022-JP?B?" */
2062 n = 0;
2063 pe->encname[n++] = 0x3d;
2064 pe->encname[n++] = 0x3f;
2065 s = outcode->mime_name;
2066 while (*s) {
2067 pe->encname[n++] = *s++;
2068 }
2069 pe->encname[n++] = 0x3f;
2070 if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2071 pe->encname[n++] = 0x51;
2072 } else {
2073 pe->encname[n++] = 0x42;
2074 transenc = &mbfl_encoding_base64;
2075 }
2076 pe->encname[n++] = 0x3f;
2077 pe->encname[n] = '\0';
2078 pe->encnamelen = n;
2079
2080 n = 0;
2081 pe->lwsp[n++] = 0x0d;
2082 pe->lwsp[n++] = 0x0a;
2083 pe->lwsp[n++] = 0x20;
2084 pe->lwsp[n] = '\0';
2085 pe->lwsplen = n;
2086
2087 /* transfer encode filter */
2088 pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2089 pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2090
2091 /* Output code filter */
2092 pe->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2093 pe->conv2_filter_backup = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2094
2095 /* encoded block filter */
2096 pe->block_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, &mbfl_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2097
2098 /* Input code filter */
2099 pe->conv1_filter = mbfl_convert_filter_new(incode, &mbfl_encoding_wchar, mime_header_encoder_collector, 0, pe);
2100
2101 if (pe->encod_filter == NULL ||
2102 pe->encod_filter_backup == NULL ||
2103 pe->conv2_filter == NULL ||
2104 pe->conv2_filter_backup == NULL ||
2105 pe->conv1_filter == NULL) {
2106 mime_header_encoder_delete(pe);
2107 return NULL;
2108 }
2109
2110 if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2111 pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2112 pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2113 } else {
2114 pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2115 pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2116 }
2117
2118 return pe;
2119 }
2120
2121 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2122 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2123 {
2124 if (pe) {
2125 mbfl_convert_filter_delete(pe->conv1_filter);
2126 mbfl_convert_filter_delete(pe->block_filter);
2127 mbfl_convert_filter_delete(pe->conv2_filter);
2128 mbfl_convert_filter_delete(pe->conv2_filter_backup);
2129 mbfl_convert_filter_delete(pe->encod_filter);
2130 mbfl_convert_filter_delete(pe->encod_filter_backup);
2131 mbfl_memory_device_clear(&pe->outdev);
2132 mbfl_memory_device_clear(&pe->tmpdev);
2133 mbfl_free((void*)pe);
2134 }
2135 }
2136
2137 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2138 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2139 {
2140 return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2141 }
2142
2143 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode,const mbfl_encoding * encoding,const char * linefeed,int indent)2144 mbfl_mime_header_encode(
2145 mbfl_string *string,
2146 mbfl_string *result,
2147 const mbfl_encoding *outcode,
2148 const mbfl_encoding *encoding,
2149 const char *linefeed,
2150 int indent)
2151 {
2152 size_t n;
2153 unsigned char *p;
2154 struct mime_header_encoder_data *pe;
2155
2156 mbfl_string_init(result);
2157 result->no_language = string->no_language;
2158 result->encoding = &mbfl_encoding_ascii;
2159
2160 pe = mime_header_encoder_new(string->encoding, outcode, encoding);
2161 if (pe == NULL) {
2162 return NULL;
2163 }
2164
2165 if (linefeed != NULL) {
2166 n = 0;
2167 while (*linefeed && n < 8) {
2168 pe->lwsp[n++] = *linefeed++;
2169 }
2170 pe->lwsp[n++] = 0x20;
2171 pe->lwsp[n] = '\0';
2172 pe->lwsplen = n;
2173 }
2174 if (indent > 0 && indent < 74) {
2175 pe->firstindent = indent;
2176 }
2177
2178 n = string->len;
2179 p = string->val;
2180 while (n > 0) {
2181 (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2182 n--;
2183 }
2184
2185 result = mime_header_encoder_result(pe, result);
2186 mime_header_encoder_delete(pe);
2187
2188 return result;
2189 }
2190
2191
2192 /*
2193 * MIME header decode
2194 */
2195 struct mime_header_decoder_data {
2196 mbfl_convert_filter *deco_filter;
2197 mbfl_convert_filter *conv1_filter;
2198 mbfl_convert_filter *conv2_filter;
2199 mbfl_memory_device outdev;
2200 mbfl_memory_device tmpdev;
2201 size_t cspos;
2202 int status;
2203 const mbfl_encoding *encoding;
2204 const mbfl_encoding *incode;
2205 const mbfl_encoding *outcode;
2206 };
2207
2208 static int
mime_header_decoder_collector(int c,void * data)2209 mime_header_decoder_collector(int c, void* data)
2210 {
2211 const mbfl_encoding *encoding;
2212 struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2213
2214 switch (pd->status) {
2215 case 1:
2216 if (c == 0x3f) { /* ? */
2217 mbfl_memory_device_output(c, &pd->tmpdev);
2218 pd->cspos = pd->tmpdev.pos;
2219 pd->status = 2;
2220 } else {
2221 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2222 mbfl_memory_device_reset(&pd->tmpdev);
2223 if (c == 0x3d) { /* = */
2224 mbfl_memory_device_output(c, &pd->tmpdev);
2225 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2226 pd->status = 9;
2227 } else {
2228 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2229 pd->status = 0;
2230 }
2231 }
2232 break;
2233 case 2: /* store charset string */
2234 if (c == 0x3f) { /* ? */
2235 /* identify charset */
2236 mbfl_memory_device_output('\0', &pd->tmpdev);
2237 encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2238 if (encoding != NULL) {
2239 pd->incode = encoding;
2240 pd->status = 3;
2241 }
2242 mbfl_memory_device_unput(&pd->tmpdev);
2243 mbfl_memory_device_output(c, &pd->tmpdev);
2244 } else {
2245 mbfl_memory_device_output(c, &pd->tmpdev);
2246 if (pd->tmpdev.pos > 100) { /* too long charset string */
2247 pd->status = 0;
2248 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2249 mbfl_memory_device_unput(&pd->tmpdev);
2250 pd->status = 9;
2251 }
2252 if (pd->status != 2) {
2253 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2254 mbfl_memory_device_reset(&pd->tmpdev);
2255 }
2256 }
2257 break;
2258 case 3: /* identify encoding */
2259 mbfl_memory_device_output(c, &pd->tmpdev);
2260 if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */
2261 pd->encoding = &mbfl_encoding_base64;
2262 pd->status = 4;
2263 } else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */
2264 pd->encoding = &mbfl_encoding_qprint;
2265 pd->status = 4;
2266 } else {
2267 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2268 mbfl_memory_device_unput(&pd->tmpdev);
2269 pd->status = 9;
2270 } else {
2271 pd->status = 0;
2272 }
2273 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2274 mbfl_memory_device_reset(&pd->tmpdev);
2275 }
2276 break;
2277 case 4: /* reset filter */
2278 mbfl_memory_device_output(c, &pd->tmpdev);
2279 if (c == 0x3f) { /* ? */
2280 /* charset convert filter */
2281 mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, &mbfl_encoding_wchar);
2282 /* decode filter */
2283 mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, &mbfl_encoding_8bit);
2284 pd->status = 5;
2285 } else {
2286 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2287 mbfl_memory_device_unput(&pd->tmpdev);
2288 pd->status = 9;
2289 } else {
2290 pd->status = 0;
2291 }
2292 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2293 }
2294 mbfl_memory_device_reset(&pd->tmpdev);
2295 break;
2296 case 5: /* encoded block */
2297 if (c == 0x3f) { /* ? */
2298 pd->status = 6;
2299 } else {
2300 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2301 }
2302 break;
2303 case 6: /* check end position */
2304 if (c == 0x3d) { /* = */
2305 /* flush and reset filter */
2306 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2307 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2308 mbfl_convert_filter_reset(pd->conv1_filter, &mbfl_encoding_ascii, &mbfl_encoding_wchar);
2309 pd->status = 7;
2310 } else {
2311 (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2312 if (c != 0x3f) { /* ? */
2313 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2314 pd->status = 5;
2315 }
2316 }
2317 break;
2318 case 7: /* after encoded block */
2319 if (c == 0x0d || c == 0x0a) { /* CR LF */
2320 pd->status = 8;
2321 } else {
2322 mbfl_memory_device_output(c, &pd->tmpdev);
2323 if (c == 0x3d) { /* = */
2324 pd->status = 1;
2325 } else if (c != 0x20 && c != 0x09) { /* not space */
2326 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2327 mbfl_memory_device_reset(&pd->tmpdev);
2328 pd->status = 0;
2329 }
2330 }
2331 break;
2332 case 8: /* folding */
2333 case 9: /* folding */
2334 if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2335 if (c == 0x3d) { /* = */
2336 if (pd->status == 8) {
2337 mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */
2338 } else {
2339 (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2340 }
2341 mbfl_memory_device_output(c, &pd->tmpdev);
2342 pd->status = 1;
2343 } else {
2344 mbfl_memory_device_output(0x20, &pd->tmpdev);
2345 mbfl_memory_device_output(c, &pd->tmpdev);
2346 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2347 mbfl_memory_device_reset(&pd->tmpdev);
2348 pd->status = 0;
2349 }
2350 }
2351 break;
2352 default: /* non encoded block */
2353 if (c == 0x0d || c == 0x0a) { /* CR LF */
2354 pd->status = 9;
2355 } else if (c == 0x3d) { /* = */
2356 mbfl_memory_device_output(c, &pd->tmpdev);
2357 pd->status = 1;
2358 } else {
2359 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2360 }
2361 break;
2362 }
2363
2364 return c;
2365 }
2366
2367 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2368 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2369 {
2370 switch (pd->status) {
2371 case 1:
2372 case 2:
2373 case 3:
2374 case 4:
2375 case 7:
2376 case 8:
2377 case 9:
2378 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2379 break;
2380 case 5:
2381 case 6:
2382 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2383 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2384 break;
2385 }
2386 (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2387 mbfl_memory_device_reset(&pd->tmpdev);
2388 pd->status = 0;
2389
2390 return mbfl_memory_device_result(&pd->outdev, result);
2391 }
2392
2393 struct mime_header_decoder_data*
mime_header_decoder_new(const mbfl_encoding * outcode)2394 mime_header_decoder_new(const mbfl_encoding *outcode)
2395 {
2396 struct mime_header_decoder_data *pd;
2397
2398 pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2399 if (pd == NULL) {
2400 return NULL;
2401 }
2402
2403 mbfl_memory_device_init(&pd->outdev, 0, 0);
2404 mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2405 pd->cspos = 0;
2406 pd->status = 0;
2407 pd->encoding = &mbfl_encoding_8bit;
2408 pd->incode = &mbfl_encoding_ascii;
2409 pd->outcode = outcode;
2410 /* charset convert filter */
2411 pd->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2412 pd->conv1_filter = mbfl_convert_filter_new(pd->incode, &mbfl_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2413 /* decode filter */
2414 pd->deco_filter = mbfl_convert_filter_new(pd->encoding, &mbfl_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2415
2416 if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2417 mime_header_decoder_delete(pd);
2418 return NULL;
2419 }
2420
2421 return pd;
2422 }
2423
2424 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2425 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2426 {
2427 if (pd) {
2428 mbfl_convert_filter_delete(pd->conv2_filter);
2429 mbfl_convert_filter_delete(pd->conv1_filter);
2430 mbfl_convert_filter_delete(pd->deco_filter);
2431 mbfl_memory_device_clear(&pd->outdev);
2432 mbfl_memory_device_clear(&pd->tmpdev);
2433 mbfl_free((void*)pd);
2434 }
2435 }
2436
2437 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2438 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2439 {
2440 return mime_header_decoder_collector(c, pd);
2441 }
2442
2443 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode)2444 mbfl_mime_header_decode(
2445 mbfl_string *string,
2446 mbfl_string *result,
2447 const mbfl_encoding *outcode)
2448 {
2449 size_t n;
2450 unsigned char *p;
2451 struct mime_header_decoder_data *pd;
2452
2453 mbfl_string_init(result);
2454 result->no_language = string->no_language;
2455 result->encoding = outcode;
2456
2457 pd = mime_header_decoder_new(outcode);
2458 if (pd == NULL) {
2459 return NULL;
2460 }
2461
2462 /* feed data */
2463 n = string->len;
2464 p = string->val;
2465 while (n > 0) {
2466 mime_header_decoder_collector(*p++, pd);
2467 n--;
2468 }
2469
2470 result = mime_header_decoder_result(pd, result);
2471 mime_header_decoder_delete(pd);
2472
2473 return result;
2474 }
2475
2476
2477
2478 /*
2479 * convert HTML numeric entity
2480 */
2481 struct collector_htmlnumericentity_data {
2482 mbfl_convert_filter *decoder;
2483 int status;
2484 int cache;
2485 int digit;
2486 int *convmap;
2487 int mapsize;
2488 };
2489
2490 static int
collector_encode_htmlnumericentity(int c,void * data)2491 collector_encode_htmlnumericentity(int c, void *data)
2492 {
2493 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2494 int f, n, s, r, d, size, *mapelm;
2495
2496 size = pc->mapsize;
2497 f = 0;
2498 n = 0;
2499 while (n < size) {
2500 mapelm = &(pc->convmap[n*4]);
2501 if (c >= mapelm[0] && c <= mapelm[1]) {
2502 s = (c + mapelm[2]) & mapelm[3];
2503 if (s >= 0) {
2504 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2505 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2506 r = 100000000;
2507 s %= r;
2508 while (r > 0) {
2509 d = s/r;
2510 if (d || f) {
2511 f = 1;
2512 s %= r;
2513 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2514 }
2515 r /= 10;
2516 }
2517 if (!f) {
2518 f = 1;
2519 (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2520 }
2521 (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2522 }
2523 }
2524 if (f) {
2525 break;
2526 }
2527 n++;
2528 }
2529 if (!f) {
2530 (*pc->decoder->filter_function)(c, pc->decoder);
2531 }
2532
2533 return c;
2534 }
2535
2536 static int
collector_decode_htmlnumericentity(int c,void * data)2537 collector_decode_htmlnumericentity(int c, void *data)
2538 {
2539 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2540 int f, n, s, r, d, size, *mapelm;
2541
2542 switch (pc->status) {
2543 case 1:
2544 if (c == 0x23) { /* '#' */
2545 pc->status = 2;
2546 } else {
2547 pc->status = 0;
2548 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2549 (*pc->decoder->filter_function)(c, pc->decoder);
2550 }
2551 break;
2552 case 2:
2553 if (c == 0x78) { /* 'x' */
2554 pc->status = 4;
2555 } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2556 pc->cache = c - 0x30;
2557 pc->status = 3;
2558 pc->digit = 1;
2559 } else {
2560 pc->status = 0;
2561 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2562 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2563 (*pc->decoder->filter_function)(c, pc->decoder);
2564 }
2565 break;
2566 case 3:
2567 s = 0;
2568 f = 0;
2569 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2570 if (pc->digit > 9) {
2571 pc->status = 0;
2572 s = pc->cache;
2573 f = 1;
2574 } else {
2575 s = pc->cache*10 + c - 0x30;
2576 pc->cache = s;
2577 pc->digit++;
2578 }
2579 } else {
2580 pc->status = 0;
2581 s = pc->cache;
2582 f = 1;
2583 n = 0;
2584 size = pc->mapsize;
2585 while (n < size) {
2586 mapelm = &(pc->convmap[n*4]);
2587 d = s - mapelm[2];
2588 if (d >= mapelm[0] && d <= mapelm[1]) {
2589 f = 0;
2590 (*pc->decoder->filter_function)(d, pc->decoder);
2591 if (c != 0x3b) { /* ';' */
2592 (*pc->decoder->filter_function)(c, pc->decoder);
2593 }
2594 break;
2595 }
2596 n++;
2597 }
2598 }
2599 if (f) {
2600 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2601 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2602 r = 1;
2603 n = pc->digit;
2604 while (n > 0) {
2605 r *= 10;
2606 n--;
2607 }
2608 s %= r;
2609 r /= 10;
2610 while (r > 0) {
2611 d = s/r;
2612 s %= r;
2613 r /= 10;
2614 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2615 }
2616 (*pc->decoder->filter_function)(c, pc->decoder);
2617 }
2618 break;
2619 case 4:
2620 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2621 pc->cache = c - 0x30;
2622 pc->status = 5;
2623 pc->digit = 1;
2624 } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
2625 pc->cache = c - 0x41 + 10;
2626 pc->status = 5;
2627 pc->digit = 1;
2628 } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */
2629 pc->cache = c - 0x61 + 10;
2630 pc->status = 5;
2631 pc->digit = 1;
2632 } else {
2633 pc->status = 0;
2634 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2635 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2636 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2637 (*pc->decoder->filter_function)(c, pc->decoder);
2638 }
2639 break;
2640 case 5:
2641 s = 0;
2642 f = 0;
2643 if ((c >= 0x30 && c <= 0x39) ||
2644 (c >= 0x41 && c <= 0x46) ||
2645 (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */
2646 if (pc->digit > 9) {
2647 pc->status = 0;
2648 s = pc->cache;
2649 f = 1;
2650 } else {
2651 if (c >= 0x30 && c <= 0x39) {
2652 s = pc->cache*16 + (c - 0x30);
2653 } else if (c >= 0x41 && c <= 0x46) {
2654 s = pc->cache*16 + (c - 0x41 + 10);
2655 } else {
2656 s = pc->cache*16 + (c - 0x61 + 10);
2657 }
2658 pc->cache = s;
2659 pc->digit++;
2660 }
2661 } else {
2662 pc->status = 0;
2663 s = pc->cache;
2664 f = 1;
2665 n = 0;
2666 size = pc->mapsize;
2667 while (n < size) {
2668 mapelm = &(pc->convmap[n*4]);
2669 d = s - mapelm[2];
2670 if (d >= mapelm[0] && d <= mapelm[1]) {
2671 f = 0;
2672 (*pc->decoder->filter_function)(d, pc->decoder);
2673 if (c != 0x3b) { /* ';' */
2674 (*pc->decoder->filter_function)(c, pc->decoder);
2675 }
2676 break;
2677 }
2678 n++;
2679 }
2680 }
2681 if (f) {
2682 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2683 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2684 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2685 r = 1;
2686 n = pc->digit;
2687 while (n > 0) {
2688 r *= 16;
2689 n--;
2690 }
2691 s %= r;
2692 r /= 16;
2693 while (r > 0) {
2694 d = s/r;
2695 s %= r;
2696 r /= 16;
2697 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2698 }
2699 (*pc->decoder->filter_function)(c, pc->decoder);
2700 }
2701 break;
2702 default:
2703 if (c == 0x26) { /* '&' */
2704 pc->status = 1;
2705 } else {
2706 (*pc->decoder->filter_function)(c, pc->decoder);
2707 }
2708 break;
2709 }
2710
2711 return c;
2712 }
2713
2714 static int
collector_encode_hex_htmlnumericentity(int c,void * data)2715 collector_encode_hex_htmlnumericentity(int c, void *data)
2716 {
2717 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2718 int f, n, s, r, d, size, *mapelm;
2719
2720 size = pc->mapsize;
2721 f = 0;
2722 n = 0;
2723 while (n < size) {
2724 mapelm = &(pc->convmap[n*4]);
2725 if (c >= mapelm[0] && c <= mapelm[1]) {
2726 s = (c + mapelm[2]) & mapelm[3];
2727 if (s >= 0) {
2728 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2729 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2730 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2731 r = 0x1000000;
2732 s %= r;
2733 while (r > 0) {
2734 d = s/r;
2735 if (d || f) {
2736 f = 1;
2737 s %= r;
2738 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2739 }
2740 r /= 16;
2741 }
2742 if (!f) {
2743 f = 1;
2744 (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2745 }
2746 (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2747 }
2748 }
2749 if (f) {
2750 break;
2751 }
2752 n++;
2753 }
2754 if (!f) {
2755 (*pc->decoder->filter_function)(c, pc->decoder);
2756 }
2757
2758 return c;
2759 }
2760
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2761 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2762 {
2763 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2764 int n, s, r, d;
2765
2766 if (pc->status) {
2767 switch (pc->status) {
2768 case 1: /* '&' */
2769 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2770 break;
2771 case 2: /* '#' */
2772 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2773 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2774 break;
2775 case 3: /* '0'-'9' */
2776 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2777 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2778
2779 s = pc->cache;
2780 r = 1;
2781 n = pc->digit;
2782 while (n > 0) {
2783 r *= 10;
2784 n--;
2785 }
2786 s %= r;
2787 r /= 10;
2788 while (r > 0) {
2789 d = s/r;
2790 s %= r;
2791 r /= 10;
2792 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2793 }
2794
2795 break;
2796 case 4: /* 'x' */
2797 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2798 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2799 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2800 break;
2801 case 5: /* '0'-'9','a'-'f' */
2802 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2803 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2804 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2805
2806 s = pc->cache;
2807 r = 1;
2808 n = pc->digit;
2809 while (n > 0) {
2810 r *= 16;
2811 n--;
2812 }
2813 s %= r;
2814 r /= 16;
2815 while (r > 0) {
2816 d = s/r;
2817 s %= r;
2818 r /= 16;
2819 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2820 }
2821 break;
2822 default:
2823 break;
2824 }
2825 }
2826
2827 pc->status = 0;
2828 pc->cache = 0;
2829 pc->digit = 0;
2830
2831 return 0;
2832 }
2833
2834
2835 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)2836 mbfl_html_numeric_entity(
2837 mbfl_string *string,
2838 mbfl_string *result,
2839 int *convmap,
2840 int mapsize,
2841 int type)
2842 {
2843 struct collector_htmlnumericentity_data pc;
2844 mbfl_memory_device device;
2845 mbfl_convert_filter *encoder;
2846 size_t n;
2847 unsigned char *p;
2848
2849 if (string == NULL || result == NULL) {
2850 return NULL;
2851 }
2852 mbfl_string_init(result);
2853 result->no_language = string->no_language;
2854 result->encoding = string->encoding;
2855 mbfl_memory_device_init(&device, string->len, 0);
2856
2857 /* output code filter */
2858 pc.decoder = mbfl_convert_filter_new(
2859 &mbfl_encoding_wchar,
2860 string->encoding,
2861 mbfl_memory_device_output, 0, &device);
2862 /* wchar filter */
2863 if (type == 0) { /* decimal output */
2864 encoder = mbfl_convert_filter_new(
2865 string->encoding,
2866 &mbfl_encoding_wchar,
2867 collector_encode_htmlnumericentity, 0, &pc);
2868 } else if (type == 2) { /* hex output */
2869 encoder = mbfl_convert_filter_new(
2870 string->encoding,
2871 &mbfl_encoding_wchar,
2872 collector_encode_hex_htmlnumericentity, 0, &pc);
2873 } else { /* type == 1: decimal/hex input */
2874 encoder = mbfl_convert_filter_new(
2875 string->encoding,
2876 &mbfl_encoding_wchar,
2877 collector_decode_htmlnumericentity,
2878 (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
2879 }
2880 if (pc.decoder == NULL || encoder == NULL) {
2881 mbfl_convert_filter_delete(encoder);
2882 mbfl_convert_filter_delete(pc.decoder);
2883 return NULL;
2884 }
2885 pc.status = 0;
2886 pc.cache = 0;
2887 pc.digit = 0;
2888 pc.convmap = convmap;
2889 pc.mapsize = mapsize;
2890
2891 /* feed data */
2892 p = string->val;
2893 n = string->len;
2894 if (p != NULL) {
2895 while (n > 0) {
2896 if ((*encoder->filter_function)(*p++, encoder) < 0) {
2897 break;
2898 }
2899 n--;
2900 }
2901 }
2902 mbfl_convert_filter_flush(encoder);
2903 mbfl_convert_filter_flush(pc.decoder);
2904 result = mbfl_memory_device_result(&device, result);
2905 mbfl_convert_filter_delete(encoder);
2906 mbfl_convert_filter_delete(pc.decoder);
2907
2908 return result;
2909 }
2910
2911 /*
2912 * Local variables:
2913 * tab-width: 4
2914 * c-basic-offset: 4
2915 * End:
2916 */
2917