1 /*
2 * charset=UTF-8
3 * vim600: encoding=utf-8
4 */
5
6 /*
7 * "streamable kanji code filter and converter"
8 *
9 * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
10 *
11 * This software is released under the GNU Lesser General Public License.
12 * (Version 2.1, February 1999)
13 * Please read the following detail of the licence (in japanese).
14 *
15 * ◆使用許諾条件◆
16 *
17 * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
18 * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
19 * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
20 * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
21 * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
22 * することはできません。
23 *
24 * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
25 * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
26 * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
27 * による許諾を得る必要があります。
28 *
29 * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
30 * ます。「GNU Lesser General Public License」とは、これまでLibrary General
31 * Public Licenseと呼ばれていたものです。
32 * http://www.gnu.org/ --- GNUウェブサイト
33 * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
34 * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
35 *
36 * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
37 * はありません。
38 *
39 * ◆保証内容◆
40 *
41 * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
42 * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
43 * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
44 * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
45 * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
46 * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
47 * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
48 * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
49 * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
50 * 契約・規定に優先します。
51 *
52 * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
53 *
54 * 〒102-0073
55 * 東京都千代田区九段北1-13-5日本地所第一ビル4F
56 * 株式会社ハッピーサイズ
57 * Phone: 03-3512-3655, Fax: 03-3512-3656
58 * Email: sales@happysize.co.jp
59 * Web: http://happysize.com/
60 *
61 * ◆著者◆
62 *
63 * 金本 茂 <sgk@happysize.co.jp>
64 *
65 * ◆履歴◆
66 *
67 * 1998/11/10 sgk implementation in C++
68 * 1999/4/25 sgk Cで書きなおし。
69 * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
70 * 1999/6/?? Unicodeサポート。
71 * 1999/6/22 sgk ライセンスをLGPLに変更。
72 *
73 */
74
75 /*
76 * Unicode support
77 *
78 * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
79 * All rights reserved.
80 *
81 */
82
83
84 #ifdef HAVE_CONFIG_H
85 #include "config.h"
86 #endif
87
88 #include <stddef.h>
89
90 #ifdef HAVE_STRING_H
91 #include <string.h>
92 #endif
93
94 #ifdef HAVE_STRINGS_H
95 #include <strings.h>
96 #endif
97
98 #ifdef HAVE_STDDEF_H
99 #include <stddef.h>
100 #endif
101
102 #include "mbfilter.h"
103 #include "mbfl_filter_output.h"
104 #include "mbfilter_pass.h"
105 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
106
107 #include "eaw_table.h"
108
109 /* hex character table "0123456789ABCDEF" */
110 static char mbfl_hexchar_table[] = {
111 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
112 };
113
114
115
116 /*
117 * encoding filter
118 */
119 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
120
121
122 /*
123 * buffering converter
124 */
125 mbfl_buffer_converter *
mbfl_buffer_converter_new(enum mbfl_no_encoding from,enum mbfl_no_encoding to,int buf_initsz)126 mbfl_buffer_converter_new(
127 enum mbfl_no_encoding from,
128 enum mbfl_no_encoding to,
129 int buf_initsz)
130 {
131 mbfl_buffer_converter *convd;
132
133 /* allocate */
134 convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
135 if (convd == NULL) {
136 return NULL;
137 }
138
139 /* initialize */
140 convd->from = mbfl_no2encoding(from);
141 convd->to = mbfl_no2encoding(to);
142 if (convd->from == NULL) {
143 convd->from = &mbfl_encoding_pass;
144 }
145 if (convd->to == NULL) {
146 convd->to = &mbfl_encoding_pass;
147 }
148
149 /* create convert filter */
150 convd->filter1 = NULL;
151 convd->filter2 = NULL;
152 if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
153 convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
154 } else {
155 convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
156 if (convd->filter2 != NULL) {
157 convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
158 mbfl_no_encoding_wchar,
159 (int (*)(int, void*))convd->filter2->filter_function,
160 (int (*)(void*))convd->filter2->filter_flush,
161 convd->filter2);
162 if (convd->filter1 == NULL) {
163 mbfl_convert_filter_delete(convd->filter2);
164 }
165 }
166 }
167 if (convd->filter1 == NULL) {
168 return NULL;
169 }
170
171 mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
172
173 return convd;
174 }
175
176 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)177 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
178 {
179 if (convd != NULL) {
180 if (convd->filter1) {
181 mbfl_convert_filter_delete(convd->filter1);
182 }
183 if (convd->filter2) {
184 mbfl_convert_filter_delete(convd->filter2);
185 }
186 mbfl_memory_device_clear(&convd->device);
187 mbfl_free((void*)convd);
188 }
189 }
190
191 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)192 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
193 {
194 mbfl_memory_device_reset(&convd->device);
195 }
196
197 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)198 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
199 {
200 if (convd != NULL) {
201 if (convd->filter2 != NULL) {
202 convd->filter2->illegal_mode = mode;
203 } else if (convd->filter1 != NULL) {
204 convd->filter1->illegal_mode = mode;
205 } else {
206 return 0;
207 }
208 }
209
210 return 1;
211 }
212
213 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)214 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
215 {
216 if (convd != NULL) {
217 if (convd->filter2 != NULL) {
218 convd->filter2->illegal_substchar = substchar;
219 } else if (convd->filter1 != NULL) {
220 convd->filter1->illegal_substchar = substchar;
221 } else {
222 return 0;
223 }
224 }
225
226 return 1;
227 }
228
229 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,int n)230 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
231 {
232 mbfl_convert_filter *filter;
233 int (*filter_function)(int c, mbfl_convert_filter *filter);
234
235 if (convd != NULL && p != NULL) {
236 filter = convd->filter1;
237 if (filter != NULL) {
238 filter_function = filter->filter_function;
239 while (n > 0) {
240 if ((*filter_function)(*p++, filter) < 0) {
241 break;
242 }
243 n--;
244 }
245 }
246 }
247
248 return n;
249 }
250
251 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)252 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
253 {
254 int n;
255 unsigned char *p;
256 mbfl_convert_filter *filter;
257 int (*filter_function)(int c, mbfl_convert_filter *filter);
258
259 if (convd == NULL || string == NULL) {
260 return -1;
261 }
262 mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
263 /* feed data */
264 n = string->len;
265 p = string->val;
266 filter = convd->filter1;
267 if (filter != NULL) {
268 filter_function = filter->filter_function;
269 while (n > 0) {
270 if ((*filter_function)(*p++, filter) < 0) {
271 return -1;
272 }
273 n--;
274 }
275 }
276
277 return 0;
278 }
279
280 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)281 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
282 {
283 if (convd == NULL) {
284 return -1;
285 }
286
287 if (convd->filter1 != NULL) {
288 mbfl_convert_filter_flush(convd->filter1);
289 }
290 if (convd->filter2 != NULL) {
291 mbfl_convert_filter_flush(convd->filter2);
292 }
293
294 return 0;
295 }
296
297 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)298 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
299 {
300 if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
301 result->no_encoding = convd->to->no_encoding;
302 result->val = convd->device.buffer;
303 result->len = convd->device.pos;
304 } else {
305 result = NULL;
306 }
307
308 return result;
309 }
310
311 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)312 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
313 {
314 if (convd == NULL || result == NULL) {
315 return NULL;
316 }
317 result->no_encoding = convd->to->no_encoding;
318 return mbfl_memory_device_result(&convd->device, result);
319 }
320
321 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)322 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
323 mbfl_string *result)
324 {
325 if (convd == NULL || string == NULL || result == NULL) {
326 return NULL;
327 }
328 mbfl_buffer_converter_feed(convd, string);
329 if (convd->filter1 != NULL) {
330 mbfl_convert_filter_flush(convd->filter1);
331 }
332 if (convd->filter2 != NULL) {
333 mbfl_convert_filter_flush(convd->filter2);
334 }
335 result->no_encoding = convd->to->no_encoding;
336 return mbfl_memory_device_result(&convd->device, result);
337 }
338
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)339 int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
340 {
341 int num_illegalchars = 0;
342
343 if (convd == NULL) {
344 return 0;
345 }
346
347 if (convd->filter1 != NULL) {
348 num_illegalchars += convd->filter1->num_illegalchar;
349 }
350
351 if (convd->filter2 != NULL) {
352 num_illegalchars += convd->filter2->num_illegalchar;
353 }
354
355 return (num_illegalchars);
356 }
357
358 /*
359 * encoding detector
360 */
361 mbfl_encoding_detector *
mbfl_encoding_detector_new(enum mbfl_no_encoding * elist,int elistsz,int strict)362 mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
363 {
364 mbfl_encoding_detector *identd;
365
366 int i, num;
367 mbfl_identify_filter *filter;
368
369 if (elist == NULL || elistsz <= 0) {
370 return NULL;
371 }
372
373 /* allocate */
374 identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
375 if (identd == NULL) {
376 return NULL;
377 }
378 identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
379 if (identd->filter_list == NULL) {
380 mbfl_free(identd);
381 return NULL;
382 }
383
384 /* create filters */
385 i = 0;
386 num = 0;
387 while (i < elistsz) {
388 filter = mbfl_identify_filter_new(elist[i]);
389 if (filter != NULL) {
390 identd->filter_list[num] = filter;
391 num++;
392 }
393 i++;
394 }
395 identd->filter_list_size = num;
396
397 /* set strict flag */
398 identd->strict = strict;
399
400 return identd;
401 }
402
403 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)404 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
405 {
406 int i;
407
408 if (identd != NULL) {
409 if (identd->filter_list != NULL) {
410 i = identd->filter_list_size;
411 while (i > 0) {
412 i--;
413 mbfl_identify_filter_delete(identd->filter_list[i]);
414 }
415 mbfl_free((void *)identd->filter_list);
416 }
417 mbfl_free((void *)identd);
418 }
419 }
420
421 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)422 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
423 {
424 int i, n, num, bad, res;
425 unsigned char *p;
426 mbfl_identify_filter *filter;
427
428 res = 0;
429 /* feed data */
430 if (identd != NULL && string != NULL && string->val != NULL) {
431 num = identd->filter_list_size;
432 n = string->len;
433 p = string->val;
434 bad = 0;
435 while (n > 0) {
436 for (i = 0; i < num; i++) {
437 filter = identd->filter_list[i];
438 if (!filter->flag) {
439 (*filter->filter_function)(*p, filter);
440 if (filter->flag) {
441 bad++;
442 }
443 }
444 }
445 if ((num - 1) <= bad) {
446 res = 1;
447 break;
448 }
449 p++;
450 n--;
451 }
452 }
453
454 return res;
455 }
456
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)457 enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
458 {
459 mbfl_identify_filter *filter;
460 enum mbfl_no_encoding encoding;
461 int n;
462
463 /* judge */
464 encoding = mbfl_no_encoding_invalid;
465 if (identd != NULL) {
466 n = identd->filter_list_size - 1;
467 while (n >= 0) {
468 filter = identd->filter_list[n];
469 if (!filter->flag) {
470 if (!identd->strict || !filter->status) {
471 encoding = filter->encoding->no_encoding;
472 }
473 }
474 n--;
475 }
476
477 /* fallback judge */
478 if (encoding == mbfl_no_encoding_invalid) {
479 n = identd->filter_list_size - 1;
480 while (n >= 0) {
481 filter = identd->filter_list[n];
482 if (!filter->flag) {
483 encoding = filter->encoding->no_encoding;
484 }
485 n--;
486 }
487 }
488 }
489
490 return encoding;
491 }
492
493
494 /*
495 * encoding converter
496 */
497 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding toenc)498 mbfl_convert_encoding(
499 mbfl_string *string,
500 mbfl_string *result,
501 enum mbfl_no_encoding toenc)
502 {
503 int n;
504 unsigned char *p;
505 const mbfl_encoding *encoding;
506 mbfl_memory_device device;
507 mbfl_convert_filter *filter1;
508 mbfl_convert_filter *filter2;
509
510 /* initialize */
511 encoding = mbfl_no2encoding(toenc);
512 if (encoding == NULL || string == NULL || result == NULL) {
513 return NULL;
514 }
515
516 filter1 = NULL;
517 filter2 = NULL;
518 if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
519 filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
520 } else {
521 filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
522 if (filter2 != NULL) {
523 filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
524 if (filter1 == NULL) {
525 mbfl_convert_filter_delete(filter2);
526 }
527 }
528 }
529 if (filter1 == NULL) {
530 return NULL;
531 }
532
533 if (filter2 != NULL) {
534 filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
535 filter2->illegal_substchar = 0x3f; /* '?' */
536 }
537
538 mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
539
540 /* feed data */
541 n = string->len;
542 p = string->val;
543 if (p != NULL) {
544 while (n > 0) {
545 if ((*filter1->filter_function)(*p++, filter1) < 0) {
546 break;
547 }
548 n--;
549 }
550 }
551
552 mbfl_convert_filter_flush(filter1);
553 mbfl_convert_filter_delete(filter1);
554 if (filter2 != NULL) {
555 mbfl_convert_filter_flush(filter2);
556 mbfl_convert_filter_delete(filter2);
557 }
558
559 return mbfl_memory_device_result(&device, result);
560 }
561
562
563 /*
564 * identify encoding
565 */
566 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)567 mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
568 {
569 int i, n, num, bad;
570 unsigned char *p;
571 mbfl_identify_filter *flist, *filter;
572 const mbfl_encoding *encoding;
573
574 /* flist is an array of mbfl_identify_filter instances */
575 flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
576 if (flist == NULL) {
577 return NULL;
578 }
579
580 num = 0;
581 if (elist != NULL) {
582 for (i = 0; i < elistsz; i++) {
583 if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
584 num++;
585 }
586 }
587 }
588
589 /* feed data */
590 n = string->len;
591 p = string->val;
592
593 if (p != NULL) {
594 bad = 0;
595 while (n > 0) {
596 for (i = 0; i < num; i++) {
597 filter = &flist[i];
598 if (!filter->flag) {
599 (*filter->filter_function)(*p, filter);
600 if (filter->flag) {
601 bad++;
602 }
603 }
604 }
605 if ((num - 1) <= bad && !strict) {
606 break;
607 }
608 p++;
609 n--;
610 }
611 }
612
613 /* judge */
614 encoding = NULL;
615
616 for (i = 0; i < num; i++) {
617 filter = &flist[i];
618 if (!filter->flag) {
619 if (strict && filter->status) {
620 continue;
621 }
622 encoding = filter->encoding;
623 break;
624 }
625 }
626
627 /* fall-back judge */
628 if (!encoding) {
629 for (i = 0; i < num; i++) {
630 filter = &flist[i];
631 if (!filter->flag && (!strict || !filter->status)) {
632 encoding = filter->encoding;
633 break;
634 }
635 }
636 }
637
638 /* cleanup */
639 /* dtors should be called in reverse order */
640 i = num; while (--i >= 0) {
641 mbfl_identify_filter_cleanup(&flist[i]);
642 }
643
644 mbfl_free((void *)flist);
645
646 return encoding;
647 }
648
649 const char*
mbfl_identify_encoding_name(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)650 mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
651 {
652 const mbfl_encoding *encoding;
653
654 encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
655 if (encoding != NULL &&
656 encoding->no_encoding > mbfl_no_encoding_charset_min &&
657 encoding->no_encoding < mbfl_no_encoding_charset_max) {
658 return encoding->name;
659 } else {
660 return NULL;
661 }
662 }
663
664 enum mbfl_no_encoding
mbfl_identify_encoding_no(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)665 mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
666 {
667 const mbfl_encoding *encoding;
668
669 encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
670 if (encoding != NULL &&
671 encoding->no_encoding > mbfl_no_encoding_charset_min &&
672 encoding->no_encoding < mbfl_no_encoding_charset_max) {
673 return encoding->no_encoding;
674 } else {
675 return mbfl_no_encoding_invalid;
676 }
677 }
678
679
680 /*
681 * strlen
682 */
683 static int
filter_count_output(int c,void * data)684 filter_count_output(int c, void *data)
685 {
686 (*(int *)data)++;
687 return c;
688 }
689
690 int
mbfl_strlen(mbfl_string * string)691 mbfl_strlen(mbfl_string *string)
692 {
693 int len, n, m, k;
694 unsigned char *p;
695 const unsigned char *mbtab;
696 const mbfl_encoding *encoding;
697
698 encoding = mbfl_no2encoding(string->no_encoding);
699 if (encoding == NULL || string == NULL) {
700 return -1;
701 }
702
703 len = 0;
704 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
705 len = string->len;
706 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
707 len = string->len/2;
708 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
709 len = string->len/4;
710 } else if (encoding->mblen_table != NULL) {
711 mbtab = encoding->mblen_table;
712 n = 0;
713 p = string->val;
714 k = string->len;
715 /* count */
716 if (p != NULL) {
717 while (n < k) {
718 m = mbtab[*p];
719 n += m;
720 p += m;
721 len++;
722 };
723 }
724 } else {
725 /* wchar filter */
726 mbfl_convert_filter *filter = mbfl_convert_filter_new(
727 string->no_encoding,
728 mbfl_no_encoding_wchar,
729 filter_count_output, 0, &len);
730 if (filter == NULL) {
731 return -1;
732 }
733 /* count */
734 n = string->len;
735 p = string->val;
736 if (p != NULL) {
737 while (n > 0) {
738 (*filter->filter_function)(*p++, filter);
739 n--;
740 }
741 }
742 mbfl_convert_filter_delete(filter);
743 }
744
745 return len;
746 }
747
748
749 /*
750 * strpos
751 */
752 struct collector_strpos_data {
753 mbfl_convert_filter *next_filter;
754 mbfl_wchar_device needle;
755 int needle_len;
756 int start;
757 int output;
758 int found_pos;
759 int needle_pos;
760 int matched_pos;
761 };
762
763 static int
collector_strpos(int c,void * data)764 collector_strpos(int c, void* data)
765 {
766 int *p, *h, *m, n;
767 struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
768
769 if (pc->output >= pc->start) {
770 if (c == (int)pc->needle.buffer[pc->needle_pos]) {
771 if (pc->needle_pos == 0) {
772 pc->found_pos = pc->output; /* found position */
773 }
774 pc->needle_pos++; /* needle pointer */
775 if (pc->needle_pos >= pc->needle_len) {
776 pc->matched_pos = pc->found_pos; /* matched position */
777 pc->needle_pos--;
778 goto retry;
779 }
780 } else if (pc->needle_pos != 0) {
781 retry:
782 h = (int *)pc->needle.buffer;
783 h++;
784 for (;;) {
785 pc->found_pos++;
786 p = h;
787 m = (int *)pc->needle.buffer;
788 n = pc->needle_pos - 1;
789 while (n > 0 && *p == *m) {
790 n--;
791 p++;
792 m++;
793 }
794 if (n <= 0) {
795 if (*m != c) {
796 pc->needle_pos = 0;
797 }
798 break;
799 } else {
800 h++;
801 pc->needle_pos--;
802 }
803 }
804 }
805 }
806
807 pc->output++;
808 return c;
809 }
810
811 /*
812 * oddlen
813 */
814 int
mbfl_oddlen(mbfl_string * string)815 mbfl_oddlen(mbfl_string *string)
816 {
817 int len, n, m, k;
818 unsigned char *p;
819 const unsigned char *mbtab;
820 const mbfl_encoding *encoding;
821
822
823 if (string == NULL) {
824 return -1;
825 }
826 encoding = mbfl_no2encoding(string->no_encoding);
827 if (encoding == NULL) {
828 return -1;
829 }
830
831 len = 0;
832 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
833 return 0;
834 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
835 return len % 2;
836 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
837 return len % 4;
838 } else if (encoding->mblen_table != NULL) {
839 mbtab = encoding->mblen_table;
840 n = 0;
841 p = string->val;
842 k = string->len;
843 /* count */
844 if (p != NULL) {
845 while (n < k) {
846 m = mbtab[*p];
847 n += m;
848 p += m;
849 };
850 }
851 return n-k;
852 } else {
853 /* how can i do ? */
854 return 0;
855 }
856 /* NOT REACHED */
857 }
858
859 int
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,int offset,int reverse)860 mbfl_strpos(
861 mbfl_string *haystack,
862 mbfl_string *needle,
863 int offset,
864 int reverse)
865 {
866 int result;
867 mbfl_string _haystack_u8, _needle_u8;
868 const mbfl_string *haystack_u8, *needle_u8;
869 const unsigned char *u8_tbl;
870
871 if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
872 return -8;
873 }
874
875 {
876 const mbfl_encoding *u8_enc;
877 u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
878 if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
879 return -8;
880 }
881 u8_tbl = u8_enc->mblen_table;
882 }
883
884 if (haystack->no_encoding != mbfl_no_encoding_utf8) {
885 mbfl_string_init(&_haystack_u8);
886 haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
887 if (haystack_u8 == NULL) {
888 result = -4;
889 goto out;
890 }
891 } else {
892 haystack_u8 = haystack;
893 }
894
895 if (needle->no_encoding != mbfl_no_encoding_utf8) {
896 mbfl_string_init(&_needle_u8);
897 needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
898 if (needle_u8 == NULL) {
899 result = -4;
900 goto out;
901 }
902 } else {
903 needle_u8 = needle;
904 }
905
906 if (needle_u8->len < 1) {
907 result = -8;
908 goto out;
909 }
910
911 result = -1;
912 if (haystack_u8->len < needle_u8->len) {
913 goto out;
914 }
915
916 if (!reverse) {
917 unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
918 unsigned int needle_u8_len = needle_u8->len;
919 unsigned int i;
920 const unsigned char *p, *q, *e;
921 const unsigned char *haystack_u8_val = haystack_u8->val,
922 *needle_u8_val = needle_u8->val;
923 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
924 jtbl[i] = needle_u8_len + 1;
925 }
926 for (i = 0; i < needle_u8_len - 1; ++i) {
927 jtbl[needle_u8_val[i]] = needle_u8_len - i;
928 }
929 e = haystack_u8_val + haystack_u8->len;
930 p = haystack_u8_val;
931 while (--offset >= 0) {
932 if (p >= e) {
933 result = -16;
934 goto out;
935 }
936 p += u8_tbl[*p];
937 }
938 p += needle_u8_len;
939 if (p > e) {
940 goto out;
941 }
942 while (p <= e) {
943 const unsigned char *pv = p;
944 q = needle_u8_val + needle_u8_len;
945 for (;;) {
946 if (q == needle_u8_val) {
947 result = 0;
948 while (p > haystack_u8_val) {
949 unsigned char c = *--p;
950 if (c < 0x80) {
951 ++result;
952 } else if ((c & 0xc0) != 0x80) {
953 ++result;
954 }
955 }
956 goto out;
957 }
958 if (*--q != *--p) {
959 break;
960 }
961 }
962 p += jtbl[*p];
963 if (p <= pv) {
964 p = pv + 1;
965 }
966 }
967 } else {
968 unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
969 unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
970 unsigned int i;
971 const unsigned char *p, *e, *q, *qe;
972 const unsigned char *haystack_u8_val = haystack_u8->val,
973 *needle_u8_val = needle_u8->val;
974 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
975 jtbl[i] = needle_u8_len;
976 }
977 for (i = needle_u8_len - 1; i > 0; --i) {
978 unsigned char c = needle_u8_val[i];
979 jtbl[c] = i;
980 if (c < 0x80) {
981 ++needle_len;
982 } else if ((c & 0xc0) != 0x80) {
983 ++needle_len;
984 }
985 }
986 {
987 unsigned char c = needle_u8_val[0];
988 if (c < 0x80) {
989 ++needle_len;
990 } else if ((c & 0xc0) != 0x80) {
991 ++needle_len;
992 }
993 }
994 e = haystack_u8_val;
995 p = e + haystack_u8->len;
996 qe = needle_u8_val + needle_u8_len;
997 if (offset < 0) {
998 if (-offset > needle_len) {
999 offset += needle_len;
1000 while (offset < 0) {
1001 unsigned char c;
1002 if (p <= e) {
1003 result = -16;
1004 goto out;
1005 }
1006 c = *(--p);
1007 if (c < 0x80) {
1008 ++offset;
1009 } else if ((c & 0xc0) != 0x80) {
1010 ++offset;
1011 }
1012 }
1013 }
1014 } else {
1015 const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1016 while (--offset >= 0) {
1017 if (e >= ee) {
1018 result = -16;
1019 goto out;
1020 }
1021 e += u8_tbl[*e];
1022 }
1023 }
1024 if (p < e + needle_u8_len) {
1025 goto out;
1026 }
1027 p -= needle_u8_len;
1028 while (p >= e) {
1029 const unsigned char *pv = p;
1030 q = needle_u8_val;
1031 for (;;) {
1032 if (q == qe) {
1033 result = 0;
1034 p -= needle_u8_len;
1035 while (p > haystack_u8_val) {
1036 unsigned char c = *--p;
1037 if (c < 0x80) {
1038 ++result;
1039 } else if ((c & 0xc0) != 0x80) {
1040 ++result;
1041 }
1042 }
1043 goto out;
1044 }
1045 if (*q != *p) {
1046 break;
1047 }
1048 ++p, ++q;
1049 }
1050 p -= jtbl[*p];
1051 if (p >= pv) {
1052 p = pv - 1;
1053 }
1054 }
1055 }
1056 out:
1057 if (haystack_u8 == &_haystack_u8) {
1058 mbfl_string_clear(&_haystack_u8);
1059 }
1060 if (needle_u8 == &_needle_u8) {
1061 mbfl_string_clear(&_needle_u8);
1062 }
1063 return result;
1064 }
1065
1066 /*
1067 * substr_count
1068 */
1069
1070 int
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1071 mbfl_substr_count(
1072 mbfl_string *haystack,
1073 mbfl_string *needle
1074 )
1075 {
1076 int n, result = 0;
1077 unsigned char *p;
1078 mbfl_convert_filter *filter;
1079 struct collector_strpos_data pc;
1080
1081 if (haystack == NULL || needle == NULL) {
1082 return -8;
1083 }
1084 /* needle is converted into wchar */
1085 mbfl_wchar_device_init(&pc.needle);
1086 filter = mbfl_convert_filter_new(
1087 needle->no_encoding,
1088 mbfl_no_encoding_wchar,
1089 mbfl_wchar_device_output, 0, &pc.needle);
1090 if (filter == NULL) {
1091 return -4;
1092 }
1093 p = needle->val;
1094 n = needle->len;
1095 if (p != NULL) {
1096 while (n > 0) {
1097 if ((*filter->filter_function)(*p++, filter) < 0) {
1098 break;
1099 }
1100 n--;
1101 }
1102 }
1103 mbfl_convert_filter_flush(filter);
1104 mbfl_convert_filter_delete(filter);
1105 pc.needle_len = pc.needle.pos;
1106 if (pc.needle.buffer == NULL) {
1107 return -4;
1108 }
1109 if (pc.needle_len <= 0) {
1110 mbfl_wchar_device_clear(&pc.needle);
1111 return -2;
1112 }
1113 /* initialize filter and collector data */
1114 filter = mbfl_convert_filter_new(
1115 haystack->no_encoding,
1116 mbfl_no_encoding_wchar,
1117 collector_strpos, 0, &pc);
1118 if (filter == NULL) {
1119 mbfl_wchar_device_clear(&pc.needle);
1120 return -4;
1121 }
1122 pc.start = 0;
1123 pc.output = 0;
1124 pc.needle_pos = 0;
1125 pc.found_pos = 0;
1126 pc.matched_pos = -1;
1127
1128 /* feed data */
1129 p = haystack->val;
1130 n = haystack->len;
1131 if (p != NULL) {
1132 while (n > 0) {
1133 if ((*filter->filter_function)(*p++, filter) < 0) {
1134 pc.matched_pos = -4;
1135 break;
1136 }
1137 if (pc.matched_pos >= 0) {
1138 ++result;
1139 pc.matched_pos = -1;
1140 pc.needle_pos = 0;
1141 }
1142 n--;
1143 }
1144 }
1145 mbfl_convert_filter_flush(filter);
1146 mbfl_convert_filter_delete(filter);
1147 mbfl_wchar_device_clear(&pc.needle);
1148
1149 return result;
1150 }
1151
1152 /*
1153 * substr
1154 */
1155 struct collector_substr_data {
1156 mbfl_convert_filter *next_filter;
1157 int start;
1158 int stop;
1159 int output;
1160 };
1161
1162 static int
collector_substr(int c,void * data)1163 collector_substr(int c, void* data)
1164 {
1165 struct collector_substr_data *pc = (struct collector_substr_data*)data;
1166
1167 if (pc->output >= pc->stop) {
1168 return -1;
1169 }
1170
1171 if (pc->output >= pc->start) {
1172 (*pc->next_filter->filter_function)(c, pc->next_filter);
1173 }
1174
1175 pc->output++;
1176
1177 return c;
1178 }
1179
1180 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,int from,int length)1181 mbfl_substr(
1182 mbfl_string *string,
1183 mbfl_string *result,
1184 int from,
1185 int length)
1186 {
1187 const mbfl_encoding *encoding;
1188 int n, m, k, len, start, end;
1189 unsigned char *p, *w;
1190 const unsigned char *mbtab;
1191
1192 encoding = mbfl_no2encoding(string->no_encoding);
1193 if (encoding == NULL || string == NULL || result == NULL) {
1194 return NULL;
1195 }
1196 mbfl_string_init(result);
1197 result->no_language = string->no_language;
1198 result->no_encoding = string->no_encoding;
1199
1200 if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1201 encoding->mblen_table != NULL) {
1202 len = string->len;
1203 start = from;
1204 end = from + length;
1205 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1206 start *= 2;
1207 end = start + length*2;
1208 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1209 start *= 4;
1210 end = start + length*4;
1211 } else if (encoding->mblen_table != NULL) {
1212 mbtab = encoding->mblen_table;
1213 start = 0;
1214 end = 0;
1215 n = 0;
1216 k = 0;
1217 p = string->val;
1218 if (p != NULL) {
1219 /* search start position */
1220 while (k <= from) {
1221 start = n;
1222 if (n >= len) {
1223 break;
1224 }
1225 m = mbtab[*p];
1226 n += m;
1227 p += m;
1228 k++;
1229 }
1230 /* detect end position */
1231 k = 0;
1232 end = start;
1233 while (k < length) {
1234 end = n;
1235 if (n >= len) {
1236 break;
1237 }
1238 m = mbtab[*p];
1239 n += m;
1240 p += m;
1241 k++;
1242 }
1243 }
1244 }
1245
1246 if (start > len) {
1247 start = len;
1248 }
1249 if (start < 0) {
1250 start = 0;
1251 }
1252 if (end > len) {
1253 end = len;
1254 }
1255 if (end < 0) {
1256 end = 0;
1257 }
1258 if (start > end) {
1259 start = end;
1260 }
1261
1262 /* allocate memory and copy */
1263 n = end - start;
1264 result->len = 0;
1265 result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
1266 if (w != NULL) {
1267 p = string->val;
1268 if (p != NULL) {
1269 p += start;
1270 result->len = n;
1271 while (n > 0) {
1272 *w++ = *p++;
1273 n--;
1274 }
1275 }
1276 *w++ = '\0';
1277 *w++ = '\0';
1278 *w++ = '\0';
1279 *w = '\0';
1280 } else {
1281 result = NULL;
1282 }
1283 } else {
1284 mbfl_memory_device device;
1285 struct collector_substr_data pc;
1286 mbfl_convert_filter *decoder;
1287 mbfl_convert_filter *encoder;
1288
1289 mbfl_memory_device_init(&device, length + 1, 0);
1290 mbfl_string_init(result);
1291 result->no_language = string->no_language;
1292 result->no_encoding = string->no_encoding;
1293 /* output code filter */
1294 decoder = mbfl_convert_filter_new(
1295 mbfl_no_encoding_wchar,
1296 string->no_encoding,
1297 mbfl_memory_device_output, 0, &device);
1298 /* wchar filter */
1299 encoder = mbfl_convert_filter_new(
1300 string->no_encoding,
1301 mbfl_no_encoding_wchar,
1302 collector_substr, 0, &pc);
1303 if (decoder == NULL || encoder == NULL) {
1304 mbfl_convert_filter_delete(encoder);
1305 mbfl_convert_filter_delete(decoder);
1306 return NULL;
1307 }
1308 pc.next_filter = decoder;
1309 pc.start = from;
1310 pc.stop = from + length;
1311 pc.output = 0;
1312
1313 /* feed data */
1314 p = string->val;
1315 n = string->len;
1316 if (p != NULL) {
1317 while (n > 0) {
1318 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1319 break;
1320 }
1321 n--;
1322 }
1323 }
1324
1325 mbfl_convert_filter_flush(encoder);
1326 mbfl_convert_filter_flush(decoder);
1327 result = mbfl_memory_device_result(&device, result);
1328 mbfl_convert_filter_delete(encoder);
1329 mbfl_convert_filter_delete(decoder);
1330 }
1331
1332 return result;
1333 }
1334
1335 /*
1336 * strcut
1337 */
1338 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,int from,int length)1339 mbfl_strcut(
1340 mbfl_string *string,
1341 mbfl_string *result,
1342 int from,
1343 int length)
1344 {
1345 const mbfl_encoding *encoding;
1346 mbfl_memory_device device;
1347
1348 /* validate the parameters */
1349 if (string == NULL || string->val == NULL || result == NULL) {
1350 return NULL;
1351 }
1352
1353 if (from < 0 || length < 0) {
1354 return NULL;
1355 }
1356
1357 if (from >= string->len) {
1358 from = string->len;
1359 }
1360
1361 encoding = mbfl_no2encoding(string->no_encoding);
1362 if (encoding == NULL) {
1363 return NULL;
1364 }
1365
1366 mbfl_string_init(result);
1367 result->no_language = string->no_language;
1368 result->no_encoding = string->no_encoding;
1369
1370 if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1371 | MBFL_ENCTYPE_WCS2BE
1372 | MBFL_ENCTYPE_WCS2LE
1373 | MBFL_ENCTYPE_WCS4BE
1374 | MBFL_ENCTYPE_WCS4LE))
1375 || encoding->mblen_table != NULL) {
1376 const unsigned char *start = NULL;
1377 const unsigned char *end = NULL;
1378 unsigned char *w;
1379 unsigned int sz;
1380
1381 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1382 from &= -2;
1383
1384 if (from + length >= string->len) {
1385 length = string->len - from;
1386 }
1387
1388 start = string->val + from;
1389 end = start + (length & -2);
1390 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1391 from &= -4;
1392
1393 if (from + length >= string->len) {
1394 length = string->len - from;
1395 }
1396
1397 start = string->val + from;
1398 end = start + (length & -4);
1399 } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1400 if (from + length >= string->len) {
1401 length = string->len - from;
1402 }
1403
1404 start = string->val + from;
1405 end = start + length;
1406 } else if (encoding->mblen_table != NULL) {
1407 const unsigned char *mbtab = encoding->mblen_table;
1408 const unsigned char *p, *q;
1409 int m;
1410
1411 /* search start position */
1412 for (m = 0, p = string->val, q = p + from;
1413 p < q; p += (m = mbtab[*p]));
1414
1415 if (p > q) {
1416 p -= m;
1417 }
1418
1419 start = p;
1420
1421 /* search end position */
1422 if ((start - string->val) + length >= (int)string->len) {
1423 end = string->val + string->len;
1424 } else {
1425 for (q = p + length; p < q; p += (m = mbtab[*p]));
1426
1427 if (p > q) {
1428 p -= m;
1429 }
1430 end = p;
1431 }
1432 } else {
1433 /* never reached */
1434 return NULL;
1435 }
1436
1437 /* allocate memory and copy string */
1438 sz = end - start;
1439 if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1440 sizeof(unsigned char))) == NULL) {
1441 return NULL;
1442 }
1443
1444 memcpy(w, start, sz);
1445 w[sz] = '\0';
1446 w[sz + 1] = '\0';
1447 w[sz + 2] = '\0';
1448 w[sz + 3] = '\0';
1449
1450 result->val = w;
1451 result->len = sz;
1452 } else {
1453 mbfl_convert_filter *encoder = NULL;
1454 mbfl_convert_filter *decoder = NULL;
1455 const unsigned char *p, *q, *r;
1456 struct {
1457 mbfl_convert_filter encoder;
1458 mbfl_convert_filter decoder;
1459 const unsigned char *p;
1460 int pos;
1461 } bk, _bk;
1462
1463 /* output code filter */
1464 if (!(decoder = mbfl_convert_filter_new(
1465 mbfl_no_encoding_wchar,
1466 string->no_encoding,
1467 mbfl_memory_device_output, 0, &device))) {
1468 return NULL;
1469 }
1470
1471 /* wchar filter */
1472 if (!(encoder = mbfl_convert_filter_new(
1473 string->no_encoding,
1474 mbfl_no_encoding_wchar,
1475 mbfl_filter_output_null,
1476 NULL, NULL))) {
1477 mbfl_convert_filter_delete(decoder);
1478 return NULL;
1479 }
1480
1481 mbfl_memory_device_init(&device, length + 8, 0);
1482
1483 p = string->val;
1484
1485 /* search start position */
1486 for (q = string->val + from; p < q; p++) {
1487 (*encoder->filter_function)(*p, encoder);
1488 }
1489
1490 /* switch the drain direction */
1491 encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1492 encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1493 encoder->data = decoder;
1494
1495 q = string->val + string->len;
1496
1497 /* save the encoder, decoder state and the pointer */
1498 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1499 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1500 _bk.p = p;
1501 _bk.pos = device.pos;
1502
1503 if (length > q - p) {
1504 length = q - p;
1505 }
1506
1507 if (length >= 20) {
1508 /* output a little shorter than "length" */
1509 /* XXX: the constant "20" was determined purely on the heuristics. */
1510 for (r = p + length - 20; p < r; p++) {
1511 (*encoder->filter_function)(*p, encoder);
1512 }
1513
1514 /* if the offset of the resulting string exceeds the length,
1515 * then restore the state */
1516 if (device.pos > length) {
1517 p = _bk.p;
1518 device.pos = _bk.pos;
1519 decoder->filter_dtor(decoder);
1520 encoder->filter_dtor(encoder);
1521 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1522 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1523 bk = _bk;
1524 } else {
1525 /* save the encoder, decoder state and the pointer */
1526 mbfl_convert_filter_copy(decoder, &bk.decoder);
1527 mbfl_convert_filter_copy(encoder, &bk.encoder);
1528 bk.p = p;
1529 bk.pos = device.pos;
1530
1531 /* flush the stream */
1532 (*encoder->filter_flush)(encoder);
1533
1534 /* if the offset of the resulting string exceeds the length,
1535 * then restore the state */
1536 if (device.pos > length) {
1537 bk.decoder.filter_dtor(&bk.decoder);
1538 bk.encoder.filter_dtor(&bk.encoder);
1539
1540 p = _bk.p;
1541 device.pos = _bk.pos;
1542 decoder->filter_dtor(decoder);
1543 encoder->filter_dtor(encoder);
1544 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1545 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1546 bk = _bk;
1547 } else {
1548 _bk.decoder.filter_dtor(&_bk.decoder);
1549 _bk.encoder.filter_dtor(&_bk.encoder);
1550
1551 p = bk.p;
1552 device.pos = bk.pos;
1553 decoder->filter_dtor(decoder);
1554 encoder->filter_dtor(encoder);
1555 mbfl_convert_filter_copy(&bk.decoder, decoder);
1556 mbfl_convert_filter_copy(&bk.encoder, encoder);
1557 }
1558 }
1559 } else {
1560 bk = _bk;
1561 }
1562
1563 /* detect end position */
1564 while (p < q) {
1565 (*encoder->filter_function)(*p, encoder);
1566
1567 if (device.pos > length) {
1568 /* restore filter */
1569 p = bk.p;
1570 device.pos = bk.pos;
1571 decoder->filter_dtor(decoder);
1572 encoder->filter_dtor(encoder);
1573 mbfl_convert_filter_copy(&bk.decoder, decoder);
1574 mbfl_convert_filter_copy(&bk.encoder, encoder);
1575 break;
1576 }
1577
1578 p++;
1579
1580 /* backup current state */
1581 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1582 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1583 _bk.pos = device.pos;
1584 _bk.p = p;
1585
1586 (*encoder->filter_flush)(encoder);
1587
1588 if (device.pos > length) {
1589 _bk.decoder.filter_dtor(&_bk.decoder);
1590 _bk.encoder.filter_dtor(&_bk.encoder);
1591
1592 /* restore filter */
1593 p = bk.p;
1594 device.pos = bk.pos;
1595 decoder->filter_dtor(decoder);
1596 encoder->filter_dtor(encoder);
1597 mbfl_convert_filter_copy(&bk.decoder, decoder);
1598 mbfl_convert_filter_copy(&bk.encoder, encoder);
1599 break;
1600 }
1601
1602 bk.decoder.filter_dtor(&bk.decoder);
1603 bk.encoder.filter_dtor(&bk.encoder);
1604
1605 p = _bk.p;
1606 device.pos = _bk.pos;
1607 decoder->filter_dtor(decoder);
1608 encoder->filter_dtor(encoder);
1609 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1610 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1611
1612 bk = _bk;
1613 }
1614
1615 (*encoder->filter_flush)(encoder);
1616
1617 bk.decoder.filter_dtor(&bk.decoder);
1618 bk.encoder.filter_dtor(&bk.encoder);
1619
1620 result = mbfl_memory_device_result(&device, result);
1621
1622 mbfl_convert_filter_delete(encoder);
1623 mbfl_convert_filter_delete(decoder);
1624 }
1625
1626 return result;
1627 }
1628
1629
1630 /*
1631 * strwidth
1632 */
is_fullwidth(int c)1633 static int is_fullwidth(int c)
1634 {
1635 int i;
1636
1637 if (c < mbfl_eaw_table[0].begin) {
1638 return 0;
1639 }
1640
1641 for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1642 if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1643 return 1;
1644 }
1645 }
1646
1647 return 0;
1648 }
1649
1650 static int
filter_count_width(int c,void * data)1651 filter_count_width(int c, void* data)
1652 {
1653 (*(int *)data) += (is_fullwidth(c) ? 2: 1);
1654 return c;
1655 }
1656
1657 int
mbfl_strwidth(mbfl_string * string)1658 mbfl_strwidth(mbfl_string *string)
1659 {
1660 int len, n;
1661 unsigned char *p;
1662 mbfl_convert_filter *filter;
1663
1664 len = 0;
1665 if (string->len > 0 && string->val != NULL) {
1666 /* wchar filter */
1667 filter = mbfl_convert_filter_new(
1668 string->no_encoding,
1669 mbfl_no_encoding_wchar,
1670 filter_count_width, 0, &len);
1671 if (filter == NULL) {
1672 mbfl_convert_filter_delete(filter);
1673 return -1;
1674 }
1675
1676 /* feed data */
1677 p = string->val;
1678 n = string->len;
1679 while (n > 0) {
1680 (*filter->filter_function)(*p++, filter);
1681 n--;
1682 }
1683
1684 mbfl_convert_filter_flush(filter);
1685 mbfl_convert_filter_delete(filter);
1686 }
1687
1688 return len;
1689 }
1690
1691
1692 /*
1693 * strimwidth
1694 */
1695 struct collector_strimwidth_data {
1696 mbfl_convert_filter *decoder;
1697 mbfl_convert_filter *decoder_backup;
1698 mbfl_memory_device device;
1699 int from;
1700 int width;
1701 int outwidth;
1702 int outchar;
1703 int status;
1704 int endpos;
1705 };
1706
1707 static int
collector_strimwidth(int c,void * data)1708 collector_strimwidth(int c, void* data)
1709 {
1710 struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1711
1712 switch (pc->status) {
1713 case 10:
1714 (*pc->decoder->filter_function)(c, pc->decoder);
1715 break;
1716 default:
1717 if (pc->outchar >= pc->from) {
1718 pc->outwidth += (is_fullwidth(c) ? 2: 1);
1719
1720 if (pc->outwidth > pc->width) {
1721 if (pc->status == 0) {
1722 pc->endpos = pc->device.pos;
1723 mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1724 }
1725 pc->status++;
1726 (*pc->decoder->filter_function)(c, pc->decoder);
1727 c = -1;
1728 } else {
1729 (*pc->decoder->filter_function)(c, pc->decoder);
1730 }
1731 }
1732 pc->outchar++;
1733 break;
1734 }
1735
1736 return c;
1737 }
1738
1739 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,int from,int width)1740 mbfl_strimwidth(
1741 mbfl_string *string,
1742 mbfl_string *marker,
1743 mbfl_string *result,
1744 int from,
1745 int width)
1746 {
1747 struct collector_strimwidth_data pc;
1748 mbfl_convert_filter *encoder;
1749 int n, mkwidth;
1750 unsigned char *p;
1751
1752 if (string == NULL || result == NULL) {
1753 return NULL;
1754 }
1755 mbfl_string_init(result);
1756 result->no_language = string->no_language;
1757 result->no_encoding = string->no_encoding;
1758 mbfl_memory_device_init(&pc.device, width, 0);
1759
1760 /* output code filter */
1761 pc.decoder = mbfl_convert_filter_new(
1762 mbfl_no_encoding_wchar,
1763 string->no_encoding,
1764 mbfl_memory_device_output, 0, &pc.device);
1765 pc.decoder_backup = mbfl_convert_filter_new(
1766 mbfl_no_encoding_wchar,
1767 string->no_encoding,
1768 mbfl_memory_device_output, 0, &pc.device);
1769 /* wchar filter */
1770 encoder = mbfl_convert_filter_new(
1771 string->no_encoding,
1772 mbfl_no_encoding_wchar,
1773 collector_strimwidth, 0, &pc);
1774 if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1775 mbfl_convert_filter_delete(encoder);
1776 mbfl_convert_filter_delete(pc.decoder);
1777 mbfl_convert_filter_delete(pc.decoder_backup);
1778 return NULL;
1779 }
1780 mkwidth = 0;
1781 if (marker) {
1782 mkwidth = mbfl_strwidth(marker);
1783 }
1784 pc.from = from;
1785 pc.width = width - mkwidth;
1786 pc.outwidth = 0;
1787 pc.outchar = 0;
1788 pc.status = 0;
1789 pc.endpos = 0;
1790
1791 /* feed data */
1792 p = string->val;
1793 n = string->len;
1794 if (p != NULL) {
1795 while (n > 0) {
1796 n--;
1797 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1798 break;
1799 }
1800 }
1801 mbfl_convert_filter_flush(encoder);
1802 if (pc.status != 0 && mkwidth > 0) {
1803 pc.width += mkwidth;
1804 while (n > 0) {
1805 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1806 break;
1807 }
1808 n--;
1809 }
1810 mbfl_convert_filter_flush(encoder);
1811 if (pc.status != 1) {
1812 pc.status = 10;
1813 pc.device.pos = pc.endpos;
1814 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1815 mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
1816 p = marker->val;
1817 n = marker->len;
1818 while (n > 0) {
1819 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1820 break;
1821 }
1822 n--;
1823 }
1824 mbfl_convert_filter_flush(encoder);
1825 }
1826 } else if (pc.status != 0) {
1827 pc.device.pos = pc.endpos;
1828 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1829 }
1830 mbfl_convert_filter_flush(pc.decoder);
1831 }
1832 result = mbfl_memory_device_result(&pc.device, result);
1833 mbfl_convert_filter_delete(encoder);
1834 mbfl_convert_filter_delete(pc.decoder);
1835 mbfl_convert_filter_delete(pc.decoder_backup);
1836
1837 return result;
1838 }
1839
1840 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1841 mbfl_ja_jp_hantozen(
1842 mbfl_string *string,
1843 mbfl_string *result,
1844 int mode)
1845 {
1846 int n;
1847 unsigned char *p;
1848 const mbfl_encoding *encoding;
1849 mbfl_memory_device device;
1850 mbfl_convert_filter *decoder = NULL;
1851 mbfl_convert_filter *encoder = NULL;
1852 mbfl_convert_filter *tl_filter = NULL;
1853 mbfl_convert_filter *next_filter = NULL;
1854 mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1855
1856 /* validate parameters */
1857 if (string == NULL || result == NULL) {
1858 return NULL;
1859 }
1860
1861 encoding = mbfl_no2encoding(string->no_encoding);
1862 if (encoding == NULL) {
1863 return NULL;
1864 }
1865
1866 mbfl_memory_device_init(&device, string->len, 0);
1867 mbfl_string_init(result);
1868
1869 result->no_language = string->no_language;
1870 result->no_encoding = string->no_encoding;
1871
1872 decoder = mbfl_convert_filter_new(
1873 mbfl_no_encoding_wchar,
1874 string->no_encoding,
1875 mbfl_memory_device_output, 0, &device);
1876 if (decoder == NULL) {
1877 goto out;
1878 }
1879 next_filter = decoder;
1880
1881 param =
1882 (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1883 if (param == NULL) {
1884 goto out;
1885 }
1886
1887 param->mode = mode;
1888
1889 tl_filter = mbfl_convert_filter_new2(
1890 &vtbl_tl_jisx0201_jisx0208,
1891 (int(*)(int, void*))next_filter->filter_function,
1892 (int(*)(void*))next_filter->filter_flush,
1893 next_filter);
1894 if (tl_filter == NULL) {
1895 mbfl_free(param);
1896 goto out;
1897 }
1898
1899 tl_filter->opaque = param;
1900 next_filter = tl_filter;
1901
1902 encoder = mbfl_convert_filter_new(
1903 string->no_encoding,
1904 mbfl_no_encoding_wchar,
1905 (int(*)(int, void*))next_filter->filter_function,
1906 (int(*)(void*))next_filter->filter_flush,
1907 next_filter);
1908 if (encoder == NULL) {
1909 goto out;
1910 }
1911
1912 /* feed data */
1913 p = string->val;
1914 n = string->len;
1915 if (p != NULL) {
1916 while (n > 0) {
1917 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1918 break;
1919 }
1920 n--;
1921 }
1922 }
1923
1924 mbfl_convert_filter_flush(encoder);
1925 result = mbfl_memory_device_result(&device, result);
1926 out:
1927 if (tl_filter != NULL) {
1928 if (tl_filter->opaque != NULL) {
1929 mbfl_free(tl_filter->opaque);
1930 }
1931 mbfl_convert_filter_delete(tl_filter);
1932 }
1933
1934 if (decoder != NULL) {
1935 mbfl_convert_filter_delete(decoder);
1936 }
1937
1938 if (encoder != NULL) {
1939 mbfl_convert_filter_delete(encoder);
1940 }
1941
1942 return result;
1943 }
1944
1945
1946 /*
1947 * MIME header encode
1948 */
1949 struct mime_header_encoder_data {
1950 mbfl_convert_filter *conv1_filter;
1951 mbfl_convert_filter *block_filter;
1952 mbfl_convert_filter *conv2_filter;
1953 mbfl_convert_filter *conv2_filter_backup;
1954 mbfl_convert_filter *encod_filter;
1955 mbfl_convert_filter *encod_filter_backup;
1956 mbfl_memory_device outdev;
1957 mbfl_memory_device tmpdev;
1958 int status1;
1959 int status2;
1960 int prevpos;
1961 int linehead;
1962 int firstindent;
1963 int encnamelen;
1964 int lwsplen;
1965 char encname[128];
1966 char lwsp[16];
1967 };
1968
1969 static int
mime_header_encoder_block_collector(int c,void * data)1970 mime_header_encoder_block_collector(int c, void *data)
1971 {
1972 int n;
1973 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1974
1975 switch (pe->status2) {
1976 case 1: /* encoded word */
1977 pe->prevpos = pe->outdev.pos;
1978 mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1979 mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1980 (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1981 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1982 (*pe->encod_filter->filter_flush)(pe->encod_filter);
1983 n = pe->outdev.pos - pe->linehead + pe->firstindent;
1984 pe->outdev.pos = pe->prevpos;
1985 mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1986 mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1987 if (n >= 74) {
1988 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1989 (*pe->encod_filter->filter_flush)(pe->encod_filter);
1990 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
1991 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1992 pe->linehead = pe->outdev.pos;
1993 pe->firstindent = 0;
1994 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1995 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1996 } else {
1997 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1998 }
1999 break;
2000
2001 default:
2002 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2003 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2004 pe->status2 = 1;
2005 break;
2006 }
2007
2008 return c;
2009 }
2010
2011 static int
mime_header_encoder_collector(int c,void * data)2012 mime_header_encoder_collector(int c, void *data)
2013 {
2014 static int qp_table[256] = {
2015 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2016 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2017 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
2018 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
2019 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
2020 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
2021 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
2022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
2023 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
2024 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
2025 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
2026 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
2027 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
2028 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
2029 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
2030 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */
2031 };
2032
2033 int n;
2034 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2035
2036 switch (pe->status1) {
2037 case 11: /* encoded word */
2038 (*pe->block_filter->filter_function)(c, pe->block_filter);
2039 break;
2040
2041 default: /* ASCII */
2042 if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
2043 mbfl_memory_device_output(c, &pe->tmpdev);
2044 pe->status1 = 1;
2045 } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */
2046 mbfl_memory_device_output(c, &pe->tmpdev);
2047 } else {
2048 if (pe->tmpdev.pos < 74 && c == 0x20) {
2049 n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
2050 if (n > 74) {
2051 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2052 pe->linehead = pe->outdev.pos;
2053 pe->firstindent = 0;
2054 } else if (pe->outdev.pos > 0) {
2055 mbfl_memory_device_output(0x20, &pe->outdev);
2056 }
2057 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2058 mbfl_memory_device_reset(&pe->tmpdev);
2059 pe->status1 = 0;
2060 } else {
2061 n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
2062 if (n > 60) {
2063 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2064 pe->linehead = pe->outdev.pos;
2065 pe->firstindent = 0;
2066 } else if (pe->outdev.pos > 0) {
2067 mbfl_memory_device_output(0x20, &pe->outdev);
2068 }
2069 mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
2070 mbfl_memory_device_reset(&pe->tmpdev);
2071 (*pe->block_filter->filter_function)(c, pe->block_filter);
2072 pe->status1 = 11;
2073 }
2074 }
2075 break;
2076 }
2077
2078 return c;
2079 }
2080
2081 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)2082 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2083 {
2084 if (pe->status1 >= 10) {
2085 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2086 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2087 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2088 } else if (pe->tmpdev.pos > 0) {
2089 if (pe->outdev.pos > 0) {
2090 if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2091 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2092 } else {
2093 mbfl_memory_device_output(0x20, &pe->outdev);
2094 }
2095 }
2096 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2097 }
2098 mbfl_memory_device_reset(&pe->tmpdev);
2099 pe->prevpos = 0;
2100 pe->linehead = 0;
2101 pe->status1 = 0;
2102 pe->status2 = 0;
2103
2104 return mbfl_memory_device_result(&pe->outdev, result);
2105 }
2106
2107 struct mime_header_encoder_data*
mime_header_encoder_new(enum mbfl_no_encoding incode,enum mbfl_no_encoding outcode,enum mbfl_no_encoding transenc)2108 mime_header_encoder_new(
2109 enum mbfl_no_encoding incode,
2110 enum mbfl_no_encoding outcode,
2111 enum mbfl_no_encoding transenc)
2112 {
2113 int n;
2114 const char *s;
2115 const mbfl_encoding *outencoding;
2116 struct mime_header_encoder_data *pe;
2117
2118 /* get output encoding and check MIME charset name */
2119 outencoding = mbfl_no2encoding(outcode);
2120 if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
2121 return NULL;
2122 }
2123
2124 pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2125 if (pe == NULL) {
2126 return NULL;
2127 }
2128
2129 mbfl_memory_device_init(&pe->outdev, 0, 0);
2130 mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2131 pe->prevpos = 0;
2132 pe->linehead = 0;
2133 pe->firstindent = 0;
2134 pe->status1 = 0;
2135 pe->status2 = 0;
2136
2137 /* make the encoding description string exp. "=?ISO-2022-JP?B?" */
2138 n = 0;
2139 pe->encname[n++] = 0x3d;
2140 pe->encname[n++] = 0x3f;
2141 s = outencoding->mime_name;
2142 while (*s) {
2143 pe->encname[n++] = *s++;
2144 }
2145 pe->encname[n++] = 0x3f;
2146 if (transenc == mbfl_no_encoding_qprint) {
2147 pe->encname[n++] = 0x51;
2148 } else {
2149 pe->encname[n++] = 0x42;
2150 transenc = mbfl_no_encoding_base64;
2151 }
2152 pe->encname[n++] = 0x3f;
2153 pe->encname[n] = '\0';
2154 pe->encnamelen = n;
2155
2156 n = 0;
2157 pe->lwsp[n++] = 0x0d;
2158 pe->lwsp[n++] = 0x0a;
2159 pe->lwsp[n++] = 0x20;
2160 pe->lwsp[n] = '\0';
2161 pe->lwsplen = n;
2162
2163 /* transfer encode filter */
2164 pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2165 pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2166
2167 /* Output code filter */
2168 pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2169 pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2170
2171 /* encoded block filter */
2172 pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2173
2174 /* Input code filter */
2175 pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);
2176
2177 if (pe->encod_filter == NULL ||
2178 pe->encod_filter_backup == NULL ||
2179 pe->conv2_filter == NULL ||
2180 pe->conv2_filter_backup == NULL ||
2181 pe->conv1_filter == NULL) {
2182 mime_header_encoder_delete(pe);
2183 return NULL;
2184 }
2185
2186 if (transenc == mbfl_no_encoding_qprint) {
2187 pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2188 pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2189 } else {
2190 pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2191 pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2192 }
2193
2194 return pe;
2195 }
2196
2197 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2198 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2199 {
2200 if (pe) {
2201 mbfl_convert_filter_delete(pe->conv1_filter);
2202 mbfl_convert_filter_delete(pe->block_filter);
2203 mbfl_convert_filter_delete(pe->conv2_filter);
2204 mbfl_convert_filter_delete(pe->conv2_filter_backup);
2205 mbfl_convert_filter_delete(pe->encod_filter);
2206 mbfl_convert_filter_delete(pe->encod_filter_backup);
2207 mbfl_memory_device_clear(&pe->outdev);
2208 mbfl_memory_device_clear(&pe->tmpdev);
2209 mbfl_free((void*)pe);
2210 }
2211 }
2212
2213 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2214 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2215 {
2216 return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2217 }
2218
2219 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode,enum mbfl_no_encoding encoding,const char * linefeed,int indent)2220 mbfl_mime_header_encode(
2221 mbfl_string *string,
2222 mbfl_string *result,
2223 enum mbfl_no_encoding outcode,
2224 enum mbfl_no_encoding encoding,
2225 const char *linefeed,
2226 int indent)
2227 {
2228 int n;
2229 unsigned char *p;
2230 struct mime_header_encoder_data *pe;
2231
2232 mbfl_string_init(result);
2233 result->no_language = string->no_language;
2234 result->no_encoding = mbfl_no_encoding_ascii;
2235
2236 pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
2237 if (pe == NULL) {
2238 return NULL;
2239 }
2240
2241 if (linefeed != NULL) {
2242 n = 0;
2243 while (*linefeed && n < 8) {
2244 pe->lwsp[n++] = *linefeed++;
2245 }
2246 pe->lwsp[n++] = 0x20;
2247 pe->lwsp[n] = '\0';
2248 pe->lwsplen = n;
2249 }
2250 if (indent > 0 && indent < 74) {
2251 pe->firstindent = indent;
2252 }
2253
2254 n = string->len;
2255 p = string->val;
2256 while (n > 0) {
2257 (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2258 n--;
2259 }
2260
2261 result = mime_header_encoder_result(pe, result);
2262 mime_header_encoder_delete(pe);
2263
2264 return result;
2265 }
2266
2267
2268 /*
2269 * MIME header decode
2270 */
2271 struct mime_header_decoder_data {
2272 mbfl_convert_filter *deco_filter;
2273 mbfl_convert_filter *conv1_filter;
2274 mbfl_convert_filter *conv2_filter;
2275 mbfl_memory_device outdev;
2276 mbfl_memory_device tmpdev;
2277 int cspos;
2278 int status;
2279 enum mbfl_no_encoding encoding;
2280 enum mbfl_no_encoding incode;
2281 enum mbfl_no_encoding outcode;
2282 };
2283
2284 static int
mime_header_decoder_collector(int c,void * data)2285 mime_header_decoder_collector(int c, void* data)
2286 {
2287 const mbfl_encoding *encoding;
2288 struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2289
2290 switch (pd->status) {
2291 case 1:
2292 if (c == 0x3f) { /* ? */
2293 mbfl_memory_device_output(c, &pd->tmpdev);
2294 pd->cspos = pd->tmpdev.pos;
2295 pd->status = 2;
2296 } else {
2297 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2298 mbfl_memory_device_reset(&pd->tmpdev);
2299 if (c == 0x3d) { /* = */
2300 mbfl_memory_device_output(c, &pd->tmpdev);
2301 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2302 pd->status = 9;
2303 } else {
2304 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2305 pd->status = 0;
2306 }
2307 }
2308 break;
2309 case 2: /* store charset string */
2310 if (c == 0x3f) { /* ? */
2311 /* identify charset */
2312 mbfl_memory_device_output('\0', &pd->tmpdev);
2313 encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2314 if (encoding != NULL) {
2315 pd->incode = encoding->no_encoding;
2316 pd->status = 3;
2317 }
2318 mbfl_memory_device_unput(&pd->tmpdev);
2319 mbfl_memory_device_output(c, &pd->tmpdev);
2320 } else {
2321 mbfl_memory_device_output(c, &pd->tmpdev);
2322 if (pd->tmpdev.pos > 100) { /* too long charset string */
2323 pd->status = 0;
2324 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2325 mbfl_memory_device_unput(&pd->tmpdev);
2326 pd->status = 9;
2327 }
2328 if (pd->status != 2) {
2329 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2330 mbfl_memory_device_reset(&pd->tmpdev);
2331 }
2332 }
2333 break;
2334 case 3: /* identify encoding */
2335 mbfl_memory_device_output(c, &pd->tmpdev);
2336 if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */
2337 pd->encoding = mbfl_no_encoding_base64;
2338 pd->status = 4;
2339 } else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */
2340 pd->encoding = mbfl_no_encoding_qprint;
2341 pd->status = 4;
2342 } else {
2343 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2344 mbfl_memory_device_unput(&pd->tmpdev);
2345 pd->status = 9;
2346 } else {
2347 pd->status = 0;
2348 }
2349 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2350 mbfl_memory_device_reset(&pd->tmpdev);
2351 }
2352 break;
2353 case 4: /* reset filter */
2354 mbfl_memory_device_output(c, &pd->tmpdev);
2355 if (c == 0x3f) { /* ? */
2356 /* charset convert filter */
2357 mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
2358 /* decode filter */
2359 mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
2360 pd->status = 5;
2361 } else {
2362 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2363 mbfl_memory_device_unput(&pd->tmpdev);
2364 pd->status = 9;
2365 } else {
2366 pd->status = 0;
2367 }
2368 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2369 }
2370 mbfl_memory_device_reset(&pd->tmpdev);
2371 break;
2372 case 5: /* encoded block */
2373 if (c == 0x3f) { /* ? */
2374 pd->status = 6;
2375 } else {
2376 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2377 }
2378 break;
2379 case 6: /* check end position */
2380 if (c == 0x3d) { /* = */
2381 /* flush and reset filter */
2382 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2383 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2384 mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
2385 pd->status = 7;
2386 } else {
2387 (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2388 if (c != 0x3f) { /* ? */
2389 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2390 pd->status = 5;
2391 }
2392 }
2393 break;
2394 case 7: /* after encoded block */
2395 if (c == 0x0d || c == 0x0a) { /* CR LF */
2396 pd->status = 8;
2397 } else {
2398 mbfl_memory_device_output(c, &pd->tmpdev);
2399 if (c == 0x3d) { /* = */
2400 pd->status = 1;
2401 } else if (c != 0x20 && c != 0x09) { /* not space */
2402 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2403 mbfl_memory_device_reset(&pd->tmpdev);
2404 pd->status = 0;
2405 }
2406 }
2407 break;
2408 case 8: /* folding */
2409 case 9: /* folding */
2410 if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2411 if (c == 0x3d) { /* = */
2412 if (pd->status == 8) {
2413 mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */
2414 } else {
2415 (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2416 }
2417 mbfl_memory_device_output(c, &pd->tmpdev);
2418 pd->status = 1;
2419 } else {
2420 mbfl_memory_device_output(0x20, &pd->tmpdev);
2421 mbfl_memory_device_output(c, &pd->tmpdev);
2422 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2423 mbfl_memory_device_reset(&pd->tmpdev);
2424 pd->status = 0;
2425 }
2426 }
2427 break;
2428 default: /* non encoded block */
2429 if (c == 0x0d || c == 0x0a) { /* CR LF */
2430 pd->status = 9;
2431 } else if (c == 0x3d) { /* = */
2432 mbfl_memory_device_output(c, &pd->tmpdev);
2433 pd->status = 1;
2434 } else {
2435 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2436 }
2437 break;
2438 }
2439
2440 return c;
2441 }
2442
2443 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2444 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2445 {
2446 switch (pd->status) {
2447 case 1:
2448 case 2:
2449 case 3:
2450 case 4:
2451 case 7:
2452 case 8:
2453 case 9:
2454 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2455 break;
2456 case 5:
2457 case 6:
2458 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2459 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2460 break;
2461 }
2462 (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2463 mbfl_memory_device_reset(&pd->tmpdev);
2464 pd->status = 0;
2465
2466 return mbfl_memory_device_result(&pd->outdev, result);
2467 }
2468
2469 struct mime_header_decoder_data*
mime_header_decoder_new(enum mbfl_no_encoding outcode)2470 mime_header_decoder_new(enum mbfl_no_encoding outcode)
2471 {
2472 struct mime_header_decoder_data *pd;
2473
2474 pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2475 if (pd == NULL) {
2476 return NULL;
2477 }
2478
2479 mbfl_memory_device_init(&pd->outdev, 0, 0);
2480 mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2481 pd->cspos = 0;
2482 pd->status = 0;
2483 pd->encoding = mbfl_no_encoding_pass;
2484 pd->incode = mbfl_no_encoding_ascii;
2485 pd->outcode = outcode;
2486 /* charset convert filter */
2487 pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2488 pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2489 /* decode filter */
2490 pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2491
2492 if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2493 mime_header_decoder_delete(pd);
2494 return NULL;
2495 }
2496
2497 return pd;
2498 }
2499
2500 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2501 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2502 {
2503 if (pd) {
2504 mbfl_convert_filter_delete(pd->conv2_filter);
2505 mbfl_convert_filter_delete(pd->conv1_filter);
2506 mbfl_convert_filter_delete(pd->deco_filter);
2507 mbfl_memory_device_clear(&pd->outdev);
2508 mbfl_memory_device_clear(&pd->tmpdev);
2509 mbfl_free((void*)pd);
2510 }
2511 }
2512
2513 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2514 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2515 {
2516 return mime_header_decoder_collector(c, pd);
2517 }
2518
2519 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode)2520 mbfl_mime_header_decode(
2521 mbfl_string *string,
2522 mbfl_string *result,
2523 enum mbfl_no_encoding outcode)
2524 {
2525 int n;
2526 unsigned char *p;
2527 struct mime_header_decoder_data *pd;
2528
2529 mbfl_string_init(result);
2530 result->no_language = string->no_language;
2531 result->no_encoding = outcode;
2532
2533 pd = mime_header_decoder_new(outcode);
2534 if (pd == NULL) {
2535 return NULL;
2536 }
2537
2538 /* feed data */
2539 n = string->len;
2540 p = string->val;
2541 while (n > 0) {
2542 mime_header_decoder_collector(*p++, pd);
2543 n--;
2544 }
2545
2546 result = mime_header_decoder_result(pd, result);
2547 mime_header_decoder_delete(pd);
2548
2549 return result;
2550 }
2551
2552
2553
2554 /*
2555 * convert HTML numeric entity
2556 */
2557 struct collector_htmlnumericentity_data {
2558 mbfl_convert_filter *decoder;
2559 int status;
2560 int cache;
2561 int digit;
2562 int *convmap;
2563 int mapsize;
2564 };
2565
2566 static int
collector_encode_htmlnumericentity(int c,void * data)2567 collector_encode_htmlnumericentity(int c, void *data)
2568 {
2569 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2570 int f, n, s, r, d, size, *mapelm;
2571
2572 size = pc->mapsize;
2573 f = 0;
2574 n = 0;
2575 while (n < size) {
2576 mapelm = &(pc->convmap[n*4]);
2577 if (c >= mapelm[0] && c <= mapelm[1]) {
2578 s = (c + mapelm[2]) & mapelm[3];
2579 if (s >= 0) {
2580 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2581 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2582 r = 100000000;
2583 s %= r;
2584 while (r > 0) {
2585 d = s/r;
2586 if (d || f) {
2587 f = 1;
2588 s %= r;
2589 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2590 }
2591 r /= 10;
2592 }
2593 if (!f) {
2594 f = 1;
2595 (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2596 }
2597 (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2598 }
2599 }
2600 if (f) {
2601 break;
2602 }
2603 n++;
2604 }
2605 if (!f) {
2606 (*pc->decoder->filter_function)(c, pc->decoder);
2607 }
2608
2609 return c;
2610 }
2611
2612 static int
collector_decode_htmlnumericentity(int c,void * data)2613 collector_decode_htmlnumericentity(int c, void *data)
2614 {
2615 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2616 int f, n, s, r, d, size, *mapelm;
2617
2618 switch (pc->status) {
2619 case 1:
2620 if (c == 0x23) { /* '#' */
2621 pc->status = 2;
2622 } else {
2623 pc->status = 0;
2624 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2625 (*pc->decoder->filter_function)(c, pc->decoder);
2626 }
2627 break;
2628 case 2:
2629 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2630 pc->cache = c - 0x30;
2631 pc->status = 3;
2632 pc->digit = 1;
2633 } else {
2634 pc->status = 0;
2635 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2636 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2637 (*pc->decoder->filter_function)(c, pc->decoder);
2638 }
2639 break;
2640 case 3:
2641 s = 0;
2642 f = 0;
2643 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2644 if (pc->digit > 9) {
2645 pc->status = 0;
2646 s = pc->cache;
2647 f = 1;
2648 } else {
2649 s = pc->cache*10 + c - 0x30;
2650 pc->cache = s;
2651 pc->digit++;
2652 }
2653 } else {
2654 pc->status = 0;
2655 s = pc->cache;
2656 f = 1;
2657 n = 0;
2658 size = pc->mapsize;
2659 while (n < size) {
2660 mapelm = &(pc->convmap[n*4]);
2661 d = s - mapelm[2];
2662 if (d >= mapelm[0] && d <= mapelm[1]) {
2663 f = 0;
2664 (*pc->decoder->filter_function)(d, pc->decoder);
2665 if (c != 0x3b) { /* ';' */
2666 (*pc->decoder->filter_function)(c, pc->decoder);
2667 }
2668 break;
2669 }
2670 n++;
2671 }
2672 }
2673 if (f) {
2674 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2675 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2676 r = 1;
2677 n = pc->digit;
2678 while (n > 0) {
2679 r *= 10;
2680 n--;
2681 }
2682 s %= r;
2683 r /= 10;
2684 while (r > 0) {
2685 d = s/r;
2686 s %= r;
2687 r /= 10;
2688 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2689 }
2690 (*pc->decoder->filter_function)(c, pc->decoder);
2691 }
2692 break;
2693 default:
2694 if (c == 0x26) { /* '&' */
2695 pc->status = 1;
2696 } else {
2697 (*pc->decoder->filter_function)(c, pc->decoder);
2698 }
2699 break;
2700 }
2701
2702 return c;
2703 }
2704
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2705 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2706 {
2707 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2708 int n, s, r, d;
2709
2710 if (pc->status) {
2711 switch (pc->status) {
2712 case 1: /* '&' */
2713 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2714 break;
2715 case 2: /* '#' */
2716 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2717 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2718 break;
2719 case 3: /* '0'-'9' */
2720 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2721 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2722
2723 s = pc->cache;
2724 r = 1;
2725 n = pc->digit;
2726 while (n > 0) {
2727 r *= 10;
2728 n--;
2729 }
2730 s %= r;
2731 r /= 10;
2732 while (r > 0) {
2733 d = s/r;
2734 s %= r;
2735 r /= 10;
2736 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2737 }
2738
2739 break;
2740 default:
2741 break;
2742 }
2743 }
2744
2745 pc->status = 0;
2746 pc->cache = 0;
2747 pc->digit = 0;
2748
2749 return 0;
2750 }
2751
2752 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)2753 mbfl_html_numeric_entity(
2754 mbfl_string *string,
2755 mbfl_string *result,
2756 int *convmap,
2757 int mapsize,
2758 int type)
2759 {
2760 struct collector_htmlnumericentity_data pc;
2761 mbfl_memory_device device;
2762 mbfl_convert_filter *encoder;
2763 int n;
2764 unsigned char *p;
2765
2766 if (string == NULL || result == NULL) {
2767 return NULL;
2768 }
2769 mbfl_string_init(result);
2770 result->no_language = string->no_language;
2771 result->no_encoding = string->no_encoding;
2772 mbfl_memory_device_init(&device, string->len, 0);
2773
2774 /* output code filter */
2775 pc.decoder = mbfl_convert_filter_new(
2776 mbfl_no_encoding_wchar,
2777 string->no_encoding,
2778 mbfl_memory_device_output, 0, &device);
2779 /* wchar filter */
2780 if (type == 0) {
2781 encoder = mbfl_convert_filter_new(
2782 string->no_encoding,
2783 mbfl_no_encoding_wchar,
2784 collector_encode_htmlnumericentity, 0, &pc);
2785 } else {
2786 encoder = mbfl_convert_filter_new(
2787 string->no_encoding,
2788 mbfl_no_encoding_wchar,
2789 collector_decode_htmlnumericentity,
2790 (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
2791 }
2792 if (pc.decoder == NULL || encoder == NULL) {
2793 mbfl_convert_filter_delete(encoder);
2794 mbfl_convert_filter_delete(pc.decoder);
2795 return NULL;
2796 }
2797 pc.status = 0;
2798 pc.cache = 0;
2799 pc.digit = 0;
2800 pc.convmap = convmap;
2801 pc.mapsize = mapsize;
2802
2803 /* feed data */
2804 p = string->val;
2805 n = string->len;
2806 if (p != NULL) {
2807 while (n > 0) {
2808 if ((*encoder->filter_function)(*p++, encoder) < 0) {
2809 break;
2810 }
2811 n--;
2812 }
2813 }
2814 mbfl_convert_filter_flush(encoder);
2815 mbfl_convert_filter_flush(pc.decoder);
2816 result = mbfl_memory_device_result(&device, result);
2817 mbfl_convert_filter_delete(encoder);
2818 mbfl_convert_filter_delete(pc.decoder);
2819
2820 return result;
2821 }
2822
2823 /*
2824 * Local variables:
2825 * tab-width: 4
2826 * c-basic-offset: 4
2827 * End:
2828 */
2829