1 /*
2 * charset=UTF-8
3 * vim600: encoding=utf-8
4 */
5
6 /*
7 * "streamable kanji code filter and converter"
8 *
9 * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
10 *
11 * This software is released under the GNU Lesser General Public License.
12 * (Version 2.1, February 1999)
13 * Please read the following detail of the licence (in japanese).
14 *
15 * ◆使用許諾条件◆
16 *
17 * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
18 * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
19 * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
20 * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
21 * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
22 * することはできません。
23 *
24 * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
25 * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
26 * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
27 * による許諾を得る必要があります。
28 *
29 * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
30 * ます。「GNU Lesser General Public License」とは、これまでLibrary General
31 * Public Licenseと呼ばれていたものです。
32 * http://www.gnu.org/ --- GNUウェブサイト
33 * http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
34 * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
35 *
36 * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
37 * はありません。
38 *
39 * ◆保証内容◆
40 *
41 * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
42 * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
43 * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
44 * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
45 * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
46 * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
47 * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
48 * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
49 * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
50 * 契約・規定に優先します。
51 *
52 * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
53 *
54 * 〒102-0073
55 * 東京都千代田区九段北1-13-5日本地所第一ビル4F
56 * 株式会社ハッピーサイズ
57 * Phone: 03-3512-3655, Fax: 03-3512-3656
58 * Email: sales@happysize.co.jp
59 * Web: http://happysize.com/
60 *
61 * ◆著者◆
62 *
63 * 金本 茂 <sgk@happysize.co.jp>
64 *
65 * ◆履歴◆
66 *
67 * 1998/11/10 sgk implementation in C++
68 * 1999/4/25 sgk Cで書きなおし。
69 * 1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
70 * 1999/6/?? Unicodeサポート。
71 * 1999/6/22 sgk ライセンスをLGPLに変更。
72 *
73 */
74
75 /*
76 * Unicode support
77 *
78 * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
79 * All rights reserved.
80 *
81 */
82
83
84 #ifdef HAVE_CONFIG_H
85 #include "config.h"
86 #endif
87
88 #include <stddef.h>
89
90 #ifdef HAVE_STRING_H
91 #include <string.h>
92 #endif
93
94 #ifdef HAVE_STRINGS_H
95 #include <strings.h>
96 #endif
97
98 #ifdef HAVE_STDDEF_H
99 #include <stddef.h>
100 #endif
101
102 #include "mbfilter.h"
103 #include "mbfl_filter_output.h"
104 #include "mbfilter_pass.h"
105 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
106
107 #include "eaw_table.h"
108
109 /* hex character table "0123456789ABCDEF" */
110 static char mbfl_hexchar_table[] = {
111 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
112 };
113
114
115
116 /*
117 * encoding filter
118 */
119 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
120
121
122 /*
123 * buffering converter
124 */
125 mbfl_buffer_converter *
mbfl_buffer_converter_new(enum mbfl_no_encoding from,enum mbfl_no_encoding to,int buf_initsz)126 mbfl_buffer_converter_new(
127 enum mbfl_no_encoding from,
128 enum mbfl_no_encoding to,
129 int buf_initsz)
130 {
131 const mbfl_encoding *_from = mbfl_no2encoding(from);
132 const mbfl_encoding *_to = mbfl_no2encoding(to);
133
134 return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz);
135 }
136
137 mbfl_buffer_converter *
mbfl_buffer_converter_new2(const mbfl_encoding * from,const mbfl_encoding * to,int buf_initsz)138 mbfl_buffer_converter_new2(
139 const mbfl_encoding *from,
140 const mbfl_encoding *to,
141 int buf_initsz)
142 {
143 mbfl_buffer_converter *convd;
144
145 /* allocate */
146 convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
147 if (convd == NULL) {
148 return NULL;
149 }
150
151 /* initialize */
152 convd->from = from;
153 convd->to = to;
154
155 /* create convert filter */
156 convd->filter1 = NULL;
157 convd->filter2 = NULL;
158 if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
159 convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
160 } else {
161 convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
162 if (convd->filter2 != NULL) {
163 convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
164 mbfl_no_encoding_wchar,
165 (int (*)(int, void*))convd->filter2->filter_function,
166 (int (*)(void*))convd->filter2->filter_flush,
167 convd->filter2);
168 if (convd->filter1 == NULL) {
169 mbfl_convert_filter_delete(convd->filter2);
170 }
171 }
172 }
173 if (convd->filter1 == NULL) {
174 return NULL;
175 }
176
177 mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
178
179 return convd;
180 }
181
182
183 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)184 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
185 {
186 if (convd != NULL) {
187 if (convd->filter1) {
188 mbfl_convert_filter_delete(convd->filter1);
189 }
190 if (convd->filter2) {
191 mbfl_convert_filter_delete(convd->filter2);
192 }
193 mbfl_memory_device_clear(&convd->device);
194 mbfl_free((void*)convd);
195 }
196 }
197
198 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)199 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
200 {
201 mbfl_memory_device_reset(&convd->device);
202 }
203
204 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)205 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
206 {
207 if (convd != NULL) {
208 if (convd->filter2 != NULL) {
209 convd->filter2->illegal_mode = mode;
210 } else if (convd->filter1 != NULL) {
211 convd->filter1->illegal_mode = mode;
212 } else {
213 return 0;
214 }
215 }
216
217 return 1;
218 }
219
220 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)221 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
222 {
223 if (convd != NULL) {
224 if (convd->filter2 != NULL) {
225 convd->filter2->illegal_substchar = substchar;
226 } else if (convd->filter1 != NULL) {
227 convd->filter1->illegal_substchar = substchar;
228 } else {
229 return 0;
230 }
231 }
232
233 return 1;
234 }
235
236 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,int n)237 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
238 {
239 mbfl_convert_filter *filter;
240 int (*filter_function)(int c, mbfl_convert_filter *filter);
241
242 if (convd != NULL && p != NULL) {
243 filter = convd->filter1;
244 if (filter != NULL) {
245 filter_function = filter->filter_function;
246 while (n > 0) {
247 if ((*filter_function)(*p++, filter) < 0) {
248 break;
249 }
250 n--;
251 }
252 }
253 }
254
255 return n;
256 }
257
258 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)259 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
260 {
261 return mbfl_buffer_converter_feed2(convd, string, NULL);
262 }
263
264 int
mbfl_buffer_converter_feed2(mbfl_buffer_converter * convd,mbfl_string * string,int * loc)265 mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc)
266 {
267 int n;
268 unsigned char *p;
269 mbfl_convert_filter *filter;
270 int (*filter_function)(int c, mbfl_convert_filter *filter);
271
272 if (convd == NULL || string == NULL) {
273 return -1;
274 }
275 mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
276 /* feed data */
277 n = string->len;
278 p = string->val;
279
280 filter = convd->filter1;
281 if (filter != NULL) {
282 filter_function = filter->filter_function;
283 while (n > 0) {
284 if ((*filter_function)(*p++, filter) < 0) {
285 if (loc) {
286 *loc = p - string->val;
287 }
288 return -1;
289 }
290 n--;
291 }
292 }
293 if (loc) {
294 *loc = p - string->val;
295 }
296 return 0;
297 }
298
299
300 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)301 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
302 {
303 if (convd == NULL) {
304 return -1;
305 }
306
307 if (convd->filter1 != NULL) {
308 mbfl_convert_filter_flush(convd->filter1);
309 }
310 if (convd->filter2 != NULL) {
311 mbfl_convert_filter_flush(convd->filter2);
312 }
313
314 return 0;
315 }
316
317 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)318 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
319 {
320 if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
321 result->no_encoding = convd->to->no_encoding;
322 result->val = convd->device.buffer;
323 result->len = convd->device.pos;
324 } else {
325 result = NULL;
326 }
327
328 return result;
329 }
330
331 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)332 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
333 {
334 if (convd == NULL || result == NULL) {
335 return NULL;
336 }
337 result->no_encoding = convd->to->no_encoding;
338 return mbfl_memory_device_result(&convd->device, result);
339 }
340
341 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)342 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
343 mbfl_string *result)
344 {
345 if (convd == NULL || string == NULL || result == NULL) {
346 return NULL;
347 }
348 mbfl_buffer_converter_feed(convd, string);
349 if (convd->filter1 != NULL) {
350 mbfl_convert_filter_flush(convd->filter1);
351 }
352 if (convd->filter2 != NULL) {
353 mbfl_convert_filter_flush(convd->filter2);
354 }
355 result->no_encoding = convd->to->no_encoding;
356 return mbfl_memory_device_result(&convd->device, result);
357 }
358
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)359 int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
360 {
361 int num_illegalchars = 0;
362
363 if (convd == NULL) {
364 return 0;
365 }
366
367 if (convd->filter1 != NULL) {
368 num_illegalchars += convd->filter1->num_illegalchar;
369 }
370
371 if (convd->filter2 != NULL) {
372 num_illegalchars += convd->filter2->num_illegalchar;
373 }
374
375 return (num_illegalchars);
376 }
377
378 /*
379 * encoding detector
380 */
381 mbfl_encoding_detector *
mbfl_encoding_detector_new(enum mbfl_no_encoding * elist,int elistsz,int strict)382 mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
383 {
384 mbfl_encoding_detector *identd;
385
386 int i, num;
387 mbfl_identify_filter *filter;
388
389 if (elist == NULL || elistsz <= 0) {
390 return NULL;
391 }
392
393 /* allocate */
394 identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
395 if (identd == NULL) {
396 return NULL;
397 }
398 identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
399 if (identd->filter_list == NULL) {
400 mbfl_free(identd);
401 return NULL;
402 }
403
404 /* create filters */
405 i = 0;
406 num = 0;
407 while (i < elistsz) {
408 filter = mbfl_identify_filter_new(elist[i]);
409 if (filter != NULL) {
410 identd->filter_list[num] = filter;
411 num++;
412 }
413 i++;
414 }
415 identd->filter_list_size = num;
416
417 /* set strict flag */
418 identd->strict = strict;
419
420 return identd;
421 }
422
423 mbfl_encoding_detector *
mbfl_encoding_detector_new2(const mbfl_encoding ** elist,int elistsz,int strict)424 mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict)
425 {
426 mbfl_encoding_detector *identd;
427
428 int i, num;
429 mbfl_identify_filter *filter;
430
431 if (elist == NULL || elistsz <= 0) {
432 return NULL;
433 }
434
435 /* allocate */
436 identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
437 if (identd == NULL) {
438 return NULL;
439 }
440 identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
441 if (identd->filter_list == NULL) {
442 mbfl_free(identd);
443 return NULL;
444 }
445
446 /* create filters */
447 i = 0;
448 num = 0;
449 while (i < elistsz) {
450 filter = mbfl_identify_filter_new2(elist[i]);
451 if (filter != NULL) {
452 identd->filter_list[num] = filter;
453 num++;
454 }
455 i++;
456 }
457 identd->filter_list_size = num;
458
459 /* set strict flag */
460 identd->strict = strict;
461
462 return identd;
463 }
464
465
466 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)467 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
468 {
469 int i;
470
471 if (identd != NULL) {
472 if (identd->filter_list != NULL) {
473 i = identd->filter_list_size;
474 while (i > 0) {
475 i--;
476 mbfl_identify_filter_delete(identd->filter_list[i]);
477 }
478 mbfl_free((void *)identd->filter_list);
479 }
480 mbfl_free((void *)identd);
481 }
482 }
483
484 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)485 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
486 {
487 int i, n, num, bad, res;
488 unsigned char *p;
489 mbfl_identify_filter *filter;
490
491 res = 0;
492 /* feed data */
493 if (identd != NULL && string != NULL && string->val != NULL) {
494 num = identd->filter_list_size;
495 n = string->len;
496 p = string->val;
497 bad = 0;
498 while (n > 0) {
499 for (i = 0; i < num; i++) {
500 filter = identd->filter_list[i];
501 if (!filter->flag) {
502 (*filter->filter_function)(*p, filter);
503 if (filter->flag) {
504 bad++;
505 }
506 }
507 }
508 if ((num - 1) <= bad) {
509 res = 1;
510 break;
511 }
512 p++;
513 n--;
514 }
515 }
516
517 return res;
518 }
519
mbfl_encoding_detector_judge2(mbfl_encoding_detector * identd)520 const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd)
521 {
522 mbfl_identify_filter *filter;
523 const mbfl_encoding *encoding = NULL;
524 int n;
525
526 /* judge */
527 if (identd != NULL) {
528 n = identd->filter_list_size - 1;
529 while (n >= 0) {
530 filter = identd->filter_list[n];
531 if (!filter->flag) {
532 if (!identd->strict || !filter->status) {
533 encoding = filter->encoding;
534 }
535 }
536 n--;
537 }
538
539 /* fallback judge */
540 if (!encoding) {
541 n = identd->filter_list_size - 1;
542 while (n >= 0) {
543 filter = identd->filter_list[n];
544 if (!filter->flag) {
545 encoding = filter->encoding;
546 }
547 n--;
548 }
549 }
550 }
551
552 return encoding;
553 }
554
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)555 enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
556 {
557 const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd);
558 return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding;
559 }
560
561
562 /*
563 * encoding converter
564 */
565 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding toenc)566 mbfl_convert_encoding(
567 mbfl_string *string,
568 mbfl_string *result,
569 enum mbfl_no_encoding toenc)
570 {
571 int n;
572 unsigned char *p;
573 const mbfl_encoding *encoding;
574 mbfl_memory_device device;
575 mbfl_convert_filter *filter1;
576 mbfl_convert_filter *filter2;
577
578 /* initialize */
579 encoding = mbfl_no2encoding(toenc);
580 if (encoding == NULL || string == NULL || result == NULL) {
581 return NULL;
582 }
583
584 filter1 = NULL;
585 filter2 = NULL;
586 if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
587 filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
588 } else {
589 filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
590 if (filter2 != NULL) {
591 filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
592 if (filter1 == NULL) {
593 mbfl_convert_filter_delete(filter2);
594 }
595 }
596 }
597 if (filter1 == NULL) {
598 return NULL;
599 }
600
601 if (filter2 != NULL) {
602 filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
603 filter2->illegal_substchar = 0x3f; /* '?' */
604 }
605
606 mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
607
608 /* feed data */
609 n = string->len;
610 p = string->val;
611 if (p != NULL) {
612 while (n > 0) {
613 if ((*filter1->filter_function)(*p++, filter1) < 0) {
614 break;
615 }
616 n--;
617 }
618 }
619
620 mbfl_convert_filter_flush(filter1);
621 mbfl_convert_filter_delete(filter1);
622 if (filter2 != NULL) {
623 mbfl_convert_filter_flush(filter2);
624 mbfl_convert_filter_delete(filter2);
625 }
626
627 return mbfl_memory_device_result(&device, result);
628 }
629
630
631 /*
632 * identify encoding
633 */
634 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)635 mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
636 {
637 int i, n, num, bad;
638 unsigned char *p;
639 mbfl_identify_filter *flist, *filter;
640 const mbfl_encoding *encoding;
641
642 /* flist is an array of mbfl_identify_filter instances */
643 flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
644 if (flist == NULL) {
645 return NULL;
646 }
647
648 num = 0;
649 if (elist != NULL) {
650 for (i = 0; i < elistsz; i++) {
651 if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
652 num++;
653 }
654 }
655 }
656
657 /* feed data */
658 n = string->len;
659 p = string->val;
660
661 if (p != NULL) {
662 bad = 0;
663 while (n > 0) {
664 for (i = 0; i < num; i++) {
665 filter = &flist[i];
666 if (!filter->flag) {
667 (*filter->filter_function)(*p, filter);
668 if (filter->flag) {
669 bad++;
670 }
671 }
672 }
673 if ((num - 1) <= bad && !strict) {
674 break;
675 }
676 p++;
677 n--;
678 }
679 }
680
681 /* judge */
682 encoding = NULL;
683
684 for (i = 0; i < num; i++) {
685 filter = &flist[i];
686 if (!filter->flag) {
687 if (strict && filter->status) {
688 continue;
689 }
690 encoding = filter->encoding;
691 break;
692 }
693 }
694
695 /* fall-back judge */
696 if (!encoding) {
697 for (i = 0; i < num; i++) {
698 filter = &flist[i];
699 if (!filter->flag && (!strict || !filter->status)) {
700 encoding = filter->encoding;
701 break;
702 }
703 }
704 }
705
706 /* cleanup */
707 /* dtors should be called in reverse order */
708 i = num; while (--i >= 0) {
709 mbfl_identify_filter_cleanup(&flist[i]);
710 }
711
712 mbfl_free((void *)flist);
713
714 return encoding;
715 }
716
717 const mbfl_encoding *
mbfl_identify_encoding2(mbfl_string * string,const mbfl_encoding ** elist,int elistsz,int strict)718 mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
719 {
720 int i, n, num, bad;
721 unsigned char *p;
722 mbfl_identify_filter *flist, *filter;
723 const mbfl_encoding *encoding;
724
725 /* flist is an array of mbfl_identify_filter instances */
726 flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
727 if (flist == NULL) {
728 return NULL;
729 }
730
731 num = 0;
732 if (elist != NULL) {
733 for (i = 0; i < elistsz; i++) {
734 if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
735 num++;
736 }
737 }
738 }
739
740 /* feed data */
741 n = string->len;
742 p = string->val;
743
744 if (p != NULL) {
745 bad = 0;
746 while (n > 0) {
747 for (i = 0; i < num; i++) {
748 filter = &flist[i];
749 if (!filter->flag) {
750 (*filter->filter_function)(*p, filter);
751 if (filter->flag) {
752 bad++;
753 }
754 }
755 }
756 if ((num - 1) <= bad && !strict) {
757 break;
758 }
759 p++;
760 n--;
761 }
762 }
763
764 /* judge */
765 encoding = NULL;
766
767 for (i = 0; i < num; i++) {
768 filter = &flist[i];
769 if (!filter->flag) {
770 if (strict && filter->status) {
771 continue;
772 }
773 encoding = filter->encoding;
774 break;
775 }
776 }
777
778 /* fall-back judge */
779 if (!encoding) {
780 for (i = 0; i < num; i++) {
781 filter = &flist[i];
782 if (!filter->flag && (!strict || !filter->status)) {
783 encoding = filter->encoding;
784 break;
785 }
786 }
787 }
788
789 /* cleanup */
790 /* dtors should be called in reverse order */
791 i = num; while (--i >= 0) {
792 mbfl_identify_filter_cleanup(&flist[i]);
793 }
794
795 mbfl_free((void *)flist);
796
797 return encoding;
798 }
799
800 /*
801 * strlen
802 */
803 static int
filter_count_output(int c,void * data)804 filter_count_output(int c, void *data)
805 {
806 (*(int *)data)++;
807 return c;
808 }
809
810 int
mbfl_strlen(mbfl_string * string)811 mbfl_strlen(mbfl_string *string)
812 {
813 int len, n, m, k;
814 unsigned char *p;
815 const unsigned char *mbtab;
816 const mbfl_encoding *encoding;
817
818 encoding = mbfl_no2encoding(string->no_encoding);
819 if (encoding == NULL || string == NULL) {
820 return -1;
821 }
822
823 len = 0;
824 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
825 len = string->len;
826 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
827 len = string->len/2;
828 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
829 len = string->len/4;
830 } else if (encoding->mblen_table != NULL) {
831 mbtab = encoding->mblen_table;
832 n = 0;
833 p = string->val;
834 k = string->len;
835 /* count */
836 if (p != NULL) {
837 while (n < k) {
838 m = mbtab[*p];
839 n += m;
840 p += m;
841 len++;
842 };
843 }
844 } else {
845 /* wchar filter */
846 mbfl_convert_filter *filter = mbfl_convert_filter_new(
847 string->no_encoding,
848 mbfl_no_encoding_wchar,
849 filter_count_output, 0, &len);
850 if (filter == NULL) {
851 return -1;
852 }
853 /* count */
854 n = string->len;
855 p = string->val;
856 if (p != NULL) {
857 while (n > 0) {
858 (*filter->filter_function)(*p++, filter);
859 n--;
860 }
861 }
862 mbfl_convert_filter_delete(filter);
863 }
864
865 return len;
866 }
867
868
869 /*
870 * strpos
871 */
872 struct collector_strpos_data {
873 mbfl_convert_filter *next_filter;
874 mbfl_wchar_device needle;
875 int needle_len;
876 int start;
877 int output;
878 int found_pos;
879 int needle_pos;
880 int matched_pos;
881 };
882
883 static int
collector_strpos(int c,void * data)884 collector_strpos(int c, void* data)
885 {
886 int *p, *h, *m, n;
887 struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
888
889 if (pc->output >= pc->start) {
890 if (c == (int)pc->needle.buffer[pc->needle_pos]) {
891 if (pc->needle_pos == 0) {
892 pc->found_pos = pc->output; /* found position */
893 }
894 pc->needle_pos++; /* needle pointer */
895 if (pc->needle_pos >= pc->needle_len) {
896 pc->matched_pos = pc->found_pos; /* matched position */
897 pc->needle_pos--;
898 goto retry;
899 }
900 } else if (pc->needle_pos != 0) {
901 retry:
902 h = (int *)pc->needle.buffer;
903 h++;
904 for (;;) {
905 pc->found_pos++;
906 p = h;
907 m = (int *)pc->needle.buffer;
908 n = pc->needle_pos - 1;
909 while (n > 0 && *p == *m) {
910 n--;
911 p++;
912 m++;
913 }
914 if (n <= 0) {
915 if (*m != c) {
916 pc->needle_pos = 0;
917 }
918 break;
919 } else {
920 h++;
921 pc->needle_pos--;
922 }
923 }
924 }
925 }
926
927 pc->output++;
928 return c;
929 }
930
931 /*
932 * oddlen
933 */
934 int
mbfl_oddlen(mbfl_string * string)935 mbfl_oddlen(mbfl_string *string)
936 {
937 int len, n, m, k;
938 unsigned char *p;
939 const unsigned char *mbtab;
940 const mbfl_encoding *encoding;
941
942
943 if (string == NULL) {
944 return -1;
945 }
946 encoding = mbfl_no2encoding(string->no_encoding);
947 if (encoding == NULL) {
948 return -1;
949 }
950
951 len = 0;
952 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
953 return 0;
954 } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
955 return len % 2;
956 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
957 return len % 4;
958 } else if (encoding->mblen_table != NULL) {
959 mbtab = encoding->mblen_table;
960 n = 0;
961 p = string->val;
962 k = string->len;
963 /* count */
964 if (p != NULL) {
965 while (n < k) {
966 m = mbtab[*p];
967 n += m;
968 p += m;
969 };
970 }
971 return n-k;
972 } else {
973 /* how can i do ? */
974 return 0;
975 }
976 /* NOT REACHED */
977 }
978
979 int
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,int offset,int reverse)980 mbfl_strpos(
981 mbfl_string *haystack,
982 mbfl_string *needle,
983 int offset,
984 int reverse)
985 {
986 int result;
987 mbfl_string _haystack_u8, _needle_u8;
988 const mbfl_string *haystack_u8, *needle_u8 = NULL;
989 const unsigned char *u8_tbl;
990
991 if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
992 return -8;
993 }
994
995 {
996 const mbfl_encoding *u8_enc;
997 u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
998 if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
999 return -8;
1000 }
1001 u8_tbl = u8_enc->mblen_table;
1002 }
1003
1004 if (haystack->no_encoding != mbfl_no_encoding_utf8) {
1005 mbfl_string_init(&_haystack_u8);
1006 haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
1007 if (haystack_u8 == NULL) {
1008 result = -4;
1009 goto out;
1010 }
1011 } else {
1012 haystack_u8 = haystack;
1013 }
1014
1015 if (needle->no_encoding != mbfl_no_encoding_utf8) {
1016 mbfl_string_init(&_needle_u8);
1017 needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
1018 if (needle_u8 == NULL) {
1019 result = -4;
1020 goto out;
1021 }
1022 } else {
1023 needle_u8 = needle;
1024 }
1025
1026 if (needle_u8->len < 1) {
1027 result = -8;
1028 goto out;
1029 }
1030
1031 result = -1;
1032 if (haystack_u8->len < needle_u8->len) {
1033 goto out;
1034 }
1035
1036 if (!reverse) {
1037 unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1038 unsigned int needle_u8_len = needle_u8->len;
1039 unsigned int i;
1040 const unsigned char *p, *q, *e;
1041 const unsigned char *haystack_u8_val = haystack_u8->val,
1042 *needle_u8_val = needle_u8->val;
1043 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1044 jtbl[i] = needle_u8_len + 1;
1045 }
1046 for (i = 0; i < needle_u8_len - 1; ++i) {
1047 jtbl[needle_u8_val[i]] = needle_u8_len - i;
1048 }
1049 e = haystack_u8_val + haystack_u8->len;
1050 p = haystack_u8_val;
1051 while (--offset >= 0) {
1052 if (p >= e) {
1053 result = -16;
1054 goto out;
1055 }
1056 p += u8_tbl[*p];
1057 }
1058 p += needle_u8_len;
1059 if (p > e) {
1060 goto out;
1061 }
1062 while (p <= e) {
1063 const unsigned char *pv = p;
1064 q = needle_u8_val + needle_u8_len;
1065 for (;;) {
1066 if (q == needle_u8_val) {
1067 result = 0;
1068 while (p > haystack_u8_val) {
1069 unsigned char c = *--p;
1070 if (c < 0x80) {
1071 ++result;
1072 } else if ((c & 0xc0) != 0x80) {
1073 ++result;
1074 }
1075 }
1076 goto out;
1077 }
1078 if (*--q != *--p) {
1079 break;
1080 }
1081 }
1082 p += jtbl[*p];
1083 if (p <= pv) {
1084 p = pv + 1;
1085 }
1086 }
1087 } else {
1088 unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1089 unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
1090 unsigned int i;
1091 const unsigned char *p, *e, *q, *qe;
1092 const unsigned char *haystack_u8_val = haystack_u8->val,
1093 *needle_u8_val = needle_u8->val;
1094 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1095 jtbl[i] = needle_u8_len;
1096 }
1097 for (i = needle_u8_len - 1; i > 0; --i) {
1098 unsigned char c = needle_u8_val[i];
1099 jtbl[c] = i;
1100 if (c < 0x80) {
1101 ++needle_len;
1102 } else if ((c & 0xc0) != 0x80) {
1103 ++needle_len;
1104 }
1105 }
1106 {
1107 unsigned char c = needle_u8_val[0];
1108 if (c < 0x80) {
1109 ++needle_len;
1110 } else if ((c & 0xc0) != 0x80) {
1111 ++needle_len;
1112 }
1113 }
1114 e = haystack_u8_val;
1115 p = e + haystack_u8->len;
1116 qe = needle_u8_val + needle_u8_len;
1117 if (offset < 0) {
1118 if (-offset > needle_len) {
1119 offset += needle_len;
1120 while (offset < 0) {
1121 unsigned char c;
1122 if (p <= e) {
1123 result = -16;
1124 goto out;
1125 }
1126 c = *(--p);
1127 if (c < 0x80) {
1128 ++offset;
1129 } else if ((c & 0xc0) != 0x80) {
1130 ++offset;
1131 }
1132 }
1133 }
1134 } else {
1135 const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1136 while (--offset >= 0) {
1137 if (e >= ee) {
1138 result = -16;
1139 goto out;
1140 }
1141 e += u8_tbl[*e];
1142 }
1143 }
1144 if (p < e + needle_u8_len) {
1145 goto out;
1146 }
1147 p -= needle_u8_len;
1148 while (p >= e) {
1149 const unsigned char *pv = p;
1150 q = needle_u8_val;
1151 for (;;) {
1152 if (q == qe) {
1153 result = 0;
1154 p -= needle_u8_len;
1155 while (p > haystack_u8_val) {
1156 unsigned char c = *--p;
1157 if (c < 0x80) {
1158 ++result;
1159 } else if ((c & 0xc0) != 0x80) {
1160 ++result;
1161 }
1162 }
1163 goto out;
1164 }
1165 if (*q != *p) {
1166 break;
1167 }
1168 ++p, ++q;
1169 }
1170 p -= jtbl[*p];
1171 if (p >= pv) {
1172 p = pv - 1;
1173 }
1174 }
1175 }
1176 out:
1177 if (haystack_u8 == &_haystack_u8) {
1178 mbfl_string_clear(&_haystack_u8);
1179 }
1180 if (needle_u8 == &_needle_u8) {
1181 mbfl_string_clear(&_needle_u8);
1182 }
1183 return result;
1184 }
1185
1186 /*
1187 * substr_count
1188 */
1189
1190 int
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1191 mbfl_substr_count(
1192 mbfl_string *haystack,
1193 mbfl_string *needle
1194 )
1195 {
1196 int n, result = 0;
1197 unsigned char *p;
1198 mbfl_convert_filter *filter;
1199 struct collector_strpos_data pc;
1200
1201 if (haystack == NULL || needle == NULL) {
1202 return -8;
1203 }
1204 /* needle is converted into wchar */
1205 mbfl_wchar_device_init(&pc.needle);
1206 filter = mbfl_convert_filter_new(
1207 needle->no_encoding,
1208 mbfl_no_encoding_wchar,
1209 mbfl_wchar_device_output, 0, &pc.needle);
1210 if (filter == NULL) {
1211 return -4;
1212 }
1213 p = needle->val;
1214 n = needle->len;
1215 if (p != NULL) {
1216 while (n > 0) {
1217 if ((*filter->filter_function)(*p++, filter) < 0) {
1218 break;
1219 }
1220 n--;
1221 }
1222 }
1223 mbfl_convert_filter_flush(filter);
1224 mbfl_convert_filter_delete(filter);
1225 pc.needle_len = pc.needle.pos;
1226 if (pc.needle.buffer == NULL) {
1227 return -4;
1228 }
1229 if (pc.needle_len <= 0) {
1230 mbfl_wchar_device_clear(&pc.needle);
1231 return -2;
1232 }
1233 /* initialize filter and collector data */
1234 filter = mbfl_convert_filter_new(
1235 haystack->no_encoding,
1236 mbfl_no_encoding_wchar,
1237 collector_strpos, 0, &pc);
1238 if (filter == NULL) {
1239 mbfl_wchar_device_clear(&pc.needle);
1240 return -4;
1241 }
1242 pc.start = 0;
1243 pc.output = 0;
1244 pc.needle_pos = 0;
1245 pc.found_pos = 0;
1246 pc.matched_pos = -1;
1247
1248 /* feed data */
1249 p = haystack->val;
1250 n = haystack->len;
1251 if (p != NULL) {
1252 while (n > 0) {
1253 if ((*filter->filter_function)(*p++, filter) < 0) {
1254 pc.matched_pos = -4;
1255 break;
1256 }
1257 if (pc.matched_pos >= 0) {
1258 ++result;
1259 pc.matched_pos = -1;
1260 pc.needle_pos = 0;
1261 }
1262 n--;
1263 }
1264 }
1265 mbfl_convert_filter_flush(filter);
1266 mbfl_convert_filter_delete(filter);
1267 mbfl_wchar_device_clear(&pc.needle);
1268
1269 return result;
1270 }
1271
1272 /*
1273 * substr
1274 */
1275 struct collector_substr_data {
1276 mbfl_convert_filter *next_filter;
1277 int start;
1278 int stop;
1279 int output;
1280 };
1281
1282 static int
collector_substr(int c,void * data)1283 collector_substr(int c, void* data)
1284 {
1285 struct collector_substr_data *pc = (struct collector_substr_data*)data;
1286
1287 if (pc->output >= pc->stop) {
1288 return -1;
1289 }
1290
1291 if (pc->output >= pc->start) {
1292 (*pc->next_filter->filter_function)(c, pc->next_filter);
1293 }
1294
1295 pc->output++;
1296
1297 return c;
1298 }
1299
1300 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,int from,int length)1301 mbfl_substr(
1302 mbfl_string *string,
1303 mbfl_string *result,
1304 int from,
1305 int length)
1306 {
1307 const mbfl_encoding *encoding;
1308 int n, m, k, len, start, end;
1309 unsigned char *p, *w;
1310 const unsigned char *mbtab;
1311
1312 encoding = mbfl_no2encoding(string->no_encoding);
1313 if (encoding == NULL || string == NULL || result == NULL) {
1314 return NULL;
1315 }
1316 mbfl_string_init(result);
1317 result->no_language = string->no_language;
1318 result->no_encoding = string->no_encoding;
1319
1320 if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1321 encoding->mblen_table != NULL) {
1322 len = string->len;
1323 start = from;
1324 end = from + length;
1325 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1326 start *= 2;
1327 end = start + length*2;
1328 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1329 start *= 4;
1330 end = start + length*4;
1331 } else if (encoding->mblen_table != NULL) {
1332 mbtab = encoding->mblen_table;
1333 start = 0;
1334 end = 0;
1335 n = 0;
1336 k = 0;
1337 p = string->val;
1338 if (p != NULL) {
1339 /* search start position */
1340 while (k <= from) {
1341 start = n;
1342 if (n >= len) {
1343 break;
1344 }
1345 m = mbtab[*p];
1346 n += m;
1347 p += m;
1348 k++;
1349 }
1350 /* detect end position */
1351 k = 0;
1352 end = start;
1353 while (k < length) {
1354 end = n;
1355 if (n >= len) {
1356 break;
1357 }
1358 m = mbtab[*p];
1359 n += m;
1360 p += m;
1361 k++;
1362 }
1363 }
1364 }
1365
1366 if (start > len) {
1367 start = len;
1368 }
1369 if (start < 0) {
1370 start = 0;
1371 }
1372 if (end > len) {
1373 end = len;
1374 }
1375 if (end < 0) {
1376 end = 0;
1377 }
1378 if (start > end) {
1379 start = end;
1380 }
1381
1382 /* allocate memory and copy */
1383 n = end - start;
1384 result->len = 0;
1385 result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
1386 if (w != NULL) {
1387 p = string->val;
1388 if (p != NULL) {
1389 p += start;
1390 result->len = n;
1391 while (n > 0) {
1392 *w++ = *p++;
1393 n--;
1394 }
1395 }
1396 *w++ = '\0';
1397 *w++ = '\0';
1398 *w++ = '\0';
1399 *w = '\0';
1400 } else {
1401 result = NULL;
1402 }
1403 } else {
1404 mbfl_memory_device device;
1405 struct collector_substr_data pc;
1406 mbfl_convert_filter *decoder;
1407 mbfl_convert_filter *encoder;
1408
1409 mbfl_memory_device_init(&device, length + 1, 0);
1410 mbfl_string_init(result);
1411 result->no_language = string->no_language;
1412 result->no_encoding = string->no_encoding;
1413 /* output code filter */
1414 decoder = mbfl_convert_filter_new(
1415 mbfl_no_encoding_wchar,
1416 string->no_encoding,
1417 mbfl_memory_device_output, 0, &device);
1418 /* wchar filter */
1419 encoder = mbfl_convert_filter_new(
1420 string->no_encoding,
1421 mbfl_no_encoding_wchar,
1422 collector_substr, 0, &pc);
1423 if (decoder == NULL || encoder == NULL) {
1424 mbfl_convert_filter_delete(encoder);
1425 mbfl_convert_filter_delete(decoder);
1426 return NULL;
1427 }
1428 pc.next_filter = decoder;
1429 pc.start = from;
1430 pc.stop = from + length;
1431 pc.output = 0;
1432
1433 /* feed data */
1434 p = string->val;
1435 n = string->len;
1436 if (p != NULL) {
1437 while (n > 0) {
1438 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1439 break;
1440 }
1441 n--;
1442 }
1443 }
1444
1445 mbfl_convert_filter_flush(encoder);
1446 mbfl_convert_filter_flush(decoder);
1447 result = mbfl_memory_device_result(&device, result);
1448 mbfl_convert_filter_delete(encoder);
1449 mbfl_convert_filter_delete(decoder);
1450 }
1451
1452 return result;
1453 }
1454
1455 /*
1456 * strcut
1457 */
1458 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,int from,int length)1459 mbfl_strcut(
1460 mbfl_string *string,
1461 mbfl_string *result,
1462 int from,
1463 int length)
1464 {
1465 const mbfl_encoding *encoding;
1466 mbfl_memory_device device;
1467
1468 /* validate the parameters */
1469 if (string == NULL || string->val == NULL || result == NULL) {
1470 return NULL;
1471 }
1472
1473 if (from < 0 || length < 0) {
1474 return NULL;
1475 }
1476
1477 if (from >= string->len) {
1478 from = string->len;
1479 }
1480
1481 encoding = mbfl_no2encoding(string->no_encoding);
1482 if (encoding == NULL) {
1483 return NULL;
1484 }
1485
1486 mbfl_string_init(result);
1487 result->no_language = string->no_language;
1488 result->no_encoding = string->no_encoding;
1489
1490 if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1491 | MBFL_ENCTYPE_WCS2BE
1492 | MBFL_ENCTYPE_WCS2LE
1493 | MBFL_ENCTYPE_WCS4BE
1494 | MBFL_ENCTYPE_WCS4LE))
1495 || encoding->mblen_table != NULL) {
1496 const unsigned char *start = NULL;
1497 const unsigned char *end = NULL;
1498 unsigned char *w;
1499 unsigned int sz;
1500
1501 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1502 from &= -2;
1503
1504 if (from + length >= string->len) {
1505 length = string->len - from;
1506 }
1507
1508 start = string->val + from;
1509 end = start + (length & -2);
1510 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1511 from &= -4;
1512
1513 if (from + length >= string->len) {
1514 length = string->len - from;
1515 }
1516
1517 start = string->val + from;
1518 end = start + (length & -4);
1519 } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1520 if (from + length >= string->len) {
1521 length = string->len - from;
1522 }
1523
1524 start = string->val + from;
1525 end = start + length;
1526 } else if (encoding->mblen_table != NULL) {
1527 const unsigned char *mbtab = encoding->mblen_table;
1528 const unsigned char *p, *q;
1529 int m;
1530
1531 /* search start position */
1532 for (m = 0, p = string->val, q = p + from;
1533 p < q; p += (m = mbtab[*p]));
1534
1535 if (p > q) {
1536 p -= m;
1537 }
1538
1539 start = p;
1540
1541 /* search end position */
1542 if ((start - string->val) + length >= (int)string->len) {
1543 end = string->val + string->len;
1544 } else {
1545 for (q = p + length; p < q; p += (m = mbtab[*p]));
1546
1547 if (p > q) {
1548 p -= m;
1549 }
1550 end = p;
1551 }
1552 } else {
1553 /* never reached */
1554 return NULL;
1555 }
1556
1557 /* allocate memory and copy string */
1558 sz = end - start;
1559 if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1560 sizeof(unsigned char))) == NULL) {
1561 return NULL;
1562 }
1563
1564 memcpy(w, start, sz);
1565 w[sz] = '\0';
1566 w[sz + 1] = '\0';
1567 w[sz + 2] = '\0';
1568 w[sz + 3] = '\0';
1569
1570 result->val = w;
1571 result->len = sz;
1572 } else {
1573 mbfl_convert_filter *encoder = NULL;
1574 mbfl_convert_filter *decoder = NULL;
1575 const unsigned char *p, *q, *r;
1576 struct {
1577 mbfl_convert_filter encoder;
1578 mbfl_convert_filter decoder;
1579 const unsigned char *p;
1580 int pos;
1581 } bk, _bk;
1582
1583 /* output code filter */
1584 if (!(decoder = mbfl_convert_filter_new(
1585 mbfl_no_encoding_wchar,
1586 string->no_encoding,
1587 mbfl_memory_device_output, 0, &device))) {
1588 return NULL;
1589 }
1590
1591 /* wchar filter */
1592 if (!(encoder = mbfl_convert_filter_new(
1593 string->no_encoding,
1594 mbfl_no_encoding_wchar,
1595 mbfl_filter_output_null,
1596 NULL, NULL))) {
1597 mbfl_convert_filter_delete(decoder);
1598 return NULL;
1599 }
1600
1601 mbfl_memory_device_init(&device, length + 8, 0);
1602
1603 p = string->val;
1604
1605 /* search start position */
1606 for (q = string->val + from; p < q; p++) {
1607 (*encoder->filter_function)(*p, encoder);
1608 }
1609
1610 /* switch the drain direction */
1611 encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1612 encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1613 encoder->data = decoder;
1614
1615 q = string->val + string->len;
1616
1617 /* save the encoder, decoder state and the pointer */
1618 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1619 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1620 _bk.p = p;
1621 _bk.pos = device.pos;
1622
1623 if (length > q - p) {
1624 length = q - p;
1625 }
1626
1627 if (length >= 20) {
1628 /* output a little shorter than "length" */
1629 /* XXX: the constant "20" was determined purely on the heuristics. */
1630 for (r = p + length - 20; p < r; p++) {
1631 (*encoder->filter_function)(*p, encoder);
1632 }
1633
1634 /* if the offset of the resulting string exceeds the length,
1635 * then restore the state */
1636 if (device.pos > length) {
1637 p = _bk.p;
1638 device.pos = _bk.pos;
1639 decoder->filter_dtor(decoder);
1640 encoder->filter_dtor(encoder);
1641 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1642 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1643 bk = _bk;
1644 } else {
1645 /* save the encoder, decoder state and the pointer */
1646 mbfl_convert_filter_copy(decoder, &bk.decoder);
1647 mbfl_convert_filter_copy(encoder, &bk.encoder);
1648 bk.p = p;
1649 bk.pos = device.pos;
1650
1651 /* flush the stream */
1652 (*encoder->filter_flush)(encoder);
1653
1654 /* if the offset of the resulting string exceeds the length,
1655 * then restore the state */
1656 if (device.pos > length) {
1657 bk.decoder.filter_dtor(&bk.decoder);
1658 bk.encoder.filter_dtor(&bk.encoder);
1659
1660 p = _bk.p;
1661 device.pos = _bk.pos;
1662 decoder->filter_dtor(decoder);
1663 encoder->filter_dtor(encoder);
1664 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1665 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1666 bk = _bk;
1667 } else {
1668 _bk.decoder.filter_dtor(&_bk.decoder);
1669 _bk.encoder.filter_dtor(&_bk.encoder);
1670
1671 p = bk.p;
1672 device.pos = bk.pos;
1673 decoder->filter_dtor(decoder);
1674 encoder->filter_dtor(encoder);
1675 mbfl_convert_filter_copy(&bk.decoder, decoder);
1676 mbfl_convert_filter_copy(&bk.encoder, encoder);
1677 }
1678 }
1679 } else {
1680 bk = _bk;
1681 }
1682
1683 /* detect end position */
1684 while (p < q) {
1685 (*encoder->filter_function)(*p, encoder);
1686
1687 if (device.pos > length) {
1688 /* restore filter */
1689 p = bk.p;
1690 device.pos = bk.pos;
1691 decoder->filter_dtor(decoder);
1692 encoder->filter_dtor(encoder);
1693 mbfl_convert_filter_copy(&bk.decoder, decoder);
1694 mbfl_convert_filter_copy(&bk.encoder, encoder);
1695 break;
1696 }
1697
1698 p++;
1699
1700 /* backup current state */
1701 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1702 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1703 _bk.pos = device.pos;
1704 _bk.p = p;
1705
1706 (*encoder->filter_flush)(encoder);
1707
1708 if (device.pos > length) {
1709 _bk.decoder.filter_dtor(&_bk.decoder);
1710 _bk.encoder.filter_dtor(&_bk.encoder);
1711
1712 /* restore filter */
1713 p = bk.p;
1714 device.pos = bk.pos;
1715 decoder->filter_dtor(decoder);
1716 encoder->filter_dtor(encoder);
1717 mbfl_convert_filter_copy(&bk.decoder, decoder);
1718 mbfl_convert_filter_copy(&bk.encoder, encoder);
1719 break;
1720 }
1721
1722 bk.decoder.filter_dtor(&bk.decoder);
1723 bk.encoder.filter_dtor(&bk.encoder);
1724
1725 p = _bk.p;
1726 device.pos = _bk.pos;
1727 decoder->filter_dtor(decoder);
1728 encoder->filter_dtor(encoder);
1729 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1730 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1731
1732 bk = _bk;
1733 }
1734
1735 (*encoder->filter_flush)(encoder);
1736
1737 bk.decoder.filter_dtor(&bk.decoder);
1738 bk.encoder.filter_dtor(&bk.encoder);
1739
1740 result = mbfl_memory_device_result(&device, result);
1741
1742 mbfl_convert_filter_delete(encoder);
1743 mbfl_convert_filter_delete(decoder);
1744 }
1745
1746 return result;
1747 }
1748
1749
1750 /*
1751 * strwidth
1752 */
is_fullwidth(int c)1753 static int is_fullwidth(int c)
1754 {
1755 int i;
1756
1757 if (c < mbfl_eaw_table[0].begin) {
1758 return 0;
1759 }
1760
1761 for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1762 if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1763 return 1;
1764 }
1765 }
1766
1767 return 0;
1768 }
1769
1770 static int
filter_count_width(int c,void * data)1771 filter_count_width(int c, void* data)
1772 {
1773 (*(int *)data) += (is_fullwidth(c) ? 2: 1);
1774 return c;
1775 }
1776
1777 int
mbfl_strwidth(mbfl_string * string)1778 mbfl_strwidth(mbfl_string *string)
1779 {
1780 int len, n;
1781 unsigned char *p;
1782 mbfl_convert_filter *filter;
1783
1784 len = 0;
1785 if (string->len > 0 && string->val != NULL) {
1786 /* wchar filter */
1787 filter = mbfl_convert_filter_new(
1788 string->no_encoding,
1789 mbfl_no_encoding_wchar,
1790 filter_count_width, 0, &len);
1791 if (filter == NULL) {
1792 mbfl_convert_filter_delete(filter);
1793 return -1;
1794 }
1795
1796 /* feed data */
1797 p = string->val;
1798 n = string->len;
1799 while (n > 0) {
1800 (*filter->filter_function)(*p++, filter);
1801 n--;
1802 }
1803
1804 mbfl_convert_filter_flush(filter);
1805 mbfl_convert_filter_delete(filter);
1806 }
1807
1808 return len;
1809 }
1810
1811
1812 /*
1813 * strimwidth
1814 */
1815 struct collector_strimwidth_data {
1816 mbfl_convert_filter *decoder;
1817 mbfl_convert_filter *decoder_backup;
1818 mbfl_memory_device device;
1819 int from;
1820 int width;
1821 int outwidth;
1822 int outchar;
1823 int status;
1824 int endpos;
1825 };
1826
1827 static int
collector_strimwidth(int c,void * data)1828 collector_strimwidth(int c, void* data)
1829 {
1830 struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1831
1832 switch (pc->status) {
1833 case 10:
1834 (*pc->decoder->filter_function)(c, pc->decoder);
1835 break;
1836 default:
1837 if (pc->outchar >= pc->from) {
1838 pc->outwidth += (is_fullwidth(c) ? 2: 1);
1839
1840 if (pc->outwidth > pc->width) {
1841 if (pc->status == 0) {
1842 pc->endpos = pc->device.pos;
1843 mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1844 }
1845 pc->status++;
1846 (*pc->decoder->filter_function)(c, pc->decoder);
1847 c = -1;
1848 } else {
1849 (*pc->decoder->filter_function)(c, pc->decoder);
1850 }
1851 }
1852 pc->outchar++;
1853 break;
1854 }
1855
1856 return c;
1857 }
1858
1859 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,int from,int width)1860 mbfl_strimwidth(
1861 mbfl_string *string,
1862 mbfl_string *marker,
1863 mbfl_string *result,
1864 int from,
1865 int width)
1866 {
1867 struct collector_strimwidth_data pc;
1868 mbfl_convert_filter *encoder;
1869 int n, mkwidth;
1870 unsigned char *p;
1871
1872 if (string == NULL || result == NULL) {
1873 return NULL;
1874 }
1875 mbfl_string_init(result);
1876 result->no_language = string->no_language;
1877 result->no_encoding = string->no_encoding;
1878 mbfl_memory_device_init(&pc.device, width, 0);
1879
1880 /* output code filter */
1881 pc.decoder = mbfl_convert_filter_new(
1882 mbfl_no_encoding_wchar,
1883 string->no_encoding,
1884 mbfl_memory_device_output, 0, &pc.device);
1885 pc.decoder_backup = mbfl_convert_filter_new(
1886 mbfl_no_encoding_wchar,
1887 string->no_encoding,
1888 mbfl_memory_device_output, 0, &pc.device);
1889 /* wchar filter */
1890 encoder = mbfl_convert_filter_new(
1891 string->no_encoding,
1892 mbfl_no_encoding_wchar,
1893 collector_strimwidth, 0, &pc);
1894 if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1895 mbfl_convert_filter_delete(encoder);
1896 mbfl_convert_filter_delete(pc.decoder);
1897 mbfl_convert_filter_delete(pc.decoder_backup);
1898 return NULL;
1899 }
1900 mkwidth = 0;
1901 if (marker) {
1902 mkwidth = mbfl_strwidth(marker);
1903 }
1904 pc.from = from;
1905 pc.width = width - mkwidth;
1906 pc.outwidth = 0;
1907 pc.outchar = 0;
1908 pc.status = 0;
1909 pc.endpos = 0;
1910
1911 /* feed data */
1912 p = string->val;
1913 n = string->len;
1914 if (p != NULL) {
1915 while (n > 0) {
1916 n--;
1917 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1918 break;
1919 }
1920 }
1921 mbfl_convert_filter_flush(encoder);
1922 if (pc.status != 0 && mkwidth > 0) {
1923 pc.width += mkwidth;
1924 while (n > 0) {
1925 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1926 break;
1927 }
1928 n--;
1929 }
1930 mbfl_convert_filter_flush(encoder);
1931 if (pc.status != 1) {
1932 pc.status = 10;
1933 pc.device.pos = pc.endpos;
1934 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1935 mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
1936 p = marker->val;
1937 n = marker->len;
1938 while (n > 0) {
1939 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1940 break;
1941 }
1942 n--;
1943 }
1944 mbfl_convert_filter_flush(encoder);
1945 }
1946 } else if (pc.status != 0) {
1947 pc.device.pos = pc.endpos;
1948 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1949 }
1950 mbfl_convert_filter_flush(pc.decoder);
1951 }
1952 result = mbfl_memory_device_result(&pc.device, result);
1953 mbfl_convert_filter_delete(encoder);
1954 mbfl_convert_filter_delete(pc.decoder);
1955 mbfl_convert_filter_delete(pc.decoder_backup);
1956
1957 return result;
1958 }
1959
1960 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1961 mbfl_ja_jp_hantozen(
1962 mbfl_string *string,
1963 mbfl_string *result,
1964 int mode)
1965 {
1966 int n;
1967 unsigned char *p;
1968 const mbfl_encoding *encoding;
1969 mbfl_memory_device device;
1970 mbfl_convert_filter *decoder = NULL;
1971 mbfl_convert_filter *encoder = NULL;
1972 mbfl_convert_filter *tl_filter = NULL;
1973 mbfl_convert_filter *next_filter = NULL;
1974 mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1975
1976 /* validate parameters */
1977 if (string == NULL || result == NULL) {
1978 return NULL;
1979 }
1980
1981 encoding = mbfl_no2encoding(string->no_encoding);
1982 if (encoding == NULL) {
1983 return NULL;
1984 }
1985
1986 mbfl_memory_device_init(&device, string->len, 0);
1987 mbfl_string_init(result);
1988
1989 result->no_language = string->no_language;
1990 result->no_encoding = string->no_encoding;
1991
1992 decoder = mbfl_convert_filter_new(
1993 mbfl_no_encoding_wchar,
1994 string->no_encoding,
1995 mbfl_memory_device_output, 0, &device);
1996 if (decoder == NULL) {
1997 goto out;
1998 }
1999 next_filter = decoder;
2000
2001 param =
2002 (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
2003 if (param == NULL) {
2004 goto out;
2005 }
2006
2007 param->mode = mode;
2008
2009 tl_filter = mbfl_convert_filter_new2(
2010 &vtbl_tl_jisx0201_jisx0208,
2011 (int(*)(int, void*))next_filter->filter_function,
2012 (int(*)(void*))next_filter->filter_flush,
2013 next_filter);
2014 if (tl_filter == NULL) {
2015 mbfl_free(param);
2016 goto out;
2017 }
2018
2019 tl_filter->opaque = param;
2020 next_filter = tl_filter;
2021
2022 encoder = mbfl_convert_filter_new(
2023 string->no_encoding,
2024 mbfl_no_encoding_wchar,
2025 (int(*)(int, void*))next_filter->filter_function,
2026 (int(*)(void*))next_filter->filter_flush,
2027 next_filter);
2028 if (encoder == NULL) {
2029 goto out;
2030 }
2031
2032 /* feed data */
2033 p = string->val;
2034 n = string->len;
2035 if (p != NULL) {
2036 while (n > 0) {
2037 if ((*encoder->filter_function)(*p++, encoder) < 0) {
2038 break;
2039 }
2040 n--;
2041 }
2042 }
2043
2044 mbfl_convert_filter_flush(encoder);
2045 result = mbfl_memory_device_result(&device, result);
2046 out:
2047 if (tl_filter != NULL) {
2048 if (tl_filter->opaque != NULL) {
2049 mbfl_free(tl_filter->opaque);
2050 }
2051 mbfl_convert_filter_delete(tl_filter);
2052 }
2053
2054 if (decoder != NULL) {
2055 mbfl_convert_filter_delete(decoder);
2056 }
2057
2058 if (encoder != NULL) {
2059 mbfl_convert_filter_delete(encoder);
2060 }
2061
2062 return result;
2063 }
2064
2065
2066 /*
2067 * MIME header encode
2068 */
2069 struct mime_header_encoder_data {
2070 mbfl_convert_filter *conv1_filter;
2071 mbfl_convert_filter *block_filter;
2072 mbfl_convert_filter *conv2_filter;
2073 mbfl_convert_filter *conv2_filter_backup;
2074 mbfl_convert_filter *encod_filter;
2075 mbfl_convert_filter *encod_filter_backup;
2076 mbfl_memory_device outdev;
2077 mbfl_memory_device tmpdev;
2078 int status1;
2079 int status2;
2080 int prevpos;
2081 int linehead;
2082 int firstindent;
2083 int encnamelen;
2084 int lwsplen;
2085 char encname[128];
2086 char lwsp[16];
2087 };
2088
2089 static int
mime_header_encoder_block_collector(int c,void * data)2090 mime_header_encoder_block_collector(int c, void *data)
2091 {
2092 int n;
2093 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2094
2095 switch (pe->status2) {
2096 case 1: /* encoded word */
2097 pe->prevpos = pe->outdev.pos;
2098 mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
2099 mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
2100 (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2101 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2102 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2103 n = pe->outdev.pos - pe->linehead + pe->firstindent;
2104 pe->outdev.pos = pe->prevpos;
2105 mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
2106 mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
2107 if (n >= 74) {
2108 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2109 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2110 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2111 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2112 pe->linehead = pe->outdev.pos;
2113 pe->firstindent = 0;
2114 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2115 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2116 } else {
2117 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2118 }
2119 break;
2120
2121 default:
2122 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2123 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2124 pe->status2 = 1;
2125 break;
2126 }
2127
2128 return c;
2129 }
2130
2131 static int
mime_header_encoder_collector(int c,void * data)2132 mime_header_encoder_collector(int c, void *data)
2133 {
2134 static int qp_table[256] = {
2135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2137 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
2138 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
2139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
2140 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
2141 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
2142 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
2143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
2144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
2145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
2146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
2147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
2149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
2150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */
2151 };
2152
2153 int n;
2154 struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2155
2156 switch (pe->status1) {
2157 case 11: /* encoded word */
2158 (*pe->block_filter->filter_function)(c, pe->block_filter);
2159 break;
2160
2161 default: /* ASCII */
2162 if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
2163 mbfl_memory_device_output(c, &pe->tmpdev);
2164 pe->status1 = 1;
2165 } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */
2166 mbfl_memory_device_output(c, &pe->tmpdev);
2167 } else {
2168 if (pe->tmpdev.pos < 74 && c == 0x20) {
2169 n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
2170 if (n > 74) {
2171 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2172 pe->linehead = pe->outdev.pos;
2173 pe->firstindent = 0;
2174 } else if (pe->outdev.pos > 0) {
2175 mbfl_memory_device_output(0x20, &pe->outdev);
2176 }
2177 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2178 mbfl_memory_device_reset(&pe->tmpdev);
2179 pe->status1 = 0;
2180 } else {
2181 n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
2182 if (n > 60) {
2183 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */
2184 pe->linehead = pe->outdev.pos;
2185 pe->firstindent = 0;
2186 } else if (pe->outdev.pos > 0) {
2187 mbfl_memory_device_output(0x20, &pe->outdev);
2188 }
2189 mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
2190 mbfl_memory_device_reset(&pe->tmpdev);
2191 (*pe->block_filter->filter_function)(c, pe->block_filter);
2192 pe->status1 = 11;
2193 }
2194 }
2195 break;
2196 }
2197
2198 return c;
2199 }
2200
2201 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)2202 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2203 {
2204 if (pe->status1 >= 10) {
2205 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2206 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2207 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2208 } else if (pe->tmpdev.pos > 0) {
2209 if (pe->outdev.pos > 0) {
2210 if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2211 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2212 } else {
2213 mbfl_memory_device_output(0x20, &pe->outdev);
2214 }
2215 }
2216 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2217 }
2218 mbfl_memory_device_reset(&pe->tmpdev);
2219 pe->prevpos = 0;
2220 pe->linehead = 0;
2221 pe->status1 = 0;
2222 pe->status2 = 0;
2223
2224 return mbfl_memory_device_result(&pe->outdev, result);
2225 }
2226
2227 struct mime_header_encoder_data*
mime_header_encoder_new(enum mbfl_no_encoding incode,enum mbfl_no_encoding outcode,enum mbfl_no_encoding transenc)2228 mime_header_encoder_new(
2229 enum mbfl_no_encoding incode,
2230 enum mbfl_no_encoding outcode,
2231 enum mbfl_no_encoding transenc)
2232 {
2233 int n;
2234 const char *s;
2235 const mbfl_encoding *outencoding;
2236 struct mime_header_encoder_data *pe;
2237
2238 /* get output encoding and check MIME charset name */
2239 outencoding = mbfl_no2encoding(outcode);
2240 if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
2241 return NULL;
2242 }
2243
2244 pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2245 if (pe == NULL) {
2246 return NULL;
2247 }
2248
2249 mbfl_memory_device_init(&pe->outdev, 0, 0);
2250 mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2251 pe->prevpos = 0;
2252 pe->linehead = 0;
2253 pe->firstindent = 0;
2254 pe->status1 = 0;
2255 pe->status2 = 0;
2256
2257 /* make the encoding description string exp. "=?ISO-2022-JP?B?" */
2258 n = 0;
2259 pe->encname[n++] = 0x3d;
2260 pe->encname[n++] = 0x3f;
2261 s = outencoding->mime_name;
2262 while (*s) {
2263 pe->encname[n++] = *s++;
2264 }
2265 pe->encname[n++] = 0x3f;
2266 if (transenc == mbfl_no_encoding_qprint) {
2267 pe->encname[n++] = 0x51;
2268 } else {
2269 pe->encname[n++] = 0x42;
2270 transenc = mbfl_no_encoding_base64;
2271 }
2272 pe->encname[n++] = 0x3f;
2273 pe->encname[n] = '\0';
2274 pe->encnamelen = n;
2275
2276 n = 0;
2277 pe->lwsp[n++] = 0x0d;
2278 pe->lwsp[n++] = 0x0a;
2279 pe->lwsp[n++] = 0x20;
2280 pe->lwsp[n] = '\0';
2281 pe->lwsplen = n;
2282
2283 /* transfer encode filter */
2284 pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2285 pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2286
2287 /* Output code filter */
2288 pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2289 pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2290
2291 /* encoded block filter */
2292 pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2293
2294 /* Input code filter */
2295 pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);
2296
2297 if (pe->encod_filter == NULL ||
2298 pe->encod_filter_backup == NULL ||
2299 pe->conv2_filter == NULL ||
2300 pe->conv2_filter_backup == NULL ||
2301 pe->conv1_filter == NULL) {
2302 mime_header_encoder_delete(pe);
2303 return NULL;
2304 }
2305
2306 if (transenc == mbfl_no_encoding_qprint) {
2307 pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2308 pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2309 } else {
2310 pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2311 pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2312 }
2313
2314 return pe;
2315 }
2316
2317 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2318 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2319 {
2320 if (pe) {
2321 mbfl_convert_filter_delete(pe->conv1_filter);
2322 mbfl_convert_filter_delete(pe->block_filter);
2323 mbfl_convert_filter_delete(pe->conv2_filter);
2324 mbfl_convert_filter_delete(pe->conv2_filter_backup);
2325 mbfl_convert_filter_delete(pe->encod_filter);
2326 mbfl_convert_filter_delete(pe->encod_filter_backup);
2327 mbfl_memory_device_clear(&pe->outdev);
2328 mbfl_memory_device_clear(&pe->tmpdev);
2329 mbfl_free((void*)pe);
2330 }
2331 }
2332
2333 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2334 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2335 {
2336 return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2337 }
2338
2339 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode,enum mbfl_no_encoding encoding,const char * linefeed,int indent)2340 mbfl_mime_header_encode(
2341 mbfl_string *string,
2342 mbfl_string *result,
2343 enum mbfl_no_encoding outcode,
2344 enum mbfl_no_encoding encoding,
2345 const char *linefeed,
2346 int indent)
2347 {
2348 int n;
2349 unsigned char *p;
2350 struct mime_header_encoder_data *pe;
2351
2352 mbfl_string_init(result);
2353 result->no_language = string->no_language;
2354 result->no_encoding = mbfl_no_encoding_ascii;
2355
2356 pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
2357 if (pe == NULL) {
2358 return NULL;
2359 }
2360
2361 if (linefeed != NULL) {
2362 n = 0;
2363 while (*linefeed && n < 8) {
2364 pe->lwsp[n++] = *linefeed++;
2365 }
2366 pe->lwsp[n++] = 0x20;
2367 pe->lwsp[n] = '\0';
2368 pe->lwsplen = n;
2369 }
2370 if (indent > 0 && indent < 74) {
2371 pe->firstindent = indent;
2372 }
2373
2374 n = string->len;
2375 p = string->val;
2376 while (n > 0) {
2377 (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2378 n--;
2379 }
2380
2381 result = mime_header_encoder_result(pe, result);
2382 mime_header_encoder_delete(pe);
2383
2384 return result;
2385 }
2386
2387
2388 /*
2389 * MIME header decode
2390 */
2391 struct mime_header_decoder_data {
2392 mbfl_convert_filter *deco_filter;
2393 mbfl_convert_filter *conv1_filter;
2394 mbfl_convert_filter *conv2_filter;
2395 mbfl_memory_device outdev;
2396 mbfl_memory_device tmpdev;
2397 int cspos;
2398 int status;
2399 enum mbfl_no_encoding encoding;
2400 enum mbfl_no_encoding incode;
2401 enum mbfl_no_encoding outcode;
2402 };
2403
2404 static int
mime_header_decoder_collector(int c,void * data)2405 mime_header_decoder_collector(int c, void* data)
2406 {
2407 const mbfl_encoding *encoding;
2408 struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2409
2410 switch (pd->status) {
2411 case 1:
2412 if (c == 0x3f) { /* ? */
2413 mbfl_memory_device_output(c, &pd->tmpdev);
2414 pd->cspos = pd->tmpdev.pos;
2415 pd->status = 2;
2416 } else {
2417 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2418 mbfl_memory_device_reset(&pd->tmpdev);
2419 if (c == 0x3d) { /* = */
2420 mbfl_memory_device_output(c, &pd->tmpdev);
2421 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2422 pd->status = 9;
2423 } else {
2424 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2425 pd->status = 0;
2426 }
2427 }
2428 break;
2429 case 2: /* store charset string */
2430 if (c == 0x3f) { /* ? */
2431 /* identify charset */
2432 mbfl_memory_device_output('\0', &pd->tmpdev);
2433 encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2434 if (encoding != NULL) {
2435 pd->incode = encoding->no_encoding;
2436 pd->status = 3;
2437 }
2438 mbfl_memory_device_unput(&pd->tmpdev);
2439 mbfl_memory_device_output(c, &pd->tmpdev);
2440 } else {
2441 mbfl_memory_device_output(c, &pd->tmpdev);
2442 if (pd->tmpdev.pos > 100) { /* too long charset string */
2443 pd->status = 0;
2444 } else if (c == 0x0d || c == 0x0a) { /* CR or LF */
2445 mbfl_memory_device_unput(&pd->tmpdev);
2446 pd->status = 9;
2447 }
2448 if (pd->status != 2) {
2449 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2450 mbfl_memory_device_reset(&pd->tmpdev);
2451 }
2452 }
2453 break;
2454 case 3: /* identify encoding */
2455 mbfl_memory_device_output(c, &pd->tmpdev);
2456 if (c == 0x42 || c == 0x62) { /* 'B' or 'b' */
2457 pd->encoding = mbfl_no_encoding_base64;
2458 pd->status = 4;
2459 } else if (c == 0x51 || c == 0x71) { /* 'Q' or 'q' */
2460 pd->encoding = mbfl_no_encoding_qprint;
2461 pd->status = 4;
2462 } else {
2463 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2464 mbfl_memory_device_unput(&pd->tmpdev);
2465 pd->status = 9;
2466 } else {
2467 pd->status = 0;
2468 }
2469 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2470 mbfl_memory_device_reset(&pd->tmpdev);
2471 }
2472 break;
2473 case 4: /* reset filter */
2474 mbfl_memory_device_output(c, &pd->tmpdev);
2475 if (c == 0x3f) { /* ? */
2476 /* charset convert filter */
2477 mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
2478 /* decode filter */
2479 mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
2480 pd->status = 5;
2481 } else {
2482 if (c == 0x0d || c == 0x0a) { /* CR or LF */
2483 mbfl_memory_device_unput(&pd->tmpdev);
2484 pd->status = 9;
2485 } else {
2486 pd->status = 0;
2487 }
2488 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2489 }
2490 mbfl_memory_device_reset(&pd->tmpdev);
2491 break;
2492 case 5: /* encoded block */
2493 if (c == 0x3f) { /* ? */
2494 pd->status = 6;
2495 } else {
2496 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2497 }
2498 break;
2499 case 6: /* check end position */
2500 if (c == 0x3d) { /* = */
2501 /* flush and reset filter */
2502 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2503 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2504 mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
2505 pd->status = 7;
2506 } else {
2507 (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2508 if (c != 0x3f) { /* ? */
2509 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2510 pd->status = 5;
2511 }
2512 }
2513 break;
2514 case 7: /* after encoded block */
2515 if (c == 0x0d || c == 0x0a) { /* CR LF */
2516 pd->status = 8;
2517 } else {
2518 mbfl_memory_device_output(c, &pd->tmpdev);
2519 if (c == 0x3d) { /* = */
2520 pd->status = 1;
2521 } else if (c != 0x20 && c != 0x09) { /* not space */
2522 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2523 mbfl_memory_device_reset(&pd->tmpdev);
2524 pd->status = 0;
2525 }
2526 }
2527 break;
2528 case 8: /* folding */
2529 case 9: /* folding */
2530 if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2531 if (c == 0x3d) { /* = */
2532 if (pd->status == 8) {
2533 mbfl_memory_device_output(0x20, &pd->tmpdev); /* SPACE */
2534 } else {
2535 (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2536 }
2537 mbfl_memory_device_output(c, &pd->tmpdev);
2538 pd->status = 1;
2539 } else {
2540 mbfl_memory_device_output(0x20, &pd->tmpdev);
2541 mbfl_memory_device_output(c, &pd->tmpdev);
2542 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2543 mbfl_memory_device_reset(&pd->tmpdev);
2544 pd->status = 0;
2545 }
2546 }
2547 break;
2548 default: /* non encoded block */
2549 if (c == 0x0d || c == 0x0a) { /* CR LF */
2550 pd->status = 9;
2551 } else if (c == 0x3d) { /* = */
2552 mbfl_memory_device_output(c, &pd->tmpdev);
2553 pd->status = 1;
2554 } else {
2555 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2556 }
2557 break;
2558 }
2559
2560 return c;
2561 }
2562
2563 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2564 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2565 {
2566 switch (pd->status) {
2567 case 1:
2568 case 2:
2569 case 3:
2570 case 4:
2571 case 7:
2572 case 8:
2573 case 9:
2574 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2575 break;
2576 case 5:
2577 case 6:
2578 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2579 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2580 break;
2581 }
2582 (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2583 mbfl_memory_device_reset(&pd->tmpdev);
2584 pd->status = 0;
2585
2586 return mbfl_memory_device_result(&pd->outdev, result);
2587 }
2588
2589 struct mime_header_decoder_data*
mime_header_decoder_new(enum mbfl_no_encoding outcode)2590 mime_header_decoder_new(enum mbfl_no_encoding outcode)
2591 {
2592 struct mime_header_decoder_data *pd;
2593
2594 pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2595 if (pd == NULL) {
2596 return NULL;
2597 }
2598
2599 mbfl_memory_device_init(&pd->outdev, 0, 0);
2600 mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2601 pd->cspos = 0;
2602 pd->status = 0;
2603 pd->encoding = mbfl_no_encoding_pass;
2604 pd->incode = mbfl_no_encoding_ascii;
2605 pd->outcode = outcode;
2606 /* charset convert filter */
2607 pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2608 pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2609 /* decode filter */
2610 pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2611
2612 if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2613 mime_header_decoder_delete(pd);
2614 return NULL;
2615 }
2616
2617 return pd;
2618 }
2619
2620 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2621 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2622 {
2623 if (pd) {
2624 mbfl_convert_filter_delete(pd->conv2_filter);
2625 mbfl_convert_filter_delete(pd->conv1_filter);
2626 mbfl_convert_filter_delete(pd->deco_filter);
2627 mbfl_memory_device_clear(&pd->outdev);
2628 mbfl_memory_device_clear(&pd->tmpdev);
2629 mbfl_free((void*)pd);
2630 }
2631 }
2632
2633 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2634 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2635 {
2636 return mime_header_decoder_collector(c, pd);
2637 }
2638
2639 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode)2640 mbfl_mime_header_decode(
2641 mbfl_string *string,
2642 mbfl_string *result,
2643 enum mbfl_no_encoding outcode)
2644 {
2645 int n;
2646 unsigned char *p;
2647 struct mime_header_decoder_data *pd;
2648
2649 mbfl_string_init(result);
2650 result->no_language = string->no_language;
2651 result->no_encoding = outcode;
2652
2653 pd = mime_header_decoder_new(outcode);
2654 if (pd == NULL) {
2655 return NULL;
2656 }
2657
2658 /* feed data */
2659 n = string->len;
2660 p = string->val;
2661 while (n > 0) {
2662 mime_header_decoder_collector(*p++, pd);
2663 n--;
2664 }
2665
2666 result = mime_header_decoder_result(pd, result);
2667 mime_header_decoder_delete(pd);
2668
2669 return result;
2670 }
2671
2672
2673
2674 /*
2675 * convert HTML numeric entity
2676 */
2677 struct collector_htmlnumericentity_data {
2678 mbfl_convert_filter *decoder;
2679 int status;
2680 int cache;
2681 int digit;
2682 int *convmap;
2683 int mapsize;
2684 };
2685
2686 static int
collector_encode_htmlnumericentity(int c,void * data)2687 collector_encode_htmlnumericentity(int c, void *data)
2688 {
2689 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2690 int f, n, s, r, d, size, *mapelm;
2691
2692 size = pc->mapsize;
2693 f = 0;
2694 n = 0;
2695 while (n < size) {
2696 mapelm = &(pc->convmap[n*4]);
2697 if (c >= mapelm[0] && c <= mapelm[1]) {
2698 s = (c + mapelm[2]) & mapelm[3];
2699 if (s >= 0) {
2700 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2701 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2702 r = 100000000;
2703 s %= r;
2704 while (r > 0) {
2705 d = s/r;
2706 if (d || f) {
2707 f = 1;
2708 s %= r;
2709 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2710 }
2711 r /= 10;
2712 }
2713 if (!f) {
2714 f = 1;
2715 (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2716 }
2717 (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2718 }
2719 }
2720 if (f) {
2721 break;
2722 }
2723 n++;
2724 }
2725 if (!f) {
2726 (*pc->decoder->filter_function)(c, pc->decoder);
2727 }
2728
2729 return c;
2730 }
2731
2732 static int
collector_decode_htmlnumericentity(int c,void * data)2733 collector_decode_htmlnumericentity(int c, void *data)
2734 {
2735 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2736 int f, n, s, r, d, size, *mapelm;
2737
2738 switch (pc->status) {
2739 case 1:
2740 if (c == 0x23) { /* '#' */
2741 pc->status = 2;
2742 } else {
2743 pc->status = 0;
2744 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2745 (*pc->decoder->filter_function)(c, pc->decoder);
2746 }
2747 break;
2748 case 2:
2749 if (c == 0x78) { /* 'x' */
2750 pc->status = 4;
2751 } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2752 pc->cache = c - 0x30;
2753 pc->status = 3;
2754 pc->digit = 1;
2755 } else {
2756 pc->status = 0;
2757 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2758 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2759 (*pc->decoder->filter_function)(c, pc->decoder);
2760 }
2761 break;
2762 case 3:
2763 s = 0;
2764 f = 0;
2765 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2766 if (pc->digit > 9) {
2767 pc->status = 0;
2768 s = pc->cache;
2769 f = 1;
2770 } else {
2771 s = pc->cache*10 + c - 0x30;
2772 pc->cache = s;
2773 pc->digit++;
2774 }
2775 } else {
2776 pc->status = 0;
2777 s = pc->cache;
2778 f = 1;
2779 n = 0;
2780 size = pc->mapsize;
2781 while (n < size) {
2782 mapelm = &(pc->convmap[n*4]);
2783 d = s - mapelm[2];
2784 if (d >= mapelm[0] && d <= mapelm[1]) {
2785 f = 0;
2786 (*pc->decoder->filter_function)(d, pc->decoder);
2787 if (c != 0x3b) { /* ';' */
2788 (*pc->decoder->filter_function)(c, pc->decoder);
2789 }
2790 break;
2791 }
2792 n++;
2793 }
2794 }
2795 if (f) {
2796 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2797 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2798 r = 1;
2799 n = pc->digit;
2800 while (n > 0) {
2801 r *= 10;
2802 n--;
2803 }
2804 s %= r;
2805 r /= 10;
2806 while (r > 0) {
2807 d = s/r;
2808 s %= r;
2809 r /= 10;
2810 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2811 }
2812 (*pc->decoder->filter_function)(c, pc->decoder);
2813 }
2814 break;
2815 case 4:
2816 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2817 pc->cache = c - 0x30;
2818 pc->status = 5;
2819 pc->digit = 1;
2820 } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
2821 pc->cache = c - 0x41 + 10;
2822 pc->status = 5;
2823 pc->digit = 1;
2824 } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */
2825 pc->cache = c - 0x61 + 10;
2826 pc->status = 5;
2827 pc->digit = 1;
2828 } else {
2829 pc->status = 0;
2830 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2831 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2832 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2833 (*pc->decoder->filter_function)(c, pc->decoder);
2834 }
2835 break;
2836 case 5:
2837 s = 0;
2838 f = 0;
2839 if ((c >= 0x30 && c <= 0x39) ||
2840 (c >= 0x41 && c <= 0x46) ||
2841 (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */
2842 if (pc->digit > 9) {
2843 pc->status = 0;
2844 s = pc->cache;
2845 f = 1;
2846 } else {
2847 if (c >= 0x30 && c <= 0x39) {
2848 s = pc->cache*16 + (c - 0x30);
2849 } else if (c >= 0x41 && c <= 0x46) {
2850 s = pc->cache*16 + (c - 0x41 + 10);
2851 } else {
2852 s = pc->cache*16 + (c - 0x61 + 10);
2853 }
2854 pc->cache = s;
2855 pc->digit++;
2856 }
2857 } else {
2858 pc->status = 0;
2859 s = pc->cache;
2860 f = 1;
2861 n = 0;
2862 size = pc->mapsize;
2863 while (n < size) {
2864 mapelm = &(pc->convmap[n*4]);
2865 d = s - mapelm[2];
2866 if (d >= mapelm[0] && d <= mapelm[1]) {
2867 f = 0;
2868 (*pc->decoder->filter_function)(d, pc->decoder);
2869 if (c != 0x3b) { /* ';' */
2870 (*pc->decoder->filter_function)(c, pc->decoder);
2871 }
2872 break;
2873 }
2874 n++;
2875 }
2876 }
2877 if (f) {
2878 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2879 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2880 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2881 r = 1;
2882 n = pc->digit;
2883 while (n > 0) {
2884 r *= 16;
2885 n--;
2886 }
2887 s %= r;
2888 r /= 16;
2889 while (r > 0) {
2890 d = s/r;
2891 s %= r;
2892 r /= 16;
2893 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2894 }
2895 (*pc->decoder->filter_function)(c, pc->decoder);
2896 }
2897 break;
2898 default:
2899 if (c == 0x26) { /* '&' */
2900 pc->status = 1;
2901 } else {
2902 (*pc->decoder->filter_function)(c, pc->decoder);
2903 }
2904 break;
2905 }
2906
2907 return c;
2908 }
2909
2910 static int
collector_encode_hex_htmlnumericentity(int c,void * data)2911 collector_encode_hex_htmlnumericentity(int c, void *data)
2912 {
2913 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2914 int f, n, s, r, d, size, *mapelm;
2915
2916 size = pc->mapsize;
2917 f = 0;
2918 n = 0;
2919 while (n < size) {
2920 mapelm = &(pc->convmap[n*4]);
2921 if (c >= mapelm[0] && c <= mapelm[1]) {
2922 s = (c + mapelm[2]) & mapelm[3];
2923 if (s >= 0) {
2924 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2925 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2926 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2927 r = 0x1000000;
2928 s %= r;
2929 while (r > 0) {
2930 d = s/r;
2931 if (d || f) {
2932 f = 1;
2933 s %= r;
2934 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2935 }
2936 r /= 16;
2937 }
2938 if (!f) {
2939 f = 1;
2940 (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2941 }
2942 (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
2943 }
2944 }
2945 if (f) {
2946 break;
2947 }
2948 n++;
2949 }
2950 if (!f) {
2951 (*pc->decoder->filter_function)(c, pc->decoder);
2952 }
2953
2954 return c;
2955 }
2956
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2957 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2958 {
2959 struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2960 int n, s, r, d;
2961
2962 if (pc->status) {
2963 switch (pc->status) {
2964 case 1: /* '&' */
2965 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2966 break;
2967 case 2: /* '#' */
2968 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2969 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2970 break;
2971 case 3: /* '0'-'9' */
2972 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2973 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2974
2975 s = pc->cache;
2976 r = 1;
2977 n = pc->digit;
2978 while (n > 0) {
2979 r *= 10;
2980 n--;
2981 }
2982 s %= r;
2983 r /= 10;
2984 while (r > 0) {
2985 d = s/r;
2986 s %= r;
2987 r /= 10;
2988 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2989 }
2990
2991 break;
2992 case 4: /* 'x' */
2993 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2994 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
2995 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
2996 break;
2997 case 5: /* '0'-'9','a'-'f' */
2998 (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
2999 (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
3000 (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
3001
3002 s = pc->cache;
3003 r = 1;
3004 n = pc->digit;
3005 while (n > 0) {
3006 r *= 16;
3007 n--;
3008 }
3009 s %= r;
3010 r /= 16;
3011 while (r > 0) {
3012 d = s/r;
3013 s %= r;
3014 r /= 16;
3015 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
3016 }
3017 break;
3018 default:
3019 break;
3020 }
3021 }
3022
3023 pc->status = 0;
3024 pc->cache = 0;
3025 pc->digit = 0;
3026
3027 return 0;
3028 }
3029
3030
3031 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)3032 mbfl_html_numeric_entity(
3033 mbfl_string *string,
3034 mbfl_string *result,
3035 int *convmap,
3036 int mapsize,
3037 int type)
3038 {
3039 struct collector_htmlnumericentity_data pc;
3040 mbfl_memory_device device;
3041 mbfl_convert_filter *encoder;
3042 int n;
3043 unsigned char *p;
3044
3045 if (string == NULL || result == NULL) {
3046 return NULL;
3047 }
3048 mbfl_string_init(result);
3049 result->no_language = string->no_language;
3050 result->no_encoding = string->no_encoding;
3051 mbfl_memory_device_init(&device, string->len, 0);
3052
3053 /* output code filter */
3054 pc.decoder = mbfl_convert_filter_new(
3055 mbfl_no_encoding_wchar,
3056 string->no_encoding,
3057 mbfl_memory_device_output, 0, &device);
3058 /* wchar filter */
3059 if (type == 0) { /* decimal output */
3060 encoder = mbfl_convert_filter_new(
3061 string->no_encoding,
3062 mbfl_no_encoding_wchar,
3063 collector_encode_htmlnumericentity, 0, &pc);
3064 } else if (type == 2) { /* hex output */
3065 encoder = mbfl_convert_filter_new(
3066 string->no_encoding,
3067 mbfl_no_encoding_wchar,
3068 collector_encode_hex_htmlnumericentity, 0, &pc);
3069 } else { /* type == 1: decimal/hex input */
3070 encoder = mbfl_convert_filter_new(
3071 string->no_encoding,
3072 mbfl_no_encoding_wchar,
3073 collector_decode_htmlnumericentity,
3074 (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
3075 }
3076 if (pc.decoder == NULL || encoder == NULL) {
3077 mbfl_convert_filter_delete(encoder);
3078 mbfl_convert_filter_delete(pc.decoder);
3079 return NULL;
3080 }
3081 pc.status = 0;
3082 pc.cache = 0;
3083 pc.digit = 0;
3084 pc.convmap = convmap;
3085 pc.mapsize = mapsize;
3086
3087 /* feed data */
3088 p = string->val;
3089 n = string->len;
3090 if (p != NULL) {
3091 while (n > 0) {
3092 if ((*encoder->filter_function)(*p++, encoder) < 0) {
3093 break;
3094 }
3095 n--;
3096 }
3097 }
3098 mbfl_convert_filter_flush(encoder);
3099 mbfl_convert_filter_flush(pc.decoder);
3100 result = mbfl_memory_device_result(&device, result);
3101 mbfl_convert_filter_delete(encoder);
3102 mbfl_convert_filter_delete(pc.decoder);
3103
3104 return result;
3105 }
3106
3107 /*
3108 * Local variables:
3109 * tab-width: 4
3110 * c-basic-offset: 4
3111 * End:
3112 */
3113