xref: /PHP-7.2/ext/mbstring/libmbfl/mbfl/mbfilter.c (revision bf5a802f)
1 /*
2  * charset=UTF-8
3  * vim600: encoding=utf-8
4  */
5 
6 /*
7  * "streamable kanji code filter and converter"
8  *
9  * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
10  *
11  * This software is released under the GNU Lesser General Public License.
12  * (Version 2.1, February 1999)
13  * Please read the following detail of the licence (in japanese).
14  *
15  * ◆使用許諾条件◆
16  *
17  * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
18  * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
19  * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
20  * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
21  * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
22  * することはできません。
23  *
24  * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
25  * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
26  * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
27  * による許諾を得る必要があります。
28  *
29  * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
30  * ます。「GNU Lesser General Public License」とは、これまでLibrary General
31  * Public Licenseと呼ばれていたものです。
32  *     http://www.gnu.org/ --- GNUウェブサイト
33  *     http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
34  * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
35  *
36  * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
37  * はありません。
38  *
39  * ◆保証内容◆
40  *
41  * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
42  * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
43  * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
44  * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
45  * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
46  * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
47  * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
48  * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
49  * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
50  * 契約・規定に優先します。
51  *
52  * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
53  *
54  * 〒102-0073
55  * 東京都千代田区九段北1-13-5日本地所第一ビル4F
56  * 株式会社ハッピーサイズ
57  * Phone: 03-3512-3655, Fax: 03-3512-3656
58  * Email: sales@happysize.co.jp
59  * Web: http://happysize.com/
60  *
61  * ◆著者◆
62  *
63  * 金本 茂 <sgk@happysize.co.jp>
64  *
65  * ◆履歴◆
66  *
67  * 1998/11/10 sgk implementation in C++
68  * 1999/4/25  sgk Cで書きなおし。
69  * 1999/4/26  sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
70  * 1999/6/??      Unicodeサポート。
71  * 1999/6/22  sgk ライセンスをLGPLに変更。
72  *
73  */
74 
75 /*
76  * Unicode support
77  *
78  * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
79  * All rights reserved.
80  *
81  */
82 
83 
84 #ifdef HAVE_CONFIG_H
85 #include "config.h"
86 #endif
87 
88 #include <stddef.h>
89 
90 #ifdef HAVE_STRING_H
91 #include <string.h>
92 #endif
93 
94 #ifdef HAVE_STRINGS_H
95 #include <strings.h>
96 #endif
97 
98 #ifdef HAVE_STDDEF_H
99 #include <stddef.h>
100 #endif
101 
102 #include "mbfilter.h"
103 #include "mbfl_filter_output.h"
104 #include "mbfilter_pass.h"
105 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
106 
107 #include "eaw_table.h"
108 
109 /* hex character table "0123456789ABCDEF" */
110 static char mbfl_hexchar_table[] = {
111 	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
112 };
113 
114 
115 
116 /*
117  * encoding filter
118  */
119 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
120 
121 
122 /*
123  *  buffering converter
124  */
125 mbfl_buffer_converter *
mbfl_buffer_converter_new(enum mbfl_no_encoding from,enum mbfl_no_encoding to,int buf_initsz)126 mbfl_buffer_converter_new(
127     enum mbfl_no_encoding from,
128     enum mbfl_no_encoding to,
129     int buf_initsz)
130 {
131 	const mbfl_encoding *_from = mbfl_no2encoding(from);
132 	const mbfl_encoding *_to = mbfl_no2encoding(to);
133 
134 	return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz);
135 }
136 
137 mbfl_buffer_converter *
mbfl_buffer_converter_new2(const mbfl_encoding * from,const mbfl_encoding * to,int buf_initsz)138 mbfl_buffer_converter_new2(
139 	const mbfl_encoding *from,
140 	const mbfl_encoding *to,
141     int buf_initsz)
142 {
143 	mbfl_buffer_converter *convd;
144 
145 	/* allocate */
146 	convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
147 	if (convd == NULL) {
148 		return NULL;
149 	}
150 
151 	/* initialize */
152 	convd->from = from;
153 	convd->to = to;
154 
155 	/* create convert filter */
156 	convd->filter1 = NULL;
157 	convd->filter2 = NULL;
158 	if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
159 		convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
160 	} else {
161 		convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
162 		if (convd->filter2 != NULL) {
163 			convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
164 					mbfl_no_encoding_wchar,
165 					(int (*)(int, void*))convd->filter2->filter_function,
166 					(int (*)(void*))convd->filter2->filter_flush,
167 					convd->filter2);
168 			if (convd->filter1 == NULL) {
169 				mbfl_convert_filter_delete(convd->filter2);
170 			}
171 		}
172 	}
173 	if (convd->filter1 == NULL) {
174 		return NULL;
175 	}
176 
177 	mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
178 
179 	return convd;
180 }
181 
182 
183 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)184 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
185 {
186 	if (convd != NULL) {
187 		if (convd->filter1) {
188 			mbfl_convert_filter_delete(convd->filter1);
189 		}
190 		if (convd->filter2) {
191 			mbfl_convert_filter_delete(convd->filter2);
192 		}
193 		mbfl_memory_device_clear(&convd->device);
194 		mbfl_free((void*)convd);
195 	}
196 }
197 
198 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)199 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
200 {
201 	mbfl_memory_device_reset(&convd->device);
202 }
203 
204 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)205 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
206 {
207 	if (convd != NULL) {
208 		if (convd->filter2 != NULL) {
209 			convd->filter2->illegal_mode = mode;
210 		} else if (convd->filter1 != NULL) {
211 			convd->filter1->illegal_mode = mode;
212 		} else {
213 			return 0;
214 		}
215 	}
216 
217 	return 1;
218 }
219 
220 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)221 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
222 {
223 	if (convd != NULL) {
224 		if (convd->filter2 != NULL) {
225 			convd->filter2->illegal_substchar = substchar;
226 		} else if (convd->filter1 != NULL) {
227 			convd->filter1->illegal_substchar = substchar;
228 		} else {
229 			return 0;
230 		}
231 	}
232 
233 	return 1;
234 }
235 
236 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,int n)237 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
238 {
239 	mbfl_convert_filter *filter;
240 	int (*filter_function)(int c, mbfl_convert_filter *filter);
241 
242 	if (convd != NULL && p != NULL) {
243 		filter = convd->filter1;
244 		if (filter != NULL) {
245 			filter_function = filter->filter_function;
246 			while (n > 0) {
247 				if ((*filter_function)(*p++, filter) < 0) {
248 					break;
249 				}
250 				n--;
251 			}
252 		}
253 	}
254 
255 	return n;
256 }
257 
258 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)259 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
260 {
261 	return mbfl_buffer_converter_feed2(convd, string, NULL);
262 }
263 
264 int
mbfl_buffer_converter_feed2(mbfl_buffer_converter * convd,mbfl_string * string,int * loc)265 mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc)
266 {
267 	int n;
268 	unsigned char *p;
269 	mbfl_convert_filter *filter;
270 	int (*filter_function)(int c, mbfl_convert_filter *filter);
271 
272 	if (convd == NULL || string == NULL) {
273 		return -1;
274 	}
275 	mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
276 	/* feed data */
277 	n = string->len;
278 	p = string->val;
279 
280 	filter = convd->filter1;
281 	if (filter != NULL) {
282 		filter_function = filter->filter_function;
283 		while (n > 0) {
284 			if ((*filter_function)(*p++, filter) < 0) {
285 				if (loc) {
286 					*loc = p - string->val;
287 				}
288 				return -1;
289 			}
290 			n--;
291 		}
292 	}
293 	if (loc) {
294 		*loc = p - string->val;
295 	}
296 	return 0;
297 }
298 
299 
300 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)301 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
302 {
303 	if (convd == NULL) {
304 		return -1;
305 	}
306 
307 	if (convd->filter1 != NULL) {
308 		mbfl_convert_filter_flush(convd->filter1);
309 	}
310 	if (convd->filter2 != NULL) {
311 		mbfl_convert_filter_flush(convd->filter2);
312 	}
313 
314 	return 0;
315 }
316 
317 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)318 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
319 {
320 	if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
321 		result->no_encoding = convd->to->no_encoding;
322 		result->val = convd->device.buffer;
323 		result->len = convd->device.pos;
324 	} else {
325 		result = NULL;
326 	}
327 
328 	return result;
329 }
330 
331 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)332 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
333 {
334 	if (convd == NULL || result == NULL) {
335 		return NULL;
336 	}
337 	result->no_encoding = convd->to->no_encoding;
338 	return mbfl_memory_device_result(&convd->device, result);
339 }
340 
341 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)342 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
343 				  mbfl_string *result)
344 {
345 	if (convd == NULL || string == NULL || result == NULL) {
346 		return NULL;
347 	}
348 	mbfl_buffer_converter_feed(convd, string);
349 	if (convd->filter1 != NULL) {
350 		mbfl_convert_filter_flush(convd->filter1);
351 	}
352 	if (convd->filter2 != NULL) {
353 		mbfl_convert_filter_flush(convd->filter2);
354 	}
355 	result->no_encoding = convd->to->no_encoding;
356 	return mbfl_memory_device_result(&convd->device, result);
357 }
358 
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)359 int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
360 {
361 	int num_illegalchars = 0;
362 
363 	if (convd == NULL) {
364 		return 0;
365 	}
366 
367 	if (convd->filter1 != NULL) {
368 		num_illegalchars += convd->filter1->num_illegalchar;
369 	}
370 
371 	if (convd->filter2 != NULL) {
372 		num_illegalchars += convd->filter2->num_illegalchar;
373 	}
374 
375 	return (num_illegalchars);
376 }
377 
378 /*
379  * encoding detector
380  */
381 mbfl_encoding_detector *
mbfl_encoding_detector_new(enum mbfl_no_encoding * elist,int elistsz,int strict)382 mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
383 {
384 	mbfl_encoding_detector *identd;
385 
386 	int i, num;
387 	mbfl_identify_filter *filter;
388 
389 	if (elist == NULL || elistsz <= 0) {
390 		return NULL;
391 	}
392 
393 	/* allocate */
394 	identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
395 	if (identd == NULL) {
396 		return NULL;
397 	}
398 	identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
399 	if (identd->filter_list == NULL) {
400 		mbfl_free(identd);
401 		return NULL;
402 	}
403 
404 	/* create filters */
405 	i = 0;
406 	num = 0;
407 	while (i < elistsz) {
408 		filter = mbfl_identify_filter_new(elist[i]);
409 		if (filter != NULL) {
410 			identd->filter_list[num] = filter;
411 			num++;
412 		}
413 		i++;
414 	}
415 	identd->filter_list_size = num;
416 
417 	/* set strict flag */
418 	identd->strict = strict;
419 
420 	return identd;
421 }
422 
423 mbfl_encoding_detector *
mbfl_encoding_detector_new2(const mbfl_encoding ** elist,int elistsz,int strict)424 mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict)
425 {
426 	mbfl_encoding_detector *identd;
427 
428 	int i, num;
429 	mbfl_identify_filter *filter;
430 
431 	if (elist == NULL || elistsz <= 0) {
432 		return NULL;
433 	}
434 
435 	/* allocate */
436 	identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
437 	if (identd == NULL) {
438 		return NULL;
439 	}
440 	identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
441 	if (identd->filter_list == NULL) {
442 		mbfl_free(identd);
443 		return NULL;
444 	}
445 
446 	/* create filters */
447 	i = 0;
448 	num = 0;
449 	while (i < elistsz) {
450 		filter = mbfl_identify_filter_new2(elist[i]);
451 		if (filter != NULL) {
452 			identd->filter_list[num] = filter;
453 			num++;
454 		}
455 		i++;
456 	}
457 	identd->filter_list_size = num;
458 
459 	/* set strict flag */
460 	identd->strict = strict;
461 
462 	return identd;
463 }
464 
465 
466 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)467 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
468 {
469 	int i;
470 
471 	if (identd != NULL) {
472 		if (identd->filter_list != NULL) {
473 			i = identd->filter_list_size;
474 			while (i > 0) {
475 				i--;
476 				mbfl_identify_filter_delete(identd->filter_list[i]);
477 			}
478 			mbfl_free((void *)identd->filter_list);
479 		}
480 		mbfl_free((void *)identd);
481 	}
482 }
483 
484 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)485 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
486 {
487 	int i, n, num, bad, res;
488 	unsigned char *p;
489 	mbfl_identify_filter *filter;
490 
491 	res = 0;
492 	/* feed data */
493 	if (identd != NULL && string != NULL && string->val != NULL) {
494 		num = identd->filter_list_size;
495 		n = string->len;
496 		p = string->val;
497 		bad = 0;
498 		while (n > 0) {
499 			for (i = 0; i < num; i++) {
500 				filter = identd->filter_list[i];
501 				if (!filter->flag) {
502 					(*filter->filter_function)(*p, filter);
503 					if (filter->flag) {
504 						bad++;
505 					}
506 				}
507 			}
508 			if ((num - 1) <= bad) {
509 				res = 1;
510 				break;
511 			}
512 			p++;
513 			n--;
514 		}
515 	}
516 
517 	return res;
518 }
519 
mbfl_encoding_detector_judge2(mbfl_encoding_detector * identd)520 const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd)
521 {
522 	mbfl_identify_filter *filter;
523 	const mbfl_encoding *encoding = NULL;
524 	int n;
525 
526 	/* judge */
527 	if (identd != NULL) {
528 		n = identd->filter_list_size - 1;
529 		while (n >= 0) {
530 			filter = identd->filter_list[n];
531 			if (!filter->flag) {
532 				if (!identd->strict || !filter->status) {
533 					encoding = filter->encoding;
534 				}
535 			}
536 			n--;
537 		}
538 
539 		/* fallback judge */
540 		if (!encoding) {
541 			n = identd->filter_list_size - 1;
542 			while (n >= 0) {
543 				filter = identd->filter_list[n];
544 				if (!filter->flag) {
545 					encoding = filter->encoding;
546 				}
547 				n--;
548  			}
549 		}
550 	}
551 
552 	return encoding;
553 }
554 
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)555 enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
556 {
557 	const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd);
558 	return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding;
559 }
560 
561 
562 /*
563  * encoding converter
564  */
565 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding toenc)566 mbfl_convert_encoding(
567     mbfl_string *string,
568     mbfl_string *result,
569     enum mbfl_no_encoding toenc)
570 {
571 	int n;
572 	unsigned char *p;
573 	const mbfl_encoding *encoding;
574 	mbfl_memory_device device;
575 	mbfl_convert_filter *filter1;
576 	mbfl_convert_filter *filter2;
577 
578 	/* initialize */
579 	encoding = mbfl_no2encoding(toenc);
580 	if (encoding == NULL || string == NULL || result == NULL) {
581 		return NULL;
582 	}
583 
584 	filter1 = NULL;
585 	filter2 = NULL;
586 	if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
587 		filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
588 	} else {
589 		filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
590 		if (filter2 != NULL) {
591 			filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
592 			if (filter1 == NULL) {
593 				mbfl_convert_filter_delete(filter2);
594 			}
595 		}
596 	}
597 	if (filter1 == NULL) {
598 		return NULL;
599 	}
600 
601 	if (filter2 != NULL) {
602 		filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
603 		filter2->illegal_substchar = 0x3f;		/* '?' */
604 	}
605 
606 	mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
607 
608 	/* feed data */
609 	n = string->len;
610 	p = string->val;
611 	if (p != NULL) {
612 		while (n > 0) {
613 			if ((*filter1->filter_function)(*p++, filter1) < 0) {
614 				break;
615 			}
616 			n--;
617 		}
618 	}
619 
620 	mbfl_convert_filter_flush(filter1);
621 	mbfl_convert_filter_delete(filter1);
622 	if (filter2 != NULL) {
623 		mbfl_convert_filter_flush(filter2);
624 		mbfl_convert_filter_delete(filter2);
625 	}
626 
627 	return mbfl_memory_device_result(&device, result);
628 }
629 
630 
631 /*
632  * identify encoding
633  */
634 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)635 mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
636 {
637 	int i, n, num, bad;
638 	unsigned char *p;
639 	mbfl_identify_filter *flist, *filter;
640 	const mbfl_encoding *encoding;
641 
642 	/* flist is an array of mbfl_identify_filter instances */
643 	flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
644 	if (flist == NULL) {
645 		return NULL;
646 	}
647 
648 	num = 0;
649 	if (elist != NULL) {
650 		for (i = 0; i < elistsz; i++) {
651 			if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
652 				num++;
653 			}
654 		}
655 	}
656 
657 	/* feed data */
658 	n = string->len;
659 	p = string->val;
660 
661 	if (p != NULL) {
662 		bad = 0;
663 		while (n > 0) {
664 			for (i = 0; i < num; i++) {
665 				filter = &flist[i];
666 				if (!filter->flag) {
667 					(*filter->filter_function)(*p, filter);
668 					if (filter->flag) {
669 						bad++;
670 					}
671 				}
672 			}
673 			if ((num - 1) <= bad && !strict) {
674 				break;
675 			}
676 			p++;
677 			n--;
678 		}
679 	}
680 
681 	/* judge */
682 	encoding = NULL;
683 
684 	for (i = 0; i < num; i++) {
685 		filter = &flist[i];
686 		if (!filter->flag) {
687 			if (strict && filter->status) {
688  				continue;
689  			}
690 			encoding = filter->encoding;
691 			break;
692 		}
693 	}
694 
695 	/* fall-back judge */
696 	if (!encoding) {
697 		for (i = 0; i < num; i++) {
698 			filter = &flist[i];
699 			if (!filter->flag && (!strict || !filter->status)) {
700 				encoding = filter->encoding;
701 				break;
702 			}
703 		}
704 	}
705 
706 	/* cleanup */
707 	/* dtors should be called in reverse order */
708 	i = num; while (--i >= 0) {
709 		mbfl_identify_filter_cleanup(&flist[i]);
710 	}
711 
712 	mbfl_free((void *)flist);
713 
714 	return encoding;
715 }
716 
717 const mbfl_encoding *
mbfl_identify_encoding2(mbfl_string * string,const mbfl_encoding ** elist,int elistsz,int strict)718 mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
719 {
720 	int i, n, num, bad;
721 	unsigned char *p;
722 	mbfl_identify_filter *flist, *filter;
723 	const mbfl_encoding *encoding;
724 
725 	/* flist is an array of mbfl_identify_filter instances */
726 	flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
727 	if (flist == NULL) {
728 		return NULL;
729 	}
730 
731 	num = 0;
732 	if (elist != NULL) {
733 		for (i = 0; i < elistsz; i++) {
734 			if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
735 				num++;
736 			}
737 		}
738 	}
739 
740 	/* feed data */
741 	n = string->len;
742 	p = string->val;
743 
744 	if (p != NULL) {
745 		bad = 0;
746 		while (n > 0) {
747 			for (i = 0; i < num; i++) {
748 				filter = &flist[i];
749 				if (!filter->flag) {
750 					(*filter->filter_function)(*p, filter);
751 					if (filter->flag) {
752 						bad++;
753 					}
754 				}
755 			}
756 			if ((num - 1) <= bad && !strict) {
757 				break;
758 			}
759 			p++;
760 			n--;
761 		}
762 	}
763 
764 	/* judge */
765 	encoding = NULL;
766 
767 	for (i = 0; i < num; i++) {
768 		filter = &flist[i];
769 		if (!filter->flag) {
770 			if (strict && filter->status) {
771  				continue;
772  			}
773 			encoding = filter->encoding;
774 			break;
775 		}
776 	}
777 
778 	/* fall-back judge */
779 	if (!encoding) {
780 		for (i = 0; i < num; i++) {
781 			filter = &flist[i];
782 			if (!filter->flag && (!strict || !filter->status)) {
783 				encoding = filter->encoding;
784 				break;
785 			}
786 		}
787 	}
788 
789 	/* cleanup */
790 	/* dtors should be called in reverse order */
791 	i = num; while (--i >= 0) {
792 		mbfl_identify_filter_cleanup(&flist[i]);
793 	}
794 
795 	mbfl_free((void *)flist);
796 
797 	return encoding;
798 }
799 
800 /*
801  *  strlen
802  */
803 static int
filter_count_output(int c,void * data)804 filter_count_output(int c, void *data)
805 {
806 	(*(int *)data)++;
807 	return c;
808 }
809 
810 int
mbfl_strlen(mbfl_string * string)811 mbfl_strlen(mbfl_string *string)
812 {
813 	int len, n, m, k;
814 	unsigned char *p;
815 	const unsigned char *mbtab;
816 	const mbfl_encoding *encoding;
817 
818 	encoding = mbfl_no2encoding(string->no_encoding);
819 	if (encoding == NULL || string == NULL) {
820 		return -1;
821 	}
822 
823 	len = 0;
824 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
825 		len = string->len;
826 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
827 		len = string->len/2;
828 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
829 		len = string->len/4;
830 	} else if (encoding->mblen_table != NULL) {
831 		mbtab = encoding->mblen_table;
832 		n = 0;
833 		p = string->val;
834 		k = string->len;
835 		/* count */
836 		if (p != NULL) {
837 			while (n < k) {
838 				m = mbtab[*p];
839 				n += m;
840 				p += m;
841 				len++;
842 			};
843 		}
844 	} else {
845 		/* wchar filter */
846 		mbfl_convert_filter *filter = mbfl_convert_filter_new(
847 		  string->no_encoding,
848 		  mbfl_no_encoding_wchar,
849 		  filter_count_output, 0, &len);
850 		if (filter == NULL) {
851 			return -1;
852 		}
853 		/* count */
854 		n = string->len;
855 		p = string->val;
856 		if (p != NULL) {
857 			while (n > 0) {
858 				(*filter->filter_function)(*p++, filter);
859 				n--;
860 			}
861 		}
862 		mbfl_convert_filter_delete(filter);
863 	}
864 
865 	return len;
866 }
867 
868 
869 /*
870  *  strpos
871  */
872 struct collector_strpos_data {
873 	mbfl_convert_filter *next_filter;
874 	mbfl_wchar_device needle;
875 	int needle_len;
876 	int start;
877 	int output;
878 	int found_pos;
879 	int needle_pos;
880 	int matched_pos;
881 };
882 
883 static int
collector_strpos(int c,void * data)884 collector_strpos(int c, void* data)
885 {
886 	int *p, *h, *m, n;
887 	struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
888 
889 	if (pc->output >= pc->start) {
890 		if (c == (int)pc->needle.buffer[pc->needle_pos]) {
891 			if (pc->needle_pos == 0) {
892 				pc->found_pos = pc->output;			/* found position */
893 			}
894 			pc->needle_pos++;						/* needle pointer */
895 			if (pc->needle_pos >= pc->needle_len) {
896 				pc->matched_pos = pc->found_pos;	/* matched position */
897 				pc->needle_pos--;
898 				goto retry;
899 			}
900 		} else if (pc->needle_pos != 0) {
901 retry:
902 			h = (int *)pc->needle.buffer;
903 			h++;
904 			for (;;) {
905 				pc->found_pos++;
906 				p = h;
907 				m = (int *)pc->needle.buffer;
908 				n = pc->needle_pos - 1;
909 				while (n > 0 && *p == *m) {
910 					n--;
911 					p++;
912 					m++;
913 				}
914 				if (n <= 0) {
915 					if (*m != c) {
916 						pc->needle_pos = 0;
917 					}
918 					break;
919 				} else {
920 					h++;
921 					pc->needle_pos--;
922 				}
923 			}
924 		}
925 	}
926 
927 	pc->output++;
928 	return c;
929 }
930 
931 /*
932  *	oddlen
933  */
934 int
mbfl_oddlen(mbfl_string * string)935 mbfl_oddlen(mbfl_string *string)
936 {
937 	int len, n, m, k;
938 	unsigned char *p;
939 	const unsigned char *mbtab;
940 	const mbfl_encoding *encoding;
941 
942 
943 	if (string == NULL) {
944 		return -1;
945 	}
946 	encoding = mbfl_no2encoding(string->no_encoding);
947 	if (encoding == NULL) {
948 		return -1;
949 	}
950 
951 	len = 0;
952 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
953 		return 0;
954 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
955 		return len % 2;
956 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
957 		return len % 4;
958 	} else if (encoding->mblen_table != NULL) {
959  		mbtab = encoding->mblen_table;
960  		n = 0;
961 		p = string->val;
962 		k = string->len;
963 		/* count */
964 		if (p != NULL) {
965 			while (n < k) {
966 				m = mbtab[*p];
967 				n += m;
968 				p += m;
969 			};
970 		}
971 		return n-k;
972 	} else {
973 		/* how can i do ? */
974 		return 0;
975 	}
976 	/* NOT REACHED */
977 }
978 
979 int
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,int offset,int reverse)980 mbfl_strpos(
981     mbfl_string *haystack,
982     mbfl_string *needle,
983     int offset,
984     int reverse)
985 {
986 	int result;
987 	mbfl_string _haystack_u8, _needle_u8;
988 	const mbfl_string *haystack_u8, *needle_u8 = NULL;
989 	const unsigned char *u8_tbl;
990 
991 	if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
992 		return -8;
993 	}
994 
995 	{
996 		const mbfl_encoding *u8_enc;
997 		u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
998 		if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
999 			return -8;
1000 		}
1001 		u8_tbl = u8_enc->mblen_table;
1002 	}
1003 
1004 	if (haystack->no_encoding != mbfl_no_encoding_utf8) {
1005 		mbfl_string_init(&_haystack_u8);
1006 		haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
1007 		if (haystack_u8 == NULL) {
1008 			result = -4;
1009 			goto out;
1010 		}
1011 	} else {
1012 		haystack_u8 = haystack;
1013 	}
1014 
1015 	if (needle->no_encoding != mbfl_no_encoding_utf8) {
1016 		mbfl_string_init(&_needle_u8);
1017 		needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
1018 		if (needle_u8 == NULL) {
1019 			result = -4;
1020 			goto out;
1021 		}
1022 	} else {
1023 		needle_u8 = needle;
1024 	}
1025 
1026 	if (needle_u8->len < 1) {
1027 		result = -8;
1028 		goto out;
1029 	}
1030 
1031 	result = -1;
1032 	if (haystack_u8->len < needle_u8->len) {
1033 		goto out;
1034 	}
1035 
1036 	if (!reverse) {
1037 		unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1038 		unsigned int needle_u8_len = needle_u8->len;
1039 		unsigned int i;
1040 		const unsigned char *p, *q, *e;
1041 		const unsigned char *haystack_u8_val = haystack_u8->val,
1042 		                    *needle_u8_val = needle_u8->val;
1043 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1044 			jtbl[i] = needle_u8_len + 1;
1045 		}
1046 		for (i = 0; i < needle_u8_len - 1; ++i) {
1047 			jtbl[needle_u8_val[i]] = needle_u8_len - i;
1048 		}
1049 		e = haystack_u8_val + haystack_u8->len;
1050 		p = haystack_u8_val;
1051 		while (--offset >= 0) {
1052 			if (p >= e) {
1053 				result = -16;
1054 				goto out;
1055 			}
1056 			p += u8_tbl[*p];
1057 		}
1058 		p += needle_u8_len;
1059 		if (p > e) {
1060 			goto out;
1061 		}
1062 		while (p <= e) {
1063 			const unsigned char *pv = p;
1064 			q = needle_u8_val + needle_u8_len;
1065 			for (;;) {
1066 				if (q == needle_u8_val) {
1067 					result = 0;
1068 					while (p > haystack_u8_val) {
1069 						unsigned char c = *--p;
1070 						if (c < 0x80) {
1071 							++result;
1072 						} else if ((c & 0xc0) != 0x80) {
1073 							++result;
1074 						}
1075 					}
1076 					goto out;
1077 				}
1078 				if (*--q != *--p) {
1079 					break;
1080 				}
1081 			}
1082 			p += jtbl[*p];
1083 			if (p <= pv) {
1084 				p = pv + 1;
1085 			}
1086 		}
1087 	} else {
1088 		unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1089 		unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
1090 		unsigned int i;
1091 		const unsigned char *p, *e, *q, *qe;
1092 		const unsigned char *haystack_u8_val = haystack_u8->val,
1093 		                    *needle_u8_val = needle_u8->val;
1094 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1095 			jtbl[i] = needle_u8_len;
1096 		}
1097 		for (i = needle_u8_len - 1; i > 0; --i) {
1098 			unsigned char c = needle_u8_val[i];
1099 			jtbl[c] = i;
1100 			if (c < 0x80) {
1101 				++needle_len;
1102 			} else if ((c & 0xc0) != 0x80) {
1103 				++needle_len;
1104 			}
1105 		}
1106 		{
1107 			unsigned char c = needle_u8_val[0];
1108 			if (c < 0x80) {
1109 				++needle_len;
1110 			} else if ((c & 0xc0) != 0x80) {
1111 				++needle_len;
1112 			}
1113 		}
1114 		e = haystack_u8_val;
1115 		p = e + haystack_u8->len;
1116 		qe = needle_u8_val + needle_u8_len;
1117 		if (offset < 0) {
1118 			if (-offset > needle_len) {
1119 				offset += needle_len;
1120 				while (offset < 0) {
1121 					unsigned char c;
1122 					if (p <= e) {
1123 						result = -16;
1124 						goto out;
1125 					}
1126 					c = *(--p);
1127 					if (c < 0x80) {
1128 						++offset;
1129 					} else if ((c & 0xc0) != 0x80) {
1130 						++offset;
1131 					}
1132 				}
1133 			}
1134 		} else {
1135 			const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1136 			while (--offset >= 0) {
1137 				if (e >= ee) {
1138 					result = -16;
1139 					goto out;
1140 				}
1141 				e += u8_tbl[*e];
1142 			}
1143 		}
1144 		if (p < e + needle_u8_len) {
1145 			goto out;
1146 		}
1147 		p -= needle_u8_len;
1148 		while (p >= e) {
1149 			const unsigned char *pv = p;
1150 			q = needle_u8_val;
1151 			for (;;) {
1152 				if (q == qe) {
1153 					result = 0;
1154 					p -= needle_u8_len;
1155 					while (p > haystack_u8_val) {
1156 						unsigned char c = *--p;
1157 						if (c < 0x80) {
1158 							++result;
1159 						} else if ((c & 0xc0) != 0x80) {
1160 							++result;
1161 						}
1162 					}
1163 					goto out;
1164 				}
1165 				if (*q != *p) {
1166 					break;
1167 				}
1168 				++p, ++q;
1169 			}
1170 			p -= jtbl[*p];
1171 			if (p >= pv) {
1172 				p = pv - 1;
1173 			}
1174 		}
1175 	}
1176 out:
1177 	if (haystack_u8 == &_haystack_u8) {
1178 		mbfl_string_clear(&_haystack_u8);
1179 	}
1180 	if (needle_u8 == &_needle_u8) {
1181 		mbfl_string_clear(&_needle_u8);
1182 	}
1183 	return result;
1184 }
1185 
1186 /*
1187  *  substr_count
1188  */
1189 
1190 int
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1191 mbfl_substr_count(
1192     mbfl_string *haystack,
1193     mbfl_string *needle
1194    )
1195 {
1196 	int n, result = 0;
1197 	unsigned char *p;
1198 	mbfl_convert_filter *filter;
1199 	struct collector_strpos_data pc;
1200 
1201 	if (haystack == NULL || needle == NULL) {
1202 		return -8;
1203 	}
1204 	/* needle is converted into wchar */
1205 	mbfl_wchar_device_init(&pc.needle);
1206 	filter = mbfl_convert_filter_new(
1207 	  needle->no_encoding,
1208 	  mbfl_no_encoding_wchar,
1209 	  mbfl_wchar_device_output, 0, &pc.needle);
1210 	if (filter == NULL) {
1211 		return -4;
1212 	}
1213 	p = needle->val;
1214 	n = needle->len;
1215 	if (p != NULL) {
1216 		while (n > 0) {
1217 			if ((*filter->filter_function)(*p++, filter) < 0) {
1218 				break;
1219 			}
1220 			n--;
1221 		}
1222 	}
1223 	mbfl_convert_filter_flush(filter);
1224 	mbfl_convert_filter_delete(filter);
1225 	pc.needle_len = pc.needle.pos;
1226 	if (pc.needle.buffer == NULL) {
1227 		return -4;
1228 	}
1229 	if (pc.needle_len <= 0) {
1230 		mbfl_wchar_device_clear(&pc.needle);
1231 		return -2;
1232 	}
1233 	/* initialize filter and collector data */
1234 	filter = mbfl_convert_filter_new(
1235 	  haystack->no_encoding,
1236 	  mbfl_no_encoding_wchar,
1237 	  collector_strpos, 0, &pc);
1238 	if (filter == NULL) {
1239 		mbfl_wchar_device_clear(&pc.needle);
1240 		return -4;
1241 	}
1242 	pc.start = 0;
1243 	pc.output = 0;
1244 	pc.needle_pos = 0;
1245 	pc.found_pos = 0;
1246 	pc.matched_pos = -1;
1247 
1248 	/* feed data */
1249 	p = haystack->val;
1250 	n = haystack->len;
1251 	if (p != NULL) {
1252 		while (n > 0) {
1253 			if ((*filter->filter_function)(*p++, filter) < 0) {
1254 				pc.matched_pos = -4;
1255 				break;
1256 			}
1257 			if (pc.matched_pos >= 0) {
1258 				++result;
1259 				pc.matched_pos = -1;
1260 				pc.needle_pos = 0;
1261 			}
1262 			n--;
1263 		}
1264 	}
1265 	mbfl_convert_filter_flush(filter);
1266 	mbfl_convert_filter_delete(filter);
1267 	mbfl_wchar_device_clear(&pc.needle);
1268 
1269 	return result;
1270 }
1271 
1272 /*
1273  *  substr
1274  */
1275 struct collector_substr_data {
1276 	mbfl_convert_filter *next_filter;
1277 	int start;
1278 	int stop;
1279 	int output;
1280 };
1281 
1282 static int
collector_substr(int c,void * data)1283 collector_substr(int c, void* data)
1284 {
1285 	struct collector_substr_data *pc = (struct collector_substr_data*)data;
1286 
1287 	if (pc->output >= pc->stop) {
1288 		return -1;
1289 	}
1290 
1291 	if (pc->output >= pc->start) {
1292 		(*pc->next_filter->filter_function)(c, pc->next_filter);
1293 	}
1294 
1295 	pc->output++;
1296 
1297 	return c;
1298 }
1299 
1300 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,int from,int length)1301 mbfl_substr(
1302     mbfl_string *string,
1303     mbfl_string *result,
1304     int from,
1305     int length)
1306 {
1307 	const mbfl_encoding *encoding;
1308 	int n, m, k, len, start, end;
1309 	unsigned char *p, *w;
1310 	const unsigned char *mbtab;
1311 
1312 	encoding = mbfl_no2encoding(string->no_encoding);
1313 	if (encoding == NULL || string == NULL || result == NULL) {
1314 		return NULL;
1315 	}
1316 	mbfl_string_init(result);
1317 	result->no_language = string->no_language;
1318 	result->no_encoding = string->no_encoding;
1319 
1320 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1321 	   encoding->mblen_table != NULL) {
1322 		len = string->len;
1323 		start = from;
1324 		end = from + length;
1325 		if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1326 			start *= 2;
1327 			end = start + length*2;
1328 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1329 			start *= 4;
1330 			end = start + length*4;
1331 		} else if (encoding->mblen_table != NULL) {
1332 			mbtab = encoding->mblen_table;
1333 			start = 0;
1334 			end = 0;
1335 			n = 0;
1336 			k = 0;
1337 			p = string->val;
1338 			if (p != NULL) {
1339 				/* search start position */
1340 				while (k <= from) {
1341 					start = n;
1342 					if (n >= len) {
1343 						break;
1344 					}
1345 					m = mbtab[*p];
1346 					n += m;
1347 					p += m;
1348 					k++;
1349 				}
1350 				/* detect end position */
1351 				k = 0;
1352 				end = start;
1353 				while (k < length) {
1354 					end = n;
1355 					if (n >= len) {
1356 						break;
1357 					}
1358 					m = mbtab[*p];
1359 					n += m;
1360 					p += m;
1361 					k++;
1362 				}
1363 			}
1364 		}
1365 
1366 		if (start > len) {
1367 			start = len;
1368 		}
1369 		if (start < 0) {
1370 			start = 0;
1371 		}
1372 		if (end > len) {
1373 			end = len;
1374 		}
1375 		if (end < 0) {
1376 			end = 0;
1377 		}
1378 		if (start > end) {
1379 			start = end;
1380 		}
1381 
1382 		/* allocate memory and copy */
1383 		n = end - start;
1384 		result->len = 0;
1385 		result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
1386 		if (w != NULL) {
1387 			p = string->val;
1388 			if (p != NULL) {
1389 				p += start;
1390 				result->len = n;
1391 				while (n > 0) {
1392 					*w++ = *p++;
1393 					n--;
1394 				}
1395 			}
1396 			*w++ = '\0';
1397 			*w++ = '\0';
1398 			*w++ = '\0';
1399 			*w = '\0';
1400 		} else {
1401 			result = NULL;
1402 		}
1403 	} else {
1404 		mbfl_memory_device device;
1405 		struct collector_substr_data pc;
1406 		mbfl_convert_filter *decoder;
1407 		mbfl_convert_filter *encoder;
1408 
1409 		mbfl_memory_device_init(&device, length + 1, 0);
1410 		mbfl_string_init(result);
1411 		result->no_language = string->no_language;
1412 		result->no_encoding = string->no_encoding;
1413 		/* output code filter */
1414 		decoder = mbfl_convert_filter_new(
1415 		    mbfl_no_encoding_wchar,
1416 		    string->no_encoding,
1417 		    mbfl_memory_device_output, 0, &device);
1418 		/* wchar filter */
1419 		encoder = mbfl_convert_filter_new(
1420 		    string->no_encoding,
1421 		    mbfl_no_encoding_wchar,
1422 		    collector_substr, 0, &pc);
1423 		if (decoder == NULL || encoder == NULL) {
1424 			mbfl_convert_filter_delete(encoder);
1425 			mbfl_convert_filter_delete(decoder);
1426 			return NULL;
1427 		}
1428 		pc.next_filter = decoder;
1429 		pc.start = from;
1430 		pc.stop = from + length;
1431 		pc.output = 0;
1432 
1433 		/* feed data */
1434 		p = string->val;
1435 		n = string->len;
1436 		if (p != NULL) {
1437 			while (n > 0) {
1438 				if ((*encoder->filter_function)(*p++, encoder) < 0) {
1439 					break;
1440 				}
1441 				n--;
1442 			}
1443 		}
1444 
1445 		mbfl_convert_filter_flush(encoder);
1446 		mbfl_convert_filter_flush(decoder);
1447 		result = mbfl_memory_device_result(&device, result);
1448 		mbfl_convert_filter_delete(encoder);
1449 		mbfl_convert_filter_delete(decoder);
1450 	}
1451 
1452 	return result;
1453 }
1454 
1455 /*
1456  *  strcut
1457  */
1458 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,int from,int length)1459 mbfl_strcut(
1460     mbfl_string *string,
1461     mbfl_string *result,
1462     int from,
1463     int length)
1464 {
1465 	const mbfl_encoding *encoding;
1466 	mbfl_memory_device device;
1467 
1468 	/* validate the parameters */
1469 	if (string == NULL || string->val == NULL || result == NULL) {
1470 		return NULL;
1471 	}
1472 
1473 	if (from < 0 || length < 0) {
1474 		return NULL;
1475 	}
1476 
1477 	if (from >= string->len) {
1478 		from = string->len;
1479 	}
1480 
1481 	encoding = mbfl_no2encoding(string->no_encoding);
1482 	if (encoding == NULL) {
1483 		return NULL;
1484 	}
1485 
1486 	mbfl_string_init(result);
1487 	result->no_language = string->no_language;
1488 	result->no_encoding = string->no_encoding;
1489 
1490 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1491 				| MBFL_ENCTYPE_WCS2BE
1492 				| MBFL_ENCTYPE_WCS2LE
1493 				| MBFL_ENCTYPE_WCS4BE
1494 				| MBFL_ENCTYPE_WCS4LE))
1495 			|| encoding->mblen_table != NULL) {
1496 		const unsigned char *start = NULL;
1497 		const unsigned char *end = NULL;
1498 		unsigned char *w;
1499 		unsigned int sz;
1500 
1501 		if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1502 			from &= -2;
1503 
1504 			if (length >= string->len - from) {
1505 				length = string->len - from;
1506 			}
1507 
1508 			start = string->val + from;
1509 			end   = start + (length & -2);
1510 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1511 			from &= -4;
1512 
1513 			if (length >= string->len - from) {
1514 				length = string->len - from;
1515 			}
1516 
1517 			start = string->val + from;
1518 			end   = start + (length & -4);
1519 		} else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1520 			if (length >= string->len - from) {
1521 				length = string->len - from;
1522 			}
1523 
1524 			start = string->val + from;
1525 			end = start + length;
1526 		} else if (encoding->mblen_table != NULL) {
1527 			const unsigned char *mbtab = encoding->mblen_table;
1528 			const unsigned char *p, *q;
1529 			int m;
1530 
1531 			/* search start position */
1532 			for (m = 0, p = string->val, q = p + from;
1533 					p < q; p += (m = mbtab[*p]));
1534 
1535 			if (p > q) {
1536 				p -= m;
1537 			}
1538 
1539 			start = p;
1540 
1541 			/* search end position */
1542 			if (length >= (int)string->len - (start - string->val)) {
1543 				end = string->val + string->len;
1544 			} else {
1545 				for (q = p + length; p < q; p += (m = mbtab[*p]));
1546 
1547 				if (p > q) {
1548 					p -= m;
1549 				}
1550 				end = p;
1551 			}
1552 		} else {
1553 			/* never reached */
1554 			return NULL;
1555 		}
1556 
1557 		/* allocate memory and copy string */
1558 		sz = end - start;
1559 		if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1560 				sizeof(unsigned char))) == NULL) {
1561 			return NULL;
1562 		}
1563 
1564 		memcpy(w, start, sz);
1565 		w[sz] = '\0';
1566 		w[sz + 1] = '\0';
1567 		w[sz + 2] = '\0';
1568 		w[sz + 3] = '\0';
1569 
1570 		result->val = w;
1571 		result->len = sz;
1572 	} else {
1573 		mbfl_convert_filter *encoder     = NULL;
1574 		mbfl_convert_filter *decoder     = NULL;
1575 		const unsigned char *p, *q, *r;
1576 		struct {
1577 			mbfl_convert_filter encoder;
1578 			mbfl_convert_filter decoder;
1579 			const unsigned char *p;
1580 			int pos;
1581 		} bk, _bk;
1582 
1583 		/* output code filter */
1584 		if (!(decoder = mbfl_convert_filter_new(
1585 				mbfl_no_encoding_wchar,
1586 				string->no_encoding,
1587 				mbfl_memory_device_output, 0, &device))) {
1588 			return NULL;
1589 		}
1590 
1591 		/* wchar filter */
1592 		if (!(encoder = mbfl_convert_filter_new(
1593 				string->no_encoding,
1594 				mbfl_no_encoding_wchar,
1595 				mbfl_filter_output_null,
1596 				NULL, NULL))) {
1597 			mbfl_convert_filter_delete(decoder);
1598 			return NULL;
1599 		}
1600 
1601 		mbfl_memory_device_init(&device, length + 8, 0);
1602 
1603 		p = string->val;
1604 
1605 		/* search start position */
1606 		for (q = string->val + from; p < q; p++) {
1607 			(*encoder->filter_function)(*p, encoder);
1608 		}
1609 
1610 		/* switch the drain direction */
1611 		encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1612 		encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1613 		encoder->data = decoder;
1614 
1615 		q = string->val + string->len;
1616 
1617 		/* save the encoder, decoder state and the pointer */
1618 		mbfl_convert_filter_copy(decoder, &_bk.decoder);
1619 		mbfl_convert_filter_copy(encoder, &_bk.encoder);
1620 		_bk.p = p;
1621 		_bk.pos = device.pos;
1622 
1623 		if (length > q - p) {
1624 			length = q - p;
1625 		}
1626 
1627 		if (length >= 20) {
1628 			/* output a little shorter than "length" */
1629 			/* XXX: the constant "20" was determined purely on the heuristics. */
1630 			for (r = p + length - 20; p < r; p++) {
1631 				(*encoder->filter_function)(*p, encoder);
1632 			}
1633 
1634 			/* if the offset of the resulting string exceeds the length,
1635 			 * then restore the state */
1636 			if (device.pos > length) {
1637 				p = _bk.p;
1638 				device.pos = _bk.pos;
1639 				decoder->filter_dtor(decoder);
1640 				encoder->filter_dtor(encoder);
1641 				mbfl_convert_filter_copy(&_bk.decoder, decoder);
1642 				mbfl_convert_filter_copy(&_bk.encoder, encoder);
1643 				bk = _bk;
1644 			} else {
1645 				/* save the encoder, decoder state and the pointer */
1646 				mbfl_convert_filter_copy(decoder, &bk.decoder);
1647 				mbfl_convert_filter_copy(encoder, &bk.encoder);
1648 				bk.p = p;
1649 				bk.pos = device.pos;
1650 
1651 				/* flush the stream */
1652 				(*encoder->filter_flush)(encoder);
1653 
1654 				/* if the offset of the resulting string exceeds the length,
1655 				 * then restore the state */
1656 				if (device.pos > length) {
1657 					bk.decoder.filter_dtor(&bk.decoder);
1658 					bk.encoder.filter_dtor(&bk.encoder);
1659 
1660 					p = _bk.p;
1661 					device.pos = _bk.pos;
1662 					decoder->filter_dtor(decoder);
1663 					encoder->filter_dtor(encoder);
1664 					mbfl_convert_filter_copy(&_bk.decoder, decoder);
1665 					mbfl_convert_filter_copy(&_bk.encoder, encoder);
1666 					bk = _bk;
1667 				} else {
1668 					_bk.decoder.filter_dtor(&_bk.decoder);
1669 					_bk.encoder.filter_dtor(&_bk.encoder);
1670 
1671 					p = bk.p;
1672 					device.pos = bk.pos;
1673 					decoder->filter_dtor(decoder);
1674 					encoder->filter_dtor(encoder);
1675 					mbfl_convert_filter_copy(&bk.decoder, decoder);
1676 					mbfl_convert_filter_copy(&bk.encoder, encoder);
1677 				}
1678 			}
1679 		} else {
1680 			bk = _bk;
1681 		}
1682 
1683 		/* detect end position */
1684 		while (p < q) {
1685 			(*encoder->filter_function)(*p, encoder);
1686 
1687 			if (device.pos > length) {
1688 				/* restore filter */
1689 				p = bk.p;
1690 				device.pos = bk.pos;
1691 				decoder->filter_dtor(decoder);
1692 				encoder->filter_dtor(encoder);
1693 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1694 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1695 				break;
1696 			}
1697 
1698 			p++;
1699 
1700 			/* backup current state */
1701 			mbfl_convert_filter_copy(decoder, &_bk.decoder);
1702 			mbfl_convert_filter_copy(encoder, &_bk.encoder);
1703 			_bk.pos = device.pos;
1704 			_bk.p = p;
1705 
1706 			(*encoder->filter_flush)(encoder);
1707 
1708 			if (device.pos > length) {
1709 				_bk.decoder.filter_dtor(&_bk.decoder);
1710 				_bk.encoder.filter_dtor(&_bk.encoder);
1711 
1712 				/* restore filter */
1713 				p = bk.p;
1714 				device.pos = bk.pos;
1715 				decoder->filter_dtor(decoder);
1716 				encoder->filter_dtor(encoder);
1717 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1718 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1719 				break;
1720 			}
1721 
1722 			bk.decoder.filter_dtor(&bk.decoder);
1723 			bk.encoder.filter_dtor(&bk.encoder);
1724 
1725 			p = _bk.p;
1726 			device.pos = _bk.pos;
1727 			decoder->filter_dtor(decoder);
1728 			encoder->filter_dtor(encoder);
1729 			mbfl_convert_filter_copy(&_bk.decoder, decoder);
1730 			mbfl_convert_filter_copy(&_bk.encoder, encoder);
1731 
1732 			bk = _bk;
1733 		}
1734 
1735 		(*encoder->filter_flush)(encoder);
1736 
1737 		bk.decoder.filter_dtor(&bk.decoder);
1738 		bk.encoder.filter_dtor(&bk.encoder);
1739 
1740 		result = mbfl_memory_device_result(&device, result);
1741 
1742 		mbfl_convert_filter_delete(encoder);
1743 		mbfl_convert_filter_delete(decoder);
1744 	}
1745 
1746 	return result;
1747 }
1748 
1749 
1750 /*
1751  *  strwidth
1752  */
is_fullwidth(int c)1753 static int is_fullwidth(int c)
1754 {
1755 	int i;
1756 
1757 	if (c < mbfl_eaw_table[0].begin) {
1758 		return 0;
1759 	}
1760 
1761 	for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1762 		if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1763 			return 1;
1764 		}
1765 	}
1766 
1767 	return 0;
1768 }
1769 
1770 static int
filter_count_width(int c,void * data)1771 filter_count_width(int c, void* data)
1772 {
1773 	(*(int *)data) += (is_fullwidth(c) ? 2: 1);
1774 	return c;
1775 }
1776 
1777 int
mbfl_strwidth(mbfl_string * string)1778 mbfl_strwidth(mbfl_string *string)
1779 {
1780 	int len, n;
1781 	unsigned char *p;
1782 	mbfl_convert_filter *filter;
1783 
1784 	len = 0;
1785 	if (string->len > 0 && string->val != NULL) {
1786 		/* wchar filter */
1787 		filter = mbfl_convert_filter_new(
1788 		    string->no_encoding,
1789 		    mbfl_no_encoding_wchar,
1790 		    filter_count_width, 0, &len);
1791 		if (filter == NULL) {
1792 			mbfl_convert_filter_delete(filter);
1793 			return -1;
1794 		}
1795 
1796 		/* feed data */
1797 		p = string->val;
1798 		n = string->len;
1799 		while (n > 0) {
1800 			(*filter->filter_function)(*p++, filter);
1801 			n--;
1802 		}
1803 
1804 		mbfl_convert_filter_flush(filter);
1805 		mbfl_convert_filter_delete(filter);
1806 	}
1807 
1808 	return len;
1809 }
1810 
1811 
1812 /*
1813  *  strimwidth
1814  */
1815 struct collector_strimwidth_data {
1816 	mbfl_convert_filter *decoder;
1817 	mbfl_convert_filter *decoder_backup;
1818 	mbfl_memory_device device;
1819 	int from;
1820 	int width;
1821 	int outwidth;
1822 	int outchar;
1823 	int status;
1824 	int endpos;
1825 };
1826 
1827 static int
collector_strimwidth(int c,void * data)1828 collector_strimwidth(int c, void* data)
1829 {
1830 	struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1831 
1832 	switch (pc->status) {
1833 	case 10:
1834 		(*pc->decoder->filter_function)(c, pc->decoder);
1835 		break;
1836 	default:
1837 		if (pc->outchar >= pc->from) {
1838 			pc->outwidth += (is_fullwidth(c) ? 2: 1);
1839 
1840 			if (pc->outwidth > pc->width) {
1841 				if (pc->status == 0) {
1842 					pc->endpos = pc->device.pos;
1843 					mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1844 				}
1845 				pc->status++;
1846 				(*pc->decoder->filter_function)(c, pc->decoder);
1847 				c = -1;
1848 			} else {
1849 				(*pc->decoder->filter_function)(c, pc->decoder);
1850 			}
1851 		}
1852 		pc->outchar++;
1853 		break;
1854 	}
1855 
1856 	return c;
1857 }
1858 
1859 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,int from,int width)1860 mbfl_strimwidth(
1861     mbfl_string *string,
1862     mbfl_string *marker,
1863     mbfl_string *result,
1864     int from,
1865     int width)
1866 {
1867 	struct collector_strimwidth_data pc;
1868 	mbfl_convert_filter *encoder;
1869 	int n, mkwidth;
1870 	unsigned char *p;
1871 
1872 	if (string == NULL || result == NULL) {
1873 		return NULL;
1874 	}
1875 	mbfl_string_init(result);
1876 	result->no_language = string->no_language;
1877 	result->no_encoding = string->no_encoding;
1878 	mbfl_memory_device_init(&pc.device, MIN(string->len, width), 0);
1879 
1880 	/* output code filter */
1881 	pc.decoder = mbfl_convert_filter_new(
1882 	    mbfl_no_encoding_wchar,
1883 	    string->no_encoding,
1884 	    mbfl_memory_device_output, 0, &pc.device);
1885 	pc.decoder_backup = mbfl_convert_filter_new(
1886 	    mbfl_no_encoding_wchar,
1887 	    string->no_encoding,
1888 	    mbfl_memory_device_output, 0, &pc.device);
1889 	/* wchar filter */
1890 	encoder = mbfl_convert_filter_new(
1891 	    string->no_encoding,
1892 	    mbfl_no_encoding_wchar,
1893 	    collector_strimwidth, 0, &pc);
1894 	if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1895 		mbfl_convert_filter_delete(encoder);
1896 		mbfl_convert_filter_delete(pc.decoder);
1897 		mbfl_convert_filter_delete(pc.decoder_backup);
1898 		return NULL;
1899 	}
1900 	mkwidth = 0;
1901 	if (marker) {
1902 		mkwidth = mbfl_strwidth(marker);
1903 	}
1904 	pc.from = from;
1905 	pc.width = width - mkwidth;
1906 	pc.outwidth = 0;
1907 	pc.outchar = 0;
1908 	pc.status = 0;
1909 	pc.endpos = 0;
1910 
1911 	/* feed data */
1912 	p = string->val;
1913 	n = string->len;
1914 	if (p != NULL) {
1915 		while (n > 0) {
1916 			n--;
1917 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1918 				break;
1919 			}
1920 		}
1921 		mbfl_convert_filter_flush(encoder);
1922 		if (pc.status != 0 && mkwidth > 0) {
1923 			pc.width += mkwidth;
1924 			while (n > 0) {
1925 				if ((*encoder->filter_function)(*p++, encoder) < 0) {
1926 					break;
1927 				}
1928 				n--;
1929 			}
1930 			mbfl_convert_filter_flush(encoder);
1931 			if (pc.status != 1) {
1932 				pc.status = 10;
1933 				pc.device.pos = pc.endpos;
1934 				mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1935 				mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
1936 				p = marker->val;
1937 				n = marker->len;
1938 				while (n > 0) {
1939 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1940 						break;
1941 					}
1942 					n--;
1943 				}
1944 				mbfl_convert_filter_flush(encoder);
1945 			}
1946 		} else if (pc.status != 0) {
1947 			pc.device.pos = pc.endpos;
1948 			mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1949 		}
1950 		mbfl_convert_filter_flush(pc.decoder);
1951 	}
1952 	result = mbfl_memory_device_result(&pc.device, result);
1953 	mbfl_convert_filter_delete(encoder);
1954 	mbfl_convert_filter_delete(pc.decoder);
1955 	mbfl_convert_filter_delete(pc.decoder_backup);
1956 
1957 	return result;
1958 }
1959 
1960 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1961 mbfl_ja_jp_hantozen(
1962     mbfl_string *string,
1963     mbfl_string *result,
1964     int mode)
1965 {
1966 	int n;
1967 	unsigned char *p;
1968 	const mbfl_encoding *encoding;
1969 	mbfl_memory_device device;
1970 	mbfl_convert_filter *decoder = NULL;
1971 	mbfl_convert_filter *encoder = NULL;
1972 	mbfl_convert_filter *tl_filter = NULL;
1973 	mbfl_convert_filter *next_filter = NULL;
1974 	mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1975 
1976 	/* validate parameters */
1977 	if (string == NULL || result == NULL) {
1978 		return NULL;
1979 	}
1980 
1981 	encoding = mbfl_no2encoding(string->no_encoding);
1982 	if (encoding == NULL) {
1983 		return NULL;
1984 	}
1985 
1986 	mbfl_memory_device_init(&device, string->len, 0);
1987 	mbfl_string_init(result);
1988 
1989 	result->no_language = string->no_language;
1990 	result->no_encoding = string->no_encoding;
1991 
1992 	decoder = mbfl_convert_filter_new(
1993 		mbfl_no_encoding_wchar,
1994 		string->no_encoding,
1995 		mbfl_memory_device_output, 0, &device);
1996 	if (decoder == NULL) {
1997 		goto out;
1998 	}
1999 	next_filter = decoder;
2000 
2001 	param =
2002 		(mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
2003 	if (param == NULL) {
2004 		goto out;
2005 	}
2006 
2007 	param->mode = mode;
2008 
2009 	tl_filter = mbfl_convert_filter_new2(
2010 		&vtbl_tl_jisx0201_jisx0208,
2011 		(int(*)(int, void*))next_filter->filter_function,
2012 		(int(*)(void*))next_filter->filter_flush,
2013 		next_filter);
2014 	if (tl_filter == NULL) {
2015 		mbfl_free(param);
2016 		goto out;
2017 	}
2018 
2019 	tl_filter->opaque = param;
2020 	next_filter = tl_filter;
2021 
2022 	encoder = mbfl_convert_filter_new(
2023 		string->no_encoding,
2024 		mbfl_no_encoding_wchar,
2025 		(int(*)(int, void*))next_filter->filter_function,
2026 		(int(*)(void*))next_filter->filter_flush,
2027 		next_filter);
2028 	if (encoder == NULL) {
2029 		goto out;
2030 	}
2031 
2032 	/* feed data */
2033 	p = string->val;
2034 	n = string->len;
2035 	if (p != NULL) {
2036 		while (n > 0) {
2037 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
2038 				break;
2039 			}
2040 			n--;
2041 		}
2042 	}
2043 
2044 	mbfl_convert_filter_flush(encoder);
2045 	result = mbfl_memory_device_result(&device, result);
2046 out:
2047 	if (tl_filter != NULL) {
2048 		if (tl_filter->opaque != NULL) {
2049 			mbfl_free(tl_filter->opaque);
2050 		}
2051 		mbfl_convert_filter_delete(tl_filter);
2052 	}
2053 
2054 	if (decoder != NULL) {
2055 		mbfl_convert_filter_delete(decoder);
2056 	}
2057 
2058 	if (encoder != NULL) {
2059 		mbfl_convert_filter_delete(encoder);
2060 	}
2061 
2062 	return result;
2063 }
2064 
2065 
2066 /*
2067  *  MIME header encode
2068  */
2069 struct mime_header_encoder_data {
2070 	mbfl_convert_filter *conv1_filter;
2071 	mbfl_convert_filter *block_filter;
2072 	mbfl_convert_filter *conv2_filter;
2073 	mbfl_convert_filter *conv2_filter_backup;
2074 	mbfl_convert_filter *encod_filter;
2075 	mbfl_convert_filter *encod_filter_backup;
2076 	mbfl_memory_device outdev;
2077 	mbfl_memory_device tmpdev;
2078 	int status1;
2079 	int status2;
2080 	int prevpos;
2081 	int linehead;
2082 	int firstindent;
2083 	int encnamelen;
2084 	int lwsplen;
2085 	char encname[128];
2086 	char lwsp[16];
2087 };
2088 
2089 static int
mime_header_encoder_block_collector(int c,void * data)2090 mime_header_encoder_block_collector(int c, void *data)
2091 {
2092 	int n;
2093 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2094 
2095 	switch (pe->status2) {
2096 	case 1:	/* encoded word */
2097 		pe->prevpos = pe->outdev.pos;
2098 		mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
2099 		mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
2100 		(*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2101 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2102 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
2103 		n = pe->outdev.pos - pe->linehead + pe->firstindent;
2104 		pe->outdev.pos = pe->prevpos;
2105 		mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
2106 		mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
2107 		if (n >= 74) {
2108 			(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2109 			(*pe->encod_filter->filter_flush)(pe->encod_filter);
2110 			mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);	/* ?= */
2111 			mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2112 			pe->linehead = pe->outdev.pos;
2113 			pe->firstindent = 0;
2114 			mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2115 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2116 		} else {
2117 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2118 		}
2119 		break;
2120 
2121 	default:
2122 		mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2123 		c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2124 		pe->status2 = 1;
2125 		break;
2126 	}
2127 
2128 	return c;
2129 }
2130 
2131 static int
mime_header_encoder_collector(int c,void * data)2132 mime_header_encoder_collector(int c, void *data)
2133 {
2134 	static int qp_table[256] = {
2135 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2136 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2137 		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
2138 		0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
2139 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
2140 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
2141 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
2142 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
2143 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
2144 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
2145 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
2146 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
2147 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
2148 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
2149 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
2150 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
2151 	};
2152 
2153 	int n;
2154 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2155 
2156 	switch (pe->status1) {
2157 	case 11:	/* encoded word */
2158 		(*pe->block_filter->filter_function)(c, pe->block_filter);
2159 		break;
2160 
2161 	default:	/* ASCII */
2162 		if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
2163 			mbfl_memory_device_output(c, &pe->tmpdev);
2164 			pe->status1 = 1;
2165 		} else if (pe->status1 == 0 && c == 0x20) {	/* repeat SPACE */
2166 			mbfl_memory_device_output(c, &pe->tmpdev);
2167 		} else {
2168 			if (pe->tmpdev.pos < 74 && c == 0x20) {
2169 				n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
2170 				if (n > 74) {
2171 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
2172 					pe->linehead = pe->outdev.pos;
2173 					pe->firstindent = 0;
2174 				} else if (pe->outdev.pos > 0) {
2175 					mbfl_memory_device_output(0x20, &pe->outdev);
2176 				}
2177 				mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2178 				mbfl_memory_device_reset(&pe->tmpdev);
2179 				pe->status1 = 0;
2180 			} else {
2181 				n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
2182 				if (n > 60)  {
2183 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
2184 					pe->linehead = pe->outdev.pos;
2185 					pe->firstindent = 0;
2186 				} else if (pe->outdev.pos > 0)  {
2187 					mbfl_memory_device_output(0x20, &pe->outdev);
2188 				}
2189 				mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
2190 				mbfl_memory_device_reset(&pe->tmpdev);
2191 				(*pe->block_filter->filter_function)(c, pe->block_filter);
2192 				pe->status1 = 11;
2193 			}
2194 		}
2195 		break;
2196 	}
2197 
2198 	return c;
2199 }
2200 
2201 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)2202 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2203 {
2204 	if (pe->status1 >= 10) {
2205 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2206 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
2207 		mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);		/* ?= */
2208 	} else if (pe->tmpdev.pos > 0) {
2209 		if (pe->outdev.pos > 0) {
2210 			if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2211 				mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2212 			} else {
2213 				mbfl_memory_device_output(0x20, &pe->outdev);
2214 			}
2215 		}
2216 		mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2217 	}
2218 	mbfl_memory_device_reset(&pe->tmpdev);
2219 	pe->prevpos = 0;
2220 	pe->linehead = 0;
2221 	pe->status1 = 0;
2222 	pe->status2 = 0;
2223 
2224 	return mbfl_memory_device_result(&pe->outdev, result);
2225 }
2226 
2227 struct mime_header_encoder_data*
mime_header_encoder_new(enum mbfl_no_encoding incode,enum mbfl_no_encoding outcode,enum mbfl_no_encoding transenc)2228 mime_header_encoder_new(
2229     enum mbfl_no_encoding incode,
2230     enum mbfl_no_encoding outcode,
2231     enum mbfl_no_encoding transenc)
2232 {
2233 	int n;
2234 	const char *s;
2235 	const mbfl_encoding *outencoding;
2236 	struct mime_header_encoder_data *pe;
2237 
2238 	/* get output encoding and check MIME charset name */
2239 	outencoding = mbfl_no2encoding(outcode);
2240 	if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
2241 		return NULL;
2242 	}
2243 
2244 	pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2245 	if (pe == NULL) {
2246 		return NULL;
2247 	}
2248 
2249 	mbfl_memory_device_init(&pe->outdev, 0, 0);
2250 	mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2251 	pe->prevpos = 0;
2252 	pe->linehead = 0;
2253 	pe->firstindent = 0;
2254 	pe->status1 = 0;
2255 	pe->status2 = 0;
2256 
2257 	/* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
2258 	n = 0;
2259 	pe->encname[n++] = 0x3d;
2260 	pe->encname[n++] = 0x3f;
2261 	s = outencoding->mime_name;
2262 	while (*s) {
2263 		pe->encname[n++] = *s++;
2264 	}
2265 	pe->encname[n++] = 0x3f;
2266 	if (transenc == mbfl_no_encoding_qprint) {
2267 		pe->encname[n++] = 0x51;
2268 	} else {
2269 		pe->encname[n++] = 0x42;
2270 		transenc = mbfl_no_encoding_base64;
2271 	}
2272 	pe->encname[n++] = 0x3f;
2273 	pe->encname[n] = '\0';
2274 	pe->encnamelen = n;
2275 
2276 	n = 0;
2277 	pe->lwsp[n++] = 0x0d;
2278 	pe->lwsp[n++] = 0x0a;
2279 	pe->lwsp[n++] = 0x20;
2280 	pe->lwsp[n] = '\0';
2281 	pe->lwsplen = n;
2282 
2283 	/* transfer encode filter */
2284 	pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2285 	pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2286 
2287 	/* Output code filter */
2288 	pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2289 	pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2290 
2291 	/* encoded block filter */
2292 	pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2293 
2294 	/* Input code filter */
2295 	pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);
2296 
2297 	if (pe->encod_filter == NULL ||
2298 	    pe->encod_filter_backup == NULL ||
2299 	    pe->conv2_filter == NULL ||
2300 	    pe->conv2_filter_backup == NULL ||
2301 	    pe->conv1_filter == NULL) {
2302 		mime_header_encoder_delete(pe);
2303 		return NULL;
2304 	}
2305 
2306 	if (transenc == mbfl_no_encoding_qprint) {
2307 		pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2308 		pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2309 	} else {
2310 		pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2311 		pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2312 	}
2313 
2314 	return pe;
2315 }
2316 
2317 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2318 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2319 {
2320 	if (pe) {
2321 		mbfl_convert_filter_delete(pe->conv1_filter);
2322 		mbfl_convert_filter_delete(pe->block_filter);
2323 		mbfl_convert_filter_delete(pe->conv2_filter);
2324 		mbfl_convert_filter_delete(pe->conv2_filter_backup);
2325 		mbfl_convert_filter_delete(pe->encod_filter);
2326 		mbfl_convert_filter_delete(pe->encod_filter_backup);
2327 		mbfl_memory_device_clear(&pe->outdev);
2328 		mbfl_memory_device_clear(&pe->tmpdev);
2329 		mbfl_free((void*)pe);
2330 	}
2331 }
2332 
2333 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2334 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2335 {
2336 	return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2337 }
2338 
2339 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode,enum mbfl_no_encoding encoding,const char * linefeed,int indent)2340 mbfl_mime_header_encode(
2341     mbfl_string *string,
2342     mbfl_string *result,
2343     enum mbfl_no_encoding outcode,
2344     enum mbfl_no_encoding encoding,
2345     const char *linefeed,
2346     int indent)
2347 {
2348 	int n;
2349 	unsigned char *p;
2350 	struct mime_header_encoder_data *pe;
2351 
2352 	mbfl_string_init(result);
2353 	result->no_language = string->no_language;
2354 	result->no_encoding = mbfl_no_encoding_ascii;
2355 
2356 	pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
2357 	if (pe == NULL) {
2358 		return NULL;
2359 	}
2360 
2361 	if (linefeed != NULL) {
2362 		n = 0;
2363 		while (*linefeed && n < 8) {
2364 			pe->lwsp[n++] = *linefeed++;
2365 		}
2366 		pe->lwsp[n++] = 0x20;
2367 		pe->lwsp[n] = '\0';
2368 		pe->lwsplen = n;
2369 	}
2370 	if (indent > 0 && indent < 74) {
2371 		pe->firstindent = indent;
2372 	}
2373 
2374 	n = string->len;
2375 	p = string->val;
2376 	while (n > 0) {
2377 		(*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2378 		n--;
2379 	}
2380 
2381 	result = mime_header_encoder_result(pe, result);
2382 	mime_header_encoder_delete(pe);
2383 
2384 	return result;
2385 }
2386 
2387 
2388 /*
2389  *  MIME header decode
2390  */
2391 struct mime_header_decoder_data {
2392 	mbfl_convert_filter *deco_filter;
2393 	mbfl_convert_filter *conv1_filter;
2394 	mbfl_convert_filter *conv2_filter;
2395 	mbfl_memory_device outdev;
2396 	mbfl_memory_device tmpdev;
2397 	int cspos;
2398 	int status;
2399 	enum mbfl_no_encoding encoding;
2400 	enum mbfl_no_encoding incode;
2401 	enum mbfl_no_encoding outcode;
2402 };
2403 
2404 static int
mime_header_decoder_collector(int c,void * data)2405 mime_header_decoder_collector(int c, void* data)
2406 {
2407 	const mbfl_encoding *encoding;
2408 	struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2409 
2410 	switch (pd->status) {
2411 	case 1:
2412 		if (c == 0x3f) {		/* ? */
2413 			mbfl_memory_device_output(c, &pd->tmpdev);
2414 			pd->cspos = pd->tmpdev.pos;
2415 			pd->status = 2;
2416 		} else {
2417 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2418 			mbfl_memory_device_reset(&pd->tmpdev);
2419 			if (c == 0x3d) {		/* = */
2420 				mbfl_memory_device_output(c, &pd->tmpdev);
2421 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2422 				pd->status = 9;
2423 			} else {
2424 				(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2425 				pd->status = 0;
2426 			}
2427 		}
2428 		break;
2429 	case 2:		/* store charset string */
2430 		if (c == 0x3f) {		/* ? */
2431 			/* identify charset */
2432 			mbfl_memory_device_output('\0', &pd->tmpdev);
2433 			encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2434 			if (encoding != NULL) {
2435 				pd->incode = encoding->no_encoding;
2436 				pd->status = 3;
2437 			}
2438 			mbfl_memory_device_unput(&pd->tmpdev);
2439 			mbfl_memory_device_output(c, &pd->tmpdev);
2440 		} else {
2441 			mbfl_memory_device_output(c, &pd->tmpdev);
2442 			if (pd->tmpdev.pos > 100) {		/* too long charset string */
2443 				pd->status = 0;
2444 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2445 				mbfl_memory_device_unput(&pd->tmpdev);
2446 				pd->status = 9;
2447 			}
2448 			if (pd->status != 2) {
2449 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2450 				mbfl_memory_device_reset(&pd->tmpdev);
2451 			}
2452 		}
2453 		break;
2454 	case 3:		/* identify encoding */
2455 		mbfl_memory_device_output(c, &pd->tmpdev);
2456 		if (c == 0x42 || c == 0x62) {		/* 'B' or 'b' */
2457 			pd->encoding = mbfl_no_encoding_base64;
2458 			pd->status = 4;
2459 		} else if (c == 0x51 || c == 0x71) {	/* 'Q' or 'q' */
2460 			pd->encoding = mbfl_no_encoding_qprint;
2461 			pd->status = 4;
2462 		} else {
2463 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2464 				mbfl_memory_device_unput(&pd->tmpdev);
2465 				pd->status = 9;
2466 			} else {
2467 				pd->status = 0;
2468 			}
2469 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2470 			mbfl_memory_device_reset(&pd->tmpdev);
2471 		}
2472 		break;
2473 	case 4:		/* reset filter */
2474 		mbfl_memory_device_output(c, &pd->tmpdev);
2475 		if (c == 0x3f) {		/* ? */
2476 			/* charset convert filter */
2477 			mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
2478 			/* decode filter */
2479 			mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
2480 			pd->status = 5;
2481 		} else {
2482 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2483 				mbfl_memory_device_unput(&pd->tmpdev);
2484 				pd->status = 9;
2485 			} else {
2486 				pd->status = 0;
2487 			}
2488 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2489 		}
2490 		mbfl_memory_device_reset(&pd->tmpdev);
2491 		break;
2492 	case 5:		/* encoded block */
2493 		if (c == 0x3f) {		/* ? */
2494 			pd->status = 6;
2495 		} else {
2496 			(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2497 		}
2498 		break;
2499 	case 6:		/* check end position */
2500 		if (c == 0x3d) {		/* = */
2501 			/* flush and reset filter */
2502 			(*pd->deco_filter->filter_flush)(pd->deco_filter);
2503 			(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2504 			mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
2505 			pd->status = 7;
2506 		} else {
2507 			(*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2508 			if (c != 0x3f) {		/* ? */
2509 				(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2510 				pd->status = 5;
2511 			}
2512 		}
2513 		break;
2514 	case 7:		/* after encoded block */
2515 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2516 			pd->status = 8;
2517 		} else {
2518 			mbfl_memory_device_output(c, &pd->tmpdev);
2519 			if (c == 0x3d) {		/* = */
2520 				pd->status = 1;
2521 			} else if (c != 0x20 && c != 0x09) {		/* not space */
2522 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2523 				mbfl_memory_device_reset(&pd->tmpdev);
2524 				pd->status = 0;
2525 			}
2526 		}
2527 		break;
2528 	case 8:		/* folding */
2529 	case 9:		/* folding */
2530 		if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2531 			if (c == 0x3d) {		/* = */
2532 				if (pd->status == 8) {
2533 					mbfl_memory_device_output(0x20, &pd->tmpdev);	/* SPACE */
2534 				} else {
2535 					(*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2536 				}
2537 				mbfl_memory_device_output(c, &pd->tmpdev);
2538 				pd->status = 1;
2539 			} else {
2540 				mbfl_memory_device_output(0x20, &pd->tmpdev);
2541 				mbfl_memory_device_output(c, &pd->tmpdev);
2542 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2543 				mbfl_memory_device_reset(&pd->tmpdev);
2544 				pd->status = 0;
2545 			}
2546 		}
2547 		break;
2548 	default:		/* non encoded block */
2549 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2550 			pd->status = 9;
2551 		} else if (c == 0x3d) {		/* = */
2552 			mbfl_memory_device_output(c, &pd->tmpdev);
2553 			pd->status = 1;
2554 		} else {
2555 			(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2556 		}
2557 		break;
2558 	}
2559 
2560 	return c;
2561 }
2562 
2563 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2564 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2565 {
2566 	switch (pd->status) {
2567 	case 1:
2568 	case 2:
2569 	case 3:
2570 	case 4:
2571 	case 7:
2572 	case 8:
2573 	case 9:
2574 		mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2575 		break;
2576 	case 5:
2577 	case 6:
2578 		(*pd->deco_filter->filter_flush)(pd->deco_filter);
2579 		(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2580 		break;
2581 	}
2582 	(*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2583 	mbfl_memory_device_reset(&pd->tmpdev);
2584 	pd->status = 0;
2585 
2586 	return mbfl_memory_device_result(&pd->outdev, result);
2587 }
2588 
2589 struct mime_header_decoder_data*
mime_header_decoder_new(enum mbfl_no_encoding outcode)2590 mime_header_decoder_new(enum mbfl_no_encoding outcode)
2591 {
2592 	struct mime_header_decoder_data *pd;
2593 
2594 	pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2595 	if (pd == NULL) {
2596 		return NULL;
2597 	}
2598 
2599 	mbfl_memory_device_init(&pd->outdev, 0, 0);
2600 	mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2601 	pd->cspos = 0;
2602 	pd->status = 0;
2603 	pd->encoding = mbfl_no_encoding_pass;
2604 	pd->incode = mbfl_no_encoding_ascii;
2605 	pd->outcode = outcode;
2606 	/* charset convert filter */
2607 	pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2608 	pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2609 	/* decode filter */
2610 	pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2611 
2612 	if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2613 		mime_header_decoder_delete(pd);
2614 		return NULL;
2615 	}
2616 
2617 	return pd;
2618 }
2619 
2620 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2621 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2622 {
2623 	if (pd) {
2624 		mbfl_convert_filter_delete(pd->conv2_filter);
2625 		mbfl_convert_filter_delete(pd->conv1_filter);
2626 		mbfl_convert_filter_delete(pd->deco_filter);
2627 		mbfl_memory_device_clear(&pd->outdev);
2628 		mbfl_memory_device_clear(&pd->tmpdev);
2629 		mbfl_free((void*)pd);
2630 	}
2631 }
2632 
2633 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2634 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2635 {
2636 	return mime_header_decoder_collector(c, pd);
2637 }
2638 
2639 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode)2640 mbfl_mime_header_decode(
2641     mbfl_string *string,
2642     mbfl_string *result,
2643     enum mbfl_no_encoding outcode)
2644 {
2645 	int n;
2646 	unsigned char *p;
2647 	struct mime_header_decoder_data *pd;
2648 
2649 	mbfl_string_init(result);
2650 	result->no_language = string->no_language;
2651 	result->no_encoding = outcode;
2652 
2653 	pd = mime_header_decoder_new(outcode);
2654 	if (pd == NULL) {
2655 		return NULL;
2656 	}
2657 
2658 	/* feed data */
2659 	n = string->len;
2660 	p = string->val;
2661 	while (n > 0) {
2662 		mime_header_decoder_collector(*p++, pd);
2663 		n--;
2664 	}
2665 
2666 	result = mime_header_decoder_result(pd, result);
2667 	mime_header_decoder_delete(pd);
2668 
2669 	return result;
2670 }
2671 
2672 
2673 
2674 /*
2675  *  convert HTML numeric entity
2676  */
2677 struct collector_htmlnumericentity_data {
2678 	mbfl_convert_filter *decoder;
2679 	int status;
2680 	int cache;
2681 	int digit;
2682 	int *convmap;
2683 	int mapsize;
2684 };
2685 
2686 static int
collector_encode_htmlnumericentity(int c,void * data)2687 collector_encode_htmlnumericentity(int c, void *data)
2688 {
2689 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2690 	int f, n, s, r, d, size, *mapelm;
2691 
2692 	size = pc->mapsize;
2693 	f = 0;
2694 	n = 0;
2695 	while (n < size) {
2696 		mapelm = &(pc->convmap[n*4]);
2697 		if (c >= mapelm[0] && c <= mapelm[1]) {
2698 			s = (c + mapelm[2]) & mapelm[3];
2699 			if (s >= 0) {
2700 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2701 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2702 				r = 100000000;
2703 				s %= r;
2704 				while (r > 0) {
2705 					d = s/r;
2706 					if (d || f) {
2707 						f = 1;
2708 						s %= r;
2709 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2710 					}
2711 					r /= 10;
2712 				}
2713 				if (!f) {
2714 					f = 1;
2715 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2716 				}
2717 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2718 			}
2719 		}
2720 		if (f) {
2721 			break;
2722 		}
2723 		n++;
2724 	}
2725 	if (!f) {
2726 		(*pc->decoder->filter_function)(c, pc->decoder);
2727 	}
2728 
2729 	return c;
2730 }
2731 
2732 static int
collector_decode_htmlnumericentity(int c,void * data)2733 collector_decode_htmlnumericentity(int c, void *data)
2734 {
2735 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2736 	int f, n, s, r, d, size, *mapelm;
2737 
2738 	switch (pc->status) {
2739 	case 1:
2740 		if (c == 0x23) {	/* '#' */
2741 			pc->status = 2;
2742 		} else {
2743 			pc->status = 0;
2744 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2745 			(*pc->decoder->filter_function)(c, pc->decoder);
2746 		}
2747 		break;
2748 	case 2:
2749 		if (c == 0x78) {	/* 'x' */
2750 			pc->status = 4;
2751 		} else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2752 			pc->cache = c - 0x30;
2753 			pc->status = 3;
2754 			pc->digit = 1;
2755 		} else {
2756 			pc->status = 0;
2757 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2758 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2759 			(*pc->decoder->filter_function)(c, pc->decoder);
2760 		}
2761 		break;
2762 	case 3:
2763 		s = 0;
2764 		f = 0;
2765 		if (c >= 0x30 && c <= 0x39) {	/* '0' - '9' */
2766 			if (pc->digit > 9) {
2767 				pc->status = 0;
2768 				s = pc->cache;
2769 				f = 1;
2770 			} else {
2771 				s = pc->cache*10 + c - 0x30;
2772 				pc->cache = s;
2773 				pc->digit++;
2774 			}
2775 		} else {
2776 			pc->status = 0;
2777 			s = pc->cache;
2778 			f = 1;
2779 			n = 0;
2780 			size = pc->mapsize;
2781 			while (n < size) {
2782 				mapelm = &(pc->convmap[n*4]);
2783 				d = s - mapelm[2];
2784 				if (d >= mapelm[0] && d <= mapelm[1]) {
2785 					f = 0;
2786 					(*pc->decoder->filter_function)(d, pc->decoder);
2787 					if (c != 0x3b) {	/* ';' */
2788 						(*pc->decoder->filter_function)(c, pc->decoder);
2789 					}
2790 					break;
2791 				}
2792 				n++;
2793 			}
2794 		}
2795 		if (f) {
2796 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2797 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2798 			r = 1;
2799 			n = pc->digit;
2800 			while (n > 0) {
2801 				r *= 10;
2802 				n--;
2803 			}
2804 			s %= r;
2805 			r /= 10;
2806 			while (r > 0) {
2807 				d = s/r;
2808 				s %= r;
2809 				r /= 10;
2810 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2811 			}
2812 			(*pc->decoder->filter_function)(c, pc->decoder);
2813 		}
2814 		break;
2815 	case 4:
2816 		if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2817 			pc->cache = c - 0x30;
2818 			pc->status = 5;
2819 			pc->digit = 1;
2820 		} else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
2821 			pc->cache = c - 0x41 + 10;
2822 			pc->status = 5;
2823 			pc->digit = 1;
2824 		} else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
2825 			pc->cache = c - 0x61 + 10;
2826 			pc->status = 5;
2827 			pc->digit = 1;
2828 		} else {
2829 			pc->status = 0;
2830 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2831 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2832 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2833 			(*pc->decoder->filter_function)(c, pc->decoder);
2834 		}
2835 		break;
2836 	case 5:
2837 		s = 0;
2838 		f = 0;
2839 		if ((c >= 0x30 && c <= 0x39) ||
2840 			(c >= 0x41 && c <= 0x46) ||
2841 			(c >= 0x61 && c <= 0x66)) {	/* '0' - '9' or 'a' - 'f'  */
2842 			if (pc->digit > 9) {
2843 				pc->status = 0;
2844 				s = pc->cache;
2845 				f = 1;
2846 			} else {
2847 				if (c >= 0x30 && c <= 0x39) {
2848 					s = pc->cache*16 + (c - 0x30);
2849 				} else if (c >= 0x41 && c <= 0x46)  {
2850 					s = pc->cache*16 + (c - 0x41 + 10);
2851 				} else {
2852 					s = pc->cache*16 + (c - 0x61 + 10);
2853 				}
2854 				pc->cache = s;
2855 				pc->digit++;
2856 			}
2857 		} else {
2858 			pc->status = 0;
2859 			s = pc->cache;
2860 			f = 1;
2861 			n = 0;
2862 			size = pc->mapsize;
2863 			while (n < size) {
2864 				mapelm = &(pc->convmap[n*4]);
2865 				d = s - mapelm[2];
2866 				if (d >= mapelm[0] && d <= mapelm[1]) {
2867 					f = 0;
2868 					(*pc->decoder->filter_function)(d, pc->decoder);
2869 					if (c != 0x3b) {	/* ';' */
2870 						(*pc->decoder->filter_function)(c, pc->decoder);
2871 					}
2872 					break;
2873 				}
2874 				n++;
2875 			}
2876 		}
2877 		if (f) {
2878 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2879 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2880 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2881 			r = 1;
2882 			n = pc->digit;
2883 			while (n > 0) {
2884 				r *= 16;
2885 				n--;
2886 			}
2887 			s %= r;
2888 			r /= 16;
2889 			while (r > 0) {
2890 				d = s/r;
2891 				s %= r;
2892 				r /= 16;
2893 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2894 			}
2895 			(*pc->decoder->filter_function)(c, pc->decoder);
2896 		}
2897 		break;
2898 	default:
2899 		if (c == 0x26) {	/* '&' */
2900 			pc->status = 1;
2901 		} else {
2902 			(*pc->decoder->filter_function)(c, pc->decoder);
2903 		}
2904 		break;
2905 	}
2906 
2907 	return c;
2908 }
2909 
2910 static int
collector_encode_hex_htmlnumericentity(int c,void * data)2911 collector_encode_hex_htmlnumericentity(int c, void *data)
2912 {
2913 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2914 	int f, n, s, r, d, size, *mapelm;
2915 
2916 	size = pc->mapsize;
2917 	f = 0;
2918 	n = 0;
2919 	while (n < size) {
2920 		mapelm = &(pc->convmap[n*4]);
2921 		if (c >= mapelm[0] && c <= mapelm[1]) {
2922 			s = (c + mapelm[2]) & mapelm[3];
2923 			if (s >= 0) {
2924 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2925 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2926 				(*pc->decoder->filter_function)(0x78, pc->decoder);	/* 'x' */
2927 				r = 0x1000000;
2928 				s %= r;
2929 				while (r > 0) {
2930 					d = s/r;
2931 					if (d || f) {
2932 						f = 1;
2933 						s %= r;
2934 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2935 					}
2936 					r /= 16;
2937 				}
2938 				if (!f) {
2939 					f = 1;
2940 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2941 				}
2942 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2943 			}
2944 		}
2945 		if (f) {
2946 			break;
2947 		}
2948 		n++;
2949 	}
2950 	if (!f) {
2951 		(*pc->decoder->filter_function)(c, pc->decoder);
2952 	}
2953 
2954 	return c;
2955 }
2956 
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2957 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2958 {
2959 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2960 	int n, s, r, d;
2961 
2962 	if (pc->status) {
2963 		switch (pc->status) {
2964 		case 1: /* '&' */
2965 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2966 			break;
2967 		case 2: /* '#' */
2968 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2969 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2970 			break;
2971 		case 3: /* '0'-'9' */
2972 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2973 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2974 
2975 			s = pc->cache;
2976 			r = 1;
2977 			n = pc->digit;
2978 			while (n > 0) {
2979 				r *= 10;
2980 				n--;
2981 			}
2982 			s %= r;
2983 			r /= 10;
2984 			while (r > 0) {
2985 				d = s/r;
2986 				s %= r;
2987 				r /= 10;
2988 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2989 			}
2990 
2991 			break;
2992 		case 4: /* 'x' */
2993 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2994 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2995 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2996 			break;
2997 		case 5: /* '0'-'9','a'-'f' */
2998 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2999 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
3000 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
3001 
3002 			s = pc->cache;
3003 			r = 1;
3004 			n = pc->digit;
3005 			while (n > 0) {
3006 				r *= 16;
3007 				n--;
3008 			}
3009 			s %= r;
3010 			r /= 16;
3011 			while (r > 0) {
3012 				d = s/r;
3013 				s %= r;
3014 				r /= 16;
3015 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
3016 			}
3017 			break;
3018 		default:
3019 			break;
3020 		}
3021 	}
3022 
3023 	pc->status = 0;
3024 	pc->cache = 0;
3025 	pc->digit = 0;
3026 
3027 	return 0;
3028 }
3029 
3030 
3031 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)3032 mbfl_html_numeric_entity(
3033     mbfl_string *string,
3034     mbfl_string *result,
3035     int *convmap,
3036     int mapsize,
3037     int type)
3038 {
3039 	struct collector_htmlnumericentity_data pc;
3040 	mbfl_memory_device device;
3041 	mbfl_convert_filter *encoder;
3042 	int n;
3043 	unsigned char *p;
3044 
3045 	if (string == NULL || result == NULL) {
3046 		return NULL;
3047 	}
3048 	mbfl_string_init(result);
3049 	result->no_language = string->no_language;
3050 	result->no_encoding = string->no_encoding;
3051 	mbfl_memory_device_init(&device, string->len, 0);
3052 
3053 	/* output code filter */
3054 	pc.decoder = mbfl_convert_filter_new(
3055 	    mbfl_no_encoding_wchar,
3056 	    string->no_encoding,
3057 	    mbfl_memory_device_output, 0, &device);
3058 	/* wchar filter */
3059 	if (type == 0) { /* decimal output */
3060 		encoder = mbfl_convert_filter_new(
3061 		    string->no_encoding,
3062 		    mbfl_no_encoding_wchar,
3063 		    collector_encode_htmlnumericentity, 0, &pc);
3064 	} else if (type == 2) { /* hex output */
3065 		encoder = mbfl_convert_filter_new(
3066 		    string->no_encoding,
3067 		    mbfl_no_encoding_wchar,
3068 		    collector_encode_hex_htmlnumericentity, 0, &pc);
3069 	} else { /* type == 1: decimal/hex input */
3070 		encoder = mbfl_convert_filter_new(
3071 		    string->no_encoding,
3072 		    mbfl_no_encoding_wchar,
3073 		    collector_decode_htmlnumericentity,
3074 			(int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
3075 	}
3076 	if (pc.decoder == NULL || encoder == NULL) {
3077 		mbfl_convert_filter_delete(encoder);
3078 		mbfl_convert_filter_delete(pc.decoder);
3079 		return NULL;
3080 	}
3081 	pc.status = 0;
3082 	pc.cache = 0;
3083 	pc.digit = 0;
3084 	pc.convmap = convmap;
3085 	pc.mapsize = mapsize;
3086 
3087 	/* feed data */
3088 	p = string->val;
3089 	n = string->len;
3090 	if (p != NULL) {
3091 		while (n > 0) {
3092 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
3093 				break;
3094 			}
3095 			n--;
3096 		}
3097 	}
3098 	mbfl_convert_filter_flush(encoder);
3099 	mbfl_convert_filter_flush(pc.decoder);
3100 	result = mbfl_memory_device_result(&device, result);
3101 	mbfl_convert_filter_delete(encoder);
3102 	mbfl_convert_filter_delete(pc.decoder);
3103 
3104 	return result;
3105 }
3106 
3107 /*
3108  * Local variables:
3109  * tab-width: 4
3110  * c-basic-offset: 4
3111  * End:
3112  */
3113