xref: /PHP-8.0/ext/mbstring/libmbfl/mbfl/mbfilter.c (revision d0417ebc)
1 /*
2  * charset=UTF-8
3  */
4 
5 /*
6  * "streamable kanji code filter and converter"
7  *
8  * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
9  *
10  * This software is released under the GNU Lesser General Public License.
11  * (Version 2.1, February 1999)
12  * Please read the following detail of the licence (in japanese).
13  *
14  * ◆使用許諾条件◆
15  *
16  * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
17  * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
18  * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
19  * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
20  * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
21  * することはできません。
22  *
23  * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
24  * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
25  * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
26  * による許諾を得る必要があります。
27  *
28  * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
29  * ます。「GNU Lesser General Public License」とは、これまでLibrary General
30  * Public Licenseと呼ばれていたものです。
31  *     http://www.gnu.org/ --- GNUウェブサイト
32  *     http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
33  * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
34  *
35  * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
36  * はありません。
37  *
38  * ◆保証内容◆
39  *
40  * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
41  * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
42  * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
43  * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
44  * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
45  * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
46  * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
47  * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
48  * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
49  * 契約・規定に優先します。
50  *
51  * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
52  *
53  * 〒102-0073
54  * 東京都千代田区九段北1-13-5日本地所第一ビル4F
55  * 株式会社ハッピーサイズ
56  * Phone: 03-3512-3655, Fax: 03-3512-3656
57  * Email: sales@happysize.co.jp
58  * Web: http://happysize.com/
59  *
60  * ◆著者◆
61  *
62  * 金本 茂 <sgk@happysize.co.jp>
63  *
64  * ◆履歴◆
65  *
66  * 1998/11/10 sgk implementation in C++
67  * 1999/4/25  sgk Cで書きなおし。
68  * 1999/4/26  sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
69  * 1999/6/??      Unicodeサポート。
70  * 1999/6/22  sgk ライセンスをLGPLに変更。
71  *
72  */
73 
74 /*
75  * Unicode support
76  *
77  * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
78  * All rights reserved.
79  *
80  */
81 
82 #include <stddef.h>
83 #include <string.h>
84 
85 #include "mbfilter.h"
86 #include "mbfl_filter_output.h"
87 #include "mbfilter_8bit.h"
88 #include "mbfilter_wchar.h"
89 #include "filters/mbfilter_ascii.h"
90 #include "filters/mbfilter_base64.h"
91 #include "filters/mbfilter_qprint.h"
92 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
93 #include "filters/mbfilter_utf8.h"
94 
95 #include "eaw_table.h"
96 
97 /* hex character table "0123456789ABCDEF" */
98 static char mbfl_hexchar_table[] = {
99 	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
100 };
101 
102 
103 
104 /*
105  * encoding filter
106  */
107 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
108 
109 
110 /*
111  *  buffering converter
112  */
113 mbfl_buffer_converter *
mbfl_buffer_converter_new(const mbfl_encoding * from,const mbfl_encoding * to,size_t buf_initsz)114 mbfl_buffer_converter_new(
115 	const mbfl_encoding *from,
116 	const mbfl_encoding *to,
117     size_t buf_initsz)
118 {
119 	mbfl_buffer_converter *convd = emalloc(sizeof(mbfl_buffer_converter));
120 	convd->to = to;
121 
122 	/* create convert filter */
123 	convd->filter1 = NULL;
124 	convd->filter2 = NULL;
125 	if (mbfl_convert_filter_get_vtbl(from, to) != NULL) {
126 		convd->filter1 = mbfl_convert_filter_new(from, to, mbfl_memory_device_output, NULL, &convd->device);
127 	} else {
128 		convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, to, mbfl_memory_device_output, NULL, &convd->device);
129 		if (convd->filter2 != NULL) {
130 			convd->filter1 = mbfl_convert_filter_new(from,
131 					&mbfl_encoding_wchar,
132 					(output_function_t)convd->filter2->filter_function,
133 					(flush_function_t)convd->filter2->filter_flush,
134 					convd->filter2);
135 			if (convd->filter1 == NULL) {
136 				mbfl_convert_filter_delete(convd->filter2);
137 			}
138 		}
139 	}
140 	if (convd->filter1 == NULL) {
141 		efree(convd);
142 		return NULL;
143 	}
144 
145 	mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
146 
147 	return convd;
148 }
149 
150 
151 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)152 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
153 {
154 	if (convd != NULL) {
155 		if (convd->filter1) {
156 			mbfl_convert_filter_delete(convd->filter1);
157 		}
158 		if (convd->filter2) {
159 			mbfl_convert_filter_delete(convd->filter2);
160 		}
161 		mbfl_memory_device_clear(&convd->device);
162 		efree((void*)convd);
163 	}
164 }
165 
166 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)167 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
168 {
169 	if (convd != NULL) {
170 		if (convd->filter2 != NULL) {
171 			convd->filter2->illegal_mode = mode;
172 		} else if (convd->filter1 != NULL) {
173 			convd->filter1->illegal_mode = mode;
174 		} else {
175 			return 0;
176 		}
177 	}
178 
179 	return 1;
180 }
181 
182 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)183 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
184 {
185 	if (convd != NULL) {
186 		if (convd->filter2 != NULL) {
187 			convd->filter2->illegal_substchar = substchar;
188 		} else if (convd->filter1 != NULL) {
189 			convd->filter1->illegal_substchar = substchar;
190 		} else {
191 			return 0;
192 		}
193 	}
194 
195 	return 1;
196 }
197 
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)198 size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
199 {
200 	size_t n;
201 	unsigned char *p;
202 	mbfl_convert_filter *filter;
203 	int (*filter_function)(int c, mbfl_convert_filter *filter);
204 
205 	ZEND_ASSERT(convd);
206 	ZEND_ASSERT(string);
207 
208 	mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
209 	/* feed data */
210 	n = string->len;
211 	p = string->val;
212 
213 	filter = convd->filter1;
214 	if (filter != NULL) {
215 		filter_function = filter->filter_function;
216 		while (n > 0) {
217 			if ((*filter_function)(*p++, filter) < 0) {
218 				return p - string->val;
219 			}
220 			n--;
221 		}
222 	}
223 	return p - string->val;
224 }
225 
226 
227 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)228 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
229 {
230 	if (convd == NULL) {
231 		return -1;
232 	}
233 
234 	if (convd->filter1 != NULL) {
235 		mbfl_convert_filter_flush(convd->filter1);
236 	}
237 	if (convd->filter2 != NULL) {
238 		mbfl_convert_filter_flush(convd->filter2);
239 	}
240 
241 	return 0;
242 }
243 
244 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)245 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
246 {
247 	if (convd == NULL || result == NULL) {
248 		return NULL;
249 	}
250 	result->encoding = convd->to;
251 	return mbfl_memory_device_result(&convd->device, result);
252 }
253 
254 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)255 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
256 				  mbfl_string *result)
257 {
258 	if (convd == NULL || string == NULL || result == NULL) {
259 		return NULL;
260 	}
261 	mbfl_buffer_converter_feed(convd, string);
262 	if (convd->filter1 != NULL) {
263 		mbfl_convert_filter_flush(convd->filter1);
264 	}
265 	if (convd->filter2 != NULL) {
266 		mbfl_convert_filter_flush(convd->filter2);
267 	}
268 	result->encoding = convd->to;
269 	return mbfl_memory_device_result(&convd->device, result);
270 }
271 
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)272 size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
273 {
274 	size_t num_illegalchars = 0;
275 
276 	if (convd == NULL) {
277 		return 0;
278 	}
279 
280 	if (convd->filter1 != NULL) {
281 		num_illegalchars += convd->filter1->num_illegalchar;
282 	}
283 
284 	if (convd->filter2 != NULL) {
285 		num_illegalchars += convd->filter2->num_illegalchar;
286 	}
287 
288 	return num_illegalchars;
289 }
290 
291 /*
292  * encoding detector
293  */
294 mbfl_encoding_detector *
mbfl_encoding_detector_new(const mbfl_encoding ** elist,int elistsz,int strict)295 mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict)
296 {
297 	mbfl_encoding_detector *identd;
298 
299 	int i, num;
300 	mbfl_identify_filter *filter;
301 
302 	if (elist == NULL || elistsz <= 0) {
303 		return NULL;
304 	}
305 
306 	/* allocate */
307 	identd = emalloc(sizeof(mbfl_encoding_detector));
308 	identd->filter_list = ecalloc(elistsz, sizeof(mbfl_identify_filter *));
309 
310 	/* create filters */
311 	i = 0;
312 	num = 0;
313 	while (i < elistsz) {
314 		filter = mbfl_identify_filter_new2(elist[i]);
315 		if (filter != NULL) {
316 			identd->filter_list[num] = filter;
317 			num++;
318 		}
319 		i++;
320 	}
321 	identd->filter_list_size = num;
322 
323 	/* set strict flag */
324 	identd->strict = strict;
325 
326 	return identd;
327 }
328 
329 
330 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)331 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
332 {
333 	int i;
334 
335 	if (identd != NULL) {
336 		if (identd->filter_list != NULL) {
337 			i = identd->filter_list_size;
338 			while (i > 0) {
339 				i--;
340 				mbfl_identify_filter_delete(identd->filter_list[i]);
341 			}
342 			efree((void *)identd->filter_list);
343 		}
344 		efree((void *)identd);
345 	}
346 }
347 
348 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)349 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
350 {
351 	int res = 0;
352 	/* feed data */
353 	if (identd != NULL && string != NULL && string->val != NULL) {
354 		int num = identd->filter_list_size;
355 		size_t n = string->len;
356 		unsigned char *p = string->val;
357 		int bad = 0;
358 		while (n > 0) {
359 			int i;
360 			for (i = 0; i < num; i++) {
361 				mbfl_identify_filter *filter = identd->filter_list[i];
362 				if (!filter->flag) {
363 					(*filter->filter_function)(*p, filter);
364 					if (filter->flag) {
365 						bad++;
366 					}
367 				}
368 			}
369 			if ((num - 1) <= bad) {
370 				res = 1;
371 				break;
372 			}
373 			p++;
374 			n--;
375 		}
376 	}
377 
378 	return res;
379 }
380 
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)381 const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
382 {
383 	mbfl_identify_filter *filter;
384 	const mbfl_encoding *encoding = NULL;
385 	int n;
386 
387 	/* judge */
388 	if (identd != NULL) {
389 		n = identd->filter_list_size - 1;
390 		while (n >= 0) {
391 			filter = identd->filter_list[n];
392 			if (!filter->flag) {
393 				if (!identd->strict || !filter->status) {
394 					encoding = filter->encoding;
395 				}
396 			}
397 			n--;
398 		}
399 
400 		/* fallback judge */
401 		if (!encoding) {
402 			n = identd->filter_list_size - 1;
403 			while (n >= 0) {
404 				filter = identd->filter_list[n];
405 				if (!filter->flag) {
406 					encoding = filter->encoding;
407 				}
408 				n--;
409  			}
410 		}
411 	}
412 
413 	return encoding;
414 }
415 
416 /*
417  * encoding converter
418  */
419 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,const mbfl_encoding * toenc)420 mbfl_convert_encoding(
421     mbfl_string *string,
422     mbfl_string *result,
423     const mbfl_encoding *toenc)
424 {
425 	size_t n;
426 	unsigned char *p;
427 	mbfl_memory_device device;
428 	mbfl_convert_filter *filter1;
429 	mbfl_convert_filter *filter2;
430 
431 	/* initialize */
432 	if (toenc == NULL || string == NULL || result == NULL) {
433 		return NULL;
434 	}
435 
436 	filter1 = NULL;
437 	filter2 = NULL;
438 	if (mbfl_convert_filter_get_vtbl(string->encoding, toenc) != NULL) {
439 		filter1 = mbfl_convert_filter_new(string->encoding, toenc, mbfl_memory_device_output, 0, &device);
440 	} else {
441 		filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
442 		if (filter2 != NULL) {
443 			filter1 = mbfl_convert_filter_new(string->encoding, &mbfl_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
444 			if (filter1 == NULL) {
445 				mbfl_convert_filter_delete(filter2);
446 			}
447 		}
448 	}
449 	if (filter1 == NULL) {
450 		return NULL;
451 	}
452 
453 	if (filter2 != NULL) {
454 		filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
455 		filter2->illegal_substchar = 0x3f;		/* '?' */
456 	}
457 
458 	mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
459 
460 	/* feed data */
461 	n = string->len;
462 	p = string->val;
463 	if (p != NULL) {
464 		while (n > 0) {
465 			if ((*filter1->filter_function)(*p++, filter1) < 0) {
466 				break;
467 			}
468 			n--;
469 		}
470 	}
471 
472 	mbfl_convert_filter_flush(filter1);
473 	mbfl_convert_filter_delete(filter1);
474 	if (filter2 != NULL) {
475 		mbfl_convert_filter_flush(filter2);
476 		mbfl_convert_filter_delete(filter2);
477 	}
478 
479 	return mbfl_memory_device_result(&device, result);
480 }
481 
482 
483 /*
484  * identify encoding
485  */
486 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,const mbfl_encoding ** elist,int elistsz,int strict)487 mbfl_identify_encoding(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
488 {
489 	int i, num, bad;
490 	size_t n;
491 	unsigned char *p;
492 	mbfl_identify_filter *flist, *filter;
493 	const mbfl_encoding *encoding;
494 
495 	/* flist is an array of mbfl_identify_filter instances */
496 	flist = ecalloc(elistsz, sizeof(mbfl_identify_filter));
497 
498 	num = 0;
499 	if (elist != NULL) {
500 		for (i = 0; i < elistsz; i++) {
501 			if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
502 				num++;
503 			}
504 		}
505 	}
506 
507 	/* feed data */
508 	n = string->len;
509 	p = string->val;
510 
511 	if (p != NULL) {
512 		bad = 0;
513 		while (n > 0) {
514 			for (i = 0; i < num; i++) {
515 				filter = &flist[i];
516 				if (!filter->flag) {
517 					(*filter->filter_function)(*p, filter);
518 					if (filter->flag) {
519 						bad++;
520 					}
521 				}
522 			}
523 			if ((num - 1) <= bad && !strict) {
524 				break;
525 			}
526 			p++;
527 			n--;
528 		}
529 	}
530 
531 	/* judge */
532 	encoding = NULL;
533 
534 	for (i = 0; i < num; i++) {
535 		filter = &flist[i];
536 		if (!filter->flag) {
537 			if (strict && filter->status) {
538  				continue;
539  			}
540 			encoding = filter->encoding;
541 			break;
542 		}
543 	}
544 
545 	/* fall-back judge */
546 	if (!encoding) {
547 		for (i = 0; i < num; i++) {
548 			filter = &flist[i];
549 			if (!filter->flag && (!strict || !filter->status)) {
550 				encoding = filter->encoding;
551 				break;
552 			}
553 		}
554 	}
555 
556 	efree((void *)flist);
557 
558 	return encoding;
559 }
560 
561 /*
562  *  strlen
563  */
564 static int
filter_count_output(int c,void * data)565 filter_count_output(int c, void *data)
566 {
567 	(*(size_t *)data)++;
568 	return c;
569 }
570 
571 size_t
mbfl_strlen(const mbfl_string * string)572 mbfl_strlen(const mbfl_string *string)
573 {
574 	size_t len, n, k;
575 	unsigned char *p;
576 	const mbfl_encoding *encoding = string->encoding;
577 
578 	len = 0;
579 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
580 		len = string->len;
581 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
582 		len = string->len/2;
583 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
584 		len = string->len/4;
585 	} else if (encoding->mblen_table != NULL) {
586 		const unsigned char *mbtab = encoding->mblen_table;
587 		n = 0;
588 		p = string->val;
589 		k = string->len;
590 		/* count */
591 		if (p != NULL) {
592 			while (n < k) {
593 				unsigned m = mbtab[*p];
594 				n += m;
595 				p += m;
596 				len++;
597 			}
598 		}
599 	} else {
600 		/* wchar filter */
601 		mbfl_convert_filter *filter = mbfl_convert_filter_new(
602 		  string->encoding,
603 		  &mbfl_encoding_wchar,
604 		  filter_count_output, 0, &len);
605 		if (filter == NULL) {
606 			return (size_t) -1;
607 		}
608 		/* count */
609 		n = string->len;
610 		p = string->val;
611 		if (p != NULL) {
612 			while (n > 0) {
613 				(*filter->filter_function)(*p++, filter);
614 				n--;
615 			}
616 		}
617 		mbfl_convert_filter_delete(filter);
618 	}
619 
620 	return len;
621 }
622 
623 
624 /*
625  *  strpos
626  */
627 struct collector_strpos_data {
628 	mbfl_convert_filter *next_filter;
629 	mbfl_wchar_device needle;
630 	size_t needle_len;
631 	size_t start;
632 	size_t output;
633 	size_t found_pos;
634 	size_t needle_pos;
635 	size_t matched_pos;
636 };
637 
638 static int
collector_strpos(int c,void * data)639 collector_strpos(int c, void* data)
640 {
641 	int *p, *h, *m;
642 	ssize_t n;
643 	struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
644 
645 	if (pc->output >= pc->start) {
646 		if (c == (int)pc->needle.buffer[pc->needle_pos]) {
647 			if (pc->needle_pos == 0) {
648 				pc->found_pos = pc->output;			/* found position */
649 			}
650 			pc->needle_pos++;						/* needle pointer */
651 			if (pc->needle_pos >= pc->needle_len) {
652 				pc->matched_pos = pc->found_pos;	/* matched position */
653 				pc->needle_pos--;
654 				goto retry;
655 			}
656 		} else if (pc->needle_pos != 0) {
657 retry:
658 			h = (int *)pc->needle.buffer;
659 			h++;
660 			for (;;) {
661 				pc->found_pos++;
662 				p = h;
663 				m = (int *)pc->needle.buffer;
664 				n = pc->needle_pos - 1;
665 				while (n > 0 && *p == *m) {
666 					n--;
667 					p++;
668 					m++;
669 				}
670 				if (n <= 0) {
671 					if (*m != c) {
672 						pc->needle_pos = 0;
673 					}
674 					break;
675 				} else {
676 					h++;
677 					pc->needle_pos--;
678 				}
679 			}
680 		}
681 	}
682 
683 	pc->output++;
684 	return c;
685 }
686 
mbfl_find_offset_utf8(const unsigned char * str,const unsigned char * end,ssize_t offset)687 static const unsigned char *mbfl_find_offset_utf8(
688 		const unsigned char *str, const unsigned char *end, ssize_t offset) {
689 	if (offset < 0) {
690 		const unsigned char *pos = end;
691 		while (offset < 0) {
692 			if (pos <= str) {
693 				return NULL;
694 			}
695 
696 			unsigned char c = *(--pos);
697 			if (c < 0x80) {
698 				++offset;
699 			} else if ((c & 0xc0) != 0x80) {
700 				++offset;
701 			}
702 		}
703 		return pos;
704 	} else {
705 		const unsigned char *u8_tbl = mbfl_encoding_utf8.mblen_table;
706 		const unsigned char *pos = str;
707 		while (offset-- > 0) {
708 			if (pos >= end) {
709 				return NULL;
710 			}
711 			pos += u8_tbl[*pos];
712 		}
713 		return pos;
714 	}
715 }
716 
mbfl_pointer_to_offset_utf8(const unsigned char * start,const unsigned char * pos)717 static size_t mbfl_pointer_to_offset_utf8(const unsigned char *start, const unsigned char *pos) {
718 	size_t result = 0;
719 	while (pos > start) {
720 		unsigned char c = *--pos;
721 		if (c < 0x80) {
722 			++result;
723 		} else if ((c & 0xc0) != 0x80) {
724 			++result;
725 		}
726 	}
727 	return result;
728 }
729 
730 size_t
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,ssize_t offset,int reverse)731 mbfl_strpos(
732     mbfl_string *haystack,
733     mbfl_string *needle,
734     ssize_t offset,
735     int reverse)
736 {
737 	size_t result;
738 	mbfl_string _haystack_u8, _needle_u8;
739 	const mbfl_string *haystack_u8, *needle_u8 = NULL;
740 	const unsigned char *offset_pointer;
741 
742 	if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) {
743 		mbfl_string_init(&_haystack_u8);
744 		haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, &mbfl_encoding_utf8);
745 		if (haystack_u8 == NULL) {
746 			result = MBFL_ERROR_ENCODING;
747 			goto out;
748 		}
749 	} else {
750 		haystack_u8 = haystack;
751 	}
752 
753 	if (needle->encoding->no_encoding != mbfl_no_encoding_utf8) {
754 		mbfl_string_init(&_needle_u8);
755 		needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, &mbfl_encoding_utf8);
756 		if (needle_u8 == NULL) {
757 			result = MBFL_ERROR_ENCODING;
758 			goto out;
759 		}
760 	} else {
761 		needle_u8 = needle;
762 	}
763 
764 	offset_pointer = mbfl_find_offset_utf8(
765 		haystack_u8->val, haystack_u8->val + haystack_u8->len, offset);
766 	if (!offset_pointer) {
767 		result = MBFL_ERROR_OFFSET;
768 		goto out;
769 	}
770 
771 	result = MBFL_ERROR_NOT_FOUND;
772 	if (haystack_u8->len < needle_u8->len) {
773 		goto out;
774 	}
775 
776 	const char *found_pos;
777 	if (!reverse) {
778 		found_pos = zend_memnstr(
779 			(const char *) offset_pointer,
780 			(const char *) needle_u8->val, needle_u8->len,
781 			(const char *) haystack_u8->val + haystack_u8->len);
782 	} else {
783 		if (offset >= 0) {
784 			found_pos = zend_memnrstr(
785 				(const char *) offset_pointer,
786 				(const char *) needle_u8->val, needle_u8->len,
787 				(const char *) haystack_u8->val + haystack_u8->len);
788 		} else {
789 			size_t needle_len = mbfl_strlen(needle_u8);
790 			offset_pointer = mbfl_find_offset_utf8(
791 				offset_pointer, haystack_u8->val + haystack_u8->len, needle_len);
792 			if (!offset_pointer) {
793 				offset_pointer = haystack_u8->val + haystack_u8->len;
794 			}
795 
796 			found_pos = zend_memnrstr(
797 				(const char *) haystack_u8->val,
798 				(const char *) needle_u8->val, needle_u8->len,
799 				(const char *) offset_pointer);
800 		}
801 	}
802 
803 	if (found_pos) {
804 		result = mbfl_pointer_to_offset_utf8(haystack_u8->val, (const unsigned char *) found_pos);
805 	}
806 
807 out:
808 	if (haystack_u8 == &_haystack_u8) {
809 		mbfl_string_clear(&_haystack_u8);
810 	}
811 	if (needle_u8 == &_needle_u8) {
812 		mbfl_string_clear(&_needle_u8);
813 	}
814 	return result;
815 }
816 
817 /*
818  *  substr_count
819  */
820 
821 size_t
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)822 mbfl_substr_count(
823     mbfl_string *haystack,
824     mbfl_string *needle
825    )
826 {
827 	size_t n, result = 0;
828 	unsigned char *p;
829 	mbfl_convert_filter *filter;
830 	struct collector_strpos_data pc;
831 
832 	/* needle is converted into wchar */
833 	mbfl_wchar_device_init(&pc.needle);
834 	filter = mbfl_convert_filter_new(
835 	  needle->encoding,
836 	  &mbfl_encoding_wchar,
837 	  mbfl_wchar_device_output, 0, &pc.needle);
838 	if (filter == NULL) {
839 		return MBFL_ERROR_ENCODING;
840 	}
841 	mbfl_convert_filter_feed_string(filter, needle->val, needle->len);
842 	mbfl_convert_filter_flush(filter);
843 	mbfl_convert_filter_delete(filter);
844 	pc.needle_len = pc.needle.pos;
845 	if (pc.needle.buffer == NULL) {
846 		return MBFL_ERROR_ENCODING;
847 	}
848 	if (pc.needle_len == 0) {
849 		mbfl_wchar_device_clear(&pc.needle);
850 		return MBFL_ERROR_EMPTY;
851 	}
852 	/* initialize filter and collector data */
853 	filter = mbfl_convert_filter_new(
854 	  haystack->encoding,
855 	  &mbfl_encoding_wchar,
856 	  collector_strpos, 0, &pc);
857 	if (filter == NULL) {
858 		mbfl_wchar_device_clear(&pc.needle);
859 		return MBFL_ERROR_ENCODING;
860 	}
861 	pc.start = 0;
862 	pc.output = 0;
863 	pc.needle_pos = 0;
864 	pc.found_pos = 0;
865 	pc.matched_pos = MBFL_ERROR_NOT_FOUND;
866 
867 	/* feed data */
868 	p = haystack->val;
869 	n = haystack->len;
870 	if (p != NULL) {
871 		while (n > 0) {
872 			if ((*filter->filter_function)(*p++, filter) < 0) {
873 				pc.matched_pos = MBFL_ERROR_ENCODING;
874 				break;
875 			}
876 			if (pc.matched_pos != MBFL_ERROR_NOT_FOUND) {
877 				++result;
878 				pc.matched_pos = MBFL_ERROR_NOT_FOUND;
879 				pc.needle_pos = 0;
880 			}
881 			n--;
882 		}
883 	}
884 	mbfl_convert_filter_flush(filter);
885 	mbfl_convert_filter_delete(filter);
886 	mbfl_wchar_device_clear(&pc.needle);
887 
888 	return result;
889 }
890 
891 /*
892  *  substr
893  */
894 struct collector_substr_data {
895 	mbfl_convert_filter *next_filter;
896 	size_t start;
897 	size_t stop;
898 	size_t output;
899 };
900 
901 static int
collector_substr(int c,void * data)902 collector_substr(int c, void* data)
903 {
904 	struct collector_substr_data *pc = (struct collector_substr_data*)data;
905 
906 	if (pc->output >= pc->stop) {
907 		return -1;
908 	}
909 
910 	if (pc->output >= pc->start) {
911 		(*pc->next_filter->filter_function)(c, pc->next_filter);
912 	}
913 
914 	pc->output++;
915 
916 	return c;
917 }
918 
919 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,size_t from,size_t length)920 mbfl_substr(
921     mbfl_string *string,
922     mbfl_string *result,
923     size_t from,
924     size_t length)
925 {
926 	const mbfl_encoding *encoding = string->encoding;
927 	size_t n, k, len, start, end;
928 	unsigned m;
929 	unsigned char *p, *w;
930 
931 	mbfl_string_init(result);
932 	result->encoding = string->encoding;
933 
934 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
935 	   encoding->mblen_table != NULL) {
936 		len = string->len;
937 		if (encoding->flag & MBFL_ENCTYPE_SBCS) {
938 			start = from;
939 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
940 			start = from*2;
941 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
942 			start = from*4;
943 		} else {
944 			const unsigned char *mbtab = encoding->mblen_table;
945 			start = 0;
946 			n = 0;
947 			k = 0;
948 			p = string->val;
949 			/* search start position */
950 			while (k <= from) {
951 				start = n;
952 				if (n >= len) {
953 					break;
954 				}
955 				m = mbtab[*p];
956 				n += m;
957 				p += m;
958 				k++;
959 			}
960 		}
961 
962 		if (length == MBFL_SUBSTR_UNTIL_END) {
963 			end = len;
964 		} else if (encoding->flag & MBFL_ENCTYPE_SBCS) {
965 			end = start + length;
966 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
967 			end = start + length*2;
968 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
969 			end = start + length*4;
970 		} else {
971 			const unsigned char *mbtab = encoding->mblen_table;
972 			end = start;
973 			n = start;
974 			k = 0;
975 			p = string->val + start;
976 			/* detect end position */
977 			while (k <= length) {
978 				end = n;
979 				if (n >= len) {
980 					break;
981 				}
982 				m = mbtab[*p];
983 				n += m;
984 				p += m;
985 				k++;
986 			}
987 		}
988 
989 		if (start > len) {
990 			start = len;
991 		}
992 		if (end > len) {
993 			end = len;
994 		}
995 		if (start > end) {
996 			start = end;
997 		}
998 
999 		/* allocate memory and copy */
1000 		n = end - start;
1001 		result->len = 0;
1002 		result->val = w = (unsigned char*)emalloc(n + 1);
1003 		result->len = n;
1004 		memcpy(w, string->val + start, n);
1005 		w[n] = '\0';
1006 	} else {
1007 		mbfl_memory_device device;
1008 		struct collector_substr_data pc;
1009 		mbfl_convert_filter *decoder;
1010 		mbfl_convert_filter *encoder;
1011 
1012 		if (length == MBFL_SUBSTR_UNTIL_END) {
1013 			length = mbfl_strlen(string) - from;
1014 		}
1015 
1016 		mbfl_memory_device_init(&device, length + 1, 0);
1017 		mbfl_string_init(result);
1018 		result->encoding = string->encoding;
1019 		/* output code filter */
1020 		decoder = mbfl_convert_filter_new(
1021 		    &mbfl_encoding_wchar,
1022 		    string->encoding,
1023 		    mbfl_memory_device_output, 0, &device);
1024 		/* wchar filter */
1025 		encoder = mbfl_convert_filter_new(
1026 		    string->encoding,
1027 		    &mbfl_encoding_wchar,
1028 		    collector_substr, 0, &pc);
1029 		if (decoder == NULL || encoder == NULL) {
1030 			mbfl_convert_filter_delete(encoder);
1031 			mbfl_convert_filter_delete(decoder);
1032 			return NULL;
1033 		}
1034 		pc.next_filter = decoder;
1035 		pc.start = from;
1036 		pc.stop = from + length;
1037 		pc.output = 0;
1038 
1039 		/* feed data */
1040 		p = string->val;
1041 		n = string->len;
1042 		if (p != NULL) {
1043 			while (n > 0) {
1044 				if ((*encoder->filter_function)(*p++, encoder) < 0) {
1045 					break;
1046 				}
1047 				n--;
1048 			}
1049 		}
1050 
1051 		mbfl_convert_filter_flush(encoder);
1052 		mbfl_convert_filter_flush(decoder);
1053 		result = mbfl_memory_device_result(&device, result);
1054 		mbfl_convert_filter_delete(encoder);
1055 		mbfl_convert_filter_delete(decoder);
1056 	}
1057 
1058 	return result;
1059 }
1060 
1061 /*
1062  *  strcut
1063  */
1064 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1065 mbfl_strcut(
1066     mbfl_string *string,
1067     mbfl_string *result,
1068     size_t from,
1069     size_t length)
1070 {
1071 	const mbfl_encoding *encoding = string->encoding;
1072 	mbfl_memory_device device;
1073 
1074 	if (from >= string->len) {
1075 		from = string->len;
1076 	}
1077 
1078 	mbfl_string_init(result);
1079 	result->encoding = string->encoding;
1080 
1081 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1082 				| MBFL_ENCTYPE_WCS2BE
1083 				| MBFL_ENCTYPE_WCS2LE
1084 				| MBFL_ENCTYPE_WCS4BE
1085 				| MBFL_ENCTYPE_WCS4LE))
1086 			|| encoding->mblen_table != NULL) {
1087 		const unsigned char *start = NULL;
1088 		const unsigned char *end = NULL;
1089 		unsigned char *w;
1090 		size_t sz;
1091 
1092 		if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1093 			from &= -2;
1094 
1095 			if (length >= string->len - from) {
1096 				length = string->len - from;
1097 			}
1098 
1099 			start = string->val + from;
1100 			end   = start + (length & -2);
1101 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1102 			from &= -4;
1103 
1104 			if (length >= string->len - from) {
1105 				length = string->len - from;
1106 			}
1107 
1108 			start = string->val + from;
1109 			end   = start + (length & -4);
1110 		} else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1111 			if (length >= string->len - from) {
1112 				length = string->len - from;
1113 			}
1114 
1115 			start = string->val + from;
1116 			end = start + length;
1117 		} else if (encoding->mblen_table != NULL) {
1118 			const unsigned char *mbtab = encoding->mblen_table;
1119 			const unsigned char *p, *q;
1120 			int m;
1121 
1122 			/* search start position */
1123 			for (m = 0, p = string->val, q = p + from;
1124 					p < q; p += (m = mbtab[*p]));
1125 
1126 			if (p > q) {
1127 				p -= m;
1128 			}
1129 
1130 			start = p;
1131 
1132 			/* search end position */
1133 			if (length >= string->len - (start - string->val)) {
1134 				end = string->val + string->len;
1135 			} else {
1136 				for (q = p + length; p < q; p += (m = mbtab[*p]));
1137 
1138 				if (p > q) {
1139 					p -= m;
1140 				}
1141 				end = p;
1142 			}
1143 		} else {
1144 			/* never reached */
1145 			return NULL;
1146 		}
1147 
1148 		/* allocate memory and copy string */
1149 		sz = end - start;
1150 		w = ecalloc(sz + 8, sizeof(unsigned char));
1151 
1152 		memcpy(w, start, sz);
1153 		w[sz] = '\0';
1154 		w[sz + 1] = '\0';
1155 		w[sz + 2] = '\0';
1156 		w[sz + 3] = '\0';
1157 
1158 		result->val = w;
1159 		result->len = sz;
1160 	} else {
1161 		mbfl_convert_filter *encoder     = NULL;
1162 		mbfl_convert_filter *decoder     = NULL;
1163 		const unsigned char *p, *q, *r;
1164 		struct {
1165 			mbfl_convert_filter encoder;
1166 			mbfl_convert_filter decoder;
1167 			const unsigned char *p;
1168 			size_t pos;
1169 		} bk, _bk;
1170 
1171 		/* output code filter */
1172 		if (!(decoder = mbfl_convert_filter_new(
1173 				&mbfl_encoding_wchar,
1174 				string->encoding,
1175 				mbfl_memory_device_output, 0, &device))) {
1176 			return NULL;
1177 		}
1178 
1179 		/* wchar filter */
1180 		if (!(encoder = mbfl_convert_filter_new(
1181 				string->encoding,
1182 				&mbfl_encoding_wchar,
1183 				mbfl_filter_output_null,
1184 				NULL, NULL))) {
1185 			mbfl_convert_filter_delete(decoder);
1186 			return NULL;
1187 		}
1188 
1189 		mbfl_memory_device_init(&device, length + 8, 0);
1190 
1191 		p = string->val;
1192 
1193 		/* search start position */
1194 		for (q = string->val + from; p < q; p++) {
1195 			(*encoder->filter_function)(*p, encoder);
1196 		}
1197 
1198 		/* switch the drain direction */
1199 		encoder->output_function = (output_function_t)decoder->filter_function;
1200 		encoder->flush_function = (flush_function_t)decoder->filter_flush;
1201 		encoder->data = decoder;
1202 
1203 		q = string->val + string->len;
1204 
1205 		/* save the encoder, decoder state and the pointer */
1206 		mbfl_convert_filter_copy(decoder, &_bk.decoder);
1207 		mbfl_convert_filter_copy(encoder, &_bk.encoder);
1208 		_bk.p = p;
1209 		_bk.pos = device.pos;
1210 
1211 		if (length > q - p) {
1212 			length = q - p;
1213 		}
1214 
1215 		if (length >= 20) {
1216 			/* output a little shorter than "length" */
1217 			/* XXX: the constant "20" was determined purely on the heuristics. */
1218 			for (r = p + length - 20; p < r; p++) {
1219 				(*encoder->filter_function)(*p, encoder);
1220 			}
1221 
1222 			/* if the offset of the resulting string exceeds the length,
1223 			 * then restore the state */
1224 			if (device.pos > length) {
1225 				p = _bk.p;
1226 				device.pos = _bk.pos;
1227 				if (decoder->filter_dtor)
1228 					decoder->filter_dtor(decoder);
1229 				if (encoder->filter_dtor)
1230 					encoder->filter_dtor(encoder);
1231 				mbfl_convert_filter_copy(&_bk.decoder, decoder);
1232 				mbfl_convert_filter_copy(&_bk.encoder, encoder);
1233 				bk = _bk;
1234 			} else {
1235 				/* save the encoder, decoder state and the pointer */
1236 				mbfl_convert_filter_copy(decoder, &bk.decoder);
1237 				mbfl_convert_filter_copy(encoder, &bk.encoder);
1238 				bk.p = p;
1239 				bk.pos = device.pos;
1240 
1241 				/* flush the stream */
1242 				(*encoder->filter_flush)(encoder);
1243 
1244 				/* if the offset of the resulting string exceeds the length,
1245 				 * then restore the state */
1246 				if (device.pos > length) {
1247 					if (bk.decoder.filter_dtor)
1248 						bk.decoder.filter_dtor(&bk.decoder);
1249 					if (bk.encoder.filter_dtor)
1250 						bk.encoder.filter_dtor(&bk.encoder);
1251 
1252 					p = _bk.p;
1253 					device.pos = _bk.pos;
1254 					if (decoder->filter_dtor)
1255 						decoder->filter_dtor(decoder);
1256 					if (encoder->filter_dtor)
1257 						encoder->filter_dtor(encoder);
1258 					mbfl_convert_filter_copy(&_bk.decoder, decoder);
1259 					mbfl_convert_filter_copy(&_bk.encoder, encoder);
1260 					bk = _bk;
1261 				} else {
1262 					if (_bk.decoder.filter_dtor)
1263 						_bk.decoder.filter_dtor(&_bk.decoder);
1264 					if (_bk.encoder.filter_dtor)
1265 						_bk.encoder.filter_dtor(&_bk.encoder);
1266 
1267 					p = bk.p;
1268 					device.pos = bk.pos;
1269 					if (decoder->filter_dtor)
1270 						decoder->filter_dtor(decoder);
1271 					if (encoder->filter_dtor)
1272 						encoder->filter_dtor(encoder);
1273 					mbfl_convert_filter_copy(&bk.decoder, decoder);
1274 					mbfl_convert_filter_copy(&bk.encoder, encoder);
1275 				}
1276 			}
1277 		} else {
1278 			bk = _bk;
1279 		}
1280 
1281 		/* detect end position */
1282 		while (p < q) {
1283 			(*encoder->filter_function)(*p, encoder);
1284 
1285 			if (device.pos > length) {
1286 				/* restore filter */
1287 				p = bk.p;
1288 				device.pos = bk.pos;
1289 				if (decoder->filter_dtor)
1290 					decoder->filter_dtor(decoder);
1291 				if (encoder->filter_dtor)
1292 					encoder->filter_dtor(encoder);
1293 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1294 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1295 				break;
1296 			}
1297 
1298 			p++;
1299 
1300 			/* backup current state */
1301 			mbfl_convert_filter_copy(decoder, &_bk.decoder);
1302 			mbfl_convert_filter_copy(encoder, &_bk.encoder);
1303 			_bk.pos = device.pos;
1304 			_bk.p = p;
1305 
1306 			(*encoder->filter_flush)(encoder);
1307 
1308 			if (device.pos > length) {
1309 				if (_bk.decoder.filter_dtor)
1310 					_bk.decoder.filter_dtor(&_bk.decoder);
1311 				if (_bk.encoder.filter_dtor)
1312 					_bk.encoder.filter_dtor(&_bk.encoder);
1313 
1314 				/* restore filter */
1315 				p = bk.p;
1316 				device.pos = bk.pos;
1317 				if (decoder->filter_dtor)
1318 					decoder->filter_dtor(decoder);
1319 				if (encoder->filter_dtor)
1320 					encoder->filter_dtor(encoder);
1321 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1322 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1323 				break;
1324 			}
1325 
1326 			if (bk.decoder.filter_dtor)
1327 				bk.decoder.filter_dtor(&bk.decoder);
1328 			if (bk.encoder.filter_dtor)
1329 				bk.encoder.filter_dtor(&bk.encoder);
1330 
1331 			p = _bk.p;
1332 			device.pos = _bk.pos;
1333 			if (decoder->filter_dtor)
1334 				decoder->filter_dtor(decoder);
1335 			if (encoder->filter_dtor)
1336 				encoder->filter_dtor(encoder);
1337 			mbfl_convert_filter_copy(&_bk.decoder, decoder);
1338 			mbfl_convert_filter_copy(&_bk.encoder, encoder);
1339 
1340 			bk = _bk;
1341 		}
1342 
1343 		(*encoder->filter_flush)(encoder);
1344 
1345 		if (bk.decoder.filter_dtor)
1346 			bk.decoder.filter_dtor(&bk.decoder);
1347 		if (bk.encoder.filter_dtor)
1348 			bk.encoder.filter_dtor(&bk.encoder);
1349 
1350 		result = mbfl_memory_device_result(&device, result);
1351 
1352 		mbfl_convert_filter_delete(encoder);
1353 		mbfl_convert_filter_delete(decoder);
1354 	}
1355 
1356 	return result;
1357 }
1358 
1359 
1360 /*
1361  *  strwidth
1362  */
is_fullwidth(int c)1363 static size_t is_fullwidth(int c)
1364 {
1365 	int i;
1366 
1367 	if (c < mbfl_eaw_table[0].begin) {
1368 		return 0;
1369 	}
1370 
1371 	for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1372 		if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1373 			return 1;
1374 		}
1375 	}
1376 
1377 	return 0;
1378 }
1379 
1380 static int
filter_count_width(int c,void * data)1381 filter_count_width(int c, void* data)
1382 {
1383 	(*(size_t *)data) += (is_fullwidth(c) ? 2: 1);
1384 	return c;
1385 }
1386 
1387 size_t
mbfl_strwidth(mbfl_string * string)1388 mbfl_strwidth(mbfl_string *string)
1389 {
1390 	size_t len, n;
1391 	unsigned char *p;
1392 	mbfl_convert_filter *filter;
1393 
1394 	len = 0;
1395 	if (string->len > 0 && string->val != NULL) {
1396 		/* wchar filter */
1397 		filter = mbfl_convert_filter_new(
1398 		    string->encoding,
1399 		    &mbfl_encoding_wchar,
1400 		    filter_count_width, 0, &len);
1401 		if (filter == NULL) {
1402 			mbfl_convert_filter_delete(filter);
1403 			return -1;
1404 		}
1405 
1406 		/* feed data */
1407 		p = string->val;
1408 		n = string->len;
1409 		while (n > 0) {
1410 			(*filter->filter_function)(*p++, filter);
1411 			n--;
1412 		}
1413 
1414 		mbfl_convert_filter_flush(filter);
1415 		mbfl_convert_filter_delete(filter);
1416 	}
1417 
1418 	return len;
1419 }
1420 
1421 
1422 /*
1423  *  strimwidth
1424  */
1425 struct collector_strimwidth_data {
1426 	mbfl_convert_filter *decoder;
1427 	mbfl_convert_filter *decoder_backup;
1428 	mbfl_memory_device device;
1429 	size_t from;
1430 	size_t width;
1431 	size_t outwidth;
1432 	size_t outchar;
1433 	size_t endpos;
1434 	int status;
1435 };
1436 
1437 static int
collector_strimwidth(int c,void * data)1438 collector_strimwidth(int c, void* data)
1439 {
1440 	struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1441 
1442 	switch (pc->status) {
1443 	case 10:
1444 		(*pc->decoder->filter_function)(c, pc->decoder);
1445 		break;
1446 	default:
1447 		if (pc->outchar >= pc->from) {
1448 			pc->outwidth += (is_fullwidth(c) ? 2: 1);
1449 
1450 			if (pc->outwidth > pc->width) {
1451 				if (pc->status == 0) {
1452 					pc->endpos = pc->device.pos;
1453 					mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1454 				}
1455 				pc->status++;
1456 				(*pc->decoder->filter_function)(c, pc->decoder);
1457 				c = -1;
1458 			} else {
1459 				(*pc->decoder->filter_function)(c, pc->decoder);
1460 			}
1461 		}
1462 		pc->outchar++;
1463 		break;
1464 	}
1465 
1466 	return c;
1467 }
1468 
1469 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,size_t from,size_t width)1470 mbfl_strimwidth(
1471     mbfl_string *string,
1472     mbfl_string *marker,
1473     mbfl_string *result,
1474     size_t from,
1475     size_t width)
1476 {
1477 	struct collector_strimwidth_data pc;
1478 	mbfl_convert_filter *encoder;
1479 	size_t n, mkwidth;
1480 	unsigned char *p;
1481 
1482 	if (string == NULL || result == NULL) {
1483 		return NULL;
1484 	}
1485 	mbfl_string_init(result);
1486 	result->encoding = string->encoding;
1487 	mbfl_memory_device_init(&pc.device, MIN(string->len, width), 0);
1488 
1489 	/* output code filter */
1490 	pc.decoder = mbfl_convert_filter_new(
1491 	    &mbfl_encoding_wchar,
1492 	    string->encoding,
1493 	    mbfl_memory_device_output, 0, &pc.device);
1494 	pc.decoder_backup = mbfl_convert_filter_new(
1495 	    &mbfl_encoding_wchar,
1496 	    string->encoding,
1497 	    mbfl_memory_device_output, 0, &pc.device);
1498 	/* wchar filter */
1499 	encoder = mbfl_convert_filter_new(
1500 	    string->encoding,
1501 	    &mbfl_encoding_wchar,
1502 	    collector_strimwidth, 0, &pc);
1503 	if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1504 		mbfl_convert_filter_delete(encoder);
1505 		mbfl_convert_filter_delete(pc.decoder);
1506 		mbfl_convert_filter_delete(pc.decoder_backup);
1507 		return NULL;
1508 	}
1509 	mkwidth = 0;
1510 	if (marker) {
1511 		mkwidth = mbfl_strwidth(marker);
1512 	}
1513 	pc.from = from;
1514 	pc.width = width - mkwidth;
1515 	pc.outwidth = 0;
1516 	pc.outchar = 0;
1517 	pc.status = 0;
1518 	pc.endpos = 0;
1519 
1520 	/* feed data */
1521 	p = string->val;
1522 	n = string->len;
1523 	if (p != NULL) {
1524 		while (n > 0) {
1525 			n--;
1526 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1527 				break;
1528 			}
1529 		}
1530 		mbfl_convert_filter_flush(encoder);
1531 		if (pc.status != 0 && mkwidth > 0) {
1532 			pc.width += mkwidth;
1533 			if (n > 0) {
1534 				while (n > 0) {
1535 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1536 						break;
1537 					}
1538 					n--;
1539 				}
1540 				mbfl_convert_filter_flush(encoder);
1541 			} else if (pc.outwidth > pc.width) {
1542 				pc.status++;
1543 			}
1544 			if (pc.status != 1) {
1545 				pc.status = 10;
1546 				pc.device.pos = pc.endpos;
1547 				mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1548 				mbfl_convert_filter_reset(encoder, marker->encoding, &mbfl_encoding_wchar);
1549 				p = marker->val;
1550 				n = marker->len;
1551 				while (n > 0) {
1552 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1553 						break;
1554 					}
1555 					n--;
1556 				}
1557 				mbfl_convert_filter_flush(encoder);
1558 			}
1559 		} else if (pc.status != 0) {
1560 			pc.device.pos = pc.endpos;
1561 			mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1562 		}
1563 		mbfl_convert_filter_flush(pc.decoder);
1564 	}
1565 	result = mbfl_memory_device_result(&pc.device, result);
1566 	mbfl_convert_filter_delete(encoder);
1567 	mbfl_convert_filter_delete(pc.decoder);
1568 	mbfl_convert_filter_delete(pc.decoder_backup);
1569 
1570 	return result;
1571 }
1572 
1573 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1574 mbfl_ja_jp_hantozen(
1575     mbfl_string *string,
1576     mbfl_string *result,
1577     int mode)
1578 {
1579 	size_t n;
1580 	unsigned char *p;
1581 	mbfl_memory_device device;
1582 	mbfl_convert_filter *decoder = NULL;
1583 	mbfl_convert_filter *encoder = NULL;
1584 	mbfl_convert_filter *tl_filter = NULL;
1585 	mbfl_convert_filter *next_filter = NULL;
1586 	mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1587 
1588 	mbfl_memory_device_init(&device, string->len, 0);
1589 	mbfl_string_init(result);
1590 
1591 	result->encoding = string->encoding;
1592 
1593 	decoder = mbfl_convert_filter_new(
1594 		&mbfl_encoding_wchar,
1595 		string->encoding,
1596 		mbfl_memory_device_output, 0, &device);
1597 	if (decoder == NULL) {
1598 		goto out;
1599 	}
1600 	next_filter = decoder;
1601 
1602 	param = emalloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1603 	param->mode = mode;
1604 
1605 	tl_filter = mbfl_convert_filter_new2(
1606 		&vtbl_tl_jisx0201_jisx0208,
1607 		(int(*)(int, void*))next_filter->filter_function,
1608 		(flush_function_t)next_filter->filter_flush,
1609 		next_filter);
1610 	if (tl_filter == NULL) {
1611 		efree(param);
1612 		goto out;
1613 	}
1614 
1615 	tl_filter->opaque = param;
1616 	next_filter = tl_filter;
1617 
1618 	encoder = mbfl_convert_filter_new(
1619 		string->encoding,
1620 		&mbfl_encoding_wchar,
1621 		(int(*)(int, void*))next_filter->filter_function,
1622 		(flush_function_t)next_filter->filter_flush,
1623 		next_filter);
1624 	if (encoder == NULL) {
1625 		goto out;
1626 	}
1627 
1628 	/* feed data */
1629 	p = string->val;
1630 	n = string->len;
1631 	if (p != NULL) {
1632 		while (n > 0) {
1633 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1634 				break;
1635 			}
1636 			n--;
1637 		}
1638 	}
1639 
1640 	mbfl_convert_filter_flush(encoder);
1641 	result = mbfl_memory_device_result(&device, result);
1642 out:
1643 	if (tl_filter != NULL) {
1644 		if (tl_filter->opaque != NULL) {
1645 			efree(tl_filter->opaque);
1646 		}
1647 		mbfl_convert_filter_delete(tl_filter);
1648 	}
1649 
1650 	if (decoder != NULL) {
1651 		mbfl_convert_filter_delete(decoder);
1652 	}
1653 
1654 	if (encoder != NULL) {
1655 		mbfl_convert_filter_delete(encoder);
1656 	}
1657 
1658 	return result;
1659 }
1660 
1661 
1662 /*
1663  *  MIME header encode
1664  */
1665 struct mime_header_encoder_data {
1666 	mbfl_convert_filter *conv1_filter;
1667 	mbfl_convert_filter *block_filter;
1668 	mbfl_convert_filter *conv2_filter;
1669 	mbfl_convert_filter *conv2_filter_backup;
1670 	mbfl_convert_filter *encod_filter;
1671 	mbfl_convert_filter *encod_filter_backup;
1672 	mbfl_memory_device outdev;
1673 	mbfl_memory_device tmpdev;
1674 	int status1;
1675 	int status2;
1676 	size_t prevpos;
1677 	size_t linehead;
1678 	size_t firstindent;
1679 	int encnamelen;
1680 	int lwsplen;
1681 	char encname[128];
1682 	char lwsp[16];
1683 };
1684 
1685 static int
mime_header_encoder_block_collector(int c,void * data)1686 mime_header_encoder_block_collector(int c, void *data)
1687 {
1688 	size_t n;
1689 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1690 
1691 	switch (pe->status2) {
1692 	case 1:	/* encoded word */
1693 		pe->prevpos = pe->outdev.pos;
1694 		mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1695 		mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1696 		(*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1697 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1698 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
1699 		n = pe->outdev.pos - pe->linehead + pe->firstindent;
1700 		pe->outdev.pos = pe->prevpos;
1701 		mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1702 		mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1703 		if (n >= 74) {
1704 			(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1705 			(*pe->encod_filter->filter_flush)(pe->encod_filter);
1706 			mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);	/* ?= */
1707 			mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1708 			pe->linehead = pe->outdev.pos;
1709 			pe->firstindent = 0;
1710 			mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1711 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1712 		} else {
1713 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1714 		}
1715 		break;
1716 
1717 	default:
1718 		mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1719 		c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1720 		pe->status2 = 1;
1721 		break;
1722 	}
1723 
1724 	return c;
1725 }
1726 
1727 static int
mime_header_encoder_collector(int c,void * data)1728 mime_header_encoder_collector(int c, void *data)
1729 {
1730 	static int qp_table[256] = {
1731 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1732 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1733 		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
1734 		0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
1735 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
1736 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
1737 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
1738 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
1739 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
1740 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
1741 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
1742 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
1743 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
1744 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
1745 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
1746 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
1747 	};
1748 
1749 	size_t n;
1750 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1751 
1752 	switch (pe->status1) {
1753 	case 11:	/* encoded word */
1754 		(*pe->block_filter->filter_function)(c, pe->block_filter);
1755 		break;
1756 
1757 	default:	/* ASCII */
1758 		if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
1759 			mbfl_memory_device_output(c, &pe->tmpdev);
1760 			pe->status1 = 1;
1761 		} else if (pe->status1 == 0 && c == 0x20) {	/* repeat SPACE */
1762 			mbfl_memory_device_output(c, &pe->tmpdev);
1763 		} else {
1764 			if (pe->tmpdev.pos < 74 && c == 0x20) {
1765 				n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
1766 				if (n > 74) {
1767 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
1768 					pe->linehead = pe->outdev.pos;
1769 					pe->firstindent = 0;
1770 				} else if (pe->outdev.pos > 0) {
1771 					mbfl_memory_device_output(0x20, &pe->outdev);
1772 				}
1773 				mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
1774 				mbfl_memory_device_reset(&pe->tmpdev);
1775 				pe->status1 = 0;
1776 			} else {
1777 				n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
1778 				if (n > 60)  {
1779 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
1780 					pe->linehead = pe->outdev.pos;
1781 					pe->firstindent = 0;
1782 				} else if (pe->outdev.pos > 0)  {
1783 					mbfl_memory_device_output(0x20, &pe->outdev);
1784 				}
1785 				mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
1786 				mbfl_memory_device_reset(&pe->tmpdev);
1787 				(*pe->block_filter->filter_function)(c, pe->block_filter);
1788 				pe->status1 = 11;
1789 			}
1790 		}
1791 		break;
1792 	}
1793 
1794 	return c;
1795 }
1796 
1797 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)1798 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
1799 {
1800 	if (pe->status1 >= 10) {
1801 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1802 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
1803 		mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);		/* ?= */
1804 	} else if (pe->tmpdev.pos > 0) {
1805 		if (pe->outdev.pos > 0) {
1806 			if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent) > 74) {
1807 				mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1808 			} else {
1809 				mbfl_memory_device_output(0x20, &pe->outdev);
1810 			}
1811 		}
1812 		mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
1813 	}
1814 	mbfl_memory_device_reset(&pe->tmpdev);
1815 	pe->prevpos = 0;
1816 	pe->linehead = 0;
1817 	pe->status1 = 0;
1818 	pe->status2 = 0;
1819 
1820 	return mbfl_memory_device_result(&pe->outdev, result);
1821 }
1822 
1823 struct mime_header_encoder_data*
mime_header_encoder_new(const mbfl_encoding * incode,const mbfl_encoding * outcode,const mbfl_encoding * transenc)1824 mime_header_encoder_new(
1825     const mbfl_encoding *incode,
1826     const mbfl_encoding *outcode,
1827     const mbfl_encoding *transenc)
1828 {
1829 	size_t n;
1830 	const char *s;
1831 	struct mime_header_encoder_data *pe;
1832 
1833 	/* get output encoding and check MIME charset name */
1834 	if (outcode->mime_name == NULL || outcode->mime_name[0] == '\0') {
1835 		return NULL;
1836 	}
1837 
1838 	pe = emalloc(sizeof(struct mime_header_encoder_data));
1839 	mbfl_memory_device_init(&pe->outdev, 0, 0);
1840 	mbfl_memory_device_init(&pe->tmpdev, 0, 0);
1841 	pe->prevpos = 0;
1842 	pe->linehead = 0;
1843 	pe->firstindent = 0;
1844 	pe->status1 = 0;
1845 	pe->status2 = 0;
1846 
1847 	/* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
1848 	n = 0;
1849 	pe->encname[n++] = 0x3d;
1850 	pe->encname[n++] = 0x3f;
1851 	s = outcode->mime_name;
1852 	while (*s) {
1853 		pe->encname[n++] = *s++;
1854 	}
1855 	pe->encname[n++] = 0x3f;
1856 	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
1857 		pe->encname[n++] = 0x51;
1858 	} else {
1859 		pe->encname[n++] = 0x42;
1860 		transenc = &mbfl_encoding_base64;
1861 	}
1862 	pe->encname[n++] = 0x3f;
1863 	pe->encname[n] = '\0';
1864 	pe->encnamelen = n;
1865 
1866 	n = 0;
1867 	pe->lwsp[n++] = 0x0d;
1868 	pe->lwsp[n++] = 0x0a;
1869 	pe->lwsp[n++] = 0x20;
1870 	pe->lwsp[n] = '\0';
1871 	pe->lwsplen = n;
1872 
1873 	/* transfer encode filter */
1874 	pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
1875 	pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
1876 
1877 	/* Output code filter */
1878 	pe->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
1879 	pe->conv2_filter_backup = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
1880 
1881 	/* encoded block filter */
1882 	pe->block_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, &mbfl_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
1883 
1884 	/* Input code filter */
1885 	pe->conv1_filter = mbfl_convert_filter_new(incode, &mbfl_encoding_wchar, mime_header_encoder_collector, 0, pe);
1886 
1887 	if (pe->encod_filter == NULL ||
1888 	    pe->encod_filter_backup == NULL ||
1889 	    pe->conv2_filter == NULL ||
1890 	    pe->conv2_filter_backup == NULL ||
1891 	    pe->conv1_filter == NULL) {
1892 		mime_header_encoder_delete(pe);
1893 		return NULL;
1894 	}
1895 
1896 	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
1897 		pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
1898 		pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
1899 	} else {
1900 		pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
1901 		pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
1902 	}
1903 
1904 	return pe;
1905 }
1906 
1907 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)1908 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
1909 {
1910 	if (pe) {
1911 		mbfl_convert_filter_delete(pe->conv1_filter);
1912 		mbfl_convert_filter_delete(pe->block_filter);
1913 		mbfl_convert_filter_delete(pe->conv2_filter);
1914 		mbfl_convert_filter_delete(pe->conv2_filter_backup);
1915 		mbfl_convert_filter_delete(pe->encod_filter);
1916 		mbfl_convert_filter_delete(pe->encod_filter_backup);
1917 		mbfl_memory_device_clear(&pe->outdev);
1918 		mbfl_memory_device_clear(&pe->tmpdev);
1919 		efree((void*)pe);
1920 	}
1921 }
1922 
1923 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode,const mbfl_encoding * encoding,const char * linefeed,int indent)1924 mbfl_mime_header_encode(
1925     mbfl_string *string,
1926     mbfl_string *result,
1927     const mbfl_encoding *outcode,
1928     const mbfl_encoding *encoding,
1929     const char *linefeed,
1930     int indent)
1931 {
1932 	size_t n;
1933 	unsigned char *p;
1934 	struct mime_header_encoder_data *pe;
1935 
1936 	mbfl_string_init(result);
1937 	result->encoding = &mbfl_encoding_ascii;
1938 
1939 	pe = mime_header_encoder_new(string->encoding, outcode, encoding);
1940 	if (pe == NULL) {
1941 		return NULL;
1942 	}
1943 
1944 	if (linefeed != NULL) {
1945 		n = 0;
1946 		while (*linefeed && n < 8) {
1947 			pe->lwsp[n++] = *linefeed++;
1948 		}
1949 		pe->lwsp[n++] = 0x20;
1950 		pe->lwsp[n] = '\0';
1951 		pe->lwsplen = n;
1952 	}
1953 	if (indent > 0 && indent < 74) {
1954 		pe->firstindent = indent;
1955 	}
1956 
1957 	n = string->len;
1958 	p = string->val;
1959 	while (n > 0) {
1960 		(*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
1961 		n--;
1962 	}
1963 
1964 	result = mime_header_encoder_result(pe, result);
1965 	mime_header_encoder_delete(pe);
1966 
1967 	return result;
1968 }
1969 
1970 
1971 /*
1972  *  MIME header decode
1973  */
1974 struct mime_header_decoder_data {
1975 	mbfl_convert_filter *deco_filter;
1976 	mbfl_convert_filter *conv1_filter;
1977 	mbfl_convert_filter *conv2_filter;
1978 	mbfl_memory_device outdev;
1979 	mbfl_memory_device tmpdev;
1980 	size_t cspos;
1981 	int status;
1982 	const mbfl_encoding *encoding;
1983 	const mbfl_encoding *incode;
1984 	const mbfl_encoding *outcode;
1985 };
1986 
1987 static int
mime_header_decoder_collector(int c,void * data)1988 mime_header_decoder_collector(int c, void* data)
1989 {
1990 	const mbfl_encoding *encoding;
1991 	struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
1992 
1993 	switch (pd->status) {
1994 	case 1:
1995 		if (c == 0x3f) {		/* ? */
1996 			mbfl_memory_device_output(c, &pd->tmpdev);
1997 			pd->cspos = pd->tmpdev.pos;
1998 			pd->status = 2;
1999 		} else {
2000 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2001 			mbfl_memory_device_reset(&pd->tmpdev);
2002 			if (c == 0x3d) {		/* = */
2003 				mbfl_memory_device_output(c, &pd->tmpdev);
2004 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2005 				pd->status = 9;
2006 			} else {
2007 				(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2008 				pd->status = 0;
2009 			}
2010 		}
2011 		break;
2012 	case 2:		/* store charset string */
2013 		if (c == 0x3f) {		/* ? */
2014 			/* identify charset */
2015 			mbfl_memory_device_output('\0', &pd->tmpdev);
2016 			encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2017 			if (encoding != NULL) {
2018 				pd->incode = encoding;
2019 				pd->status = 3;
2020 			}
2021 			mbfl_memory_device_unput(&pd->tmpdev);
2022 			mbfl_memory_device_output(c, &pd->tmpdev);
2023 		} else {
2024 			mbfl_memory_device_output(c, &pd->tmpdev);
2025 			if (pd->tmpdev.pos > 100) {		/* too long charset string */
2026 				pd->status = 0;
2027 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2028 				mbfl_memory_device_unput(&pd->tmpdev);
2029 				pd->status = 9;
2030 			}
2031 			if (pd->status != 2) {
2032 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2033 				mbfl_memory_device_reset(&pd->tmpdev);
2034 			}
2035 		}
2036 		break;
2037 	case 3:		/* identify encoding */
2038 		mbfl_memory_device_output(c, &pd->tmpdev);
2039 		if (c == 0x42 || c == 0x62) {		/* 'B' or 'b' */
2040 			pd->encoding = &mbfl_encoding_base64;
2041 			pd->status = 4;
2042 		} else if (c == 0x51 || c == 0x71) {	/* 'Q' or 'q' */
2043 			pd->encoding = &mbfl_encoding_qprint;
2044 			pd->status = 4;
2045 		} else {
2046 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2047 				mbfl_memory_device_unput(&pd->tmpdev);
2048 				pd->status = 9;
2049 			} else {
2050 				pd->status = 0;
2051 			}
2052 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2053 			mbfl_memory_device_reset(&pd->tmpdev);
2054 		}
2055 		break;
2056 	case 4:		/* reset filter */
2057 		mbfl_memory_device_output(c, &pd->tmpdev);
2058 		if (c == 0x3f) {		/* ? */
2059 			/* charset convert filter */
2060 			mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, &mbfl_encoding_wchar);
2061 			/* decode filter */
2062 			mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, &mbfl_encoding_8bit);
2063 			pd->status = 5;
2064 		} else {
2065 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2066 				mbfl_memory_device_unput(&pd->tmpdev);
2067 				pd->status = 9;
2068 			} else {
2069 				pd->status = 0;
2070 			}
2071 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2072 		}
2073 		mbfl_memory_device_reset(&pd->tmpdev);
2074 		break;
2075 	case 5:		/* encoded block */
2076 		if (c == 0x3f) {		/* ? */
2077 			pd->status = 6;
2078 		} else {
2079 			(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2080 		}
2081 		break;
2082 	case 6:		/* check end position */
2083 		if (c == 0x3d) {		/* = */
2084 			/* flush and reset filter */
2085 			(*pd->deco_filter->filter_flush)(pd->deco_filter);
2086 			(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2087 			mbfl_convert_filter_reset(pd->conv1_filter, &mbfl_encoding_ascii, &mbfl_encoding_wchar);
2088 			pd->status = 7;
2089 		} else {
2090 			(*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2091 			if (c != 0x3f) {		/* ? */
2092 				(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2093 				pd->status = 5;
2094 			}
2095 		}
2096 		break;
2097 	case 7:		/* after encoded block */
2098 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2099 			pd->status = 8;
2100 		} else {
2101 			mbfl_memory_device_output(c, &pd->tmpdev);
2102 			if (c == 0x3d) {		/* = */
2103 				pd->status = 1;
2104 			} else if (c != 0x20 && c != 0x09) {		/* not space */
2105 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2106 				mbfl_memory_device_reset(&pd->tmpdev);
2107 				pd->status = 0;
2108 			}
2109 		}
2110 		break;
2111 	case 8:		/* folding */
2112 	case 9:		/* folding */
2113 		if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2114 			if (c == 0x3d) {		/* = */
2115 				if (pd->status == 8) {
2116 					mbfl_memory_device_output(0x20, &pd->tmpdev);	/* SPACE */
2117 				} else {
2118 					(*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2119 				}
2120 				mbfl_memory_device_output(c, &pd->tmpdev);
2121 				pd->status = 1;
2122 			} else {
2123 				mbfl_memory_device_output(0x20, &pd->tmpdev);
2124 				mbfl_memory_device_output(c, &pd->tmpdev);
2125 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2126 				mbfl_memory_device_reset(&pd->tmpdev);
2127 				pd->status = 0;
2128 			}
2129 		}
2130 		break;
2131 	default:		/* non encoded block */
2132 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2133 			pd->status = 9;
2134 		} else if (c == 0x3d) {		/* = */
2135 			mbfl_memory_device_output(c, &pd->tmpdev);
2136 			pd->status = 1;
2137 		} else {
2138 			(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2139 		}
2140 		break;
2141 	}
2142 
2143 	return c;
2144 }
2145 
2146 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2147 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2148 {
2149 	switch (pd->status) {
2150 	case 1:
2151 	case 2:
2152 	case 3:
2153 	case 4:
2154 	case 7:
2155 	case 8:
2156 	case 9:
2157 		mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2158 		break;
2159 	case 5:
2160 	case 6:
2161 		(*pd->deco_filter->filter_flush)(pd->deco_filter);
2162 		(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2163 		break;
2164 	}
2165 	(*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2166 	mbfl_memory_device_reset(&pd->tmpdev);
2167 	pd->status = 0;
2168 
2169 	return mbfl_memory_device_result(&pd->outdev, result);
2170 }
2171 
2172 struct mime_header_decoder_data*
mime_header_decoder_new(const mbfl_encoding * outcode)2173 mime_header_decoder_new(const mbfl_encoding *outcode)
2174 {
2175 	struct mime_header_decoder_data *pd = emalloc(sizeof(struct mime_header_decoder_data));
2176 
2177 	mbfl_memory_device_init(&pd->outdev, 0, 0);
2178 	mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2179 	pd->cspos = 0;
2180 	pd->status = 0;
2181 	pd->encoding = &mbfl_encoding_8bit;
2182 	pd->incode = &mbfl_encoding_ascii;
2183 	pd->outcode = outcode;
2184 	/* charset convert filter */
2185 	pd->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2186 	pd->conv1_filter = mbfl_convert_filter_new(pd->incode, &mbfl_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2187 	/* decode filter */
2188 	pd->deco_filter = mbfl_convert_filter_new(pd->encoding, &mbfl_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2189 
2190 	if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2191 		mime_header_decoder_delete(pd);
2192 		return NULL;
2193 	}
2194 
2195 	return pd;
2196 }
2197 
2198 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2199 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2200 {
2201 	if (pd) {
2202 		mbfl_convert_filter_delete(pd->conv2_filter);
2203 		mbfl_convert_filter_delete(pd->conv1_filter);
2204 		mbfl_convert_filter_delete(pd->deco_filter);
2205 		mbfl_memory_device_clear(&pd->outdev);
2206 		mbfl_memory_device_clear(&pd->tmpdev);
2207 		efree((void*)pd);
2208 	}
2209 }
2210 
2211 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode)2212 mbfl_mime_header_decode(
2213     mbfl_string *string,
2214     mbfl_string *result,
2215     const mbfl_encoding *outcode)
2216 {
2217 	size_t n;
2218 	unsigned char *p;
2219 	struct mime_header_decoder_data *pd;
2220 
2221 	mbfl_string_init(result);
2222 	result->encoding = outcode;
2223 
2224 	pd = mime_header_decoder_new(outcode);
2225 	if (pd == NULL) {
2226 		return NULL;
2227 	}
2228 
2229 	/* feed data */
2230 	n = string->len;
2231 	p = string->val;
2232 	while (n > 0) {
2233 		mime_header_decoder_collector(*p++, pd);
2234 		n--;
2235 	}
2236 
2237 	result = mime_header_decoder_result(pd, result);
2238 	mime_header_decoder_delete(pd);
2239 
2240 	return result;
2241 }
2242 
2243 
2244 
2245 /*
2246  *  convert HTML numeric entity
2247  */
2248 struct collector_htmlnumericentity_data {
2249 	mbfl_convert_filter *decoder;
2250 	int status;
2251 	int cache;
2252 	int digit;
2253 	int *convmap;
2254 	int mapsize;
2255 };
2256 
2257 static int
collector_encode_htmlnumericentity(int c,void * data)2258 collector_encode_htmlnumericentity(int c, void *data)
2259 {
2260 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2261 	int f, n, s, r, d, size, *mapelm;
2262 
2263 	size = pc->mapsize;
2264 	f = 0;
2265 	n = 0;
2266 	while (n < size) {
2267 		mapelm = &(pc->convmap[n*4]);
2268 		if (c >= mapelm[0] && c <= mapelm[1]) {
2269 			s = (c + mapelm[2]) & mapelm[3];
2270 			if (s >= 0) {
2271 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2272 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2273 				r = 100000000;
2274 				s %= r;
2275 				while (r > 0) {
2276 					d = s/r;
2277 					if (d || f) {
2278 						f = 1;
2279 						s %= r;
2280 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2281 					}
2282 					r /= 10;
2283 				}
2284 				if (!f) {
2285 					f = 1;
2286 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2287 				}
2288 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2289 			}
2290 		}
2291 		if (f) {
2292 			break;
2293 		}
2294 		n++;
2295 	}
2296 	if (!f) {
2297 		(*pc->decoder->filter_function)(c, pc->decoder);
2298 	}
2299 
2300 	return c;
2301 }
2302 
2303 static int
collector_decode_htmlnumericentity(int c,void * data)2304 collector_decode_htmlnumericentity(int c, void *data)
2305 {
2306 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2307 	int f, n, s, r, d, size, *mapelm;
2308 
2309 	switch (pc->status) {
2310 	case 1:
2311 		if (c == 0x23) {	/* '#' */
2312 			pc->status = 2;
2313 		} else {
2314 			pc->status = 0;
2315 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2316 			(*pc->decoder->filter_function)(c, pc->decoder);
2317 		}
2318 		break;
2319 	case 2:
2320 		if (c == 0x78) {	/* 'x' */
2321 			pc->status = 4;
2322 		} else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2323 			pc->cache = c - 0x30;
2324 			pc->status = 3;
2325 			pc->digit = 1;
2326 		} else {
2327 			pc->status = 0;
2328 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2329 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2330 			(*pc->decoder->filter_function)(c, pc->decoder);
2331 		}
2332 		break;
2333 	case 3:
2334 		s = 0;
2335 		f = 0;
2336 		if (c >= 0x30 && c <= 0x39) {	/* '0' - '9' */
2337 			s = pc->cache;
2338 			if (pc->digit > 9 || s > INT_MAX/10) {
2339 				pc->status = 0;
2340 				f = 1;
2341 			} else {
2342 				s = s*10 + (c - 0x30);
2343 				pc->cache = s;
2344 				pc->digit++;
2345 			}
2346 		} else {
2347 			pc->status = 0;
2348 			s = pc->cache;
2349 			f = 1;
2350 			n = 0;
2351 			size = pc->mapsize;
2352 			while (n < size) {
2353 				mapelm = &(pc->convmap[n*4]);
2354 				d = s - mapelm[2];
2355 				if (d >= mapelm[0] && d <= mapelm[1]) {
2356 					f = 0;
2357 					(*pc->decoder->filter_function)(d, pc->decoder);
2358 					if (c != 0x3b) {	/* ';' */
2359 						(*pc->decoder->filter_function)(c, pc->decoder);
2360 					}
2361 					break;
2362 				}
2363 				n++;
2364 			}
2365 		}
2366 		if (f) {
2367 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2368 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2369 			r = 1;
2370 			n = pc->digit;
2371 			while (n > 1) {
2372 				r *= 10;
2373 				n--;
2374 			}
2375 			while (r > 0) {
2376 				d = s/r;
2377 				s %= r;
2378 				r /= 10;
2379 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2380 			}
2381 			(*pc->decoder->filter_function)(c, pc->decoder);
2382 		}
2383 		break;
2384 	case 4:
2385 		if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2386 			pc->cache = c - 0x30;
2387 			pc->status = 5;
2388 			pc->digit = 1;
2389 		} else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
2390 			pc->cache = c - 0x41 + 10;
2391 			pc->status = 5;
2392 			pc->digit = 1;
2393 		} else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
2394 			pc->cache = c - 0x61 + 10;
2395 			pc->status = 5;
2396 			pc->digit = 1;
2397 		} else {
2398 			pc->status = 0;
2399 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2400 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2401 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2402 			(*pc->decoder->filter_function)(c, pc->decoder);
2403 		}
2404 		break;
2405 	case 5:
2406 		s = 0;
2407 		f = 0;
2408 		if ((c >= 0x30 && c <= 0x39) ||
2409 			(c >= 0x41 && c <= 0x46) ||
2410 			(c >= 0x61 && c <= 0x66)) {	/* '0' - '9' or 'a' - 'f'  */
2411 			if (pc->digit > 9) {
2412 				pc->status = 0;
2413 				s = pc->cache;
2414 				f = 1;
2415 			} else {
2416 				if (c >= 0x30 && c <= 0x39) {
2417 					s = pc->cache*16 + (c - 0x30);
2418 				} else if (c >= 0x41 && c <= 0x46)  {
2419 					s = pc->cache*16 + (c - 0x41 + 10);
2420 				} else {
2421 					s = pc->cache*16 + (c - 0x61 + 10);
2422 				}
2423 				pc->cache = s;
2424 				pc->digit++;
2425 			}
2426 		} else {
2427 			pc->status = 0;
2428 			s = pc->cache;
2429 			f = 1;
2430 			n = 0;
2431 			size = pc->mapsize;
2432 			while (n < size) {
2433 				mapelm = &(pc->convmap[n*4]);
2434 				d = s - mapelm[2];
2435 				if (d >= mapelm[0] && d <= mapelm[1]) {
2436 					f = 0;
2437 					(*pc->decoder->filter_function)(d, pc->decoder);
2438 					if (c != 0x3b) {	/* ';' */
2439 						(*pc->decoder->filter_function)(c, pc->decoder);
2440 					}
2441 					break;
2442 				}
2443 				n++;
2444 			}
2445 		}
2446 		if (f) {
2447 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2448 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2449 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2450 			r = 1;
2451 			n = pc->digit;
2452 			while (n > 0) {
2453 				r *= 16;
2454 				n--;
2455 			}
2456 			s %= r;
2457 			r /= 16;
2458 			while (r > 0) {
2459 				d = s/r;
2460 				s %= r;
2461 				r /= 16;
2462 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2463 			}
2464 			(*pc->decoder->filter_function)(c, pc->decoder);
2465 		}
2466 		break;
2467 	default:
2468 		if (c == 0x26) {	/* '&' */
2469 			pc->status = 1;
2470 		} else {
2471 			(*pc->decoder->filter_function)(c, pc->decoder);
2472 		}
2473 		break;
2474 	}
2475 
2476 	return c;
2477 }
2478 
2479 static int
collector_encode_hex_htmlnumericentity(int c,void * data)2480 collector_encode_hex_htmlnumericentity(int c, void *data)
2481 {
2482 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2483 	int f, n, s, r, d, size, *mapelm;
2484 
2485 	size = pc->mapsize;
2486 	f = 0;
2487 	n = 0;
2488 	while (n < size) {
2489 		mapelm = &(pc->convmap[n*4]);
2490 		if (c >= mapelm[0] && c <= mapelm[1]) {
2491 			s = (c + mapelm[2]) & mapelm[3];
2492 			if (s >= 0) {
2493 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2494 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2495 				(*pc->decoder->filter_function)(0x78, pc->decoder);	/* 'x' */
2496 				r = 0x1000000;
2497 				s %= r;
2498 				while (r > 0) {
2499 					d = s/r;
2500 					if (d || f) {
2501 						f = 1;
2502 						s %= r;
2503 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2504 					}
2505 					r /= 16;
2506 				}
2507 				if (!f) {
2508 					f = 1;
2509 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2510 				}
2511 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2512 			}
2513 		}
2514 		if (f) {
2515 			break;
2516 		}
2517 		n++;
2518 	}
2519 	if (!f) {
2520 		(*pc->decoder->filter_function)(c, pc->decoder);
2521 	}
2522 
2523 	return c;
2524 }
2525 
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2526 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2527 {
2528 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2529 	int n, s, r, d;
2530 
2531 	if (pc->status) {
2532 		switch (pc->status) {
2533 		case 1: /* '&' */
2534 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2535 			break;
2536 		case 2: /* '#' */
2537 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2538 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2539 			break;
2540 		case 3: /* '0'-'9' */
2541 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2542 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2543 
2544 			s = pc->cache;
2545 			r = 1;
2546 			n = pc->digit;
2547 			while (n > 1) {
2548 				r *= 10;
2549 				n--;
2550 			}
2551 			while (r > 0) {
2552 				d = s/r;
2553 				s %= r;
2554 				r /= 10;
2555 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2556 			}
2557 
2558 			break;
2559 		case 4: /* 'x' */
2560 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2561 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2562 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2563 			break;
2564 		case 5: /* '0'-'9','a'-'f' */
2565 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2566 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2567 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2568 
2569 			s = pc->cache;
2570 			r = 1;
2571 			n = pc->digit;
2572 			while (n > 0) {
2573 				r *= 16;
2574 				n--;
2575 			}
2576 			s %= r;
2577 			r /= 16;
2578 			while (r > 0) {
2579 				d = s/r;
2580 				s %= r;
2581 				r /= 16;
2582 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2583 			}
2584 			break;
2585 		default:
2586 			break;
2587 		}
2588 	}
2589 
2590 	pc->status = 0;
2591 	pc->cache = 0;
2592 	pc->digit = 0;
2593 
2594 	return 0;
2595 }
2596 
2597 
2598 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)2599 mbfl_html_numeric_entity(
2600     mbfl_string *string,
2601     mbfl_string *result,
2602     int *convmap,
2603     int mapsize,
2604     int type)
2605 {
2606 	struct collector_htmlnumericentity_data pc;
2607 	mbfl_memory_device device;
2608 	mbfl_convert_filter *encoder;
2609 	size_t n;
2610 	unsigned char *p;
2611 
2612 	if (string == NULL || result == NULL) {
2613 		return NULL;
2614 	}
2615 	mbfl_string_init(result);
2616 	result->encoding = string->encoding;
2617 	mbfl_memory_device_init(&device, string->len, 0);
2618 
2619 	/* output code filter */
2620 	pc.decoder = mbfl_convert_filter_new(
2621 	    &mbfl_encoding_wchar,
2622 	    string->encoding,
2623 	    mbfl_memory_device_output, 0, &device);
2624 	/* wchar filter */
2625 	if (type == 0) { /* decimal output */
2626 		encoder = mbfl_convert_filter_new(
2627 		    string->encoding,
2628 		    &mbfl_encoding_wchar,
2629 		    collector_encode_htmlnumericentity, 0, &pc);
2630 	} else if (type == 2) { /* hex output */
2631 		encoder = mbfl_convert_filter_new(
2632 		    string->encoding,
2633 		    &mbfl_encoding_wchar,
2634 		    collector_encode_hex_htmlnumericentity, 0, &pc);
2635 	} else { /* type == 1: decimal/hex input */
2636 		encoder = mbfl_convert_filter_new(
2637 		    string->encoding,
2638 		    &mbfl_encoding_wchar,
2639 		    collector_decode_htmlnumericentity,
2640 		    (flush_function_t)mbfl_filt_decode_htmlnumericentity_flush, &pc);
2641 	}
2642 	if (pc.decoder == NULL || encoder == NULL) {
2643 		mbfl_convert_filter_delete(encoder);
2644 		mbfl_convert_filter_delete(pc.decoder);
2645 		return NULL;
2646 	}
2647 	pc.status = 0;
2648 	pc.cache = 0;
2649 	pc.digit = 0;
2650 	pc.convmap = convmap;
2651 	pc.mapsize = mapsize;
2652 
2653 	/* feed data */
2654 	p = string->val;
2655 	n = string->len;
2656 	if (p != NULL) {
2657 		while (n > 0) {
2658 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
2659 				break;
2660 			}
2661 			n--;
2662 		}
2663 	}
2664 	mbfl_convert_filter_flush(encoder);
2665 	mbfl_convert_filter_flush(pc.decoder);
2666 	result = mbfl_memory_device_result(&device, result);
2667 	mbfl_convert_filter_delete(encoder);
2668 	mbfl_convert_filter_delete(pc.decoder);
2669 
2670 	return result;
2671 }
2672