xref: /PHP-7.4/ext/mbstring/libmbfl/mbfl/mbfilter.c (revision 3d5de7d7)
1 /*
2  * charset=UTF-8
3  */
4 
5 /*
6  * "streamable kanji code filter and converter"
7  *
8  * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
9  *
10  * This software is released under the GNU Lesser General Public License.
11  * (Version 2.1, February 1999)
12  * Please read the following detail of the licence (in japanese).
13  *
14  * ◆使用許諾条件◆
15  *
16  * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
17  * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
18  * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
19  * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
20  * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
21  * することはできません。
22  *
23  * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
24  * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
25  * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
26  * による許諾を得る必要があります。
27  *
28  * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
29  * ます。「GNU Lesser General Public License」とは、これまでLibrary General
30  * Public Licenseと呼ばれていたものです。
31  *     http://www.gnu.org/ --- GNUウェブサイト
32  *     http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
33  * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
34  *
35  * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
36  * はありません。
37  *
38  * ◆保証内容◆
39  *
40  * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
41  * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
42  * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
43  * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
44  * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
45  * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
46  * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
47  * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
48  * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
49  * 契約・規定に優先します。
50  *
51  * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
52  *
53  * 〒102-0073
54  * 東京都千代田区九段北1-13-5日本地所第一ビル4F
55  * 株式会社ハッピーサイズ
56  * Phone: 03-3512-3655, Fax: 03-3512-3656
57  * Email: sales@happysize.co.jp
58  * Web: http://happysize.com/
59  *
60  * ◆著者◆
61  *
62  * 金本 茂 <sgk@happysize.co.jp>
63  *
64  * ◆履歴◆
65  *
66  * 1998/11/10 sgk implementation in C++
67  * 1999/4/25  sgk Cで書きなおし。
68  * 1999/4/26  sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
69  * 1999/6/??      Unicodeサポート。
70  * 1999/6/22  sgk ライセンスをLGPLに変更。
71  *
72  */
73 
74 /*
75  * Unicode support
76  *
77  * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
78  * All rights reserved.
79  *
80  */
81 
82 
83 #ifdef HAVE_CONFIG_H
84 #include "config.h"
85 #endif
86 
87 #include <stddef.h>
88 #include <string.h>
89 #include <limits.h>
90 
91 #include "mbfilter.h"
92 #include "mbfl_filter_output.h"
93 #include "mbfilter_8bit.h"
94 #include "mbfilter_pass.h"
95 #include "mbfilter_wchar.h"
96 #include "filters/mbfilter_ascii.h"
97 #include "filters/mbfilter_base64.h"
98 #include "filters/mbfilter_qprint.h"
99 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
100 #include "filters/mbfilter_utf8.h"
101 
102 #include "eaw_table.h"
103 
104 /* hex character table "0123456789ABCDEF" */
105 static char mbfl_hexchar_table[] = {
106 	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
107 };
108 
109 
110 
111 /*
112  * encoding filter
113  */
114 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
115 
116 
117 /*
118  *  buffering converter
119  */
120 mbfl_buffer_converter *
mbfl_buffer_converter_new(const mbfl_encoding * from,const mbfl_encoding * to,size_t buf_initsz)121 mbfl_buffer_converter_new(
122 	const mbfl_encoding *from,
123 	const mbfl_encoding *to,
124     size_t buf_initsz)
125 {
126 	mbfl_buffer_converter *convd;
127 
128 	/* allocate */
129 	convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof(mbfl_buffer_converter));
130 	if (convd == NULL) {
131 		return NULL;
132 	}
133 
134 	/* initialize */
135 	convd->from = from;
136 	convd->to = to;
137 
138 	/* create convert filter */
139 	convd->filter1 = NULL;
140 	convd->filter2 = NULL;
141 	if (mbfl_convert_filter_get_vtbl(convd->from, convd->to) != NULL) {
142 		convd->filter1 = mbfl_convert_filter_new(convd->from, convd->to, mbfl_memory_device_output, NULL, &convd->device);
143 	} else {
144 		convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, convd->to, mbfl_memory_device_output, NULL, &convd->device);
145 		if (convd->filter2 != NULL) {
146 			convd->filter1 = mbfl_convert_filter_new(convd->from,
147 					&mbfl_encoding_wchar,
148 					(int (*)(int, void*))convd->filter2->filter_function,
149 					(int (*)(void*))convd->filter2->filter_flush,
150 					convd->filter2);
151 			if (convd->filter1 == NULL) {
152 				mbfl_convert_filter_delete(convd->filter2);
153 			}
154 		}
155 	}
156 	if (convd->filter1 == NULL) {
157 		mbfl_free(convd);
158 		return NULL;
159 	}
160 
161 	mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
162 
163 	return convd;
164 }
165 
166 
167 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)168 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
169 {
170 	if (convd != NULL) {
171 		if (convd->filter1) {
172 			mbfl_convert_filter_delete(convd->filter1);
173 		}
174 		if (convd->filter2) {
175 			mbfl_convert_filter_delete(convd->filter2);
176 		}
177 		mbfl_memory_device_clear(&convd->device);
178 		mbfl_free((void*)convd);
179 	}
180 }
181 
182 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)183 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
184 {
185 	mbfl_memory_device_reset(&convd->device);
186 }
187 
188 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)189 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
190 {
191 	if (convd != NULL) {
192 		if (convd->filter2 != NULL) {
193 			convd->filter2->illegal_mode = mode;
194 		} else if (convd->filter1 != NULL) {
195 			convd->filter1->illegal_mode = mode;
196 		} else {
197 			return 0;
198 		}
199 	}
200 
201 	return 1;
202 }
203 
204 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)205 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
206 {
207 	if (convd != NULL) {
208 		if (convd->filter2 != NULL) {
209 			convd->filter2->illegal_substchar = substchar;
210 		} else if (convd->filter1 != NULL) {
211 			convd->filter1->illegal_substchar = substchar;
212 		} else {
213 			return 0;
214 		}
215 	}
216 
217 	return 1;
218 }
219 
220 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,size_t n)221 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, size_t n)
222 {
223 	mbfl_convert_filter *filter;
224 	int (*filter_function)(int c, mbfl_convert_filter *filter);
225 
226 	if (convd != NULL && p != NULL) {
227 		filter = convd->filter1;
228 		if (filter != NULL) {
229 			filter_function = filter->filter_function;
230 			while (n > 0) {
231 				if ((*filter_function)(*p++, filter) < 0) {
232 					break;
233 				}
234 				n--;
235 			}
236 		}
237 	}
238 
239 	return n;
240 }
241 
242 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)243 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
244 {
245 	return mbfl_buffer_converter_feed2(convd, string, NULL);
246 }
247 
248 int
mbfl_buffer_converter_feed2(mbfl_buffer_converter * convd,mbfl_string * string,size_t * loc)249 mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, size_t *loc)
250 {
251 	size_t n;
252 	unsigned char *p;
253 	mbfl_convert_filter *filter;
254 	int (*filter_function)(int c, mbfl_convert_filter *filter);
255 
256 	if (convd == NULL || string == NULL) {
257 		return -1;
258 	}
259 	mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
260 	/* feed data */
261 	n = string->len;
262 	p = string->val;
263 
264 	filter = convd->filter1;
265 	if (filter != NULL) {
266 		filter_function = filter->filter_function;
267 		while (n > 0) {
268 			if ((*filter_function)(*p++, filter) < 0) {
269 				if (loc) {
270 					*loc = p - string->val;
271 				}
272 				return -1;
273 			}
274 			n--;
275 		}
276 	}
277 	if (loc) {
278 		*loc = p - string->val;
279 	}
280 	return 0;
281 }
282 
283 
284 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)285 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
286 {
287 	if (convd == NULL) {
288 		return -1;
289 	}
290 
291 	if (convd->filter1 != NULL) {
292 		mbfl_convert_filter_flush(convd->filter1);
293 	}
294 	if (convd->filter2 != NULL) {
295 		mbfl_convert_filter_flush(convd->filter2);
296 	}
297 
298 	return 0;
299 }
300 
301 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)302 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
303 {
304 	if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
305 		result->encoding = convd->to;
306 		result->val = convd->device.buffer;
307 		result->len = convd->device.pos;
308 	} else {
309 		result = NULL;
310 	}
311 
312 	return result;
313 }
314 
315 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)316 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
317 {
318 	if (convd == NULL || result == NULL) {
319 		return NULL;
320 	}
321 	result->encoding = convd->to;
322 	return mbfl_memory_device_result(&convd->device, result);
323 }
324 
325 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)326 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
327 				  mbfl_string *result)
328 {
329 	if (convd == NULL || string == NULL || result == NULL) {
330 		return NULL;
331 	}
332 	mbfl_buffer_converter_feed(convd, string);
333 	if (convd->filter1 != NULL) {
334 		mbfl_convert_filter_flush(convd->filter1);
335 	}
336 	if (convd->filter2 != NULL) {
337 		mbfl_convert_filter_flush(convd->filter2);
338 	}
339 	result->encoding = convd->to;
340 	return mbfl_memory_device_result(&convd->device, result);
341 }
342 
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)343 size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
344 {
345 	size_t num_illegalchars = 0;
346 
347 	if (convd == NULL) {
348 		return 0;
349 	}
350 
351 	if (convd->filter1 != NULL) {
352 		num_illegalchars += convd->filter1->num_illegalchar;
353 	}
354 
355 	if (convd->filter2 != NULL) {
356 		num_illegalchars += convd->filter2->num_illegalchar;
357 	}
358 
359 	return num_illegalchars;
360 }
361 
362 /*
363  * encoding detector
364  */
365 mbfl_encoding_detector *
mbfl_encoding_detector_new(const mbfl_encoding ** elist,int elistsz,int strict)366 mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict)
367 {
368 	mbfl_encoding_detector *identd;
369 
370 	int i, num;
371 	mbfl_identify_filter *filter;
372 
373 	if (elist == NULL || elistsz <= 0) {
374 		return NULL;
375 	}
376 
377 	/* allocate */
378 	identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
379 	if (identd == NULL) {
380 		return NULL;
381 	}
382 	identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
383 	if (identd->filter_list == NULL) {
384 		mbfl_free(identd);
385 		return NULL;
386 	}
387 
388 	/* create filters */
389 	i = 0;
390 	num = 0;
391 	while (i < elistsz) {
392 		filter = mbfl_identify_filter_new2(elist[i]);
393 		if (filter != NULL) {
394 			identd->filter_list[num] = filter;
395 			num++;
396 		}
397 		i++;
398 	}
399 	identd->filter_list_size = num;
400 
401 	/* set strict flag */
402 	identd->strict = strict;
403 
404 	return identd;
405 }
406 
407 
408 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)409 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
410 {
411 	int i;
412 
413 	if (identd != NULL) {
414 		if (identd->filter_list != NULL) {
415 			i = identd->filter_list_size;
416 			while (i > 0) {
417 				i--;
418 				mbfl_identify_filter_delete(identd->filter_list[i]);
419 			}
420 			mbfl_free((void *)identd->filter_list);
421 		}
422 		mbfl_free((void *)identd);
423 	}
424 }
425 
426 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)427 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
428 {
429 	int res = 0;
430 	/* feed data */
431 	if (identd != NULL && string != NULL && string->val != NULL) {
432 		int num = identd->filter_list_size;
433 		size_t n = string->len;
434 		unsigned char *p = string->val;
435 		int bad = 0;
436 		while (n > 0) {
437 			int i;
438 			for (i = 0; i < num; i++) {
439 				mbfl_identify_filter *filter = identd->filter_list[i];
440 				if (!filter->flag) {
441 					(*filter->filter_function)(*p, filter);
442 					if (filter->flag) {
443 						bad++;
444 					}
445 				}
446 			}
447 			if ((num - 1) <= bad) {
448 				res = 1;
449 				break;
450 			}
451 			p++;
452 			n--;
453 		}
454 	}
455 
456 	return res;
457 }
458 
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)459 const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
460 {
461 	mbfl_identify_filter *filter;
462 	const mbfl_encoding *encoding = NULL;
463 	int n;
464 
465 	/* judge */
466 	if (identd != NULL) {
467 		n = identd->filter_list_size - 1;
468 		while (n >= 0) {
469 			filter = identd->filter_list[n];
470 			if (!filter->flag) {
471 				if (!identd->strict || !filter->status) {
472 					encoding = filter->encoding;
473 				}
474 			}
475 			n--;
476 		}
477 
478 		/* fallback judge */
479 		if (!encoding) {
480 			n = identd->filter_list_size - 1;
481 			while (n >= 0) {
482 				filter = identd->filter_list[n];
483 				if (!filter->flag) {
484 					encoding = filter->encoding;
485 				}
486 				n--;
487  			}
488 		}
489 	}
490 
491 	return encoding;
492 }
493 
494 /*
495  * encoding converter
496  */
497 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,const mbfl_encoding * toenc)498 mbfl_convert_encoding(
499     mbfl_string *string,
500     mbfl_string *result,
501     const mbfl_encoding *toenc)
502 {
503 	size_t n;
504 	unsigned char *p;
505 	mbfl_memory_device device;
506 	mbfl_convert_filter *filter1;
507 	mbfl_convert_filter *filter2;
508 
509 	/* initialize */
510 	if (toenc == NULL || string == NULL || result == NULL) {
511 		return NULL;
512 	}
513 
514 	filter1 = NULL;
515 	filter2 = NULL;
516 	if (mbfl_convert_filter_get_vtbl(string->encoding, toenc) != NULL) {
517 		filter1 = mbfl_convert_filter_new(string->encoding, toenc, mbfl_memory_device_output, 0, &device);
518 	} else {
519 		filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
520 		if (filter2 != NULL) {
521 			filter1 = mbfl_convert_filter_new(string->encoding, &mbfl_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
522 			if (filter1 == NULL) {
523 				mbfl_convert_filter_delete(filter2);
524 			}
525 		}
526 	}
527 	if (filter1 == NULL) {
528 		return NULL;
529 	}
530 
531 	if (filter2 != NULL) {
532 		filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
533 		filter2->illegal_substchar = 0x3f;		/* '?' */
534 	}
535 
536 	mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
537 
538 	/* feed data */
539 	n = string->len;
540 	p = string->val;
541 	if (p != NULL) {
542 		while (n > 0) {
543 			if ((*filter1->filter_function)(*p++, filter1) < 0) {
544 				break;
545 			}
546 			n--;
547 		}
548 	}
549 
550 	mbfl_convert_filter_flush(filter1);
551 	mbfl_convert_filter_delete(filter1);
552 	if (filter2 != NULL) {
553 		mbfl_convert_filter_flush(filter2);
554 		mbfl_convert_filter_delete(filter2);
555 	}
556 
557 	return mbfl_memory_device_result(&device, result);
558 }
559 
560 
561 /*
562  * identify encoding
563  */
564 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,const mbfl_encoding ** elist,int elistsz,int strict)565 mbfl_identify_encoding(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
566 {
567 	int i, num, bad;
568 	size_t n;
569 	unsigned char *p;
570 	mbfl_identify_filter *flist, *filter;
571 	const mbfl_encoding *encoding;
572 
573 	/* flist is an array of mbfl_identify_filter instances */
574 	flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
575 	if (flist == NULL) {
576 		return NULL;
577 	}
578 
579 	num = 0;
580 	if (elist != NULL) {
581 		for (i = 0; i < elistsz; i++) {
582 			if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
583 				num++;
584 			}
585 		}
586 	}
587 
588 	/* feed data */
589 	n = string->len;
590 	p = string->val;
591 
592 	if (p != NULL) {
593 		bad = 0;
594 		while (n > 0) {
595 			for (i = 0; i < num; i++) {
596 				filter = &flist[i];
597 				if (!filter->flag) {
598 					(*filter->filter_function)(*p, filter);
599 					if (filter->flag) {
600 						bad++;
601 					}
602 				}
603 			}
604 			if ((num - 1) <= bad && !strict) {
605 				break;
606 			}
607 			p++;
608 			n--;
609 		}
610 	}
611 
612 	/* judge */
613 	encoding = NULL;
614 
615 	for (i = 0; i < num; i++) {
616 		filter = &flist[i];
617 		if (!filter->flag) {
618 			if (strict && filter->status) {
619  				continue;
620  			}
621 			encoding = filter->encoding;
622 			break;
623 		}
624 	}
625 
626 	/* fall-back judge */
627 	if (!encoding) {
628 		for (i = 0; i < num; i++) {
629 			filter = &flist[i];
630 			if (!filter->flag && (!strict || !filter->status)) {
631 				encoding = filter->encoding;
632 				break;
633 			}
634 		}
635 	}
636 
637 	/* cleanup */
638 	/* dtors should be called in reverse order */
639 	i = num;
640 	while (--i >= 0) {
641 		mbfl_identify_filter_cleanup(&flist[i]);
642 	}
643 
644 	mbfl_free((void *)flist);
645 
646 	return encoding;
647 }
648 
649 /*
650  *  strlen
651  */
652 static int
filter_count_output(int c,void * data)653 filter_count_output(int c, void *data)
654 {
655 	(*(size_t *)data)++;
656 	return c;
657 }
658 
659 size_t
mbfl_strlen(mbfl_string * string)660 mbfl_strlen(mbfl_string *string)
661 {
662 	size_t len, n, k;
663 	unsigned char *p;
664 	const mbfl_encoding *encoding = string->encoding;
665 
666 	len = 0;
667 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
668 		len = string->len;
669 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
670 		len = string->len/2;
671 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
672 		len = string->len/4;
673 	} else if (encoding->mblen_table != NULL) {
674 		const unsigned char *mbtab = encoding->mblen_table;
675 		n = 0;
676 		p = string->val;
677 		k = string->len;
678 		/* count */
679 		if (p != NULL) {
680 			while (n < k) {
681 				unsigned m = mbtab[*p];
682 				n += m;
683 				p += m;
684 				len++;
685 			}
686 		}
687 	} else {
688 		/* wchar filter */
689 		mbfl_convert_filter *filter = mbfl_convert_filter_new(
690 		  string->encoding,
691 		  &mbfl_encoding_wchar,
692 		  filter_count_output, 0, &len);
693 		if (filter == NULL) {
694 			return (size_t) -1;
695 		}
696 		/* count */
697 		n = string->len;
698 		p = string->val;
699 		if (p != NULL) {
700 			while (n > 0) {
701 				(*filter->filter_function)(*p++, filter);
702 				n--;
703 			}
704 		}
705 		mbfl_convert_filter_delete(filter);
706 	}
707 
708 	return len;
709 }
710 
711 
712 /*
713  *  strpos
714  */
715 struct collector_strpos_data {
716 	mbfl_convert_filter *next_filter;
717 	mbfl_wchar_device needle;
718 	size_t needle_len;
719 	size_t start;
720 	size_t output;
721 	size_t found_pos;
722 	size_t needle_pos;
723 	size_t matched_pos;
724 };
725 
726 static int
collector_strpos(int c,void * data)727 collector_strpos(int c, void* data)
728 {
729 	int *p, *h, *m;
730 	ssize_t n;
731 	struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
732 
733 	if (pc->output >= pc->start) {
734 		if (c == (int)pc->needle.buffer[pc->needle_pos]) {
735 			if (pc->needle_pos == 0) {
736 				pc->found_pos = pc->output;			/* found position */
737 			}
738 			pc->needle_pos++;						/* needle pointer */
739 			if (pc->needle_pos >= pc->needle_len) {
740 				pc->matched_pos = pc->found_pos;	/* matched position */
741 				pc->needle_pos--;
742 				goto retry;
743 			}
744 		} else if (pc->needle_pos != 0) {
745 retry:
746 			h = (int *)pc->needle.buffer;
747 			h++;
748 			for (;;) {
749 				pc->found_pos++;
750 				p = h;
751 				m = (int *)pc->needle.buffer;
752 				n = pc->needle_pos - 1;
753 				while (n > 0 && *p == *m) {
754 					n--;
755 					p++;
756 					m++;
757 				}
758 				if (n <= 0) {
759 					if (*m != c) {
760 						pc->needle_pos = 0;
761 					}
762 					break;
763 				} else {
764 					h++;
765 					pc->needle_pos--;
766 				}
767 			}
768 		}
769 	}
770 
771 	pc->output++;
772 	return c;
773 }
774 
775 /*
776  *	oddlen
777  */
778 size_t
mbfl_oddlen(mbfl_string * string)779 mbfl_oddlen(mbfl_string *string)
780 {
781 	size_t len, n, k;
782 	unsigned char *p;
783 	const mbfl_encoding *encoding = string->encoding;
784 
785 	len = 0;
786 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
787 		return 0;
788 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
789 		return len % 2;
790 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
791 		return len % 4;
792 	} else if (encoding->mblen_table != NULL) {
793 		const unsigned char *mbtab = encoding->mblen_table;
794  		n = 0;
795 		p = string->val;
796 		k = string->len;
797 		/* count */
798 		if (p != NULL) {
799 			while (n < k) {
800 				unsigned m = mbtab[*p];
801 				n += m;
802 				p += m;
803 			};
804 		}
805 		return n-k;
806 	} else {
807 		/* how can i do ? */
808 		return 0;
809 	}
810 	/* NOT REACHED */
811 }
812 
813 size_t
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,ssize_t offset,int reverse)814 mbfl_strpos(
815     mbfl_string *haystack,
816     mbfl_string *needle,
817     ssize_t offset,
818     int reverse)
819 {
820 	size_t result;
821 	mbfl_string _haystack_u8, _needle_u8;
822 	const mbfl_string *haystack_u8, *needle_u8 = NULL;
823 	const unsigned char *u8_tbl;
824 
825 	if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
826 		return (size_t) -8;
827 	}
828 
829 	{
830 		const mbfl_encoding *u8_enc = &mbfl_encoding_utf8;
831 		if (u8_enc->mblen_table == NULL) {
832 			return (size_t) -8;
833 		}
834 		u8_tbl = u8_enc->mblen_table;
835 	}
836 
837 	if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) {
838 		mbfl_string_init(&_haystack_u8);
839 		haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, &mbfl_encoding_utf8);
840 		if (haystack_u8 == NULL) {
841 			result = (size_t) -4;
842 			goto out;
843 		}
844 	} else {
845 		haystack_u8 = haystack;
846 	}
847 
848 	if (needle->encoding->no_encoding != mbfl_no_encoding_utf8) {
849 		mbfl_string_init(&_needle_u8);
850 		needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, &mbfl_encoding_utf8);
851 		if (needle_u8 == NULL) {
852 			result = (size_t) -4;
853 			goto out;
854 		}
855 	} else {
856 		needle_u8 = needle;
857 	}
858 
859 	if (needle_u8->len < 1) {
860 		result = (size_t) -8;
861 		goto out;
862 	}
863 
864 	result = (size_t) -1;
865 	if (haystack_u8->len < needle_u8->len) {
866 		goto out;
867 	}
868 
869 	if (!reverse) {
870 		size_t jtbl[1 << (sizeof(unsigned char) * 8)];
871 		size_t needle_u8_len = needle_u8->len;
872 		size_t i;
873 		const unsigned char *p, *q, *e;
874 		const unsigned char *haystack_u8_val = haystack_u8->val,
875 		                    *needle_u8_val = needle_u8->val;
876 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
877 			jtbl[i] = needle_u8_len + 1;
878 		}
879 		for (i = 0; i < needle_u8_len - 1; ++i) {
880 			jtbl[needle_u8_val[i]] = needle_u8_len - i;
881 		}
882 		e = haystack_u8_val + haystack_u8->len;
883 		p = haystack_u8_val;
884 		while (offset-- > 0) {
885 			if (p >= e) {
886 				result = (size_t) -16;
887 				goto out;
888 			}
889 			p += u8_tbl[*p];
890 		}
891 		p += needle_u8_len;
892 		if (p > e) {
893 			goto out;
894 		}
895 		while (p <= e) {
896 			const unsigned char *pv = p;
897 			q = needle_u8_val + needle_u8_len;
898 			for (;;) {
899 				if (q == needle_u8_val) {
900 					result = 0;
901 					while (p > haystack_u8_val) {
902 						unsigned char c = *--p;
903 						if (c < 0x80) {
904 							++result;
905 						} else if ((c & 0xc0) != 0x80) {
906 							++result;
907 						}
908 					}
909 					goto out;
910 				}
911 				if (*--q != *--p) {
912 					break;
913 				}
914 			}
915 			p += jtbl[*p];
916 			if (p <= pv) {
917 				p = pv + 1;
918 			}
919 		}
920 	} else {
921 		size_t jtbl[1 << (sizeof(unsigned char) * 8)];
922 		size_t needle_u8_len = needle_u8->len, needle_len = 0;
923 		size_t i;
924 		const unsigned char *p, *e, *q, *qe;
925 		const unsigned char *haystack_u8_val = haystack_u8->val,
926 		                    *needle_u8_val = needle_u8->val;
927 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
928 			jtbl[i] = needle_u8_len;
929 		}
930 		for (i = needle_u8_len - 1; i > 0; --i) {
931 			unsigned char c = needle_u8_val[i];
932 			jtbl[c] = i;
933 			if (c < 0x80) {
934 				++needle_len;
935 			} else if ((c & 0xc0) != 0x80) {
936 				++needle_len;
937 			}
938 		}
939 		{
940 			unsigned char c = needle_u8_val[0];
941 			if (c < 0x80) {
942 				++needle_len;
943 			} else if ((c & 0xc0) != 0x80) {
944 				++needle_len;
945 			}
946 		}
947 		e = haystack_u8_val;
948 		p = e + haystack_u8->len;
949 		qe = needle_u8_val + needle_u8_len;
950 		if (offset < 0) {
951 			if (-offset > needle_len) {
952 				offset += needle_len;
953 				while (offset < 0) {
954 					unsigned char c;
955 					if (p <= e) {
956 						result = (size_t) -16;
957 						goto out;
958 					}
959 					c = *(--p);
960 					if (c < 0x80) {
961 						++offset;
962 					} else if ((c & 0xc0) != 0x80) {
963 						++offset;
964 					}
965 				}
966 			}
967 		} else {
968 			const unsigned char *ee = haystack_u8_val + haystack_u8->len;
969 			while (offset-- > 0) {
970 				if (e >= ee) {
971 					result = (size_t) -16;
972 					goto out;
973 				}
974 				e += u8_tbl[*e];
975 			}
976 		}
977 		if (p < e + needle_u8_len) {
978 			goto out;
979 		}
980 		p -= needle_u8_len;
981 		while (p >= e) {
982 			const unsigned char *pv = p;
983 			q = needle_u8_val;
984 			for (;;) {
985 				if (q == qe) {
986 					result = 0;
987 					p -= needle_u8_len;
988 					while (p > haystack_u8_val) {
989 						unsigned char c = *--p;
990 						if (c < 0x80) {
991 							++result;
992 						} else if ((c & 0xc0) != 0x80) {
993 							++result;
994 						}
995 					}
996 					goto out;
997 				}
998 				if (*q != *p) {
999 					break;
1000 				}
1001 				++p, ++q;
1002 			}
1003 			p -= jtbl[*p];
1004 			if (p >= pv) {
1005 				p = pv - 1;
1006 			}
1007 		}
1008 	}
1009 out:
1010 	if (haystack_u8 == &_haystack_u8) {
1011 		mbfl_string_clear(&_haystack_u8);
1012 	}
1013 	if (needle_u8 == &_needle_u8) {
1014 		mbfl_string_clear(&_needle_u8);
1015 	}
1016 	return result;
1017 }
1018 
1019 /*
1020  *  substr_count
1021  */
1022 
1023 size_t
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1024 mbfl_substr_count(
1025     mbfl_string *haystack,
1026     mbfl_string *needle
1027    )
1028 {
1029 	size_t n, result = 0;
1030 	unsigned char *p;
1031 	mbfl_convert_filter *filter;
1032 	struct collector_strpos_data pc;
1033 
1034 	if (haystack == NULL || needle == NULL) {
1035 		return (size_t) -8;
1036 	}
1037 	/* needle is converted into wchar */
1038 	mbfl_wchar_device_init(&pc.needle);
1039 	filter = mbfl_convert_filter_new(
1040 	  needle->encoding,
1041 	  &mbfl_encoding_wchar,
1042 	  mbfl_wchar_device_output, 0, &pc.needle);
1043 	if (filter == NULL) {
1044 		return (size_t) -4;
1045 	}
1046 	mbfl_convert_filter_feed_string(filter, needle->val, needle->len);
1047 	mbfl_convert_filter_flush(filter);
1048 	mbfl_convert_filter_delete(filter);
1049 	pc.needle_len = pc.needle.pos;
1050 	if (pc.needle.buffer == NULL) {
1051 		return (size_t) -4;
1052 	}
1053 	if (pc.needle_len <= 0) {
1054 		mbfl_wchar_device_clear(&pc.needle);
1055 		return (size_t) -2;
1056 	}
1057 	/* initialize filter and collector data */
1058 	filter = mbfl_convert_filter_new(
1059 	  haystack->encoding,
1060 	  &mbfl_encoding_wchar,
1061 	  collector_strpos, 0, &pc);
1062 	if (filter == NULL) {
1063 		mbfl_wchar_device_clear(&pc.needle);
1064 		return (size_t) -4;
1065 	}
1066 	pc.start = 0;
1067 	pc.output = 0;
1068 	pc.needle_pos = 0;
1069 	pc.found_pos = 0;
1070 	pc.matched_pos = (size_t) -1;
1071 
1072 	/* feed data */
1073 	p = haystack->val;
1074 	n = haystack->len;
1075 	if (p != NULL) {
1076 		while (n > 0) {
1077 			if ((*filter->filter_function)(*p++, filter) < 0) {
1078 				pc.matched_pos = (size_t) -4;
1079 				break;
1080 			}
1081 			if (pc.matched_pos != (size_t) -1) {
1082 				++result;
1083 				pc.matched_pos = (size_t) -1;
1084 				pc.needle_pos = 0;
1085 			}
1086 			n--;
1087 		}
1088 	}
1089 	mbfl_convert_filter_flush(filter);
1090 	mbfl_convert_filter_delete(filter);
1091 	mbfl_wchar_device_clear(&pc.needle);
1092 
1093 	return result;
1094 }
1095 
1096 /*
1097  *  substr
1098  */
1099 struct collector_substr_data {
1100 	mbfl_convert_filter *next_filter;
1101 	size_t start;
1102 	size_t stop;
1103 	size_t output;
1104 };
1105 
1106 static int
collector_substr(int c,void * data)1107 collector_substr(int c, void* data)
1108 {
1109 	struct collector_substr_data *pc = (struct collector_substr_data*)data;
1110 
1111 	if (pc->output >= pc->stop) {
1112 		return -1;
1113 	}
1114 
1115 	if (pc->output >= pc->start) {
1116 		(*pc->next_filter->filter_function)(c, pc->next_filter);
1117 	}
1118 
1119 	pc->output++;
1120 
1121 	return c;
1122 }
1123 
1124 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1125 mbfl_substr(
1126     mbfl_string *string,
1127     mbfl_string *result,
1128     size_t from,
1129     size_t length)
1130 {
1131 	const mbfl_encoding *encoding = string->encoding;
1132 	size_t n, k, len, start, end;
1133 	unsigned m;
1134 	unsigned char *p, *w;
1135 
1136 	mbfl_string_init(result);
1137 	result->no_language = string->no_language;
1138 	result->encoding = string->encoding;
1139 
1140 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1141 	   encoding->mblen_table != NULL) {
1142 		len = string->len;
1143 		if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1144 			start = from;
1145 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1146 			start = from*2;
1147 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1148 			start = from*4;
1149 		} else {
1150 			const unsigned char *mbtab = encoding->mblen_table;
1151 			start = 0;
1152 			n = 0;
1153 			k = 0;
1154 			p = string->val;
1155 			/* search start position */
1156 			while (k <= from) {
1157 				start = n;
1158 				if (n >= len) {
1159 					break;
1160 				}
1161 				m = mbtab[*p];
1162 				n += m;
1163 				p += m;
1164 				k++;
1165 			}
1166 		}
1167 
1168 		if (length == MBFL_SUBSTR_UNTIL_END) {
1169 			end = len;
1170 		} else if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1171 			end = start + length;
1172 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1173 			end = start + length*2;
1174 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1175 			end = start + length*4;
1176 		} else {
1177 			const unsigned char *mbtab = encoding->mblen_table;
1178 			end = start;
1179 			n = start;
1180 			k = 0;
1181 			p = string->val + start;
1182 			/* detect end position */
1183 			while (k <= length) {
1184 				end = n;
1185 				if (n >= len) {
1186 					break;
1187 				}
1188 				m = mbtab[*p];
1189 				n += m;
1190 				p += m;
1191 				k++;
1192 			}
1193 		}
1194 
1195 		if (start > len) {
1196 			start = len;
1197 		}
1198 		if (end > len) {
1199 			end = len;
1200 		}
1201 		if (start > end) {
1202 			start = end;
1203 		}
1204 
1205 		/* allocate memory and copy */
1206 		n = end - start;
1207 		result->len = 0;
1208 		result->val = w = (unsigned char*)mbfl_malloc(n + 1);
1209 		if (w != NULL) {
1210 			result->len = n;
1211 			memcpy(w, string->val + start, n);
1212 			w[n] = '\0';
1213 		} else {
1214 			result = NULL;
1215 		}
1216 	} else {
1217 		mbfl_memory_device device;
1218 		struct collector_substr_data pc;
1219 		mbfl_convert_filter *decoder;
1220 		mbfl_convert_filter *encoder;
1221 
1222 		if (length == MBFL_SUBSTR_UNTIL_END) {
1223 			length = mbfl_strlen(string) - from;
1224 		}
1225 
1226 		mbfl_memory_device_init(&device, length + 1, 0);
1227 		mbfl_string_init(result);
1228 		result->no_language = string->no_language;
1229 		result->encoding = string->encoding;
1230 		/* output code filter */
1231 		decoder = mbfl_convert_filter_new(
1232 		    &mbfl_encoding_wchar,
1233 		    string->encoding,
1234 		    mbfl_memory_device_output, 0, &device);
1235 		/* wchar filter */
1236 		encoder = mbfl_convert_filter_new(
1237 		    string->encoding,
1238 		    &mbfl_encoding_wchar,
1239 		    collector_substr, 0, &pc);
1240 		if (decoder == NULL || encoder == NULL) {
1241 			mbfl_convert_filter_delete(encoder);
1242 			mbfl_convert_filter_delete(decoder);
1243 			return NULL;
1244 		}
1245 		pc.next_filter = decoder;
1246 		pc.start = from;
1247 		pc.stop = from + length;
1248 		pc.output = 0;
1249 
1250 		/* feed data */
1251 		p = string->val;
1252 		n = string->len;
1253 		if (p != NULL) {
1254 			while (n > 0) {
1255 				if ((*encoder->filter_function)(*p++, encoder) < 0) {
1256 					break;
1257 				}
1258 				n--;
1259 			}
1260 		}
1261 
1262 		mbfl_convert_filter_flush(encoder);
1263 		mbfl_convert_filter_flush(decoder);
1264 		result = mbfl_memory_device_result(&device, result);
1265 		mbfl_convert_filter_delete(encoder);
1266 		mbfl_convert_filter_delete(decoder);
1267 	}
1268 
1269 	return result;
1270 }
1271 
1272 /*
1273  *  strcut
1274  */
1275 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1276 mbfl_strcut(
1277     mbfl_string *string,
1278     mbfl_string *result,
1279     size_t from,
1280     size_t length)
1281 {
1282 	const mbfl_encoding *encoding = string->encoding;
1283 	mbfl_memory_device device;
1284 
1285 	if (from >= string->len) {
1286 		from = string->len;
1287 	}
1288 
1289 	mbfl_string_init(result);
1290 	result->no_language = string->no_language;
1291 	result->encoding = string->encoding;
1292 
1293 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1294 				| MBFL_ENCTYPE_WCS2BE
1295 				| MBFL_ENCTYPE_WCS2LE
1296 				| MBFL_ENCTYPE_WCS4BE
1297 				| MBFL_ENCTYPE_WCS4LE))
1298 			|| encoding->mblen_table != NULL) {
1299 		const unsigned char *start = NULL;
1300 		const unsigned char *end = NULL;
1301 		unsigned char *w;
1302 		size_t sz;
1303 
1304 		if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1305 			from &= -2;
1306 
1307 			if (length >= string->len - from) {
1308 				length = string->len - from;
1309 			}
1310 
1311 			start = string->val + from;
1312 			end   = start + (length & -2);
1313 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1314 			from &= -4;
1315 
1316 			if (length >= string->len - from) {
1317 				length = string->len - from;
1318 			}
1319 
1320 			start = string->val + from;
1321 			end   = start + (length & -4);
1322 		} else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1323 			if (length >= string->len - from) {
1324 				length = string->len - from;
1325 			}
1326 
1327 			start = string->val + from;
1328 			end = start + length;
1329 		} else if (encoding->mblen_table != NULL) {
1330 			const unsigned char *mbtab = encoding->mblen_table;
1331 			const unsigned char *p, *q;
1332 			int m;
1333 
1334 			/* search start position */
1335 			for (m = 0, p = string->val, q = p + from;
1336 					p < q; p += (m = mbtab[*p]));
1337 
1338 			if (p > q) {
1339 				p -= m;
1340 			}
1341 
1342 			start = p;
1343 
1344 			/* search end position */
1345 			if (length >= string->len - (start - string->val)) {
1346 				end = string->val + string->len;
1347 			} else {
1348 				for (q = p + length; p < q; p += (m = mbtab[*p]));
1349 
1350 				if (p > q) {
1351 					p -= m;
1352 				}
1353 				end = p;
1354 			}
1355 		} else {
1356 			/* never reached */
1357 			return NULL;
1358 		}
1359 
1360 		/* allocate memory and copy string */
1361 		sz = end - start;
1362 		if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1363 				sizeof(unsigned char))) == NULL) {
1364 			return NULL;
1365 		}
1366 
1367 		memcpy(w, start, sz);
1368 		w[sz] = '\0';
1369 		w[sz + 1] = '\0';
1370 		w[sz + 2] = '\0';
1371 		w[sz + 3] = '\0';
1372 
1373 		result->val = w;
1374 		result->len = sz;
1375 	} else {
1376 		mbfl_convert_filter *encoder     = NULL;
1377 		mbfl_convert_filter *decoder     = NULL;
1378 		const unsigned char *p, *q, *r;
1379 		struct {
1380 			mbfl_convert_filter encoder;
1381 			mbfl_convert_filter decoder;
1382 			const unsigned char *p;
1383 			size_t pos;
1384 		} bk, _bk;
1385 
1386 		/* output code filter */
1387 		if (!(decoder = mbfl_convert_filter_new(
1388 				&mbfl_encoding_wchar,
1389 				string->encoding,
1390 				mbfl_memory_device_output, 0, &device))) {
1391 			return NULL;
1392 		}
1393 
1394 		/* wchar filter */
1395 		if (!(encoder = mbfl_convert_filter_new(
1396 				string->encoding,
1397 				&mbfl_encoding_wchar,
1398 				mbfl_filter_output_null,
1399 				NULL, NULL))) {
1400 			mbfl_convert_filter_delete(decoder);
1401 			return NULL;
1402 		}
1403 
1404 		mbfl_memory_device_init(&device, length + 8, 0);
1405 
1406 		p = string->val;
1407 
1408 		/* search start position */
1409 		for (q = string->val + from; p < q; p++) {
1410 			(*encoder->filter_function)(*p, encoder);
1411 		}
1412 
1413 		/* switch the drain direction */
1414 		encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1415 		encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1416 		encoder->data = decoder;
1417 
1418 		q = string->val + string->len;
1419 
1420 		/* save the encoder, decoder state and the pointer */
1421 		mbfl_convert_filter_copy(decoder, &_bk.decoder);
1422 		mbfl_convert_filter_copy(encoder, &_bk.encoder);
1423 		_bk.p = p;
1424 		_bk.pos = device.pos;
1425 
1426 		if (length > q - p) {
1427 			length = q - p;
1428 		}
1429 
1430 		if (length >= 20) {
1431 			/* output a little shorter than "length" */
1432 			/* XXX: the constant "20" was determined purely on the heuristics. */
1433 			for (r = p + length - 20; p < r; p++) {
1434 				(*encoder->filter_function)(*p, encoder);
1435 			}
1436 
1437 			/* if the offset of the resulting string exceeds the length,
1438 			 * then restore the state */
1439 			if (device.pos > length) {
1440 				p = _bk.p;
1441 				device.pos = _bk.pos;
1442 				decoder->filter_dtor(decoder);
1443 				encoder->filter_dtor(encoder);
1444 				mbfl_convert_filter_copy(&_bk.decoder, decoder);
1445 				mbfl_convert_filter_copy(&_bk.encoder, encoder);
1446 				bk = _bk;
1447 			} else {
1448 				/* save the encoder, decoder state and the pointer */
1449 				mbfl_convert_filter_copy(decoder, &bk.decoder);
1450 				mbfl_convert_filter_copy(encoder, &bk.encoder);
1451 				bk.p = p;
1452 				bk.pos = device.pos;
1453 
1454 				/* flush the stream */
1455 				(*encoder->filter_flush)(encoder);
1456 
1457 				/* if the offset of the resulting string exceeds the length,
1458 				 * then restore the state */
1459 				if (device.pos > length) {
1460 					bk.decoder.filter_dtor(&bk.decoder);
1461 					bk.encoder.filter_dtor(&bk.encoder);
1462 
1463 					p = _bk.p;
1464 					device.pos = _bk.pos;
1465 					decoder->filter_dtor(decoder);
1466 					encoder->filter_dtor(encoder);
1467 					mbfl_convert_filter_copy(&_bk.decoder, decoder);
1468 					mbfl_convert_filter_copy(&_bk.encoder, encoder);
1469 					bk = _bk;
1470 				} else {
1471 					_bk.decoder.filter_dtor(&_bk.decoder);
1472 					_bk.encoder.filter_dtor(&_bk.encoder);
1473 
1474 					p = bk.p;
1475 					device.pos = bk.pos;
1476 					decoder->filter_dtor(decoder);
1477 					encoder->filter_dtor(encoder);
1478 					mbfl_convert_filter_copy(&bk.decoder, decoder);
1479 					mbfl_convert_filter_copy(&bk.encoder, encoder);
1480 				}
1481 			}
1482 		} else {
1483 			bk = _bk;
1484 		}
1485 
1486 		/* detect end position */
1487 		while (p < q) {
1488 			(*encoder->filter_function)(*p, encoder);
1489 
1490 			if (device.pos > length) {
1491 				/* restore filter */
1492 				p = bk.p;
1493 				device.pos = bk.pos;
1494 				decoder->filter_dtor(decoder);
1495 				encoder->filter_dtor(encoder);
1496 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1497 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1498 				break;
1499 			}
1500 
1501 			p++;
1502 
1503 			/* backup current state */
1504 			mbfl_convert_filter_copy(decoder, &_bk.decoder);
1505 			mbfl_convert_filter_copy(encoder, &_bk.encoder);
1506 			_bk.pos = device.pos;
1507 			_bk.p = p;
1508 
1509 			(*encoder->filter_flush)(encoder);
1510 
1511 			if (device.pos > length) {
1512 				_bk.decoder.filter_dtor(&_bk.decoder);
1513 				_bk.encoder.filter_dtor(&_bk.encoder);
1514 
1515 				/* restore filter */
1516 				p = bk.p;
1517 				device.pos = bk.pos;
1518 				decoder->filter_dtor(decoder);
1519 				encoder->filter_dtor(encoder);
1520 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1521 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1522 				break;
1523 			}
1524 
1525 			bk.decoder.filter_dtor(&bk.decoder);
1526 			bk.encoder.filter_dtor(&bk.encoder);
1527 
1528 			p = _bk.p;
1529 			device.pos = _bk.pos;
1530 			decoder->filter_dtor(decoder);
1531 			encoder->filter_dtor(encoder);
1532 			mbfl_convert_filter_copy(&_bk.decoder, decoder);
1533 			mbfl_convert_filter_copy(&_bk.encoder, encoder);
1534 
1535 			bk = _bk;
1536 		}
1537 
1538 		(*encoder->filter_flush)(encoder);
1539 
1540 		bk.decoder.filter_dtor(&bk.decoder);
1541 		bk.encoder.filter_dtor(&bk.encoder);
1542 
1543 		result = mbfl_memory_device_result(&device, result);
1544 
1545 		mbfl_convert_filter_delete(encoder);
1546 		mbfl_convert_filter_delete(decoder);
1547 	}
1548 
1549 	return result;
1550 }
1551 
1552 
1553 /*
1554  *  strwidth
1555  */
is_fullwidth(int c)1556 static size_t is_fullwidth(int c)
1557 {
1558 	int i;
1559 
1560 	if (c < mbfl_eaw_table[0].begin) {
1561 		return 0;
1562 	}
1563 
1564 	for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1565 		if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1566 			return 1;
1567 		}
1568 	}
1569 
1570 	return 0;
1571 }
1572 
1573 static int
filter_count_width(int c,void * data)1574 filter_count_width(int c, void* data)
1575 {
1576 	(*(size_t *)data) += (is_fullwidth(c) ? 2: 1);
1577 	return c;
1578 }
1579 
1580 size_t
mbfl_strwidth(mbfl_string * string)1581 mbfl_strwidth(mbfl_string *string)
1582 {
1583 	size_t len, n;
1584 	unsigned char *p;
1585 	mbfl_convert_filter *filter;
1586 
1587 	len = 0;
1588 	if (string->len > 0 && string->val != NULL) {
1589 		/* wchar filter */
1590 		filter = mbfl_convert_filter_new(
1591 		    string->encoding,
1592 		    &mbfl_encoding_wchar,
1593 		    filter_count_width, 0, &len);
1594 		if (filter == NULL) {
1595 			mbfl_convert_filter_delete(filter);
1596 			return -1;
1597 		}
1598 
1599 		/* feed data */
1600 		p = string->val;
1601 		n = string->len;
1602 		while (n > 0) {
1603 			(*filter->filter_function)(*p++, filter);
1604 			n--;
1605 		}
1606 
1607 		mbfl_convert_filter_flush(filter);
1608 		mbfl_convert_filter_delete(filter);
1609 	}
1610 
1611 	return len;
1612 }
1613 
1614 
1615 /*
1616  *  strimwidth
1617  */
1618 struct collector_strimwidth_data {
1619 	mbfl_convert_filter *decoder;
1620 	mbfl_convert_filter *decoder_backup;
1621 	mbfl_memory_device device;
1622 	size_t from;
1623 	size_t width;
1624 	size_t outwidth;
1625 	size_t outchar;
1626 	size_t endpos;
1627 	int status;
1628 };
1629 
1630 static int
collector_strimwidth(int c,void * data)1631 collector_strimwidth(int c, void* data)
1632 {
1633 	struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1634 
1635 	switch (pc->status) {
1636 	case 10:
1637 		(*pc->decoder->filter_function)(c, pc->decoder);
1638 		break;
1639 	default:
1640 		if (pc->outchar >= pc->from) {
1641 			pc->outwidth += (is_fullwidth(c) ? 2: 1);
1642 
1643 			if (pc->outwidth > pc->width) {
1644 				if (pc->status == 0) {
1645 					pc->endpos = pc->device.pos;
1646 					mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1647 				}
1648 				pc->status++;
1649 				(*pc->decoder->filter_function)(c, pc->decoder);
1650 				c = -1;
1651 			} else {
1652 				(*pc->decoder->filter_function)(c, pc->decoder);
1653 			}
1654 		}
1655 		pc->outchar++;
1656 		break;
1657 	}
1658 
1659 	return c;
1660 }
1661 
1662 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,size_t from,size_t width)1663 mbfl_strimwidth(
1664     mbfl_string *string,
1665     mbfl_string *marker,
1666     mbfl_string *result,
1667     size_t from,
1668     size_t width)
1669 {
1670 	struct collector_strimwidth_data pc;
1671 	mbfl_convert_filter *encoder;
1672 	size_t n, mkwidth;
1673 	unsigned char *p;
1674 
1675 	if (string == NULL || result == NULL) {
1676 		return NULL;
1677 	}
1678 	mbfl_string_init(result);
1679 	result->no_language = string->no_language;
1680 	result->encoding = string->encoding;
1681 	mbfl_memory_device_init(&pc.device, MIN(string->len, width), 0);
1682 
1683 	/* output code filter */
1684 	pc.decoder = mbfl_convert_filter_new(
1685 	    &mbfl_encoding_wchar,
1686 	    string->encoding,
1687 	    mbfl_memory_device_output, 0, &pc.device);
1688 	pc.decoder_backup = mbfl_convert_filter_new(
1689 	    &mbfl_encoding_wchar,
1690 	    string->encoding,
1691 	    mbfl_memory_device_output, 0, &pc.device);
1692 	/* wchar filter */
1693 	encoder = mbfl_convert_filter_new(
1694 	    string->encoding,
1695 	    &mbfl_encoding_wchar,
1696 	    collector_strimwidth, 0, &pc);
1697 	if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1698 		mbfl_convert_filter_delete(encoder);
1699 		mbfl_convert_filter_delete(pc.decoder);
1700 		mbfl_convert_filter_delete(pc.decoder_backup);
1701 		return NULL;
1702 	}
1703 	mkwidth = 0;
1704 	if (marker) {
1705 		mkwidth = mbfl_strwidth(marker);
1706 	}
1707 	pc.from = from;
1708 	pc.width = width - mkwidth;
1709 	pc.outwidth = 0;
1710 	pc.outchar = 0;
1711 	pc.status = 0;
1712 	pc.endpos = 0;
1713 
1714 	/* feed data */
1715 	p = string->val;
1716 	n = string->len;
1717 	if (p != NULL) {
1718 		while (n > 0) {
1719 			n--;
1720 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1721 				break;
1722 			}
1723 		}
1724 		mbfl_convert_filter_flush(encoder);
1725 		if (pc.status != 0 && mkwidth > 0) {
1726 			pc.width += mkwidth;
1727 			if (n > 0) {
1728 				while (n > 0) {
1729 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1730 						break;
1731 					}
1732 					n--;
1733 				}
1734 				mbfl_convert_filter_flush(encoder);
1735 			} else if (pc.outwidth > pc.width) {
1736 				pc.status++;
1737 			}
1738 			if (pc.status != 1) {
1739 				pc.status = 10;
1740 				pc.device.pos = pc.endpos;
1741 				mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1742 				mbfl_convert_filter_reset(encoder, marker->encoding, &mbfl_encoding_wchar);
1743 				p = marker->val;
1744 				n = marker->len;
1745 				while (n > 0) {
1746 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1747 						break;
1748 					}
1749 					n--;
1750 				}
1751 				mbfl_convert_filter_flush(encoder);
1752 			}
1753 		} else if (pc.status != 0) {
1754 			pc.device.pos = pc.endpos;
1755 			mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1756 		}
1757 		mbfl_convert_filter_flush(pc.decoder);
1758 	}
1759 	result = mbfl_memory_device_result(&pc.device, result);
1760 	mbfl_convert_filter_delete(encoder);
1761 	mbfl_convert_filter_delete(pc.decoder);
1762 	mbfl_convert_filter_delete(pc.decoder_backup);
1763 
1764 	return result;
1765 }
1766 
1767 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1768 mbfl_ja_jp_hantozen(
1769     mbfl_string *string,
1770     mbfl_string *result,
1771     int mode)
1772 {
1773 	size_t n;
1774 	unsigned char *p;
1775 	mbfl_memory_device device;
1776 	mbfl_convert_filter *decoder = NULL;
1777 	mbfl_convert_filter *encoder = NULL;
1778 	mbfl_convert_filter *tl_filter = NULL;
1779 	mbfl_convert_filter *next_filter = NULL;
1780 	mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1781 
1782 	mbfl_memory_device_init(&device, string->len, 0);
1783 	mbfl_string_init(result);
1784 
1785 	result->no_language = string->no_language;
1786 	result->encoding = string->encoding;
1787 
1788 	decoder = mbfl_convert_filter_new(
1789 		&mbfl_encoding_wchar,
1790 		string->encoding,
1791 		mbfl_memory_device_output, 0, &device);
1792 	if (decoder == NULL) {
1793 		goto out;
1794 	}
1795 	next_filter = decoder;
1796 
1797 	param =
1798 		(mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1799 	if (param == NULL) {
1800 		goto out;
1801 	}
1802 
1803 	param->mode = mode;
1804 
1805 	tl_filter = mbfl_convert_filter_new2(
1806 		&vtbl_tl_jisx0201_jisx0208,
1807 		(int(*)(int, void*))next_filter->filter_function,
1808 		(int(*)(void*))next_filter->filter_flush,
1809 		next_filter);
1810 	if (tl_filter == NULL) {
1811 		mbfl_free(param);
1812 		goto out;
1813 	}
1814 
1815 	tl_filter->opaque = param;
1816 	next_filter = tl_filter;
1817 
1818 	encoder = mbfl_convert_filter_new(
1819 		string->encoding,
1820 		&mbfl_encoding_wchar,
1821 		(int(*)(int, void*))next_filter->filter_function,
1822 		(int(*)(void*))next_filter->filter_flush,
1823 		next_filter);
1824 	if (encoder == NULL) {
1825 		goto out;
1826 	}
1827 
1828 	/* feed data */
1829 	p = string->val;
1830 	n = string->len;
1831 	if (p != NULL) {
1832 		while (n > 0) {
1833 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1834 				break;
1835 			}
1836 			n--;
1837 		}
1838 	}
1839 
1840 	mbfl_convert_filter_flush(encoder);
1841 	result = mbfl_memory_device_result(&device, result);
1842 out:
1843 	if (tl_filter != NULL) {
1844 		if (tl_filter->opaque != NULL) {
1845 			mbfl_free(tl_filter->opaque);
1846 		}
1847 		mbfl_convert_filter_delete(tl_filter);
1848 	}
1849 
1850 	if (decoder != NULL) {
1851 		mbfl_convert_filter_delete(decoder);
1852 	}
1853 
1854 	if (encoder != NULL) {
1855 		mbfl_convert_filter_delete(encoder);
1856 	}
1857 
1858 	return result;
1859 }
1860 
1861 
1862 /*
1863  *  MIME header encode
1864  */
1865 struct mime_header_encoder_data {
1866 	mbfl_convert_filter *conv1_filter;
1867 	mbfl_convert_filter *block_filter;
1868 	mbfl_convert_filter *conv2_filter;
1869 	mbfl_convert_filter *conv2_filter_backup;
1870 	mbfl_convert_filter *encod_filter;
1871 	mbfl_convert_filter *encod_filter_backup;
1872 	mbfl_memory_device outdev;
1873 	mbfl_memory_device tmpdev;
1874 	int status1;
1875 	int status2;
1876 	size_t prevpos;
1877 	size_t linehead;
1878 	size_t firstindent;
1879 	int encnamelen;
1880 	int lwsplen;
1881 	char encname[128];
1882 	char lwsp[16];
1883 };
1884 
1885 static int
mime_header_encoder_block_collector(int c,void * data)1886 mime_header_encoder_block_collector(int c, void *data)
1887 {
1888 	size_t n;
1889 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1890 
1891 	switch (pe->status2) {
1892 	case 1:	/* encoded word */
1893 		pe->prevpos = pe->outdev.pos;
1894 		mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1895 		mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1896 		(*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1897 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1898 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
1899 		n = pe->outdev.pos - pe->linehead + pe->firstindent;
1900 		pe->outdev.pos = pe->prevpos;
1901 		mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1902 		mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1903 		if (n >= 74) {
1904 			(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1905 			(*pe->encod_filter->filter_flush)(pe->encod_filter);
1906 			mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);	/* ?= */
1907 			mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1908 			pe->linehead = pe->outdev.pos;
1909 			pe->firstindent = 0;
1910 			mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1911 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1912 		} else {
1913 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1914 		}
1915 		break;
1916 
1917 	default:
1918 		mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1919 		c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1920 		pe->status2 = 1;
1921 		break;
1922 	}
1923 
1924 	return c;
1925 }
1926 
1927 static int
mime_header_encoder_collector(int c,void * data)1928 mime_header_encoder_collector(int c, void *data)
1929 {
1930 	static int qp_table[256] = {
1931 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1932 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1933 		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
1934 		0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
1935 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
1936 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
1937 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
1938 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
1939 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
1940 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
1941 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
1942 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
1943 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
1944 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
1945 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
1946 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
1947 	};
1948 
1949 	size_t n;
1950 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1951 
1952 	switch (pe->status1) {
1953 	case 11:	/* encoded word */
1954 		(*pe->block_filter->filter_function)(c, pe->block_filter);
1955 		break;
1956 
1957 	default:	/* ASCII */
1958 		if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
1959 			mbfl_memory_device_output(c, &pe->tmpdev);
1960 			pe->status1 = 1;
1961 		} else if (pe->status1 == 0 && c == 0x20) {	/* repeat SPACE */
1962 			mbfl_memory_device_output(c, &pe->tmpdev);
1963 		} else {
1964 			if (pe->tmpdev.pos < 74 && c == 0x20) {
1965 				n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
1966 				if (n > 74) {
1967 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
1968 					pe->linehead = pe->outdev.pos;
1969 					pe->firstindent = 0;
1970 				} else if (pe->outdev.pos > 0) {
1971 					mbfl_memory_device_output(0x20, &pe->outdev);
1972 				}
1973 				mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
1974 				mbfl_memory_device_reset(&pe->tmpdev);
1975 				pe->status1 = 0;
1976 			} else {
1977 				n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
1978 				if (n > 60)  {
1979 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
1980 					pe->linehead = pe->outdev.pos;
1981 					pe->firstindent = 0;
1982 				} else if (pe->outdev.pos > 0)  {
1983 					mbfl_memory_device_output(0x20, &pe->outdev);
1984 				}
1985 				mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
1986 				mbfl_memory_device_reset(&pe->tmpdev);
1987 				(*pe->block_filter->filter_function)(c, pe->block_filter);
1988 				pe->status1 = 11;
1989 			}
1990 		}
1991 		break;
1992 	}
1993 
1994 	return c;
1995 }
1996 
1997 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)1998 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
1999 {
2000 	if (pe->status1 >= 10) {
2001 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2002 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
2003 		mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);		/* ?= */
2004 	} else if (pe->tmpdev.pos > 0) {
2005 		if (pe->outdev.pos > 0) {
2006 			if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2007 				mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2008 			} else {
2009 				mbfl_memory_device_output(0x20, &pe->outdev);
2010 			}
2011 		}
2012 		mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2013 	}
2014 	mbfl_memory_device_reset(&pe->tmpdev);
2015 	pe->prevpos = 0;
2016 	pe->linehead = 0;
2017 	pe->status1 = 0;
2018 	pe->status2 = 0;
2019 
2020 	return mbfl_memory_device_result(&pe->outdev, result);
2021 }
2022 
2023 struct mime_header_encoder_data*
mime_header_encoder_new(const mbfl_encoding * incode,const mbfl_encoding * outcode,const mbfl_encoding * transenc)2024 mime_header_encoder_new(
2025     const mbfl_encoding *incode,
2026     const mbfl_encoding *outcode,
2027     const mbfl_encoding *transenc)
2028 {
2029 	size_t n;
2030 	const char *s;
2031 	struct mime_header_encoder_data *pe;
2032 
2033 	/* get output encoding and check MIME charset name */
2034 	if (outcode->mime_name == NULL || outcode->mime_name[0] == '\0') {
2035 		return NULL;
2036 	}
2037 
2038 	pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2039 	if (pe == NULL) {
2040 		return NULL;
2041 	}
2042 
2043 	mbfl_memory_device_init(&pe->outdev, 0, 0);
2044 	mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2045 	pe->prevpos = 0;
2046 	pe->linehead = 0;
2047 	pe->firstindent = 0;
2048 	pe->status1 = 0;
2049 	pe->status2 = 0;
2050 
2051 	/* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
2052 	n = 0;
2053 	pe->encname[n++] = 0x3d;
2054 	pe->encname[n++] = 0x3f;
2055 	s = outcode->mime_name;
2056 	while (*s) {
2057 		pe->encname[n++] = *s++;
2058 	}
2059 	pe->encname[n++] = 0x3f;
2060 	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2061 		pe->encname[n++] = 0x51;
2062 	} else {
2063 		pe->encname[n++] = 0x42;
2064 		transenc = &mbfl_encoding_base64;
2065 	}
2066 	pe->encname[n++] = 0x3f;
2067 	pe->encname[n] = '\0';
2068 	pe->encnamelen = n;
2069 
2070 	n = 0;
2071 	pe->lwsp[n++] = 0x0d;
2072 	pe->lwsp[n++] = 0x0a;
2073 	pe->lwsp[n++] = 0x20;
2074 	pe->lwsp[n] = '\0';
2075 	pe->lwsplen = n;
2076 
2077 	/* transfer encode filter */
2078 	pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2079 	pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2080 
2081 	/* Output code filter */
2082 	pe->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2083 	pe->conv2_filter_backup = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2084 
2085 	/* encoded block filter */
2086 	pe->block_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, &mbfl_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2087 
2088 	/* Input code filter */
2089 	pe->conv1_filter = mbfl_convert_filter_new(incode, &mbfl_encoding_wchar, mime_header_encoder_collector, 0, pe);
2090 
2091 	if (pe->encod_filter == NULL ||
2092 	    pe->encod_filter_backup == NULL ||
2093 	    pe->conv2_filter == NULL ||
2094 	    pe->conv2_filter_backup == NULL ||
2095 	    pe->conv1_filter == NULL) {
2096 		mime_header_encoder_delete(pe);
2097 		return NULL;
2098 	}
2099 
2100 	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2101 		pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2102 		pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2103 	} else {
2104 		pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2105 		pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2106 	}
2107 
2108 	return pe;
2109 }
2110 
2111 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2112 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2113 {
2114 	if (pe) {
2115 		mbfl_convert_filter_delete(pe->conv1_filter);
2116 		mbfl_convert_filter_delete(pe->block_filter);
2117 		mbfl_convert_filter_delete(pe->conv2_filter);
2118 		mbfl_convert_filter_delete(pe->conv2_filter_backup);
2119 		mbfl_convert_filter_delete(pe->encod_filter);
2120 		mbfl_convert_filter_delete(pe->encod_filter_backup);
2121 		mbfl_memory_device_clear(&pe->outdev);
2122 		mbfl_memory_device_clear(&pe->tmpdev);
2123 		mbfl_free((void*)pe);
2124 	}
2125 }
2126 
2127 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2128 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2129 {
2130 	return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2131 }
2132 
2133 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode,const mbfl_encoding * encoding,const char * linefeed,int indent)2134 mbfl_mime_header_encode(
2135     mbfl_string *string,
2136     mbfl_string *result,
2137     const mbfl_encoding *outcode,
2138     const mbfl_encoding *encoding,
2139     const char *linefeed,
2140     int indent)
2141 {
2142 	size_t n;
2143 	unsigned char *p;
2144 	struct mime_header_encoder_data *pe;
2145 
2146 	mbfl_string_init(result);
2147 	result->no_language = string->no_language;
2148 	result->encoding = &mbfl_encoding_ascii;
2149 
2150 	pe = mime_header_encoder_new(string->encoding, outcode, encoding);
2151 	if (pe == NULL) {
2152 		return NULL;
2153 	}
2154 
2155 	if (linefeed != NULL) {
2156 		n = 0;
2157 		while (*linefeed && n < 8) {
2158 			pe->lwsp[n++] = *linefeed++;
2159 		}
2160 		pe->lwsp[n++] = 0x20;
2161 		pe->lwsp[n] = '\0';
2162 		pe->lwsplen = n;
2163 	}
2164 	if (indent > 0 && indent < 74) {
2165 		pe->firstindent = indent;
2166 	}
2167 
2168 	n = string->len;
2169 	p = string->val;
2170 	while (n > 0) {
2171 		(*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2172 		n--;
2173 	}
2174 
2175 	result = mime_header_encoder_result(pe, result);
2176 	mime_header_encoder_delete(pe);
2177 
2178 	return result;
2179 }
2180 
2181 
2182 /*
2183  *  MIME header decode
2184  */
2185 struct mime_header_decoder_data {
2186 	mbfl_convert_filter *deco_filter;
2187 	mbfl_convert_filter *conv1_filter;
2188 	mbfl_convert_filter *conv2_filter;
2189 	mbfl_memory_device outdev;
2190 	mbfl_memory_device tmpdev;
2191 	size_t cspos;
2192 	int status;
2193 	const mbfl_encoding *encoding;
2194 	const mbfl_encoding *incode;
2195 	const mbfl_encoding *outcode;
2196 };
2197 
2198 static int
mime_header_decoder_collector(int c,void * data)2199 mime_header_decoder_collector(int c, void* data)
2200 {
2201 	const mbfl_encoding *encoding;
2202 	struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2203 
2204 	switch (pd->status) {
2205 	case 1:
2206 		if (c == 0x3f) {		/* ? */
2207 			mbfl_memory_device_output(c, &pd->tmpdev);
2208 			pd->cspos = pd->tmpdev.pos;
2209 			pd->status = 2;
2210 		} else {
2211 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2212 			mbfl_memory_device_reset(&pd->tmpdev);
2213 			if (c == 0x3d) {		/* = */
2214 				mbfl_memory_device_output(c, &pd->tmpdev);
2215 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2216 				pd->status = 9;
2217 			} else {
2218 				(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2219 				pd->status = 0;
2220 			}
2221 		}
2222 		break;
2223 	case 2:		/* store charset string */
2224 		if (c == 0x3f) {		/* ? */
2225 			/* identify charset */
2226 			mbfl_memory_device_output('\0', &pd->tmpdev);
2227 			encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2228 			if (encoding != NULL) {
2229 				pd->incode = encoding;
2230 				pd->status = 3;
2231 			}
2232 			mbfl_memory_device_unput(&pd->tmpdev);
2233 			mbfl_memory_device_output(c, &pd->tmpdev);
2234 		} else {
2235 			mbfl_memory_device_output(c, &pd->tmpdev);
2236 			if (pd->tmpdev.pos > 100) {		/* too long charset string */
2237 				pd->status = 0;
2238 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2239 				mbfl_memory_device_unput(&pd->tmpdev);
2240 				pd->status = 9;
2241 			}
2242 			if (pd->status != 2) {
2243 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2244 				mbfl_memory_device_reset(&pd->tmpdev);
2245 			}
2246 		}
2247 		break;
2248 	case 3:		/* identify encoding */
2249 		mbfl_memory_device_output(c, &pd->tmpdev);
2250 		if (c == 0x42 || c == 0x62) {		/* 'B' or 'b' */
2251 			pd->encoding = &mbfl_encoding_base64;
2252 			pd->status = 4;
2253 		} else if (c == 0x51 || c == 0x71) {	/* 'Q' or 'q' */
2254 			pd->encoding = &mbfl_encoding_qprint;
2255 			pd->status = 4;
2256 		} else {
2257 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2258 				mbfl_memory_device_unput(&pd->tmpdev);
2259 				pd->status = 9;
2260 			} else {
2261 				pd->status = 0;
2262 			}
2263 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2264 			mbfl_memory_device_reset(&pd->tmpdev);
2265 		}
2266 		break;
2267 	case 4:		/* reset filter */
2268 		mbfl_memory_device_output(c, &pd->tmpdev);
2269 		if (c == 0x3f) {		/* ? */
2270 			/* charset convert filter */
2271 			mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, &mbfl_encoding_wchar);
2272 			/* decode filter */
2273 			mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, &mbfl_encoding_8bit);
2274 			pd->status = 5;
2275 		} else {
2276 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2277 				mbfl_memory_device_unput(&pd->tmpdev);
2278 				pd->status = 9;
2279 			} else {
2280 				pd->status = 0;
2281 			}
2282 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2283 		}
2284 		mbfl_memory_device_reset(&pd->tmpdev);
2285 		break;
2286 	case 5:		/* encoded block */
2287 		if (c == 0x3f) {		/* ? */
2288 			pd->status = 6;
2289 		} else {
2290 			(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2291 		}
2292 		break;
2293 	case 6:		/* check end position */
2294 		if (c == 0x3d) {		/* = */
2295 			/* flush and reset filter */
2296 			(*pd->deco_filter->filter_flush)(pd->deco_filter);
2297 			(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2298 			mbfl_convert_filter_reset(pd->conv1_filter, &mbfl_encoding_ascii, &mbfl_encoding_wchar);
2299 			pd->status = 7;
2300 		} else {
2301 			(*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2302 			if (c != 0x3f) {		/* ? */
2303 				(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2304 				pd->status = 5;
2305 			}
2306 		}
2307 		break;
2308 	case 7:		/* after encoded block */
2309 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2310 			pd->status = 8;
2311 		} else {
2312 			mbfl_memory_device_output(c, &pd->tmpdev);
2313 			if (c == 0x3d) {		/* = */
2314 				pd->status = 1;
2315 			} else if (c != 0x20 && c != 0x09) {		/* not space */
2316 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2317 				mbfl_memory_device_reset(&pd->tmpdev);
2318 				pd->status = 0;
2319 			}
2320 		}
2321 		break;
2322 	case 8:		/* folding */
2323 	case 9:		/* folding */
2324 		if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2325 			if (c == 0x3d) {		/* = */
2326 				if (pd->status == 8) {
2327 					mbfl_memory_device_output(0x20, &pd->tmpdev);	/* SPACE */
2328 				} else {
2329 					(*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2330 				}
2331 				mbfl_memory_device_output(c, &pd->tmpdev);
2332 				pd->status = 1;
2333 			} else {
2334 				mbfl_memory_device_output(0x20, &pd->tmpdev);
2335 				mbfl_memory_device_output(c, &pd->tmpdev);
2336 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2337 				mbfl_memory_device_reset(&pd->tmpdev);
2338 				pd->status = 0;
2339 			}
2340 		}
2341 		break;
2342 	default:		/* non encoded block */
2343 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2344 			pd->status = 9;
2345 		} else if (c == 0x3d) {		/* = */
2346 			mbfl_memory_device_output(c, &pd->tmpdev);
2347 			pd->status = 1;
2348 		} else {
2349 			(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2350 		}
2351 		break;
2352 	}
2353 
2354 	return c;
2355 }
2356 
2357 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2358 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2359 {
2360 	switch (pd->status) {
2361 	case 1:
2362 	case 2:
2363 	case 3:
2364 	case 4:
2365 	case 7:
2366 	case 8:
2367 	case 9:
2368 		mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2369 		break;
2370 	case 5:
2371 	case 6:
2372 		(*pd->deco_filter->filter_flush)(pd->deco_filter);
2373 		(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2374 		break;
2375 	}
2376 	(*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2377 	mbfl_memory_device_reset(&pd->tmpdev);
2378 	pd->status = 0;
2379 
2380 	return mbfl_memory_device_result(&pd->outdev, result);
2381 }
2382 
2383 struct mime_header_decoder_data*
mime_header_decoder_new(const mbfl_encoding * outcode)2384 mime_header_decoder_new(const mbfl_encoding *outcode)
2385 {
2386 	struct mime_header_decoder_data *pd;
2387 
2388 	pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2389 	if (pd == NULL) {
2390 		return NULL;
2391 	}
2392 
2393 	mbfl_memory_device_init(&pd->outdev, 0, 0);
2394 	mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2395 	pd->cspos = 0;
2396 	pd->status = 0;
2397 	pd->encoding = &mbfl_encoding_8bit;
2398 	pd->incode = &mbfl_encoding_ascii;
2399 	pd->outcode = outcode;
2400 	/* charset convert filter */
2401 	pd->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2402 	pd->conv1_filter = mbfl_convert_filter_new(pd->incode, &mbfl_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2403 	/* decode filter */
2404 	pd->deco_filter = mbfl_convert_filter_new(pd->encoding, &mbfl_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2405 
2406 	if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2407 		mime_header_decoder_delete(pd);
2408 		return NULL;
2409 	}
2410 
2411 	return pd;
2412 }
2413 
2414 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2415 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2416 {
2417 	if (pd) {
2418 		mbfl_convert_filter_delete(pd->conv2_filter);
2419 		mbfl_convert_filter_delete(pd->conv1_filter);
2420 		mbfl_convert_filter_delete(pd->deco_filter);
2421 		mbfl_memory_device_clear(&pd->outdev);
2422 		mbfl_memory_device_clear(&pd->tmpdev);
2423 		mbfl_free((void*)pd);
2424 	}
2425 }
2426 
2427 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2428 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2429 {
2430 	return mime_header_decoder_collector(c, pd);
2431 }
2432 
2433 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode)2434 mbfl_mime_header_decode(
2435     mbfl_string *string,
2436     mbfl_string *result,
2437     const mbfl_encoding *outcode)
2438 {
2439 	size_t n;
2440 	unsigned char *p;
2441 	struct mime_header_decoder_data *pd;
2442 
2443 	mbfl_string_init(result);
2444 	result->no_language = string->no_language;
2445 	result->encoding = outcode;
2446 
2447 	pd = mime_header_decoder_new(outcode);
2448 	if (pd == NULL) {
2449 		return NULL;
2450 	}
2451 
2452 	/* feed data */
2453 	n = string->len;
2454 	p = string->val;
2455 	while (n > 0) {
2456 		mime_header_decoder_collector(*p++, pd);
2457 		n--;
2458 	}
2459 
2460 	result = mime_header_decoder_result(pd, result);
2461 	mime_header_decoder_delete(pd);
2462 
2463 	return result;
2464 }
2465 
2466 
2467 
2468 /*
2469  *  convert HTML numeric entity
2470  */
2471 struct collector_htmlnumericentity_data {
2472 	mbfl_convert_filter *decoder;
2473 	int status;
2474 	int cache;
2475 	int digit;
2476 	int *convmap;
2477 	int mapsize;
2478 };
2479 
2480 static int
collector_encode_htmlnumericentity(int c,void * data)2481 collector_encode_htmlnumericentity(int c, void *data)
2482 {
2483 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2484 	int f, n, s, r, d, size, *mapelm;
2485 
2486 	size = pc->mapsize;
2487 	f = 0;
2488 	n = 0;
2489 	while (n < size) {
2490 		mapelm = &(pc->convmap[n*4]);
2491 		if (c >= mapelm[0] && c <= mapelm[1]) {
2492 			s = (c + mapelm[2]) & mapelm[3];
2493 			if (s >= 0) {
2494 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2495 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2496 				r = 100000000;
2497 				s %= r;
2498 				while (r > 0) {
2499 					d = s/r;
2500 					if (d || f) {
2501 						f = 1;
2502 						s %= r;
2503 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2504 					}
2505 					r /= 10;
2506 				}
2507 				if (!f) {
2508 					f = 1;
2509 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2510 				}
2511 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2512 			}
2513 		}
2514 		if (f) {
2515 			break;
2516 		}
2517 		n++;
2518 	}
2519 	if (!f) {
2520 		(*pc->decoder->filter_function)(c, pc->decoder);
2521 	}
2522 
2523 	return c;
2524 }
2525 
2526 static int
collector_decode_htmlnumericentity(int c,void * data)2527 collector_decode_htmlnumericentity(int c, void *data)
2528 {
2529 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2530 	int f, n, s, r, d, size, *mapelm;
2531 
2532 	switch (pc->status) {
2533 	case 1:
2534 		if (c == 0x23) {	/* '#' */
2535 			pc->status = 2;
2536 		} else {
2537 			pc->status = 0;
2538 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2539 			(*pc->decoder->filter_function)(c, pc->decoder);
2540 		}
2541 		break;
2542 	case 2:
2543 		if (c == 0x78) {	/* 'x' */
2544 			pc->status = 4;
2545 		} else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2546 			pc->cache = c - 0x30;
2547 			pc->status = 3;
2548 			pc->digit = 1;
2549 		} else {
2550 			pc->status = 0;
2551 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2552 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2553 			(*pc->decoder->filter_function)(c, pc->decoder);
2554 		}
2555 		break;
2556 	case 3:
2557 		s = 0;
2558 		f = 0;
2559 		if (c >= 0x30 && c <= 0x39) {	/* '0' - '9' */
2560 			s = pc->cache;
2561 			if (pc->digit > 9 || s > INT_MAX/10) {
2562 				pc->status = 0;
2563 				f = 1;
2564 			} else {
2565 				s = s*10 + (c - 0x30);
2566 				pc->cache = s;
2567 				pc->digit++;
2568 			}
2569 		} else {
2570 			pc->status = 0;
2571 			s = pc->cache;
2572 			f = 1;
2573 			n = 0;
2574 			size = pc->mapsize;
2575 			while (n < size) {
2576 				mapelm = &(pc->convmap[n*4]);
2577 				d = s - mapelm[2];
2578 				if (d >= mapelm[0] && d <= mapelm[1]) {
2579 					f = 0;
2580 					(*pc->decoder->filter_function)(d, pc->decoder);
2581 					if (c != 0x3b) {	/* ';' */
2582 						(*pc->decoder->filter_function)(c, pc->decoder);
2583 					}
2584 					break;
2585 				}
2586 				n++;
2587 			}
2588 		}
2589 		if (f) {
2590 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2591 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2592 			r = 1;
2593 			n = pc->digit;
2594 			while (n > 1) {
2595 				r *= 10;
2596 				n--;
2597 			}
2598 			while (r > 0) {
2599 				d = s/r;
2600 				s %= r;
2601 				r /= 10;
2602 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2603 			}
2604 			(*pc->decoder->filter_function)(c, pc->decoder);
2605 		}
2606 		break;
2607 	case 4:
2608 		if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2609 			pc->cache = c - 0x30;
2610 			pc->status = 5;
2611 			pc->digit = 1;
2612 		} else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
2613 			pc->cache = c - 0x41 + 10;
2614 			pc->status = 5;
2615 			pc->digit = 1;
2616 		} else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
2617 			pc->cache = c - 0x61 + 10;
2618 			pc->status = 5;
2619 			pc->digit = 1;
2620 		} else {
2621 			pc->status = 0;
2622 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2623 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2624 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2625 			(*pc->decoder->filter_function)(c, pc->decoder);
2626 		}
2627 		break;
2628 	case 5:
2629 		s = 0;
2630 		f = 0;
2631 		if ((c >= 0x30 && c <= 0x39) ||
2632 			(c >= 0x41 && c <= 0x46) ||
2633 			(c >= 0x61 && c <= 0x66)) {	/* '0' - '9' or 'a' - 'f'  */
2634 			if (pc->digit > 9) {
2635 				pc->status = 0;
2636 				s = pc->cache;
2637 				f = 1;
2638 			} else {
2639 				if (c >= 0x30 && c <= 0x39) {
2640 					s = pc->cache*16 + (c - 0x30);
2641 				} else if (c >= 0x41 && c <= 0x46)  {
2642 					s = pc->cache*16 + (c - 0x41 + 10);
2643 				} else {
2644 					s = pc->cache*16 + (c - 0x61 + 10);
2645 				}
2646 				pc->cache = s;
2647 				pc->digit++;
2648 			}
2649 		} else {
2650 			pc->status = 0;
2651 			s = pc->cache;
2652 			f = 1;
2653 			n = 0;
2654 			size = pc->mapsize;
2655 			while (n < size) {
2656 				mapelm = &(pc->convmap[n*4]);
2657 				d = s - mapelm[2];
2658 				if (d >= mapelm[0] && d <= mapelm[1]) {
2659 					f = 0;
2660 					(*pc->decoder->filter_function)(d, pc->decoder);
2661 					if (c != 0x3b) {	/* ';' */
2662 						(*pc->decoder->filter_function)(c, pc->decoder);
2663 					}
2664 					break;
2665 				}
2666 				n++;
2667 			}
2668 		}
2669 		if (f) {
2670 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2671 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2672 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2673 			r = 1;
2674 			n = pc->digit;
2675 			while (n > 0) {
2676 				r *= 16;
2677 				n--;
2678 			}
2679 			s %= r;
2680 			r /= 16;
2681 			while (r > 0) {
2682 				d = s/r;
2683 				s %= r;
2684 				r /= 16;
2685 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2686 			}
2687 			(*pc->decoder->filter_function)(c, pc->decoder);
2688 		}
2689 		break;
2690 	default:
2691 		if (c == 0x26) {	/* '&' */
2692 			pc->status = 1;
2693 		} else {
2694 			(*pc->decoder->filter_function)(c, pc->decoder);
2695 		}
2696 		break;
2697 	}
2698 
2699 	return c;
2700 }
2701 
2702 static int
collector_encode_hex_htmlnumericentity(int c,void * data)2703 collector_encode_hex_htmlnumericentity(int c, void *data)
2704 {
2705 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2706 	int f, n, s, r, d, size, *mapelm;
2707 
2708 	size = pc->mapsize;
2709 	f = 0;
2710 	n = 0;
2711 	while (n < size) {
2712 		mapelm = &(pc->convmap[n*4]);
2713 		if (c >= mapelm[0] && c <= mapelm[1]) {
2714 			s = (c + mapelm[2]) & mapelm[3];
2715 			if (s >= 0) {
2716 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2717 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2718 				(*pc->decoder->filter_function)(0x78, pc->decoder);	/* 'x' */
2719 				r = 0x1000000;
2720 				s %= r;
2721 				while (r > 0) {
2722 					d = s/r;
2723 					if (d || f) {
2724 						f = 1;
2725 						s %= r;
2726 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2727 					}
2728 					r /= 16;
2729 				}
2730 				if (!f) {
2731 					f = 1;
2732 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2733 				}
2734 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2735 			}
2736 		}
2737 		if (f) {
2738 			break;
2739 		}
2740 		n++;
2741 	}
2742 	if (!f) {
2743 		(*pc->decoder->filter_function)(c, pc->decoder);
2744 	}
2745 
2746 	return c;
2747 }
2748 
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2749 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2750 {
2751 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2752 	int n, s, r, d;
2753 
2754 	if (pc->status) {
2755 		switch (pc->status) {
2756 		case 1: /* '&' */
2757 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2758 			break;
2759 		case 2: /* '#' */
2760 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2761 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2762 			break;
2763 		case 3: /* '0'-'9' */
2764 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2765 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2766 
2767 			s = pc->cache;
2768 			r = 1;
2769 			n = pc->digit;
2770 			while (n > 1) {
2771 				r *= 10;
2772 				n--;
2773 			}
2774 			while (r > 0) {
2775 				d = s/r;
2776 				s %= r;
2777 				r /= 10;
2778 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2779 			}
2780 
2781 			break;
2782 		case 4: /* 'x' */
2783 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2784 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2785 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2786 			break;
2787 		case 5: /* '0'-'9','a'-'f' */
2788 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2789 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2790 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2791 
2792 			s = pc->cache;
2793 			r = 1;
2794 			n = pc->digit;
2795 			while (n > 0) {
2796 				r *= 16;
2797 				n--;
2798 			}
2799 			s %= r;
2800 			r /= 16;
2801 			while (r > 0) {
2802 				d = s/r;
2803 				s %= r;
2804 				r /= 16;
2805 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2806 			}
2807 			break;
2808 		default:
2809 			break;
2810 		}
2811 	}
2812 
2813 	pc->status = 0;
2814 	pc->cache = 0;
2815 	pc->digit = 0;
2816 
2817 	return 0;
2818 }
2819 
2820 
2821 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)2822 mbfl_html_numeric_entity(
2823     mbfl_string *string,
2824     mbfl_string *result,
2825     int *convmap,
2826     int mapsize,
2827     int type)
2828 {
2829 	struct collector_htmlnumericentity_data pc;
2830 	mbfl_memory_device device;
2831 	mbfl_convert_filter *encoder;
2832 	size_t n;
2833 	unsigned char *p;
2834 
2835 	if (string == NULL || result == NULL) {
2836 		return NULL;
2837 	}
2838 	mbfl_string_init(result);
2839 	result->no_language = string->no_language;
2840 	result->encoding = string->encoding;
2841 	mbfl_memory_device_init(&device, string->len, 0);
2842 
2843 	/* output code filter */
2844 	pc.decoder = mbfl_convert_filter_new(
2845 	    &mbfl_encoding_wchar,
2846 	    string->encoding,
2847 	    mbfl_memory_device_output, 0, &device);
2848 	/* wchar filter */
2849 	if (type == 0) { /* decimal output */
2850 		encoder = mbfl_convert_filter_new(
2851 		    string->encoding,
2852 		    &mbfl_encoding_wchar,
2853 		    collector_encode_htmlnumericentity, 0, &pc);
2854 	} else if (type == 2) { /* hex output */
2855 		encoder = mbfl_convert_filter_new(
2856 		    string->encoding,
2857 		    &mbfl_encoding_wchar,
2858 		    collector_encode_hex_htmlnumericentity, 0, &pc);
2859 	} else { /* type == 1: decimal/hex input */
2860 		encoder = mbfl_convert_filter_new(
2861 		    string->encoding,
2862 		    &mbfl_encoding_wchar,
2863 		    collector_decode_htmlnumericentity,
2864 			(int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
2865 	}
2866 	if (pc.decoder == NULL || encoder == NULL) {
2867 		mbfl_convert_filter_delete(encoder);
2868 		mbfl_convert_filter_delete(pc.decoder);
2869 		return NULL;
2870 	}
2871 	pc.status = 0;
2872 	pc.cache = 0;
2873 	pc.digit = 0;
2874 	pc.convmap = convmap;
2875 	pc.mapsize = mapsize;
2876 
2877 	/* feed data */
2878 	p = string->val;
2879 	n = string->len;
2880 	if (p != NULL) {
2881 		while (n > 0) {
2882 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
2883 				break;
2884 			}
2885 			n--;
2886 		}
2887 	}
2888 	mbfl_convert_filter_flush(encoder);
2889 	mbfl_convert_filter_flush(pc.decoder);
2890 	result = mbfl_memory_device_result(&device, result);
2891 	mbfl_convert_filter_delete(encoder);
2892 	mbfl_convert_filter_delete(pc.decoder);
2893 
2894 	return result;
2895 }
2896