xref: /PHP-7.3/ext/mbstring/libmbfl/mbfl/mbfilter.c (revision 3d5de7d7)
1 /*
2  * charset=UTF-8
3  */
4 
5 /*
6  * "streamable kanji code filter and converter"
7  *
8  * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
9  *
10  * This software is released under the GNU Lesser General Public License.
11  * (Version 2.1, February 1999)
12  * Please read the following detail of the licence (in japanese).
13  *
14  * ◆使用許諾条件◆
15  *
16  * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
17  * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
18  * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
19  * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
20  * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
21  * することはできません。
22  *
23  * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
24  * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
25  * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
26  * による許諾を得る必要があります。
27  *
28  * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
29  * ます。「GNU Lesser General Public License」とは、これまでLibrary General
30  * Public Licenseと呼ばれていたものです。
31  *     http://www.gnu.org/ --- GNUウェブサイト
32  *     http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
33  * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
34  *
35  * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
36  * はありません。
37  *
38  * ◆保証内容◆
39  *
40  * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
41  * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
42  * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
43  * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
44  * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
45  * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
46  * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
47  * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
48  * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
49  * 契約・規定に優先します。
50  *
51  * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
52  *
53  * 〒102-0073
54  * 東京都千代田区九段北1-13-5日本地所第一ビル4F
55  * 株式会社ハッピーサイズ
56  * Phone: 03-3512-3655, Fax: 03-3512-3656
57  * Email: sales@happysize.co.jp
58  * Web: http://happysize.com/
59  *
60  * ◆著者◆
61  *
62  * 金本 茂 <sgk@happysize.co.jp>
63  *
64  * ◆履歴◆
65  *
66  * 1998/11/10 sgk implementation in C++
67  * 1999/4/25  sgk Cで書きなおし。
68  * 1999/4/26  sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
69  * 1999/6/??      Unicodeサポート。
70  * 1999/6/22  sgk ライセンスをLGPLに変更。
71  *
72  */
73 
74 /*
75  * Unicode support
76  *
77  * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
78  * All rights reserved.
79  *
80  */
81 
82 
83 #ifdef HAVE_CONFIG_H
84 #include "config.h"
85 #endif
86 
87 #include <stddef.h>
88 
89 #ifdef HAVE_STRING_H
90 #include <string.h>
91 #endif
92 
93 #ifdef HAVE_STRINGS_H
94 #include <strings.h>
95 #endif
96 
97 #ifdef HAVE_STDDEF_H
98 #include <stddef.h>
99 #endif
100 
101 #include "mbfilter.h"
102 #include "mbfl_filter_output.h"
103 #include "mbfilter_8bit.h"
104 #include "mbfilter_pass.h"
105 #include "mbfilter_wchar.h"
106 #include "filters/mbfilter_ascii.h"
107 #include "filters/mbfilter_base64.h"
108 #include "filters/mbfilter_qprint.h"
109 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
110 #include "filters/mbfilter_utf8.h"
111 
112 #include "eaw_table.h"
113 
114 /* hex character table "0123456789ABCDEF" */
115 static char mbfl_hexchar_table[] = {
116 	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
117 };
118 
119 
120 
121 /*
122  * encoding filter
123  */
124 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
125 
126 
127 /*
128  *  buffering converter
129  */
130 mbfl_buffer_converter *
mbfl_buffer_converter_new(const mbfl_encoding * from,const mbfl_encoding * to,size_t buf_initsz)131 mbfl_buffer_converter_new(
132 	const mbfl_encoding *from,
133 	const mbfl_encoding *to,
134     size_t buf_initsz)
135 {
136 	mbfl_buffer_converter *convd;
137 
138 	/* allocate */
139 	convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof(mbfl_buffer_converter));
140 	if (convd == NULL) {
141 		return NULL;
142 	}
143 
144 	/* initialize */
145 	convd->from = from;
146 	convd->to = to;
147 
148 	/* create convert filter */
149 	convd->filter1 = NULL;
150 	convd->filter2 = NULL;
151 	if (mbfl_convert_filter_get_vtbl(convd->from, convd->to) != NULL) {
152 		convd->filter1 = mbfl_convert_filter_new(convd->from, convd->to, mbfl_memory_device_output, NULL, &convd->device);
153 	} else {
154 		convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, convd->to, mbfl_memory_device_output, NULL, &convd->device);
155 		if (convd->filter2 != NULL) {
156 			convd->filter1 = mbfl_convert_filter_new(convd->from,
157 					&mbfl_encoding_wchar,
158 					(int (*)(int, void*))convd->filter2->filter_function,
159 					(int (*)(void*))convd->filter2->filter_flush,
160 					convd->filter2);
161 			if (convd->filter1 == NULL) {
162 				mbfl_convert_filter_delete(convd->filter2);
163 			}
164 		}
165 	}
166 	if (convd->filter1 == NULL) {
167 		mbfl_free(convd);
168 		return NULL;
169 	}
170 
171 	mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
172 
173 	return convd;
174 }
175 
176 
177 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)178 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
179 {
180 	if (convd != NULL) {
181 		if (convd->filter1) {
182 			mbfl_convert_filter_delete(convd->filter1);
183 		}
184 		if (convd->filter2) {
185 			mbfl_convert_filter_delete(convd->filter2);
186 		}
187 		mbfl_memory_device_clear(&convd->device);
188 		mbfl_free((void*)convd);
189 	}
190 }
191 
192 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)193 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
194 {
195 	mbfl_memory_device_reset(&convd->device);
196 }
197 
198 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)199 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
200 {
201 	if (convd != NULL) {
202 		if (convd->filter2 != NULL) {
203 			convd->filter2->illegal_mode = mode;
204 		} else if (convd->filter1 != NULL) {
205 			convd->filter1->illegal_mode = mode;
206 		} else {
207 			return 0;
208 		}
209 	}
210 
211 	return 1;
212 }
213 
214 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)215 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
216 {
217 	if (convd != NULL) {
218 		if (convd->filter2 != NULL) {
219 			convd->filter2->illegal_substchar = substchar;
220 		} else if (convd->filter1 != NULL) {
221 			convd->filter1->illegal_substchar = substchar;
222 		} else {
223 			return 0;
224 		}
225 	}
226 
227 	return 1;
228 }
229 
230 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,size_t n)231 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, size_t n)
232 {
233 	mbfl_convert_filter *filter;
234 	int (*filter_function)(int c, mbfl_convert_filter *filter);
235 
236 	if (convd != NULL && p != NULL) {
237 		filter = convd->filter1;
238 		if (filter != NULL) {
239 			filter_function = filter->filter_function;
240 			while (n > 0) {
241 				if ((*filter_function)(*p++, filter) < 0) {
242 					break;
243 				}
244 				n--;
245 			}
246 		}
247 	}
248 
249 	return n;
250 }
251 
252 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)253 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
254 {
255 	return mbfl_buffer_converter_feed2(convd, string, NULL);
256 }
257 
258 int
mbfl_buffer_converter_feed2(mbfl_buffer_converter * convd,mbfl_string * string,size_t * loc)259 mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, size_t *loc)
260 {
261 	size_t n;
262 	unsigned char *p;
263 	mbfl_convert_filter *filter;
264 	int (*filter_function)(int c, mbfl_convert_filter *filter);
265 
266 	if (convd == NULL || string == NULL) {
267 		return -1;
268 	}
269 	mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
270 	/* feed data */
271 	n = string->len;
272 	p = string->val;
273 
274 	filter = convd->filter1;
275 	if (filter != NULL) {
276 		filter_function = filter->filter_function;
277 		while (n > 0) {
278 			if ((*filter_function)(*p++, filter) < 0) {
279 				if (loc) {
280 					*loc = p - string->val;
281 				}
282 				return -1;
283 			}
284 			n--;
285 		}
286 	}
287 	if (loc) {
288 		*loc = p - string->val;
289 	}
290 	return 0;
291 }
292 
293 
294 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)295 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
296 {
297 	if (convd == NULL) {
298 		return -1;
299 	}
300 
301 	if (convd->filter1 != NULL) {
302 		mbfl_convert_filter_flush(convd->filter1);
303 	}
304 	if (convd->filter2 != NULL) {
305 		mbfl_convert_filter_flush(convd->filter2);
306 	}
307 
308 	return 0;
309 }
310 
311 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)312 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
313 {
314 	if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
315 		result->encoding = convd->to;
316 		result->val = convd->device.buffer;
317 		result->len = convd->device.pos;
318 	} else {
319 		result = NULL;
320 	}
321 
322 	return result;
323 }
324 
325 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)326 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
327 {
328 	if (convd == NULL || result == NULL) {
329 		return NULL;
330 	}
331 	result->encoding = convd->to;
332 	return mbfl_memory_device_result(&convd->device, result);
333 }
334 
335 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)336 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
337 				  mbfl_string *result)
338 {
339 	if (convd == NULL || string == NULL || result == NULL) {
340 		return NULL;
341 	}
342 	mbfl_buffer_converter_feed(convd, string);
343 	if (convd->filter1 != NULL) {
344 		mbfl_convert_filter_flush(convd->filter1);
345 	}
346 	if (convd->filter2 != NULL) {
347 		mbfl_convert_filter_flush(convd->filter2);
348 	}
349 	result->encoding = convd->to;
350 	return mbfl_memory_device_result(&convd->device, result);
351 }
352 
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)353 size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
354 {
355 	size_t num_illegalchars = 0;
356 
357 	if (convd == NULL) {
358 		return 0;
359 	}
360 
361 	if (convd->filter1 != NULL) {
362 		num_illegalchars += convd->filter1->num_illegalchar;
363 	}
364 
365 	if (convd->filter2 != NULL) {
366 		num_illegalchars += convd->filter2->num_illegalchar;
367 	}
368 
369 	return num_illegalchars;
370 }
371 
372 /*
373  * encoding detector
374  */
375 mbfl_encoding_detector *
mbfl_encoding_detector_new(const mbfl_encoding ** elist,int elistsz,int strict)376 mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict)
377 {
378 	mbfl_encoding_detector *identd;
379 
380 	int i, num;
381 	mbfl_identify_filter *filter;
382 
383 	if (elist == NULL || elistsz <= 0) {
384 		return NULL;
385 	}
386 
387 	/* allocate */
388 	identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
389 	if (identd == NULL) {
390 		return NULL;
391 	}
392 	identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
393 	if (identd->filter_list == NULL) {
394 		mbfl_free(identd);
395 		return NULL;
396 	}
397 
398 	/* create filters */
399 	i = 0;
400 	num = 0;
401 	while (i < elistsz) {
402 		filter = mbfl_identify_filter_new2(elist[i]);
403 		if (filter != NULL) {
404 			identd->filter_list[num] = filter;
405 			num++;
406 		}
407 		i++;
408 	}
409 	identd->filter_list_size = num;
410 
411 	/* set strict flag */
412 	identd->strict = strict;
413 
414 	return identd;
415 }
416 
417 
418 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)419 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
420 {
421 	int i;
422 
423 	if (identd != NULL) {
424 		if (identd->filter_list != NULL) {
425 			i = identd->filter_list_size;
426 			while (i > 0) {
427 				i--;
428 				mbfl_identify_filter_delete(identd->filter_list[i]);
429 			}
430 			mbfl_free((void *)identd->filter_list);
431 		}
432 		mbfl_free((void *)identd);
433 	}
434 }
435 
436 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)437 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
438 {
439 	int res = 0;
440 	/* feed data */
441 	if (identd != NULL && string != NULL && string->val != NULL) {
442 		int num = identd->filter_list_size;
443 		size_t n = string->len;
444 		unsigned char *p = string->val;
445 		int bad = 0;
446 		while (n > 0) {
447 			int i;
448 			for (i = 0; i < num; i++) {
449 				mbfl_identify_filter *filter = identd->filter_list[i];
450 				if (!filter->flag) {
451 					(*filter->filter_function)(*p, filter);
452 					if (filter->flag) {
453 						bad++;
454 					}
455 				}
456 			}
457 			if ((num - 1) <= bad) {
458 				res = 1;
459 				break;
460 			}
461 			p++;
462 			n--;
463 		}
464 	}
465 
466 	return res;
467 }
468 
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)469 const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
470 {
471 	mbfl_identify_filter *filter;
472 	const mbfl_encoding *encoding = NULL;
473 	int n;
474 
475 	/* judge */
476 	if (identd != NULL) {
477 		n = identd->filter_list_size - 1;
478 		while (n >= 0) {
479 			filter = identd->filter_list[n];
480 			if (!filter->flag) {
481 				if (!identd->strict || !filter->status) {
482 					encoding = filter->encoding;
483 				}
484 			}
485 			n--;
486 		}
487 
488 		/* fallback judge */
489 		if (!encoding) {
490 			n = identd->filter_list_size - 1;
491 			while (n >= 0) {
492 				filter = identd->filter_list[n];
493 				if (!filter->flag) {
494 					encoding = filter->encoding;
495 				}
496 				n--;
497  			}
498 		}
499 	}
500 
501 	return encoding;
502 }
503 
504 /*
505  * encoding converter
506  */
507 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,const mbfl_encoding * toenc)508 mbfl_convert_encoding(
509     mbfl_string *string,
510     mbfl_string *result,
511     const mbfl_encoding *toenc)
512 {
513 	size_t n;
514 	unsigned char *p;
515 	mbfl_memory_device device;
516 	mbfl_convert_filter *filter1;
517 	mbfl_convert_filter *filter2;
518 
519 	/* initialize */
520 	if (toenc == NULL || string == NULL || result == NULL) {
521 		return NULL;
522 	}
523 
524 	filter1 = NULL;
525 	filter2 = NULL;
526 	if (mbfl_convert_filter_get_vtbl(string->encoding, toenc) != NULL) {
527 		filter1 = mbfl_convert_filter_new(string->encoding, toenc, mbfl_memory_device_output, 0, &device);
528 	} else {
529 		filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
530 		if (filter2 != NULL) {
531 			filter1 = mbfl_convert_filter_new(string->encoding, &mbfl_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
532 			if (filter1 == NULL) {
533 				mbfl_convert_filter_delete(filter2);
534 			}
535 		}
536 	}
537 	if (filter1 == NULL) {
538 		return NULL;
539 	}
540 
541 	if (filter2 != NULL) {
542 		filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
543 		filter2->illegal_substchar = 0x3f;		/* '?' */
544 	}
545 
546 	mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
547 
548 	/* feed data */
549 	n = string->len;
550 	p = string->val;
551 	if (p != NULL) {
552 		while (n > 0) {
553 			if ((*filter1->filter_function)(*p++, filter1) < 0) {
554 				break;
555 			}
556 			n--;
557 		}
558 	}
559 
560 	mbfl_convert_filter_flush(filter1);
561 	mbfl_convert_filter_delete(filter1);
562 	if (filter2 != NULL) {
563 		mbfl_convert_filter_flush(filter2);
564 		mbfl_convert_filter_delete(filter2);
565 	}
566 
567 	return mbfl_memory_device_result(&device, result);
568 }
569 
570 
571 /*
572  * identify encoding
573  */
574 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,const mbfl_encoding ** elist,int elistsz,int strict)575 mbfl_identify_encoding(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
576 {
577 	int i, num, bad;
578 	size_t n;
579 	unsigned char *p;
580 	mbfl_identify_filter *flist, *filter;
581 	const mbfl_encoding *encoding;
582 
583 	/* flist is an array of mbfl_identify_filter instances */
584 	flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
585 	if (flist == NULL) {
586 		return NULL;
587 	}
588 
589 	num = 0;
590 	if (elist != NULL) {
591 		for (i = 0; i < elistsz; i++) {
592 			if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
593 				num++;
594 			}
595 		}
596 	}
597 
598 	/* feed data */
599 	n = string->len;
600 	p = string->val;
601 
602 	if (p != NULL) {
603 		bad = 0;
604 		while (n > 0) {
605 			for (i = 0; i < num; i++) {
606 				filter = &flist[i];
607 				if (!filter->flag) {
608 					(*filter->filter_function)(*p, filter);
609 					if (filter->flag) {
610 						bad++;
611 					}
612 				}
613 			}
614 			if ((num - 1) <= bad && !strict) {
615 				break;
616 			}
617 			p++;
618 			n--;
619 		}
620 	}
621 
622 	/* judge */
623 	encoding = NULL;
624 
625 	for (i = 0; i < num; i++) {
626 		filter = &flist[i];
627 		if (!filter->flag) {
628 			if (strict && filter->status) {
629  				continue;
630  			}
631 			encoding = filter->encoding;
632 			break;
633 		}
634 	}
635 
636 	/* fall-back judge */
637 	if (!encoding) {
638 		for (i = 0; i < num; i++) {
639 			filter = &flist[i];
640 			if (!filter->flag && (!strict || !filter->status)) {
641 				encoding = filter->encoding;
642 				break;
643 			}
644 		}
645 	}
646 
647 	/* cleanup */
648 	/* dtors should be called in reverse order */
649 	i = num;
650 	while (--i >= 0) {
651 		mbfl_identify_filter_cleanup(&flist[i]);
652 	}
653 
654 	mbfl_free((void *)flist);
655 
656 	return encoding;
657 }
658 
659 /*
660  *  strlen
661  */
662 static int
filter_count_output(int c,void * data)663 filter_count_output(int c, void *data)
664 {
665 	(*(size_t *)data)++;
666 	return c;
667 }
668 
669 size_t
mbfl_strlen(mbfl_string * string)670 mbfl_strlen(mbfl_string *string)
671 {
672 	size_t len, n, k;
673 	unsigned char *p;
674 	const mbfl_encoding *encoding = string->encoding;
675 
676 	len = 0;
677 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
678 		len = string->len;
679 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
680 		len = string->len/2;
681 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
682 		len = string->len/4;
683 	} else if (encoding->mblen_table != NULL) {
684 		const unsigned char *mbtab = encoding->mblen_table;
685 		n = 0;
686 		p = string->val;
687 		k = string->len;
688 		/* count */
689 		if (p != NULL) {
690 			while (n < k) {
691 				unsigned m = mbtab[*p];
692 				n += m;
693 				p += m;
694 				len++;
695 			}
696 		}
697 	} else {
698 		/* wchar filter */
699 		mbfl_convert_filter *filter = mbfl_convert_filter_new(
700 		  string->encoding,
701 		  &mbfl_encoding_wchar,
702 		  filter_count_output, 0, &len);
703 		if (filter == NULL) {
704 			return (size_t) -1;
705 		}
706 		/* count */
707 		n = string->len;
708 		p = string->val;
709 		if (p != NULL) {
710 			while (n > 0) {
711 				(*filter->filter_function)(*p++, filter);
712 				n--;
713 			}
714 		}
715 		mbfl_convert_filter_delete(filter);
716 	}
717 
718 	return len;
719 }
720 
721 
722 /*
723  *  strpos
724  */
725 struct collector_strpos_data {
726 	mbfl_convert_filter *next_filter;
727 	mbfl_wchar_device needle;
728 	size_t needle_len;
729 	size_t start;
730 	size_t output;
731 	size_t found_pos;
732 	size_t needle_pos;
733 	size_t matched_pos;
734 };
735 
736 static int
collector_strpos(int c,void * data)737 collector_strpos(int c, void* data)
738 {
739 	int *p, *h, *m;
740 	ssize_t n;
741 	struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
742 
743 	if (pc->output >= pc->start) {
744 		if (c == (int)pc->needle.buffer[pc->needle_pos]) {
745 			if (pc->needle_pos == 0) {
746 				pc->found_pos = pc->output;			/* found position */
747 			}
748 			pc->needle_pos++;						/* needle pointer */
749 			if (pc->needle_pos >= pc->needle_len) {
750 				pc->matched_pos = pc->found_pos;	/* matched position */
751 				pc->needle_pos--;
752 				goto retry;
753 			}
754 		} else if (pc->needle_pos != 0) {
755 retry:
756 			h = (int *)pc->needle.buffer;
757 			h++;
758 			for (;;) {
759 				pc->found_pos++;
760 				p = h;
761 				m = (int *)pc->needle.buffer;
762 				n = pc->needle_pos - 1;
763 				while (n > 0 && *p == *m) {
764 					n--;
765 					p++;
766 					m++;
767 				}
768 				if (n <= 0) {
769 					if (*m != c) {
770 						pc->needle_pos = 0;
771 					}
772 					break;
773 				} else {
774 					h++;
775 					pc->needle_pos--;
776 				}
777 			}
778 		}
779 	}
780 
781 	pc->output++;
782 	return c;
783 }
784 
785 /*
786  *	oddlen
787  */
788 size_t
mbfl_oddlen(mbfl_string * string)789 mbfl_oddlen(mbfl_string *string)
790 {
791 	size_t len, n, k;
792 	unsigned char *p;
793 	const mbfl_encoding *encoding = string->encoding;
794 
795 	len = 0;
796 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
797 		return 0;
798 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
799 		return len % 2;
800 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
801 		return len % 4;
802 	} else if (encoding->mblen_table != NULL) {
803 		const unsigned char *mbtab = encoding->mblen_table;
804  		n = 0;
805 		p = string->val;
806 		k = string->len;
807 		/* count */
808 		if (p != NULL) {
809 			while (n < k) {
810 				unsigned m = mbtab[*p];
811 				n += m;
812 				p += m;
813 			};
814 		}
815 		return n-k;
816 	} else {
817 		/* how can i do ? */
818 		return 0;
819 	}
820 	/* NOT REACHED */
821 }
822 
823 size_t
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,ssize_t offset,int reverse)824 mbfl_strpos(
825     mbfl_string *haystack,
826     mbfl_string *needle,
827     ssize_t offset,
828     int reverse)
829 {
830 	size_t result;
831 	mbfl_string _haystack_u8, _needle_u8;
832 	const mbfl_string *haystack_u8, *needle_u8 = NULL;
833 	const unsigned char *u8_tbl;
834 
835 	if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
836 		return (size_t) -8;
837 	}
838 
839 	{
840 		const mbfl_encoding *u8_enc = &mbfl_encoding_utf8;
841 		if (u8_enc->mblen_table == NULL) {
842 			return (size_t) -8;
843 		}
844 		u8_tbl = u8_enc->mblen_table;
845 	}
846 
847 	if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) {
848 		mbfl_string_init(&_haystack_u8);
849 		haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, &mbfl_encoding_utf8);
850 		if (haystack_u8 == NULL) {
851 			result = (size_t) -4;
852 			goto out;
853 		}
854 	} else {
855 		haystack_u8 = haystack;
856 	}
857 
858 	if (needle->encoding->no_encoding != mbfl_no_encoding_utf8) {
859 		mbfl_string_init(&_needle_u8);
860 		needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, &mbfl_encoding_utf8);
861 		if (needle_u8 == NULL) {
862 			result = (size_t) -4;
863 			goto out;
864 		}
865 	} else {
866 		needle_u8 = needle;
867 	}
868 
869 	if (needle_u8->len < 1) {
870 		result = (size_t) -8;
871 		goto out;
872 	}
873 
874 	result = (size_t) -1;
875 	if (haystack_u8->len < needle_u8->len) {
876 		goto out;
877 	}
878 
879 	if (!reverse) {
880 		size_t jtbl[1 << (sizeof(unsigned char) * 8)];
881 		size_t needle_u8_len = needle_u8->len;
882 		size_t i;
883 		const unsigned char *p, *q, *e;
884 		const unsigned char *haystack_u8_val = haystack_u8->val,
885 		                    *needle_u8_val = needle_u8->val;
886 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
887 			jtbl[i] = needle_u8_len + 1;
888 		}
889 		for (i = 0; i < needle_u8_len - 1; ++i) {
890 			jtbl[needle_u8_val[i]] = needle_u8_len - i;
891 		}
892 		e = haystack_u8_val + haystack_u8->len;
893 		p = haystack_u8_val;
894 		while (offset-- > 0) {
895 			if (p >= e) {
896 				result = (size_t) -16;
897 				goto out;
898 			}
899 			p += u8_tbl[*p];
900 		}
901 		p += needle_u8_len;
902 		if (p > e) {
903 			goto out;
904 		}
905 		while (p <= e) {
906 			const unsigned char *pv = p;
907 			q = needle_u8_val + needle_u8_len;
908 			for (;;) {
909 				if (q == needle_u8_val) {
910 					result = 0;
911 					while (p > haystack_u8_val) {
912 						unsigned char c = *--p;
913 						if (c < 0x80) {
914 							++result;
915 						} else if ((c & 0xc0) != 0x80) {
916 							++result;
917 						}
918 					}
919 					goto out;
920 				}
921 				if (*--q != *--p) {
922 					break;
923 				}
924 			}
925 			p += jtbl[*p];
926 			if (p <= pv) {
927 				p = pv + 1;
928 			}
929 		}
930 	} else {
931 		size_t jtbl[1 << (sizeof(unsigned char) * 8)];
932 		size_t needle_u8_len = needle_u8->len, needle_len = 0;
933 		size_t i;
934 		const unsigned char *p, *e, *q, *qe;
935 		const unsigned char *haystack_u8_val = haystack_u8->val,
936 		                    *needle_u8_val = needle_u8->val;
937 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
938 			jtbl[i] = needle_u8_len;
939 		}
940 		for (i = needle_u8_len - 1; i > 0; --i) {
941 			unsigned char c = needle_u8_val[i];
942 			jtbl[c] = i;
943 			if (c < 0x80) {
944 				++needle_len;
945 			} else if ((c & 0xc0) != 0x80) {
946 				++needle_len;
947 			}
948 		}
949 		{
950 			unsigned char c = needle_u8_val[0];
951 			if (c < 0x80) {
952 				++needle_len;
953 			} else if ((c & 0xc0) != 0x80) {
954 				++needle_len;
955 			}
956 		}
957 		e = haystack_u8_val;
958 		p = e + haystack_u8->len;
959 		qe = needle_u8_val + needle_u8_len;
960 		if (offset < 0) {
961 			if (-offset > needle_len) {
962 				offset += needle_len;
963 				while (offset < 0) {
964 					unsigned char c;
965 					if (p <= e) {
966 						result = (size_t) -16;
967 						goto out;
968 					}
969 					c = *(--p);
970 					if (c < 0x80) {
971 						++offset;
972 					} else if ((c & 0xc0) != 0x80) {
973 						++offset;
974 					}
975 				}
976 			}
977 		} else {
978 			const unsigned char *ee = haystack_u8_val + haystack_u8->len;
979 			while (offset-- > 0) {
980 				if (e >= ee) {
981 					result = (size_t) -16;
982 					goto out;
983 				}
984 				e += u8_tbl[*e];
985 			}
986 		}
987 		if (p < e + needle_u8_len) {
988 			goto out;
989 		}
990 		p -= needle_u8_len;
991 		while (p >= e) {
992 			const unsigned char *pv = p;
993 			q = needle_u8_val;
994 			for (;;) {
995 				if (q == qe) {
996 					result = 0;
997 					p -= needle_u8_len;
998 					while (p > haystack_u8_val) {
999 						unsigned char c = *--p;
1000 						if (c < 0x80) {
1001 							++result;
1002 						} else if ((c & 0xc0) != 0x80) {
1003 							++result;
1004 						}
1005 					}
1006 					goto out;
1007 				}
1008 				if (*q != *p) {
1009 					break;
1010 				}
1011 				++p, ++q;
1012 			}
1013 			p -= jtbl[*p];
1014 			if (p >= pv) {
1015 				p = pv - 1;
1016 			}
1017 		}
1018 	}
1019 out:
1020 	if (haystack_u8 == &_haystack_u8) {
1021 		mbfl_string_clear(&_haystack_u8);
1022 	}
1023 	if (needle_u8 == &_needle_u8) {
1024 		mbfl_string_clear(&_needle_u8);
1025 	}
1026 	return result;
1027 }
1028 
1029 /*
1030  *  substr_count
1031  */
1032 
1033 size_t
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1034 mbfl_substr_count(
1035     mbfl_string *haystack,
1036     mbfl_string *needle
1037    )
1038 {
1039 	size_t n, result = 0;
1040 	unsigned char *p;
1041 	mbfl_convert_filter *filter;
1042 	struct collector_strpos_data pc;
1043 
1044 	if (haystack == NULL || needle == NULL) {
1045 		return (size_t) -8;
1046 	}
1047 	/* needle is converted into wchar */
1048 	mbfl_wchar_device_init(&pc.needle);
1049 	filter = mbfl_convert_filter_new(
1050 	  needle->encoding,
1051 	  &mbfl_encoding_wchar,
1052 	  mbfl_wchar_device_output, 0, &pc.needle);
1053 	if (filter == NULL) {
1054 		return (size_t) -4;
1055 	}
1056 	mbfl_convert_filter_feed_string(filter, needle->val, needle->len);
1057 	mbfl_convert_filter_flush(filter);
1058 	mbfl_convert_filter_delete(filter);
1059 	pc.needle_len = pc.needle.pos;
1060 	if (pc.needle.buffer == NULL) {
1061 		return (size_t) -4;
1062 	}
1063 	if (pc.needle_len <= 0) {
1064 		mbfl_wchar_device_clear(&pc.needle);
1065 		return (size_t) -2;
1066 	}
1067 	/* initialize filter and collector data */
1068 	filter = mbfl_convert_filter_new(
1069 	  haystack->encoding,
1070 	  &mbfl_encoding_wchar,
1071 	  collector_strpos, 0, &pc);
1072 	if (filter == NULL) {
1073 		mbfl_wchar_device_clear(&pc.needle);
1074 		return (size_t) -4;
1075 	}
1076 	pc.start = 0;
1077 	pc.output = 0;
1078 	pc.needle_pos = 0;
1079 	pc.found_pos = 0;
1080 	pc.matched_pos = (size_t) -1;
1081 
1082 	/* feed data */
1083 	p = haystack->val;
1084 	n = haystack->len;
1085 	if (p != NULL) {
1086 		while (n > 0) {
1087 			if ((*filter->filter_function)(*p++, filter) < 0) {
1088 				pc.matched_pos = (size_t) -4;
1089 				break;
1090 			}
1091 			if (pc.matched_pos != (size_t) -1) {
1092 				++result;
1093 				pc.matched_pos = (size_t) -1;
1094 				pc.needle_pos = 0;
1095 			}
1096 			n--;
1097 		}
1098 	}
1099 	mbfl_convert_filter_flush(filter);
1100 	mbfl_convert_filter_delete(filter);
1101 	mbfl_wchar_device_clear(&pc.needle);
1102 
1103 	return result;
1104 }
1105 
1106 /*
1107  *  substr
1108  */
1109 struct collector_substr_data {
1110 	mbfl_convert_filter *next_filter;
1111 	size_t start;
1112 	size_t stop;
1113 	size_t output;
1114 };
1115 
1116 static int
collector_substr(int c,void * data)1117 collector_substr(int c, void* data)
1118 {
1119 	struct collector_substr_data *pc = (struct collector_substr_data*)data;
1120 
1121 	if (pc->output >= pc->stop) {
1122 		return -1;
1123 	}
1124 
1125 	if (pc->output >= pc->start) {
1126 		(*pc->next_filter->filter_function)(c, pc->next_filter);
1127 	}
1128 
1129 	pc->output++;
1130 
1131 	return c;
1132 }
1133 
1134 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1135 mbfl_substr(
1136     mbfl_string *string,
1137     mbfl_string *result,
1138     size_t from,
1139     size_t length)
1140 {
1141 	const mbfl_encoding *encoding = string->encoding;
1142 	size_t n, k, len, start, end;
1143 	unsigned m;
1144 	unsigned char *p, *w;
1145 
1146 	mbfl_string_init(result);
1147 	result->no_language = string->no_language;
1148 	result->encoding = string->encoding;
1149 
1150 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1151 	   encoding->mblen_table != NULL) {
1152 		len = string->len;
1153 		if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1154 			start = from;
1155 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1156 			start = from*2;
1157 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1158 			start = from*4;
1159 		} else {
1160 			const unsigned char *mbtab = encoding->mblen_table;
1161 			start = 0;
1162 			n = 0;
1163 			k = 0;
1164 			p = string->val;
1165 			/* search start position */
1166 			while (k <= from) {
1167 				start = n;
1168 				if (n >= len) {
1169 					break;
1170 				}
1171 				m = mbtab[*p];
1172 				n += m;
1173 				p += m;
1174 				k++;
1175 			}
1176 		}
1177 
1178 		if (length == MBFL_SUBSTR_UNTIL_END) {
1179 			end = len;
1180 		} else if (encoding->flag & MBFL_ENCTYPE_SBCS) {
1181 			end = start + length;
1182 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1183 			end = start + length*2;
1184 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1185 			end = start + length*4;
1186 		} else {
1187 			const unsigned char *mbtab = encoding->mblen_table;
1188 			end = start;
1189 			n = start;
1190 			k = 0;
1191 			p = string->val + start;
1192 			/* detect end position */
1193 			while (k <= length) {
1194 				end = n;
1195 				if (n >= len) {
1196 					break;
1197 				}
1198 				m = mbtab[*p];
1199 				n += m;
1200 				p += m;
1201 				k++;
1202 			}
1203 		}
1204 
1205 		if (start > len) {
1206 			start = len;
1207 		}
1208 		if (end > len) {
1209 			end = len;
1210 		}
1211 		if (start > end) {
1212 			start = end;
1213 		}
1214 
1215 		/* allocate memory and copy */
1216 		n = end - start;
1217 		result->len = 0;
1218 		result->val = w = (unsigned char*)mbfl_malloc(n + 1);
1219 		if (w != NULL) {
1220 			result->len = n;
1221 			memcpy(w, string->val + start, n);
1222 			w[n] = '\0';
1223 		} else {
1224 			result = NULL;
1225 		}
1226 	} else {
1227 		mbfl_memory_device device;
1228 		struct collector_substr_data pc;
1229 		mbfl_convert_filter *decoder;
1230 		mbfl_convert_filter *encoder;
1231 
1232 		if (length == MBFL_SUBSTR_UNTIL_END) {
1233 			length = mbfl_strlen(string) - from;
1234 		}
1235 
1236 		mbfl_memory_device_init(&device, length + 1, 0);
1237 		mbfl_string_init(result);
1238 		result->no_language = string->no_language;
1239 		result->encoding = string->encoding;
1240 		/* output code filter */
1241 		decoder = mbfl_convert_filter_new(
1242 		    &mbfl_encoding_wchar,
1243 		    string->encoding,
1244 		    mbfl_memory_device_output, 0, &device);
1245 		/* wchar filter */
1246 		encoder = mbfl_convert_filter_new(
1247 		    string->encoding,
1248 		    &mbfl_encoding_wchar,
1249 		    collector_substr, 0, &pc);
1250 		if (decoder == NULL || encoder == NULL) {
1251 			mbfl_convert_filter_delete(encoder);
1252 			mbfl_convert_filter_delete(decoder);
1253 			return NULL;
1254 		}
1255 		pc.next_filter = decoder;
1256 		pc.start = from;
1257 		pc.stop = from + length;
1258 		pc.output = 0;
1259 
1260 		/* feed data */
1261 		p = string->val;
1262 		n = string->len;
1263 		if (p != NULL) {
1264 			while (n > 0) {
1265 				if ((*encoder->filter_function)(*p++, encoder) < 0) {
1266 					break;
1267 				}
1268 				n--;
1269 			}
1270 		}
1271 
1272 		mbfl_convert_filter_flush(encoder);
1273 		mbfl_convert_filter_flush(decoder);
1274 		result = mbfl_memory_device_result(&device, result);
1275 		mbfl_convert_filter_delete(encoder);
1276 		mbfl_convert_filter_delete(decoder);
1277 	}
1278 
1279 	return result;
1280 }
1281 
1282 /*
1283  *  strcut
1284  */
1285 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,size_t from,size_t length)1286 mbfl_strcut(
1287     mbfl_string *string,
1288     mbfl_string *result,
1289     size_t from,
1290     size_t length)
1291 {
1292 	const mbfl_encoding *encoding = string->encoding;
1293 	mbfl_memory_device device;
1294 
1295 	if (from >= string->len) {
1296 		from = string->len;
1297 	}
1298 
1299 	mbfl_string_init(result);
1300 	result->no_language = string->no_language;
1301 	result->encoding = string->encoding;
1302 
1303 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1304 				| MBFL_ENCTYPE_WCS2BE
1305 				| MBFL_ENCTYPE_WCS2LE
1306 				| MBFL_ENCTYPE_WCS4BE
1307 				| MBFL_ENCTYPE_WCS4LE))
1308 			|| encoding->mblen_table != NULL) {
1309 		const unsigned char *start = NULL;
1310 		const unsigned char *end = NULL;
1311 		unsigned char *w;
1312 		size_t sz;
1313 
1314 		if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1315 			from &= -2;
1316 
1317 			if (length >= string->len - from) {
1318 				length = string->len - from;
1319 			}
1320 
1321 			start = string->val + from;
1322 			end   = start + (length & -2);
1323 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1324 			from &= -4;
1325 
1326 			if (length >= string->len - from) {
1327 				length = string->len - from;
1328 			}
1329 
1330 			start = string->val + from;
1331 			end   = start + (length & -4);
1332 		} else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1333 			if (length >= string->len - from) {
1334 				length = string->len - from;
1335 			}
1336 
1337 			start = string->val + from;
1338 			end = start + length;
1339 		} else if (encoding->mblen_table != NULL) {
1340 			const unsigned char *mbtab = encoding->mblen_table;
1341 			const unsigned char *p, *q;
1342 			int m;
1343 
1344 			/* search start position */
1345 			for (m = 0, p = string->val, q = p + from;
1346 					p < q; p += (m = mbtab[*p]));
1347 
1348 			if (p > q) {
1349 				p -= m;
1350 			}
1351 
1352 			start = p;
1353 
1354 			/* search end position */
1355 			if (length >= string->len - (start - string->val)) {
1356 				end = string->val + string->len;
1357 			} else {
1358 				for (q = p + length; p < q; p += (m = mbtab[*p]));
1359 
1360 				if (p > q) {
1361 					p -= m;
1362 				}
1363 				end = p;
1364 			}
1365 		} else {
1366 			/* never reached */
1367 			return NULL;
1368 		}
1369 
1370 		/* allocate memory and copy string */
1371 		sz = end - start;
1372 		if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1373 				sizeof(unsigned char))) == NULL) {
1374 			return NULL;
1375 		}
1376 
1377 		memcpy(w, start, sz);
1378 		w[sz] = '\0';
1379 		w[sz + 1] = '\0';
1380 		w[sz + 2] = '\0';
1381 		w[sz + 3] = '\0';
1382 
1383 		result->val = w;
1384 		result->len = sz;
1385 	} else {
1386 		mbfl_convert_filter *encoder     = NULL;
1387 		mbfl_convert_filter *decoder     = NULL;
1388 		const unsigned char *p, *q, *r;
1389 		struct {
1390 			mbfl_convert_filter encoder;
1391 			mbfl_convert_filter decoder;
1392 			const unsigned char *p;
1393 			size_t pos;
1394 		} bk, _bk;
1395 
1396 		/* output code filter */
1397 		if (!(decoder = mbfl_convert_filter_new(
1398 				&mbfl_encoding_wchar,
1399 				string->encoding,
1400 				mbfl_memory_device_output, 0, &device))) {
1401 			return NULL;
1402 		}
1403 
1404 		/* wchar filter */
1405 		if (!(encoder = mbfl_convert_filter_new(
1406 				string->encoding,
1407 				&mbfl_encoding_wchar,
1408 				mbfl_filter_output_null,
1409 				NULL, NULL))) {
1410 			mbfl_convert_filter_delete(decoder);
1411 			return NULL;
1412 		}
1413 
1414 		mbfl_memory_device_init(&device, length + 8, 0);
1415 
1416 		p = string->val;
1417 
1418 		/* search start position */
1419 		for (q = string->val + from; p < q; p++) {
1420 			(*encoder->filter_function)(*p, encoder);
1421 		}
1422 
1423 		/* switch the drain direction */
1424 		encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1425 		encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1426 		encoder->data = decoder;
1427 
1428 		q = string->val + string->len;
1429 
1430 		/* save the encoder, decoder state and the pointer */
1431 		mbfl_convert_filter_copy(decoder, &_bk.decoder);
1432 		mbfl_convert_filter_copy(encoder, &_bk.encoder);
1433 		_bk.p = p;
1434 		_bk.pos = device.pos;
1435 
1436 		if (length > q - p) {
1437 			length = q - p;
1438 		}
1439 
1440 		if (length >= 20) {
1441 			/* output a little shorter than "length" */
1442 			/* XXX: the constant "20" was determined purely on the heuristics. */
1443 			for (r = p + length - 20; p < r; p++) {
1444 				(*encoder->filter_function)(*p, encoder);
1445 			}
1446 
1447 			/* if the offset of the resulting string exceeds the length,
1448 			 * then restore the state */
1449 			if (device.pos > length) {
1450 				p = _bk.p;
1451 				device.pos = _bk.pos;
1452 				decoder->filter_dtor(decoder);
1453 				encoder->filter_dtor(encoder);
1454 				mbfl_convert_filter_copy(&_bk.decoder, decoder);
1455 				mbfl_convert_filter_copy(&_bk.encoder, encoder);
1456 				bk = _bk;
1457 			} else {
1458 				/* save the encoder, decoder state and the pointer */
1459 				mbfl_convert_filter_copy(decoder, &bk.decoder);
1460 				mbfl_convert_filter_copy(encoder, &bk.encoder);
1461 				bk.p = p;
1462 				bk.pos = device.pos;
1463 
1464 				/* flush the stream */
1465 				(*encoder->filter_flush)(encoder);
1466 
1467 				/* if the offset of the resulting string exceeds the length,
1468 				 * then restore the state */
1469 				if (device.pos > length) {
1470 					bk.decoder.filter_dtor(&bk.decoder);
1471 					bk.encoder.filter_dtor(&bk.encoder);
1472 
1473 					p = _bk.p;
1474 					device.pos = _bk.pos;
1475 					decoder->filter_dtor(decoder);
1476 					encoder->filter_dtor(encoder);
1477 					mbfl_convert_filter_copy(&_bk.decoder, decoder);
1478 					mbfl_convert_filter_copy(&_bk.encoder, encoder);
1479 					bk = _bk;
1480 				} else {
1481 					_bk.decoder.filter_dtor(&_bk.decoder);
1482 					_bk.encoder.filter_dtor(&_bk.encoder);
1483 
1484 					p = bk.p;
1485 					device.pos = bk.pos;
1486 					decoder->filter_dtor(decoder);
1487 					encoder->filter_dtor(encoder);
1488 					mbfl_convert_filter_copy(&bk.decoder, decoder);
1489 					mbfl_convert_filter_copy(&bk.encoder, encoder);
1490 				}
1491 			}
1492 		} else {
1493 			bk = _bk;
1494 		}
1495 
1496 		/* detect end position */
1497 		while (p < q) {
1498 			(*encoder->filter_function)(*p, encoder);
1499 
1500 			if (device.pos > length) {
1501 				/* restore filter */
1502 				p = bk.p;
1503 				device.pos = bk.pos;
1504 				decoder->filter_dtor(decoder);
1505 				encoder->filter_dtor(encoder);
1506 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1507 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1508 				break;
1509 			}
1510 
1511 			p++;
1512 
1513 			/* backup current state */
1514 			mbfl_convert_filter_copy(decoder, &_bk.decoder);
1515 			mbfl_convert_filter_copy(encoder, &_bk.encoder);
1516 			_bk.pos = device.pos;
1517 			_bk.p = p;
1518 
1519 			(*encoder->filter_flush)(encoder);
1520 
1521 			if (device.pos > length) {
1522 				_bk.decoder.filter_dtor(&_bk.decoder);
1523 				_bk.encoder.filter_dtor(&_bk.encoder);
1524 
1525 				/* restore filter */
1526 				p = bk.p;
1527 				device.pos = bk.pos;
1528 				decoder->filter_dtor(decoder);
1529 				encoder->filter_dtor(encoder);
1530 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1531 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1532 				break;
1533 			}
1534 
1535 			bk.decoder.filter_dtor(&bk.decoder);
1536 			bk.encoder.filter_dtor(&bk.encoder);
1537 
1538 			p = _bk.p;
1539 			device.pos = _bk.pos;
1540 			decoder->filter_dtor(decoder);
1541 			encoder->filter_dtor(encoder);
1542 			mbfl_convert_filter_copy(&_bk.decoder, decoder);
1543 			mbfl_convert_filter_copy(&_bk.encoder, encoder);
1544 
1545 			bk = _bk;
1546 		}
1547 
1548 		(*encoder->filter_flush)(encoder);
1549 
1550 		bk.decoder.filter_dtor(&bk.decoder);
1551 		bk.encoder.filter_dtor(&bk.encoder);
1552 
1553 		result = mbfl_memory_device_result(&device, result);
1554 
1555 		mbfl_convert_filter_delete(encoder);
1556 		mbfl_convert_filter_delete(decoder);
1557 	}
1558 
1559 	return result;
1560 }
1561 
1562 
1563 /*
1564  *  strwidth
1565  */
is_fullwidth(int c)1566 static size_t is_fullwidth(int c)
1567 {
1568 	int i;
1569 
1570 	if (c < mbfl_eaw_table[0].begin) {
1571 		return 0;
1572 	}
1573 
1574 	for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1575 		if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1576 			return 1;
1577 		}
1578 	}
1579 
1580 	return 0;
1581 }
1582 
1583 static int
filter_count_width(int c,void * data)1584 filter_count_width(int c, void* data)
1585 {
1586 	(*(size_t *)data) += (is_fullwidth(c) ? 2: 1);
1587 	return c;
1588 }
1589 
1590 size_t
mbfl_strwidth(mbfl_string * string)1591 mbfl_strwidth(mbfl_string *string)
1592 {
1593 	size_t len, n;
1594 	unsigned char *p;
1595 	mbfl_convert_filter *filter;
1596 
1597 	len = 0;
1598 	if (string->len > 0 && string->val != NULL) {
1599 		/* wchar filter */
1600 		filter = mbfl_convert_filter_new(
1601 		    string->encoding,
1602 		    &mbfl_encoding_wchar,
1603 		    filter_count_width, 0, &len);
1604 		if (filter == NULL) {
1605 			mbfl_convert_filter_delete(filter);
1606 			return -1;
1607 		}
1608 
1609 		/* feed data */
1610 		p = string->val;
1611 		n = string->len;
1612 		while (n > 0) {
1613 			(*filter->filter_function)(*p++, filter);
1614 			n--;
1615 		}
1616 
1617 		mbfl_convert_filter_flush(filter);
1618 		mbfl_convert_filter_delete(filter);
1619 	}
1620 
1621 	return len;
1622 }
1623 
1624 
1625 /*
1626  *  strimwidth
1627  */
1628 struct collector_strimwidth_data {
1629 	mbfl_convert_filter *decoder;
1630 	mbfl_convert_filter *decoder_backup;
1631 	mbfl_memory_device device;
1632 	size_t from;
1633 	size_t width;
1634 	size_t outwidth;
1635 	size_t outchar;
1636 	size_t endpos;
1637 	int status;
1638 };
1639 
1640 static int
collector_strimwidth(int c,void * data)1641 collector_strimwidth(int c, void* data)
1642 {
1643 	struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1644 
1645 	switch (pc->status) {
1646 	case 10:
1647 		(*pc->decoder->filter_function)(c, pc->decoder);
1648 		break;
1649 	default:
1650 		if (pc->outchar >= pc->from) {
1651 			pc->outwidth += (is_fullwidth(c) ? 2: 1);
1652 
1653 			if (pc->outwidth > pc->width) {
1654 				if (pc->status == 0) {
1655 					pc->endpos = pc->device.pos;
1656 					mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1657 				}
1658 				pc->status++;
1659 				(*pc->decoder->filter_function)(c, pc->decoder);
1660 				c = -1;
1661 			} else {
1662 				(*pc->decoder->filter_function)(c, pc->decoder);
1663 			}
1664 		}
1665 		pc->outchar++;
1666 		break;
1667 	}
1668 
1669 	return c;
1670 }
1671 
1672 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,size_t from,size_t width)1673 mbfl_strimwidth(
1674     mbfl_string *string,
1675     mbfl_string *marker,
1676     mbfl_string *result,
1677     size_t from,
1678     size_t width)
1679 {
1680 	struct collector_strimwidth_data pc;
1681 	mbfl_convert_filter *encoder;
1682 	size_t n, mkwidth;
1683 	unsigned char *p;
1684 
1685 	if (string == NULL || result == NULL) {
1686 		return NULL;
1687 	}
1688 	mbfl_string_init(result);
1689 	result->no_language = string->no_language;
1690 	result->encoding = string->encoding;
1691 	mbfl_memory_device_init(&pc.device, MIN(string->len, width), 0);
1692 
1693 	/* output code filter */
1694 	pc.decoder = mbfl_convert_filter_new(
1695 	    &mbfl_encoding_wchar,
1696 	    string->encoding,
1697 	    mbfl_memory_device_output, 0, &pc.device);
1698 	pc.decoder_backup = mbfl_convert_filter_new(
1699 	    &mbfl_encoding_wchar,
1700 	    string->encoding,
1701 	    mbfl_memory_device_output, 0, &pc.device);
1702 	/* wchar filter */
1703 	encoder = mbfl_convert_filter_new(
1704 	    string->encoding,
1705 	    &mbfl_encoding_wchar,
1706 	    collector_strimwidth, 0, &pc);
1707 	if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1708 		mbfl_convert_filter_delete(encoder);
1709 		mbfl_convert_filter_delete(pc.decoder);
1710 		mbfl_convert_filter_delete(pc.decoder_backup);
1711 		return NULL;
1712 	}
1713 	mkwidth = 0;
1714 	if (marker) {
1715 		mkwidth = mbfl_strwidth(marker);
1716 	}
1717 	pc.from = from;
1718 	pc.width = width - mkwidth;
1719 	pc.outwidth = 0;
1720 	pc.outchar = 0;
1721 	pc.status = 0;
1722 	pc.endpos = 0;
1723 
1724 	/* feed data */
1725 	p = string->val;
1726 	n = string->len;
1727 	if (p != NULL) {
1728 		while (n > 0) {
1729 			n--;
1730 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1731 				break;
1732 			}
1733 		}
1734 		mbfl_convert_filter_flush(encoder);
1735 		if (pc.status != 0 && mkwidth > 0) {
1736 			pc.width += mkwidth;
1737 			if (n > 0) {
1738 				while (n > 0) {
1739 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1740 						break;
1741 					}
1742 					n--;
1743 				}
1744 				mbfl_convert_filter_flush(encoder);
1745 			} else if (pc.outwidth > pc.width) {
1746 				pc.status++;
1747 			}
1748 			if (pc.status != 1) {
1749 				pc.status = 10;
1750 				pc.device.pos = pc.endpos;
1751 				mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1752 				mbfl_convert_filter_reset(encoder, marker->encoding, &mbfl_encoding_wchar);
1753 				p = marker->val;
1754 				n = marker->len;
1755 				while (n > 0) {
1756 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1757 						break;
1758 					}
1759 					n--;
1760 				}
1761 				mbfl_convert_filter_flush(encoder);
1762 			}
1763 		} else if (pc.status != 0) {
1764 			pc.device.pos = pc.endpos;
1765 			mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1766 		}
1767 		mbfl_convert_filter_flush(pc.decoder);
1768 	}
1769 	result = mbfl_memory_device_result(&pc.device, result);
1770 	mbfl_convert_filter_delete(encoder);
1771 	mbfl_convert_filter_delete(pc.decoder);
1772 	mbfl_convert_filter_delete(pc.decoder_backup);
1773 
1774 	return result;
1775 }
1776 
1777 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1778 mbfl_ja_jp_hantozen(
1779     mbfl_string *string,
1780     mbfl_string *result,
1781     int mode)
1782 {
1783 	size_t n;
1784 	unsigned char *p;
1785 	mbfl_memory_device device;
1786 	mbfl_convert_filter *decoder = NULL;
1787 	mbfl_convert_filter *encoder = NULL;
1788 	mbfl_convert_filter *tl_filter = NULL;
1789 	mbfl_convert_filter *next_filter = NULL;
1790 	mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1791 
1792 	mbfl_memory_device_init(&device, string->len, 0);
1793 	mbfl_string_init(result);
1794 
1795 	result->no_language = string->no_language;
1796 	result->encoding = string->encoding;
1797 
1798 	decoder = mbfl_convert_filter_new(
1799 		&mbfl_encoding_wchar,
1800 		string->encoding,
1801 		mbfl_memory_device_output, 0, &device);
1802 	if (decoder == NULL) {
1803 		goto out;
1804 	}
1805 	next_filter = decoder;
1806 
1807 	param =
1808 		(mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1809 	if (param == NULL) {
1810 		goto out;
1811 	}
1812 
1813 	param->mode = mode;
1814 
1815 	tl_filter = mbfl_convert_filter_new2(
1816 		&vtbl_tl_jisx0201_jisx0208,
1817 		(int(*)(int, void*))next_filter->filter_function,
1818 		(int(*)(void*))next_filter->filter_flush,
1819 		next_filter);
1820 	if (tl_filter == NULL) {
1821 		mbfl_free(param);
1822 		goto out;
1823 	}
1824 
1825 	tl_filter->opaque = param;
1826 	next_filter = tl_filter;
1827 
1828 	encoder = mbfl_convert_filter_new(
1829 		string->encoding,
1830 		&mbfl_encoding_wchar,
1831 		(int(*)(int, void*))next_filter->filter_function,
1832 		(int(*)(void*))next_filter->filter_flush,
1833 		next_filter);
1834 	if (encoder == NULL) {
1835 		goto out;
1836 	}
1837 
1838 	/* feed data */
1839 	p = string->val;
1840 	n = string->len;
1841 	if (p != NULL) {
1842 		while (n > 0) {
1843 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1844 				break;
1845 			}
1846 			n--;
1847 		}
1848 	}
1849 
1850 	mbfl_convert_filter_flush(encoder);
1851 	result = mbfl_memory_device_result(&device, result);
1852 out:
1853 	if (tl_filter != NULL) {
1854 		if (tl_filter->opaque != NULL) {
1855 			mbfl_free(tl_filter->opaque);
1856 		}
1857 		mbfl_convert_filter_delete(tl_filter);
1858 	}
1859 
1860 	if (decoder != NULL) {
1861 		mbfl_convert_filter_delete(decoder);
1862 	}
1863 
1864 	if (encoder != NULL) {
1865 		mbfl_convert_filter_delete(encoder);
1866 	}
1867 
1868 	return result;
1869 }
1870 
1871 
1872 /*
1873  *  MIME header encode
1874  */
1875 struct mime_header_encoder_data {
1876 	mbfl_convert_filter *conv1_filter;
1877 	mbfl_convert_filter *block_filter;
1878 	mbfl_convert_filter *conv2_filter;
1879 	mbfl_convert_filter *conv2_filter_backup;
1880 	mbfl_convert_filter *encod_filter;
1881 	mbfl_convert_filter *encod_filter_backup;
1882 	mbfl_memory_device outdev;
1883 	mbfl_memory_device tmpdev;
1884 	int status1;
1885 	int status2;
1886 	size_t prevpos;
1887 	size_t linehead;
1888 	size_t firstindent;
1889 	int encnamelen;
1890 	int lwsplen;
1891 	char encname[128];
1892 	char lwsp[16];
1893 };
1894 
1895 static int
mime_header_encoder_block_collector(int c,void * data)1896 mime_header_encoder_block_collector(int c, void *data)
1897 {
1898 	size_t n;
1899 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1900 
1901 	switch (pe->status2) {
1902 	case 1:	/* encoded word */
1903 		pe->prevpos = pe->outdev.pos;
1904 		mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1905 		mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1906 		(*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1907 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1908 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
1909 		n = pe->outdev.pos - pe->linehead + pe->firstindent;
1910 		pe->outdev.pos = pe->prevpos;
1911 		mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1912 		mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1913 		if (n >= 74) {
1914 			(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1915 			(*pe->encod_filter->filter_flush)(pe->encod_filter);
1916 			mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);	/* ?= */
1917 			mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1918 			pe->linehead = pe->outdev.pos;
1919 			pe->firstindent = 0;
1920 			mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1921 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1922 		} else {
1923 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1924 		}
1925 		break;
1926 
1927 	default:
1928 		mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1929 		c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1930 		pe->status2 = 1;
1931 		break;
1932 	}
1933 
1934 	return c;
1935 }
1936 
1937 static int
mime_header_encoder_collector(int c,void * data)1938 mime_header_encoder_collector(int c, void *data)
1939 {
1940 	static int qp_table[256] = {
1941 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1942 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
1943 		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
1944 		0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
1945 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
1946 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
1947 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
1948 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
1949 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
1950 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
1951 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
1952 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
1953 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
1954 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
1955 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
1956 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
1957 	};
1958 
1959 	size_t n;
1960 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1961 
1962 	switch (pe->status1) {
1963 	case 11:	/* encoded word */
1964 		(*pe->block_filter->filter_function)(c, pe->block_filter);
1965 		break;
1966 
1967 	default:	/* ASCII */
1968 		if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
1969 			mbfl_memory_device_output(c, &pe->tmpdev);
1970 			pe->status1 = 1;
1971 		} else if (pe->status1 == 0 && c == 0x20) {	/* repeat SPACE */
1972 			mbfl_memory_device_output(c, &pe->tmpdev);
1973 		} else {
1974 			if (pe->tmpdev.pos < 74 && c == 0x20) {
1975 				n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
1976 				if (n > 74) {
1977 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
1978 					pe->linehead = pe->outdev.pos;
1979 					pe->firstindent = 0;
1980 				} else if (pe->outdev.pos > 0) {
1981 					mbfl_memory_device_output(0x20, &pe->outdev);
1982 				}
1983 				mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
1984 				mbfl_memory_device_reset(&pe->tmpdev);
1985 				pe->status1 = 0;
1986 			} else {
1987 				n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
1988 				if (n > 60)  {
1989 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
1990 					pe->linehead = pe->outdev.pos;
1991 					pe->firstindent = 0;
1992 				} else if (pe->outdev.pos > 0)  {
1993 					mbfl_memory_device_output(0x20, &pe->outdev);
1994 				}
1995 				mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
1996 				mbfl_memory_device_reset(&pe->tmpdev);
1997 				(*pe->block_filter->filter_function)(c, pe->block_filter);
1998 				pe->status1 = 11;
1999 			}
2000 		}
2001 		break;
2002 	}
2003 
2004 	return c;
2005 }
2006 
2007 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)2008 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2009 {
2010 	if (pe->status1 >= 10) {
2011 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2012 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
2013 		mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);		/* ?= */
2014 	} else if (pe->tmpdev.pos > 0) {
2015 		if (pe->outdev.pos > 0) {
2016 			if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2017 				mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2018 			} else {
2019 				mbfl_memory_device_output(0x20, &pe->outdev);
2020 			}
2021 		}
2022 		mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2023 	}
2024 	mbfl_memory_device_reset(&pe->tmpdev);
2025 	pe->prevpos = 0;
2026 	pe->linehead = 0;
2027 	pe->status1 = 0;
2028 	pe->status2 = 0;
2029 
2030 	return mbfl_memory_device_result(&pe->outdev, result);
2031 }
2032 
2033 struct mime_header_encoder_data*
mime_header_encoder_new(const mbfl_encoding * incode,const mbfl_encoding * outcode,const mbfl_encoding * transenc)2034 mime_header_encoder_new(
2035     const mbfl_encoding *incode,
2036     const mbfl_encoding *outcode,
2037     const mbfl_encoding *transenc)
2038 {
2039 	size_t n;
2040 	const char *s;
2041 	struct mime_header_encoder_data *pe;
2042 
2043 	/* get output encoding and check MIME charset name */
2044 	if (outcode->mime_name == NULL || outcode->mime_name[0] == '\0') {
2045 		return NULL;
2046 	}
2047 
2048 	pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2049 	if (pe == NULL) {
2050 		return NULL;
2051 	}
2052 
2053 	mbfl_memory_device_init(&pe->outdev, 0, 0);
2054 	mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2055 	pe->prevpos = 0;
2056 	pe->linehead = 0;
2057 	pe->firstindent = 0;
2058 	pe->status1 = 0;
2059 	pe->status2 = 0;
2060 
2061 	/* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
2062 	n = 0;
2063 	pe->encname[n++] = 0x3d;
2064 	pe->encname[n++] = 0x3f;
2065 	s = outcode->mime_name;
2066 	while (*s) {
2067 		pe->encname[n++] = *s++;
2068 	}
2069 	pe->encname[n++] = 0x3f;
2070 	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2071 		pe->encname[n++] = 0x51;
2072 	} else {
2073 		pe->encname[n++] = 0x42;
2074 		transenc = &mbfl_encoding_base64;
2075 	}
2076 	pe->encname[n++] = 0x3f;
2077 	pe->encname[n] = '\0';
2078 	pe->encnamelen = n;
2079 
2080 	n = 0;
2081 	pe->lwsp[n++] = 0x0d;
2082 	pe->lwsp[n++] = 0x0a;
2083 	pe->lwsp[n++] = 0x20;
2084 	pe->lwsp[n] = '\0';
2085 	pe->lwsplen = n;
2086 
2087 	/* transfer encode filter */
2088 	pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2089 	pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2090 
2091 	/* Output code filter */
2092 	pe->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2093 	pe->conv2_filter_backup = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2094 
2095 	/* encoded block filter */
2096 	pe->block_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, &mbfl_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2097 
2098 	/* Input code filter */
2099 	pe->conv1_filter = mbfl_convert_filter_new(incode, &mbfl_encoding_wchar, mime_header_encoder_collector, 0, pe);
2100 
2101 	if (pe->encod_filter == NULL ||
2102 	    pe->encod_filter_backup == NULL ||
2103 	    pe->conv2_filter == NULL ||
2104 	    pe->conv2_filter_backup == NULL ||
2105 	    pe->conv1_filter == NULL) {
2106 		mime_header_encoder_delete(pe);
2107 		return NULL;
2108 	}
2109 
2110 	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
2111 		pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2112 		pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2113 	} else {
2114 		pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2115 		pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2116 	}
2117 
2118 	return pe;
2119 }
2120 
2121 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2122 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2123 {
2124 	if (pe) {
2125 		mbfl_convert_filter_delete(pe->conv1_filter);
2126 		mbfl_convert_filter_delete(pe->block_filter);
2127 		mbfl_convert_filter_delete(pe->conv2_filter);
2128 		mbfl_convert_filter_delete(pe->conv2_filter_backup);
2129 		mbfl_convert_filter_delete(pe->encod_filter);
2130 		mbfl_convert_filter_delete(pe->encod_filter_backup);
2131 		mbfl_memory_device_clear(&pe->outdev);
2132 		mbfl_memory_device_clear(&pe->tmpdev);
2133 		mbfl_free((void*)pe);
2134 	}
2135 }
2136 
2137 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2138 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2139 {
2140 	return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2141 }
2142 
2143 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode,const mbfl_encoding * encoding,const char * linefeed,int indent)2144 mbfl_mime_header_encode(
2145     mbfl_string *string,
2146     mbfl_string *result,
2147     const mbfl_encoding *outcode,
2148     const mbfl_encoding *encoding,
2149     const char *linefeed,
2150     int indent)
2151 {
2152 	size_t n;
2153 	unsigned char *p;
2154 	struct mime_header_encoder_data *pe;
2155 
2156 	mbfl_string_init(result);
2157 	result->no_language = string->no_language;
2158 	result->encoding = &mbfl_encoding_ascii;
2159 
2160 	pe = mime_header_encoder_new(string->encoding, outcode, encoding);
2161 	if (pe == NULL) {
2162 		return NULL;
2163 	}
2164 
2165 	if (linefeed != NULL) {
2166 		n = 0;
2167 		while (*linefeed && n < 8) {
2168 			pe->lwsp[n++] = *linefeed++;
2169 		}
2170 		pe->lwsp[n++] = 0x20;
2171 		pe->lwsp[n] = '\0';
2172 		pe->lwsplen = n;
2173 	}
2174 	if (indent > 0 && indent < 74) {
2175 		pe->firstindent = indent;
2176 	}
2177 
2178 	n = string->len;
2179 	p = string->val;
2180 	while (n > 0) {
2181 		(*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2182 		n--;
2183 	}
2184 
2185 	result = mime_header_encoder_result(pe, result);
2186 	mime_header_encoder_delete(pe);
2187 
2188 	return result;
2189 }
2190 
2191 
2192 /*
2193  *  MIME header decode
2194  */
2195 struct mime_header_decoder_data {
2196 	mbfl_convert_filter *deco_filter;
2197 	mbfl_convert_filter *conv1_filter;
2198 	mbfl_convert_filter *conv2_filter;
2199 	mbfl_memory_device outdev;
2200 	mbfl_memory_device tmpdev;
2201 	size_t cspos;
2202 	int status;
2203 	const mbfl_encoding *encoding;
2204 	const mbfl_encoding *incode;
2205 	const mbfl_encoding *outcode;
2206 };
2207 
2208 static int
mime_header_decoder_collector(int c,void * data)2209 mime_header_decoder_collector(int c, void* data)
2210 {
2211 	const mbfl_encoding *encoding;
2212 	struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2213 
2214 	switch (pd->status) {
2215 	case 1:
2216 		if (c == 0x3f) {		/* ? */
2217 			mbfl_memory_device_output(c, &pd->tmpdev);
2218 			pd->cspos = pd->tmpdev.pos;
2219 			pd->status = 2;
2220 		} else {
2221 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2222 			mbfl_memory_device_reset(&pd->tmpdev);
2223 			if (c == 0x3d) {		/* = */
2224 				mbfl_memory_device_output(c, &pd->tmpdev);
2225 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2226 				pd->status = 9;
2227 			} else {
2228 				(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2229 				pd->status = 0;
2230 			}
2231 		}
2232 		break;
2233 	case 2:		/* store charset string */
2234 		if (c == 0x3f) {		/* ? */
2235 			/* identify charset */
2236 			mbfl_memory_device_output('\0', &pd->tmpdev);
2237 			encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2238 			if (encoding != NULL) {
2239 				pd->incode = encoding;
2240 				pd->status = 3;
2241 			}
2242 			mbfl_memory_device_unput(&pd->tmpdev);
2243 			mbfl_memory_device_output(c, &pd->tmpdev);
2244 		} else {
2245 			mbfl_memory_device_output(c, &pd->tmpdev);
2246 			if (pd->tmpdev.pos > 100) {		/* too long charset string */
2247 				pd->status = 0;
2248 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2249 				mbfl_memory_device_unput(&pd->tmpdev);
2250 				pd->status = 9;
2251 			}
2252 			if (pd->status != 2) {
2253 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2254 				mbfl_memory_device_reset(&pd->tmpdev);
2255 			}
2256 		}
2257 		break;
2258 	case 3:		/* identify encoding */
2259 		mbfl_memory_device_output(c, &pd->tmpdev);
2260 		if (c == 0x42 || c == 0x62) {		/* 'B' or 'b' */
2261 			pd->encoding = &mbfl_encoding_base64;
2262 			pd->status = 4;
2263 		} else if (c == 0x51 || c == 0x71) {	/* 'Q' or 'q' */
2264 			pd->encoding = &mbfl_encoding_qprint;
2265 			pd->status = 4;
2266 		} else {
2267 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2268 				mbfl_memory_device_unput(&pd->tmpdev);
2269 				pd->status = 9;
2270 			} else {
2271 				pd->status = 0;
2272 			}
2273 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2274 			mbfl_memory_device_reset(&pd->tmpdev);
2275 		}
2276 		break;
2277 	case 4:		/* reset filter */
2278 		mbfl_memory_device_output(c, &pd->tmpdev);
2279 		if (c == 0x3f) {		/* ? */
2280 			/* charset convert filter */
2281 			mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, &mbfl_encoding_wchar);
2282 			/* decode filter */
2283 			mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, &mbfl_encoding_8bit);
2284 			pd->status = 5;
2285 		} else {
2286 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2287 				mbfl_memory_device_unput(&pd->tmpdev);
2288 				pd->status = 9;
2289 			} else {
2290 				pd->status = 0;
2291 			}
2292 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2293 		}
2294 		mbfl_memory_device_reset(&pd->tmpdev);
2295 		break;
2296 	case 5:		/* encoded block */
2297 		if (c == 0x3f) {		/* ? */
2298 			pd->status = 6;
2299 		} else {
2300 			(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2301 		}
2302 		break;
2303 	case 6:		/* check end position */
2304 		if (c == 0x3d) {		/* = */
2305 			/* flush and reset filter */
2306 			(*pd->deco_filter->filter_flush)(pd->deco_filter);
2307 			(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2308 			mbfl_convert_filter_reset(pd->conv1_filter, &mbfl_encoding_ascii, &mbfl_encoding_wchar);
2309 			pd->status = 7;
2310 		} else {
2311 			(*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2312 			if (c != 0x3f) {		/* ? */
2313 				(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2314 				pd->status = 5;
2315 			}
2316 		}
2317 		break;
2318 	case 7:		/* after encoded block */
2319 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2320 			pd->status = 8;
2321 		} else {
2322 			mbfl_memory_device_output(c, &pd->tmpdev);
2323 			if (c == 0x3d) {		/* = */
2324 				pd->status = 1;
2325 			} else if (c != 0x20 && c != 0x09) {		/* not space */
2326 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2327 				mbfl_memory_device_reset(&pd->tmpdev);
2328 				pd->status = 0;
2329 			}
2330 		}
2331 		break;
2332 	case 8:		/* folding */
2333 	case 9:		/* folding */
2334 		if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2335 			if (c == 0x3d) {		/* = */
2336 				if (pd->status == 8) {
2337 					mbfl_memory_device_output(0x20, &pd->tmpdev);	/* SPACE */
2338 				} else {
2339 					(*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2340 				}
2341 				mbfl_memory_device_output(c, &pd->tmpdev);
2342 				pd->status = 1;
2343 			} else {
2344 				mbfl_memory_device_output(0x20, &pd->tmpdev);
2345 				mbfl_memory_device_output(c, &pd->tmpdev);
2346 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2347 				mbfl_memory_device_reset(&pd->tmpdev);
2348 				pd->status = 0;
2349 			}
2350 		}
2351 		break;
2352 	default:		/* non encoded block */
2353 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2354 			pd->status = 9;
2355 		} else if (c == 0x3d) {		/* = */
2356 			mbfl_memory_device_output(c, &pd->tmpdev);
2357 			pd->status = 1;
2358 		} else {
2359 			(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2360 		}
2361 		break;
2362 	}
2363 
2364 	return c;
2365 }
2366 
2367 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2368 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2369 {
2370 	switch (pd->status) {
2371 	case 1:
2372 	case 2:
2373 	case 3:
2374 	case 4:
2375 	case 7:
2376 	case 8:
2377 	case 9:
2378 		mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2379 		break;
2380 	case 5:
2381 	case 6:
2382 		(*pd->deco_filter->filter_flush)(pd->deco_filter);
2383 		(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2384 		break;
2385 	}
2386 	(*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2387 	mbfl_memory_device_reset(&pd->tmpdev);
2388 	pd->status = 0;
2389 
2390 	return mbfl_memory_device_result(&pd->outdev, result);
2391 }
2392 
2393 struct mime_header_decoder_data*
mime_header_decoder_new(const mbfl_encoding * outcode)2394 mime_header_decoder_new(const mbfl_encoding *outcode)
2395 {
2396 	struct mime_header_decoder_data *pd;
2397 
2398 	pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2399 	if (pd == NULL) {
2400 		return NULL;
2401 	}
2402 
2403 	mbfl_memory_device_init(&pd->outdev, 0, 0);
2404 	mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2405 	pd->cspos = 0;
2406 	pd->status = 0;
2407 	pd->encoding = &mbfl_encoding_8bit;
2408 	pd->incode = &mbfl_encoding_ascii;
2409 	pd->outcode = outcode;
2410 	/* charset convert filter */
2411 	pd->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2412 	pd->conv1_filter = mbfl_convert_filter_new(pd->incode, &mbfl_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2413 	/* decode filter */
2414 	pd->deco_filter = mbfl_convert_filter_new(pd->encoding, &mbfl_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2415 
2416 	if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2417 		mime_header_decoder_delete(pd);
2418 		return NULL;
2419 	}
2420 
2421 	return pd;
2422 }
2423 
2424 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2425 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2426 {
2427 	if (pd) {
2428 		mbfl_convert_filter_delete(pd->conv2_filter);
2429 		mbfl_convert_filter_delete(pd->conv1_filter);
2430 		mbfl_convert_filter_delete(pd->deco_filter);
2431 		mbfl_memory_device_clear(&pd->outdev);
2432 		mbfl_memory_device_clear(&pd->tmpdev);
2433 		mbfl_free((void*)pd);
2434 	}
2435 }
2436 
2437 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2438 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2439 {
2440 	return mime_header_decoder_collector(c, pd);
2441 }
2442 
2443 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,const mbfl_encoding * outcode)2444 mbfl_mime_header_decode(
2445     mbfl_string *string,
2446     mbfl_string *result,
2447     const mbfl_encoding *outcode)
2448 {
2449 	size_t n;
2450 	unsigned char *p;
2451 	struct mime_header_decoder_data *pd;
2452 
2453 	mbfl_string_init(result);
2454 	result->no_language = string->no_language;
2455 	result->encoding = outcode;
2456 
2457 	pd = mime_header_decoder_new(outcode);
2458 	if (pd == NULL) {
2459 		return NULL;
2460 	}
2461 
2462 	/* feed data */
2463 	n = string->len;
2464 	p = string->val;
2465 	while (n > 0) {
2466 		mime_header_decoder_collector(*p++, pd);
2467 		n--;
2468 	}
2469 
2470 	result = mime_header_decoder_result(pd, result);
2471 	mime_header_decoder_delete(pd);
2472 
2473 	return result;
2474 }
2475 
2476 
2477 
2478 /*
2479  *  convert HTML numeric entity
2480  */
2481 struct collector_htmlnumericentity_data {
2482 	mbfl_convert_filter *decoder;
2483 	int status;
2484 	int cache;
2485 	int digit;
2486 	int *convmap;
2487 	int mapsize;
2488 };
2489 
2490 static int
collector_encode_htmlnumericentity(int c,void * data)2491 collector_encode_htmlnumericentity(int c, void *data)
2492 {
2493 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2494 	int f, n, s, r, d, size, *mapelm;
2495 
2496 	size = pc->mapsize;
2497 	f = 0;
2498 	n = 0;
2499 	while (n < size) {
2500 		mapelm = &(pc->convmap[n*4]);
2501 		if (c >= mapelm[0] && c <= mapelm[1]) {
2502 			s = (c + mapelm[2]) & mapelm[3];
2503 			if (s >= 0) {
2504 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2505 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2506 				r = 100000000;
2507 				s %= r;
2508 				while (r > 0) {
2509 					d = s/r;
2510 					if (d || f) {
2511 						f = 1;
2512 						s %= r;
2513 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2514 					}
2515 					r /= 10;
2516 				}
2517 				if (!f) {
2518 					f = 1;
2519 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2520 				}
2521 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2522 			}
2523 		}
2524 		if (f) {
2525 			break;
2526 		}
2527 		n++;
2528 	}
2529 	if (!f) {
2530 		(*pc->decoder->filter_function)(c, pc->decoder);
2531 	}
2532 
2533 	return c;
2534 }
2535 
2536 static int
collector_decode_htmlnumericentity(int c,void * data)2537 collector_decode_htmlnumericentity(int c, void *data)
2538 {
2539 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2540 	int f, n, s, r, d, size, *mapelm;
2541 
2542 	switch (pc->status) {
2543 	case 1:
2544 		if (c == 0x23) {	/* '#' */
2545 			pc->status = 2;
2546 		} else {
2547 			pc->status = 0;
2548 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2549 			(*pc->decoder->filter_function)(c, pc->decoder);
2550 		}
2551 		break;
2552 	case 2:
2553 		if (c == 0x78) {	/* 'x' */
2554 			pc->status = 4;
2555 		} else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2556 			pc->cache = c - 0x30;
2557 			pc->status = 3;
2558 			pc->digit = 1;
2559 		} else {
2560 			pc->status = 0;
2561 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2562 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2563 			(*pc->decoder->filter_function)(c, pc->decoder);
2564 		}
2565 		break;
2566 	case 3:
2567 		s = 0;
2568 		f = 0;
2569 		if (c >= 0x30 && c <= 0x39) {	/* '0' - '9' */
2570 			if (pc->digit > 9) {
2571 				pc->status = 0;
2572 				s = pc->cache;
2573 				f = 1;
2574 			} else {
2575 				s = pc->cache*10 + c - 0x30;
2576 				pc->cache = s;
2577 				pc->digit++;
2578 			}
2579 		} else {
2580 			pc->status = 0;
2581 			s = pc->cache;
2582 			f = 1;
2583 			n = 0;
2584 			size = pc->mapsize;
2585 			while (n < size) {
2586 				mapelm = &(pc->convmap[n*4]);
2587 				d = s - mapelm[2];
2588 				if (d >= mapelm[0] && d <= mapelm[1]) {
2589 					f = 0;
2590 					(*pc->decoder->filter_function)(d, pc->decoder);
2591 					if (c != 0x3b) {	/* ';' */
2592 						(*pc->decoder->filter_function)(c, pc->decoder);
2593 					}
2594 					break;
2595 				}
2596 				n++;
2597 			}
2598 		}
2599 		if (f) {
2600 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2601 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2602 			r = 1;
2603 			n = pc->digit;
2604 			while (n > 0) {
2605 				r *= 10;
2606 				n--;
2607 			}
2608 			s %= r;
2609 			r /= 10;
2610 			while (r > 0) {
2611 				d = s/r;
2612 				s %= r;
2613 				r /= 10;
2614 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2615 			}
2616 			(*pc->decoder->filter_function)(c, pc->decoder);
2617 		}
2618 		break;
2619 	case 4:
2620 		if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2621 			pc->cache = c - 0x30;
2622 			pc->status = 5;
2623 			pc->digit = 1;
2624 		} else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
2625 			pc->cache = c - 0x41 + 10;
2626 			pc->status = 5;
2627 			pc->digit = 1;
2628 		} else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
2629 			pc->cache = c - 0x61 + 10;
2630 			pc->status = 5;
2631 			pc->digit = 1;
2632 		} else {
2633 			pc->status = 0;
2634 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2635 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2636 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2637 			(*pc->decoder->filter_function)(c, pc->decoder);
2638 		}
2639 		break;
2640 	case 5:
2641 		s = 0;
2642 		f = 0;
2643 		if ((c >= 0x30 && c <= 0x39) ||
2644 			(c >= 0x41 && c <= 0x46) ||
2645 			(c >= 0x61 && c <= 0x66)) {	/* '0' - '9' or 'a' - 'f'  */
2646 			if (pc->digit > 9) {
2647 				pc->status = 0;
2648 				s = pc->cache;
2649 				f = 1;
2650 			} else {
2651 				if (c >= 0x30 && c <= 0x39) {
2652 					s = pc->cache*16 + (c - 0x30);
2653 				} else if (c >= 0x41 && c <= 0x46)  {
2654 					s = pc->cache*16 + (c - 0x41 + 10);
2655 				} else {
2656 					s = pc->cache*16 + (c - 0x61 + 10);
2657 				}
2658 				pc->cache = s;
2659 				pc->digit++;
2660 			}
2661 		} else {
2662 			pc->status = 0;
2663 			s = pc->cache;
2664 			f = 1;
2665 			n = 0;
2666 			size = pc->mapsize;
2667 			while (n < size) {
2668 				mapelm = &(pc->convmap[n*4]);
2669 				d = s - mapelm[2];
2670 				if (d >= mapelm[0] && d <= mapelm[1]) {
2671 					f = 0;
2672 					(*pc->decoder->filter_function)(d, pc->decoder);
2673 					if (c != 0x3b) {	/* ';' */
2674 						(*pc->decoder->filter_function)(c, pc->decoder);
2675 					}
2676 					break;
2677 				}
2678 				n++;
2679 			}
2680 		}
2681 		if (f) {
2682 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2683 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2684 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2685 			r = 1;
2686 			n = pc->digit;
2687 			while (n > 0) {
2688 				r *= 16;
2689 				n--;
2690 			}
2691 			s %= r;
2692 			r /= 16;
2693 			while (r > 0) {
2694 				d = s/r;
2695 				s %= r;
2696 				r /= 16;
2697 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2698 			}
2699 			(*pc->decoder->filter_function)(c, pc->decoder);
2700 		}
2701 		break;
2702 	default:
2703 		if (c == 0x26) {	/* '&' */
2704 			pc->status = 1;
2705 		} else {
2706 			(*pc->decoder->filter_function)(c, pc->decoder);
2707 		}
2708 		break;
2709 	}
2710 
2711 	return c;
2712 }
2713 
2714 static int
collector_encode_hex_htmlnumericentity(int c,void * data)2715 collector_encode_hex_htmlnumericentity(int c, void *data)
2716 {
2717 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2718 	int f, n, s, r, d, size, *mapelm;
2719 
2720 	size = pc->mapsize;
2721 	f = 0;
2722 	n = 0;
2723 	while (n < size) {
2724 		mapelm = &(pc->convmap[n*4]);
2725 		if (c >= mapelm[0] && c <= mapelm[1]) {
2726 			s = (c + mapelm[2]) & mapelm[3];
2727 			if (s >= 0) {
2728 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2729 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2730 				(*pc->decoder->filter_function)(0x78, pc->decoder);	/* 'x' */
2731 				r = 0x1000000;
2732 				s %= r;
2733 				while (r > 0) {
2734 					d = s/r;
2735 					if (d || f) {
2736 						f = 1;
2737 						s %= r;
2738 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2739 					}
2740 					r /= 16;
2741 				}
2742 				if (!f) {
2743 					f = 1;
2744 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2745 				}
2746 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2747 			}
2748 		}
2749 		if (f) {
2750 			break;
2751 		}
2752 		n++;
2753 	}
2754 	if (!f) {
2755 		(*pc->decoder->filter_function)(c, pc->decoder);
2756 	}
2757 
2758 	return c;
2759 }
2760 
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2761 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2762 {
2763 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2764 	int n, s, r, d;
2765 
2766 	if (pc->status) {
2767 		switch (pc->status) {
2768 		case 1: /* '&' */
2769 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2770 			break;
2771 		case 2: /* '#' */
2772 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2773 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2774 			break;
2775 		case 3: /* '0'-'9' */
2776 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2777 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2778 
2779 			s = pc->cache;
2780 			r = 1;
2781 			n = pc->digit;
2782 			while (n > 0) {
2783 				r *= 10;
2784 				n--;
2785 			}
2786 			s %= r;
2787 			r /= 10;
2788 			while (r > 0) {
2789 				d = s/r;
2790 				s %= r;
2791 				r /= 10;
2792 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2793 			}
2794 
2795 			break;
2796 		case 4: /* 'x' */
2797 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2798 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2799 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2800 			break;
2801 		case 5: /* '0'-'9','a'-'f' */
2802 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2803 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2804 			(*pc->decoder->filter_function)(0x78, pc->decoder);		/* 'x' */
2805 
2806 			s = pc->cache;
2807 			r = 1;
2808 			n = pc->digit;
2809 			while (n > 0) {
2810 				r *= 16;
2811 				n--;
2812 			}
2813 			s %= r;
2814 			r /= 16;
2815 			while (r > 0) {
2816 				d = s/r;
2817 				s %= r;
2818 				r /= 16;
2819 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2820 			}
2821 			break;
2822 		default:
2823 			break;
2824 		}
2825 	}
2826 
2827 	pc->status = 0;
2828 	pc->cache = 0;
2829 	pc->digit = 0;
2830 
2831 	return 0;
2832 }
2833 
2834 
2835 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)2836 mbfl_html_numeric_entity(
2837     mbfl_string *string,
2838     mbfl_string *result,
2839     int *convmap,
2840     int mapsize,
2841     int type)
2842 {
2843 	struct collector_htmlnumericentity_data pc;
2844 	mbfl_memory_device device;
2845 	mbfl_convert_filter *encoder;
2846 	size_t n;
2847 	unsigned char *p;
2848 
2849 	if (string == NULL || result == NULL) {
2850 		return NULL;
2851 	}
2852 	mbfl_string_init(result);
2853 	result->no_language = string->no_language;
2854 	result->encoding = string->encoding;
2855 	mbfl_memory_device_init(&device, string->len, 0);
2856 
2857 	/* output code filter */
2858 	pc.decoder = mbfl_convert_filter_new(
2859 	    &mbfl_encoding_wchar,
2860 	    string->encoding,
2861 	    mbfl_memory_device_output, 0, &device);
2862 	/* wchar filter */
2863 	if (type == 0) { /* decimal output */
2864 		encoder = mbfl_convert_filter_new(
2865 		    string->encoding,
2866 		    &mbfl_encoding_wchar,
2867 		    collector_encode_htmlnumericentity, 0, &pc);
2868 	} else if (type == 2) { /* hex output */
2869 		encoder = mbfl_convert_filter_new(
2870 		    string->encoding,
2871 		    &mbfl_encoding_wchar,
2872 		    collector_encode_hex_htmlnumericentity, 0, &pc);
2873 	} else { /* type == 1: decimal/hex input */
2874 		encoder = mbfl_convert_filter_new(
2875 		    string->encoding,
2876 		    &mbfl_encoding_wchar,
2877 		    collector_decode_htmlnumericentity,
2878 			(int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
2879 	}
2880 	if (pc.decoder == NULL || encoder == NULL) {
2881 		mbfl_convert_filter_delete(encoder);
2882 		mbfl_convert_filter_delete(pc.decoder);
2883 		return NULL;
2884 	}
2885 	pc.status = 0;
2886 	pc.cache = 0;
2887 	pc.digit = 0;
2888 	pc.convmap = convmap;
2889 	pc.mapsize = mapsize;
2890 
2891 	/* feed data */
2892 	p = string->val;
2893 	n = string->len;
2894 	if (p != NULL) {
2895 		while (n > 0) {
2896 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
2897 				break;
2898 			}
2899 			n--;
2900 		}
2901 	}
2902 	mbfl_convert_filter_flush(encoder);
2903 	mbfl_convert_filter_flush(pc.decoder);
2904 	result = mbfl_memory_device_result(&device, result);
2905 	mbfl_convert_filter_delete(encoder);
2906 	mbfl_convert_filter_delete(pc.decoder);
2907 
2908 	return result;
2909 }
2910 
2911 /*
2912  * Local variables:
2913  * tab-width: 4
2914  * c-basic-offset: 4
2915  * End:
2916  */
2917