xref: /PHP-5.3/ext/mbstring/libmbfl/mbfl/mbfilter.c (revision 3d888412)
1 /*
2  * charset=UTF-8
3  * vim600: encoding=utf-8
4  */
5 
6 /*
7  * "streamable kanji code filter and converter"
8  *
9  * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
10  *
11  * This software is released under the GNU Lesser General Public License.
12  * (Version 2.1, February 1999)
13  * Please read the following detail of the licence (in japanese).
14  *
15  * ◆使用許諾条件◆
16  *
17  * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
18  * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
19  * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
20  * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
21  * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
22  * することはできません。
23  *
24  * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
25  * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
26  * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
27  * による許諾を得る必要があります。
28  *
29  * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
30  * ます。「GNU Lesser General Public License」とは、これまでLibrary General
31  * Public Licenseと呼ばれていたものです。
32  *     http://www.gnu.org/ --- GNUウェブサイト
33  *     http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
34  * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
35  *
36  * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
37  * はありません。
38  *
39  * ◆保証内容◆
40  *
41  * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
42  * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
43  * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
44  * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
45  * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
46  * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
47  * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
48  * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
49  * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
50  * 契約・規定に優先します。
51  *
52  * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
53  *
54  * 〒102-0073
55  * 東京都千代田区九段北1-13-5日本地所第一ビル4F
56  * 株式会社ハッピーサイズ
57  * Phone: 03-3512-3655, Fax: 03-3512-3656
58  * Email: sales@happysize.co.jp
59  * Web: http://happysize.com/
60  *
61  * ◆著者◆
62  *
63  * 金本 茂 <sgk@happysize.co.jp>
64  *
65  * ◆履歴◆
66  *
67  * 1998/11/10 sgk implementation in C++
68  * 1999/4/25  sgk Cで書きなおし。
69  * 1999/4/26  sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
70  * 1999/6/??      Unicodeサポート。
71  * 1999/6/22  sgk ライセンスをLGPLに変更。
72  *
73  */
74 
75 /*
76  * Unicode support
77  *
78  * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
79  * All rights reserved.
80  *
81  */
82 
83 
84 #ifdef HAVE_CONFIG_H
85 #include "config.h"
86 #endif
87 
88 #include <stddef.h>
89 
90 #ifdef HAVE_STRING_H
91 #include <string.h>
92 #endif
93 
94 #ifdef HAVE_STRINGS_H
95 #include <strings.h>
96 #endif
97 
98 #ifdef HAVE_STDDEF_H
99 #include <stddef.h>
100 #endif
101 
102 #include "mbfilter.h"
103 #include "mbfl_filter_output.h"
104 #include "mbfilter_pass.h"
105 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
106 
107 #include "eaw_table.h"
108 
109 /* hex character table "0123456789ABCDEF" */
110 static char mbfl_hexchar_table[] = {
111 	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
112 };
113 
114 
115 
116 /*
117  * encoding filter
118  */
119 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
120 
121 
122 /*
123  *  buffering converter
124  */
125 mbfl_buffer_converter *
mbfl_buffer_converter_new(enum mbfl_no_encoding from,enum mbfl_no_encoding to,int buf_initsz)126 mbfl_buffer_converter_new(
127     enum mbfl_no_encoding from,
128     enum mbfl_no_encoding to,
129     int buf_initsz)
130 {
131 	mbfl_buffer_converter *convd;
132 
133 	/* allocate */
134 	convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
135 	if (convd == NULL) {
136 		return NULL;
137 	}
138 
139 	/* initialize */
140 	convd->from = mbfl_no2encoding(from);
141 	convd->to = mbfl_no2encoding(to);
142 	if (convd->from == NULL) {
143 		convd->from = &mbfl_encoding_pass;
144 	}
145 	if (convd->to == NULL) {
146 		convd->to = &mbfl_encoding_pass;
147 	}
148 
149 	/* create convert filter */
150 	convd->filter1 = NULL;
151 	convd->filter2 = NULL;
152 	if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
153 		convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
154 	} else {
155 		convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
156 		if (convd->filter2 != NULL) {
157 			convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
158 					mbfl_no_encoding_wchar,
159 					(int (*)(int, void*))convd->filter2->filter_function,
160 					(int (*)(void*))convd->filter2->filter_flush,
161 					convd->filter2);
162 			if (convd->filter1 == NULL) {
163 				mbfl_convert_filter_delete(convd->filter2);
164 			}
165 		}
166 	}
167 	if (convd->filter1 == NULL) {
168 		return NULL;
169 	}
170 
171 	mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
172 
173 	return convd;
174 }
175 
176 void
mbfl_buffer_converter_delete(mbfl_buffer_converter * convd)177 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
178 {
179 	if (convd != NULL) {
180 		if (convd->filter1) {
181 			mbfl_convert_filter_delete(convd->filter1);
182 		}
183 		if (convd->filter2) {
184 			mbfl_convert_filter_delete(convd->filter2);
185 		}
186 		mbfl_memory_device_clear(&convd->device);
187 		mbfl_free((void*)convd);
188 	}
189 }
190 
191 void
mbfl_buffer_converter_reset(mbfl_buffer_converter * convd)192 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
193 {
194 	mbfl_memory_device_reset(&convd->device);
195 }
196 
197 int
mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter * convd,int mode)198 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
199 {
200 	if (convd != NULL) {
201 		if (convd->filter2 != NULL) {
202 			convd->filter2->illegal_mode = mode;
203 		} else if (convd->filter1 != NULL) {
204 			convd->filter1->illegal_mode = mode;
205 		} else {
206 			return 0;
207 		}
208 	}
209 
210 	return 1;
211 }
212 
213 int
mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter * convd,int substchar)214 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
215 {
216 	if (convd != NULL) {
217 		if (convd->filter2 != NULL) {
218 			convd->filter2->illegal_substchar = substchar;
219 		} else if (convd->filter1 != NULL) {
220 			convd->filter1->illegal_substchar = substchar;
221 		} else {
222 			return 0;
223 		}
224 	}
225 
226 	return 1;
227 }
228 
229 int
mbfl_buffer_converter_strncat(mbfl_buffer_converter * convd,const unsigned char * p,int n)230 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
231 {
232 	mbfl_convert_filter *filter;
233 	int (*filter_function)(int c, mbfl_convert_filter *filter);
234 
235 	if (convd != NULL && p != NULL) {
236 		filter = convd->filter1;
237 		if (filter != NULL) {
238 			filter_function = filter->filter_function;
239 			while (n > 0) {
240 				if ((*filter_function)(*p++, filter) < 0) {
241 					break;
242 				}
243 				n--;
244 			}
245 		}
246 	}
247 
248 	return n;
249 }
250 
251 int
mbfl_buffer_converter_feed(mbfl_buffer_converter * convd,mbfl_string * string)252 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
253 {
254 	int n;
255 	unsigned char *p;
256 	mbfl_convert_filter *filter;
257 	int (*filter_function)(int c, mbfl_convert_filter *filter);
258 
259 	if (convd == NULL || string == NULL) {
260 		return -1;
261 	}
262 	mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
263 	/* feed data */
264 	n = string->len;
265 	p = string->val;
266 	filter = convd->filter1;
267 	if (filter != NULL) {
268 		filter_function = filter->filter_function;
269 		while (n > 0) {
270 			if ((*filter_function)(*p++, filter) < 0) {
271 				return -1;
272 			}
273 			n--;
274 		}
275 	}
276 
277 	return 0;
278 }
279 
280 int
mbfl_buffer_converter_flush(mbfl_buffer_converter * convd)281 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
282 {
283 	if (convd == NULL) {
284 		return -1;
285 	}
286 
287 	if (convd->filter1 != NULL) {
288 		mbfl_convert_filter_flush(convd->filter1);
289 	}
290 	if (convd->filter2 != NULL) {
291 		mbfl_convert_filter_flush(convd->filter2);
292 	}
293 
294 	return 0;
295 }
296 
297 mbfl_string *
mbfl_buffer_converter_getbuffer(mbfl_buffer_converter * convd,mbfl_string * result)298 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
299 {
300 	if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
301 		result->no_encoding = convd->to->no_encoding;
302 		result->val = convd->device.buffer;
303 		result->len = convd->device.pos;
304 	} else {
305 		result = NULL;
306 	}
307 
308 	return result;
309 }
310 
311 mbfl_string *
mbfl_buffer_converter_result(mbfl_buffer_converter * convd,mbfl_string * result)312 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
313 {
314 	if (convd == NULL || result == NULL) {
315 		return NULL;
316 	}
317 	result->no_encoding = convd->to->no_encoding;
318 	return mbfl_memory_device_result(&convd->device, result);
319 }
320 
321 mbfl_string *
mbfl_buffer_converter_feed_result(mbfl_buffer_converter * convd,mbfl_string * string,mbfl_string * result)322 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
323 				  mbfl_string *result)
324 {
325 	if (convd == NULL || string == NULL || result == NULL) {
326 		return NULL;
327 	}
328 	mbfl_buffer_converter_feed(convd, string);
329 	if (convd->filter1 != NULL) {
330 		mbfl_convert_filter_flush(convd->filter1);
331 	}
332 	if (convd->filter2 != NULL) {
333 		mbfl_convert_filter_flush(convd->filter2);
334 	}
335 	result->no_encoding = convd->to->no_encoding;
336 	return mbfl_memory_device_result(&convd->device, result);
337 }
338 
mbfl_buffer_illegalchars(mbfl_buffer_converter * convd)339 int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
340 {
341 	int num_illegalchars = 0;
342 
343 	if (convd == NULL) {
344 		return 0;
345 	}
346 
347 	if (convd->filter1 != NULL) {
348 		num_illegalchars += convd->filter1->num_illegalchar;
349 	}
350 
351 	if (convd->filter2 != NULL) {
352 		num_illegalchars += convd->filter2->num_illegalchar;
353 	}
354 
355 	return (num_illegalchars);
356 }
357 
358 /*
359  * encoding detector
360  */
361 mbfl_encoding_detector *
mbfl_encoding_detector_new(enum mbfl_no_encoding * elist,int elistsz,int strict)362 mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
363 {
364 	mbfl_encoding_detector *identd;
365 
366 	int i, num;
367 	mbfl_identify_filter *filter;
368 
369 	if (elist == NULL || elistsz <= 0) {
370 		return NULL;
371 	}
372 
373 	/* allocate */
374 	identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
375 	if (identd == NULL) {
376 		return NULL;
377 	}
378 	identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
379 	if (identd->filter_list == NULL) {
380 		mbfl_free(identd);
381 		return NULL;
382 	}
383 
384 	/* create filters */
385 	i = 0;
386 	num = 0;
387 	while (i < elistsz) {
388 		filter = mbfl_identify_filter_new(elist[i]);
389 		if (filter != NULL) {
390 			identd->filter_list[num] = filter;
391 			num++;
392 		}
393 		i++;
394 	}
395 	identd->filter_list_size = num;
396 
397 	/* set strict flag */
398 	identd->strict = strict;
399 
400 	return identd;
401 }
402 
403 void
mbfl_encoding_detector_delete(mbfl_encoding_detector * identd)404 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
405 {
406 	int i;
407 
408 	if (identd != NULL) {
409 		if (identd->filter_list != NULL) {
410 			i = identd->filter_list_size;
411 			while (i > 0) {
412 				i--;
413 				mbfl_identify_filter_delete(identd->filter_list[i]);
414 			}
415 			mbfl_free((void *)identd->filter_list);
416 		}
417 		mbfl_free((void *)identd);
418 	}
419 }
420 
421 int
mbfl_encoding_detector_feed(mbfl_encoding_detector * identd,mbfl_string * string)422 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
423 {
424 	int i, n, num, bad, res;
425 	unsigned char *p;
426 	mbfl_identify_filter *filter;
427 
428 	res = 0;
429 	/* feed data */
430 	if (identd != NULL && string != NULL && string->val != NULL) {
431 		num = identd->filter_list_size;
432 		n = string->len;
433 		p = string->val;
434 		bad = 0;
435 		while (n > 0) {
436 			for (i = 0; i < num; i++) {
437 				filter = identd->filter_list[i];
438 				if (!filter->flag) {
439 					(*filter->filter_function)(*p, filter);
440 					if (filter->flag) {
441 						bad++;
442 					}
443 				}
444 			}
445 			if ((num - 1) <= bad) {
446 				res = 1;
447 				break;
448 			}
449 			p++;
450 			n--;
451 		}
452 	}
453 
454 	return res;
455 }
456 
mbfl_encoding_detector_judge(mbfl_encoding_detector * identd)457 enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
458 {
459 	mbfl_identify_filter *filter;
460 	enum mbfl_no_encoding encoding;
461 	int n;
462 
463 	/* judge */
464 	encoding = mbfl_no_encoding_invalid;
465 	if (identd != NULL) {
466 		n = identd->filter_list_size - 1;
467 		while (n >= 0) {
468 			filter = identd->filter_list[n];
469 			if (!filter->flag) {
470 				if (!identd->strict || !filter->status) {
471 					encoding = filter->encoding->no_encoding;
472 				}
473 			}
474 			n--;
475 		}
476 
477 		/* fallback judge */
478 		if (encoding ==	mbfl_no_encoding_invalid) {
479 			n = identd->filter_list_size - 1;
480 			while (n >= 0) {
481 				filter = identd->filter_list[n];
482 				if (!filter->flag) {
483 					encoding = filter->encoding->no_encoding;
484 				}
485 				n--;
486  			}
487 		}
488 	}
489 
490 	return encoding;
491 }
492 
493 
494 /*
495  * encoding converter
496  */
497 mbfl_string *
mbfl_convert_encoding(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding toenc)498 mbfl_convert_encoding(
499     mbfl_string *string,
500     mbfl_string *result,
501     enum mbfl_no_encoding toenc)
502 {
503 	int n;
504 	unsigned char *p;
505 	const mbfl_encoding *encoding;
506 	mbfl_memory_device device;
507 	mbfl_convert_filter *filter1;
508 	mbfl_convert_filter *filter2;
509 
510 	/* initialize */
511 	encoding = mbfl_no2encoding(toenc);
512 	if (encoding == NULL || string == NULL || result == NULL) {
513 		return NULL;
514 	}
515 
516 	filter1 = NULL;
517 	filter2 = NULL;
518 	if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
519 		filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
520 	} else {
521 		filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
522 		if (filter2 != NULL) {
523 			filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
524 			if (filter1 == NULL) {
525 				mbfl_convert_filter_delete(filter2);
526 			}
527 		}
528 	}
529 	if (filter1 == NULL) {
530 		return NULL;
531 	}
532 
533 	if (filter2 != NULL) {
534 		filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
535 		filter2->illegal_substchar = 0x3f;		/* '?' */
536 	}
537 
538 	mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
539 
540 	/* feed data */
541 	n = string->len;
542 	p = string->val;
543 	if (p != NULL) {
544 		while (n > 0) {
545 			if ((*filter1->filter_function)(*p++, filter1) < 0) {
546 				break;
547 			}
548 			n--;
549 		}
550 	}
551 
552 	mbfl_convert_filter_flush(filter1);
553 	mbfl_convert_filter_delete(filter1);
554 	if (filter2 != NULL) {
555 		mbfl_convert_filter_flush(filter2);
556 		mbfl_convert_filter_delete(filter2);
557 	}
558 
559 	return mbfl_memory_device_result(&device, result);
560 }
561 
562 
563 /*
564  * identify encoding
565  */
566 const mbfl_encoding *
mbfl_identify_encoding(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)567 mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
568 {
569 	int i, n, num, bad;
570 	unsigned char *p;
571 	mbfl_identify_filter *flist, *filter;
572 	const mbfl_encoding *encoding;
573 
574 	/* flist is an array of mbfl_identify_filter instances */
575 	flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
576 	if (flist == NULL) {
577 		return NULL;
578 	}
579 
580 	num = 0;
581 	if (elist != NULL) {
582 		for (i = 0; i < elistsz; i++) {
583 			if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
584 				num++;
585 			}
586 		}
587 	}
588 
589 	/* feed data */
590 	n = string->len;
591 	p = string->val;
592 
593 	if (p != NULL) {
594 		bad = 0;
595 		while (n > 0) {
596 			for (i = 0; i < num; i++) {
597 				filter = &flist[i];
598 				if (!filter->flag) {
599 					(*filter->filter_function)(*p, filter);
600 					if (filter->flag) {
601 						bad++;
602 					}
603 				}
604 			}
605 			if ((num - 1) <= bad && !strict) {
606 				break;
607 			}
608 			p++;
609 			n--;
610 		}
611 	}
612 
613 	/* judge */
614 	encoding = NULL;
615 
616 	for (i = 0; i < num; i++) {
617 		filter = &flist[i];
618 		if (!filter->flag) {
619 			if (strict && filter->status) {
620  				continue;
621  			}
622 			encoding = filter->encoding;
623 			break;
624 		}
625 	}
626 
627 	/* fall-back judge */
628 	if (!encoding) {
629 		for (i = 0; i < num; i++) {
630 			filter = &flist[i];
631 			if (!filter->flag && (!strict || !filter->status)) {
632 				encoding = filter->encoding;
633 				break;
634 			}
635 		}
636 	}
637 
638 	/* cleanup */
639 	/* dtors should be called in reverse order */
640 	i = num; while (--i >= 0) {
641 		mbfl_identify_filter_cleanup(&flist[i]);
642 	}
643 
644 	mbfl_free((void *)flist);
645 
646 	return encoding;
647 }
648 
649 const char*
mbfl_identify_encoding_name(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)650 mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
651 {
652 	const mbfl_encoding *encoding;
653 
654 	encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
655 	if (encoding != NULL &&
656 	    encoding->no_encoding > mbfl_no_encoding_charset_min &&
657 	    encoding->no_encoding < mbfl_no_encoding_charset_max) {
658 		return encoding->name;
659 	} else {
660 		return NULL;
661 	}
662 }
663 
664 enum mbfl_no_encoding
mbfl_identify_encoding_no(mbfl_string * string,enum mbfl_no_encoding * elist,int elistsz,int strict)665 mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
666 {
667 	const mbfl_encoding *encoding;
668 
669 	encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
670 	if (encoding != NULL &&
671 	    encoding->no_encoding > mbfl_no_encoding_charset_min &&
672 	    encoding->no_encoding < mbfl_no_encoding_charset_max) {
673 		return encoding->no_encoding;
674 	} else {
675 		return mbfl_no_encoding_invalid;
676 	}
677 }
678 
679 
680 /*
681  *  strlen
682  */
683 static int
filter_count_output(int c,void * data)684 filter_count_output(int c, void *data)
685 {
686 	(*(int *)data)++;
687 	return c;
688 }
689 
690 int
mbfl_strlen(mbfl_string * string)691 mbfl_strlen(mbfl_string *string)
692 {
693 	int len, n, m, k;
694 	unsigned char *p;
695 	const unsigned char *mbtab;
696 	const mbfl_encoding *encoding;
697 
698 	encoding = mbfl_no2encoding(string->no_encoding);
699 	if (encoding == NULL || string == NULL) {
700 		return -1;
701 	}
702 
703 	len = 0;
704 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
705 		len = string->len;
706 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
707 		len = string->len/2;
708 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
709 		len = string->len/4;
710 	} else if (encoding->mblen_table != NULL) {
711 		mbtab = encoding->mblen_table;
712 		n = 0;
713 		p = string->val;
714 		k = string->len;
715 		/* count */
716 		if (p != NULL) {
717 			while (n < k) {
718 				m = mbtab[*p];
719 				n += m;
720 				p += m;
721 				len++;
722 			};
723 		}
724 	} else {
725 		/* wchar filter */
726 		mbfl_convert_filter *filter = mbfl_convert_filter_new(
727 		  string->no_encoding,
728 		  mbfl_no_encoding_wchar,
729 		  filter_count_output, 0, &len);
730 		if (filter == NULL) {
731 			return -1;
732 		}
733 		/* count */
734 		n = string->len;
735 		p = string->val;
736 		if (p != NULL) {
737 			while (n > 0) {
738 				(*filter->filter_function)(*p++, filter);
739 				n--;
740 			}
741 		}
742 		mbfl_convert_filter_delete(filter);
743 	}
744 
745 	return len;
746 }
747 
748 
749 /*
750  *  strpos
751  */
752 struct collector_strpos_data {
753 	mbfl_convert_filter *next_filter;
754 	mbfl_wchar_device needle;
755 	int needle_len;
756 	int start;
757 	int output;
758 	int found_pos;
759 	int needle_pos;
760 	int matched_pos;
761 };
762 
763 static int
collector_strpos(int c,void * data)764 collector_strpos(int c, void* data)
765 {
766 	int *p, *h, *m, n;
767 	struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
768 
769 	if (pc->output >= pc->start) {
770 		if (c == (int)pc->needle.buffer[pc->needle_pos]) {
771 			if (pc->needle_pos == 0) {
772 				pc->found_pos = pc->output;			/* found position */
773 			}
774 			pc->needle_pos++;						/* needle pointer */
775 			if (pc->needle_pos >= pc->needle_len) {
776 				pc->matched_pos = pc->found_pos;	/* matched position */
777 				pc->needle_pos--;
778 				goto retry;
779 			}
780 		} else if (pc->needle_pos != 0) {
781 retry:
782 			h = (int *)pc->needle.buffer;
783 			h++;
784 			for (;;) {
785 				pc->found_pos++;
786 				p = h;
787 				m = (int *)pc->needle.buffer;
788 				n = pc->needle_pos - 1;
789 				while (n > 0 && *p == *m) {
790 					n--;
791 					p++;
792 					m++;
793 				}
794 				if (n <= 0) {
795 					if (*m != c) {
796 						pc->needle_pos = 0;
797 					}
798 					break;
799 				} else {
800 					h++;
801 					pc->needle_pos--;
802 				}
803 			}
804 		}
805 	}
806 
807 	pc->output++;
808 	return c;
809 }
810 
811 /*
812  *	oddlen
813  */
814 int
mbfl_oddlen(mbfl_string * string)815 mbfl_oddlen(mbfl_string *string)
816 {
817 	int len, n, m, k;
818 	unsigned char *p;
819 	const unsigned char *mbtab;
820 	const mbfl_encoding *encoding;
821 
822 
823 	if (string == NULL) {
824 		return -1;
825 	}
826 	encoding = mbfl_no2encoding(string->no_encoding);
827 	if (encoding == NULL) {
828 		return -1;
829 	}
830 
831 	len = 0;
832 	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
833 		return 0;
834 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
835 		return len % 2;
836 	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
837 		return len % 4;
838 	} else if (encoding->mblen_table != NULL) {
839  		mbtab = encoding->mblen_table;
840  		n = 0;
841 		p = string->val;
842 		k = string->len;
843 		/* count */
844 		if (p != NULL) {
845 			while (n < k) {
846 				m = mbtab[*p];
847 				n += m;
848 				p += m;
849 			};
850 		}
851 		return n-k;
852 	} else {
853 		/* how can i do ? */
854 		return 0;
855 	}
856 	/* NOT REACHED */
857 }
858 
859 int
mbfl_strpos(mbfl_string * haystack,mbfl_string * needle,int offset,int reverse)860 mbfl_strpos(
861     mbfl_string *haystack,
862     mbfl_string *needle,
863     int offset,
864     int reverse)
865 {
866 	int result;
867 	mbfl_string _haystack_u8, _needle_u8;
868 	const mbfl_string *haystack_u8, *needle_u8;
869 	const unsigned char *u8_tbl;
870 
871 	if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
872 		return -8;
873 	}
874 
875 	{
876 		const mbfl_encoding *u8_enc;
877 		u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
878 		if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
879 			return -8;
880 		}
881 		u8_tbl = u8_enc->mblen_table;
882 	}
883 
884 	if (haystack->no_encoding != mbfl_no_encoding_utf8) {
885 		mbfl_string_init(&_haystack_u8);
886 		haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
887 		if (haystack_u8 == NULL) {
888 			result = -4;
889 			goto out;
890 		}
891 	} else {
892 		haystack_u8 = haystack;
893 	}
894 
895 	if (needle->no_encoding != mbfl_no_encoding_utf8) {
896 		mbfl_string_init(&_needle_u8);
897 		needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
898 		if (needle_u8 == NULL) {
899 			result = -4;
900 			goto out;
901 		}
902 	} else {
903 		needle_u8 = needle;
904 	}
905 
906 	if (needle_u8->len < 1) {
907 		result = -8;
908 		goto out;
909 	}
910 
911 	result = -1;
912 	if (haystack_u8->len < needle_u8->len) {
913 		goto out;
914 	}
915 
916 	if (!reverse) {
917 		unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
918 		unsigned int needle_u8_len = needle_u8->len;
919 		unsigned int i;
920 		const unsigned char *p, *q, *e;
921 		const unsigned char *haystack_u8_val = haystack_u8->val,
922 		                    *needle_u8_val = needle_u8->val;
923 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
924 			jtbl[i] = needle_u8_len + 1;
925 		}
926 		for (i = 0; i < needle_u8_len - 1; ++i) {
927 			jtbl[needle_u8_val[i]] = needle_u8_len - i;
928 		}
929 		e = haystack_u8_val + haystack_u8->len;
930 		p = haystack_u8_val;
931 		while (--offset >= 0) {
932 			if (p >= e) {
933 				result = -16;
934 				goto out;
935 			}
936 			p += u8_tbl[*p];
937 		}
938 		p += needle_u8_len;
939 		if (p > e) {
940 			goto out;
941 		}
942 		while (p <= e) {
943 			const unsigned char *pv = p;
944 			q = needle_u8_val + needle_u8_len;
945 			for (;;) {
946 				if (q == needle_u8_val) {
947 					result = 0;
948 					while (p > haystack_u8_val) {
949 						unsigned char c = *--p;
950 						if (c < 0x80) {
951 							++result;
952 						} else if ((c & 0xc0) != 0x80) {
953 							++result;
954 						}
955 					}
956 					goto out;
957 				}
958 				if (*--q != *--p) {
959 					break;
960 				}
961 			}
962 			p += jtbl[*p];
963 			if (p <= pv) {
964 				p = pv + 1;
965 			}
966 		}
967 	} else {
968 		unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
969 		unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
970 		unsigned int i;
971 		const unsigned char *p, *e, *q, *qe;
972 		const unsigned char *haystack_u8_val = haystack_u8->val,
973 		                    *needle_u8_val = needle_u8->val;
974 		for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
975 			jtbl[i] = needle_u8_len;
976 		}
977 		for (i = needle_u8_len - 1; i > 0; --i) {
978 			unsigned char c = needle_u8_val[i];
979 			jtbl[c] = i;
980 			if (c < 0x80) {
981 				++needle_len;
982 			} else if ((c & 0xc0) != 0x80) {
983 				++needle_len;
984 			}
985 		}
986 		{
987 			unsigned char c = needle_u8_val[0];
988 			if (c < 0x80) {
989 				++needle_len;
990 			} else if ((c & 0xc0) != 0x80) {
991 				++needle_len;
992 			}
993 		}
994 		e = haystack_u8_val;
995 		p = e + haystack_u8->len;
996 		qe = needle_u8_val + needle_u8_len;
997 		if (offset < 0) {
998 			if (-offset > needle_len) {
999 				offset += needle_len;
1000 				while (offset < 0) {
1001 					unsigned char c;
1002 					if (p <= e) {
1003 						result = -16;
1004 						goto out;
1005 					}
1006 					c = *(--p);
1007 					if (c < 0x80) {
1008 						++offset;
1009 					} else if ((c & 0xc0) != 0x80) {
1010 						++offset;
1011 					}
1012 				}
1013 			}
1014 		} else {
1015 			const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1016 			while (--offset >= 0) {
1017 				if (e >= ee) {
1018 					result = -16;
1019 					goto out;
1020 				}
1021 				e += u8_tbl[*e];
1022 			}
1023 		}
1024 		if (p < e + needle_u8_len) {
1025 			goto out;
1026 		}
1027 		p -= needle_u8_len;
1028 		while (p >= e) {
1029 			const unsigned char *pv = p;
1030 			q = needle_u8_val;
1031 			for (;;) {
1032 				if (q == qe) {
1033 					result = 0;
1034 					p -= needle_u8_len;
1035 					while (p > haystack_u8_val) {
1036 						unsigned char c = *--p;
1037 						if (c < 0x80) {
1038 							++result;
1039 						} else if ((c & 0xc0) != 0x80) {
1040 							++result;
1041 						}
1042 					}
1043 					goto out;
1044 				}
1045 				if (*q != *p) {
1046 					break;
1047 				}
1048 				++p, ++q;
1049 			}
1050 			p -= jtbl[*p];
1051 			if (p >= pv) {
1052 				p = pv - 1;
1053 			}
1054 		}
1055 	}
1056 out:
1057 	if (haystack_u8 == &_haystack_u8) {
1058 		mbfl_string_clear(&_haystack_u8);
1059 	}
1060 	if (needle_u8 == &_needle_u8) {
1061 		mbfl_string_clear(&_needle_u8);
1062 	}
1063 	return result;
1064 }
1065 
1066 /*
1067  *  substr_count
1068  */
1069 
1070 int
mbfl_substr_count(mbfl_string * haystack,mbfl_string * needle)1071 mbfl_substr_count(
1072     mbfl_string *haystack,
1073     mbfl_string *needle
1074    )
1075 {
1076 	int n, result = 0;
1077 	unsigned char *p;
1078 	mbfl_convert_filter *filter;
1079 	struct collector_strpos_data pc;
1080 
1081 	if (haystack == NULL || needle == NULL) {
1082 		return -8;
1083 	}
1084 	/* needle is converted into wchar */
1085 	mbfl_wchar_device_init(&pc.needle);
1086 	filter = mbfl_convert_filter_new(
1087 	  needle->no_encoding,
1088 	  mbfl_no_encoding_wchar,
1089 	  mbfl_wchar_device_output, 0, &pc.needle);
1090 	if (filter == NULL) {
1091 		return -4;
1092 	}
1093 	p = needle->val;
1094 	n = needle->len;
1095 	if (p != NULL) {
1096 		while (n > 0) {
1097 			if ((*filter->filter_function)(*p++, filter) < 0) {
1098 				break;
1099 			}
1100 			n--;
1101 		}
1102 	}
1103 	mbfl_convert_filter_flush(filter);
1104 	mbfl_convert_filter_delete(filter);
1105 	pc.needle_len = pc.needle.pos;
1106 	if (pc.needle.buffer == NULL) {
1107 		return -4;
1108 	}
1109 	if (pc.needle_len <= 0) {
1110 		mbfl_wchar_device_clear(&pc.needle);
1111 		return -2;
1112 	}
1113 	/* initialize filter and collector data */
1114 	filter = mbfl_convert_filter_new(
1115 	  haystack->no_encoding,
1116 	  mbfl_no_encoding_wchar,
1117 	  collector_strpos, 0, &pc);
1118 	if (filter == NULL) {
1119 		mbfl_wchar_device_clear(&pc.needle);
1120 		return -4;
1121 	}
1122 	pc.start = 0;
1123 	pc.output = 0;
1124 	pc.needle_pos = 0;
1125 	pc.found_pos = 0;
1126 	pc.matched_pos = -1;
1127 
1128 	/* feed data */
1129 	p = haystack->val;
1130 	n = haystack->len;
1131 	if (p != NULL) {
1132 		while (n > 0) {
1133 			if ((*filter->filter_function)(*p++, filter) < 0) {
1134 				pc.matched_pos = -4;
1135 				break;
1136 			}
1137 			if (pc.matched_pos >= 0) {
1138 				++result;
1139 				pc.matched_pos = -1;
1140 				pc.needle_pos = 0;
1141 			}
1142 			n--;
1143 		}
1144 	}
1145 	mbfl_convert_filter_flush(filter);
1146 	mbfl_convert_filter_delete(filter);
1147 	mbfl_wchar_device_clear(&pc.needle);
1148 
1149 	return result;
1150 }
1151 
1152 /*
1153  *  substr
1154  */
1155 struct collector_substr_data {
1156 	mbfl_convert_filter *next_filter;
1157 	int start;
1158 	int stop;
1159 	int output;
1160 };
1161 
1162 static int
collector_substr(int c,void * data)1163 collector_substr(int c, void* data)
1164 {
1165 	struct collector_substr_data *pc = (struct collector_substr_data*)data;
1166 
1167 	if (pc->output >= pc->stop) {
1168 		return -1;
1169 	}
1170 
1171 	if (pc->output >= pc->start) {
1172 		(*pc->next_filter->filter_function)(c, pc->next_filter);
1173 	}
1174 
1175 	pc->output++;
1176 
1177 	return c;
1178 }
1179 
1180 mbfl_string *
mbfl_substr(mbfl_string * string,mbfl_string * result,int from,int length)1181 mbfl_substr(
1182     mbfl_string *string,
1183     mbfl_string *result,
1184     int from,
1185     int length)
1186 {
1187 	const mbfl_encoding *encoding;
1188 	int n, m, k, len, start, end;
1189 	unsigned char *p, *w;
1190 	const unsigned char *mbtab;
1191 
1192 	encoding = mbfl_no2encoding(string->no_encoding);
1193 	if (encoding == NULL || string == NULL || result == NULL) {
1194 		return NULL;
1195 	}
1196 	mbfl_string_init(result);
1197 	result->no_language = string->no_language;
1198 	result->no_encoding = string->no_encoding;
1199 
1200 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1201 	   encoding->mblen_table != NULL) {
1202 		len = string->len;
1203 		start = from;
1204 		end = from + length;
1205 		if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1206 			start *= 2;
1207 			end = start + length*2;
1208 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1209 			start *= 4;
1210 			end = start + length*4;
1211 		} else if (encoding->mblen_table != NULL) {
1212 			mbtab = encoding->mblen_table;
1213 			start = 0;
1214 			end = 0;
1215 			n = 0;
1216 			k = 0;
1217 			p = string->val;
1218 			if (p != NULL) {
1219 				/* search start position */
1220 				while (k <= from) {
1221 					start = n;
1222 					if (n >= len) {
1223 						break;
1224 					}
1225 					m = mbtab[*p];
1226 					n += m;
1227 					p += m;
1228 					k++;
1229 				}
1230 				/* detect end position */
1231 				k = 0;
1232 				end = start;
1233 				while (k < length) {
1234 					end = n;
1235 					if (n >= len) {
1236 						break;
1237 					}
1238 					m = mbtab[*p];
1239 					n += m;
1240 					p += m;
1241 					k++;
1242 				}
1243 			}
1244 		}
1245 
1246 		if (start > len) {
1247 			start = len;
1248 		}
1249 		if (start < 0) {
1250 			start = 0;
1251 		}
1252 		if (end > len) {
1253 			end = len;
1254 		}
1255 		if (end < 0) {
1256 			end = 0;
1257 		}
1258 		if (start > end) {
1259 			start = end;
1260 		}
1261 
1262 		/* allocate memory and copy */
1263 		n = end - start;
1264 		result->len = 0;
1265 		result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
1266 		if (w != NULL) {
1267 			p = string->val;
1268 			if (p != NULL) {
1269 				p += start;
1270 				result->len = n;
1271 				while (n > 0) {
1272 					*w++ = *p++;
1273 					n--;
1274 				}
1275 			}
1276 			*w++ = '\0';
1277 			*w++ = '\0';
1278 			*w++ = '\0';
1279 			*w = '\0';
1280 		} else {
1281 			result = NULL;
1282 		}
1283 	} else {
1284 		mbfl_memory_device device;
1285 		struct collector_substr_data pc;
1286 		mbfl_convert_filter *decoder;
1287 		mbfl_convert_filter *encoder;
1288 
1289 		mbfl_memory_device_init(&device, length + 1, 0);
1290 		mbfl_string_init(result);
1291 		result->no_language = string->no_language;
1292 		result->no_encoding = string->no_encoding;
1293 		/* output code filter */
1294 		decoder = mbfl_convert_filter_new(
1295 		    mbfl_no_encoding_wchar,
1296 		    string->no_encoding,
1297 		    mbfl_memory_device_output, 0, &device);
1298 		/* wchar filter */
1299 		encoder = mbfl_convert_filter_new(
1300 		    string->no_encoding,
1301 		    mbfl_no_encoding_wchar,
1302 		    collector_substr, 0, &pc);
1303 		if (decoder == NULL || encoder == NULL) {
1304 			mbfl_convert_filter_delete(encoder);
1305 			mbfl_convert_filter_delete(decoder);
1306 			return NULL;
1307 		}
1308 		pc.next_filter = decoder;
1309 		pc.start = from;
1310 		pc.stop = from + length;
1311 		pc.output = 0;
1312 
1313 		/* feed data */
1314 		p = string->val;
1315 		n = string->len;
1316 		if (p != NULL) {
1317 			while (n > 0) {
1318 				if ((*encoder->filter_function)(*p++, encoder) < 0) {
1319 					break;
1320 				}
1321 				n--;
1322 			}
1323 		}
1324 
1325 		mbfl_convert_filter_flush(encoder);
1326 		mbfl_convert_filter_flush(decoder);
1327 		result = mbfl_memory_device_result(&device, result);
1328 		mbfl_convert_filter_delete(encoder);
1329 		mbfl_convert_filter_delete(decoder);
1330 	}
1331 
1332 	return result;
1333 }
1334 
1335 /*
1336  *  strcut
1337  */
1338 mbfl_string *
mbfl_strcut(mbfl_string * string,mbfl_string * result,int from,int length)1339 mbfl_strcut(
1340     mbfl_string *string,
1341     mbfl_string *result,
1342     int from,
1343     int length)
1344 {
1345 	const mbfl_encoding *encoding;
1346 	mbfl_memory_device device;
1347 
1348 	/* validate the parameters */
1349 	if (string == NULL || string->val == NULL || result == NULL) {
1350 		return NULL;
1351 	}
1352 
1353 	if (from < 0 || length < 0) {
1354 		return NULL;
1355 	}
1356 
1357 	if (from >= string->len) {
1358 		from = string->len;
1359 	}
1360 
1361 	encoding = mbfl_no2encoding(string->no_encoding);
1362 	if (encoding == NULL) {
1363 		return NULL;
1364 	}
1365 
1366 	mbfl_string_init(result);
1367 	result->no_language = string->no_language;
1368 	result->no_encoding = string->no_encoding;
1369 
1370 	if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1371 				| MBFL_ENCTYPE_WCS2BE
1372 				| MBFL_ENCTYPE_WCS2LE
1373 				| MBFL_ENCTYPE_WCS4BE
1374 				| MBFL_ENCTYPE_WCS4LE))
1375 			|| encoding->mblen_table != NULL) {
1376 		const unsigned char *start = NULL;
1377 		const unsigned char *end = NULL;
1378 		unsigned char *w;
1379 		unsigned int sz;
1380 
1381 		if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1382 			from &= -2;
1383 
1384 			if (from + length >= string->len) {
1385 				length = string->len - from;
1386 			}
1387 
1388 			start = string->val + from;
1389 			end   = start + (length & -2);
1390 		} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1391 			from &= -4;
1392 
1393 			if (from + length >= string->len) {
1394 				length = string->len - from;
1395 			}
1396 
1397 			start = string->val + from;
1398 			end   = start + (length & -4);
1399 		} else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1400 			if (from + length >= string->len) {
1401 				length = string->len - from;
1402 			}
1403 
1404 			start = string->val + from;
1405 			end = start + length;
1406 		} else if (encoding->mblen_table != NULL) {
1407 			const unsigned char *mbtab = encoding->mblen_table;
1408 			const unsigned char *p, *q;
1409 			int m;
1410 
1411 			/* search start position */
1412 			for (m = 0, p = string->val, q = p + from;
1413 					p < q; p += (m = mbtab[*p]));
1414 
1415 			if (p > q) {
1416 				p -= m;
1417 			}
1418 
1419 			start = p;
1420 
1421 			/* search end position */
1422 			if ((start - string->val) + length >= (int)string->len) {
1423 				end = string->val + string->len;
1424 			} else {
1425 				for (q = p + length; p < q; p += (m = mbtab[*p]));
1426 
1427 				if (p > q) {
1428 					p -= m;
1429 				}
1430 				end = p;
1431 			}
1432 		} else {
1433 			/* never reached */
1434 			return NULL;
1435 		}
1436 
1437 		/* allocate memory and copy string */
1438 		sz = end - start;
1439 		if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1440 				sizeof(unsigned char))) == NULL) {
1441 			return NULL;
1442 		}
1443 
1444 		memcpy(w, start, sz);
1445 		w[sz] = '\0';
1446 		w[sz + 1] = '\0';
1447 		w[sz + 2] = '\0';
1448 		w[sz + 3] = '\0';
1449 
1450 		result->val = w;
1451 		result->len = sz;
1452 	} else {
1453 		mbfl_convert_filter *encoder     = NULL;
1454 		mbfl_convert_filter *decoder     = NULL;
1455 		const unsigned char *p, *q, *r;
1456 		struct {
1457 			mbfl_convert_filter encoder;
1458 			mbfl_convert_filter decoder;
1459 			const unsigned char *p;
1460 			int pos;
1461 		} bk, _bk;
1462 
1463 		/* output code filter */
1464 		if (!(decoder = mbfl_convert_filter_new(
1465 				mbfl_no_encoding_wchar,
1466 				string->no_encoding,
1467 				mbfl_memory_device_output, 0, &device))) {
1468 			return NULL;
1469 		}
1470 
1471 		/* wchar filter */
1472 		if (!(encoder = mbfl_convert_filter_new(
1473 				string->no_encoding,
1474 				mbfl_no_encoding_wchar,
1475 				mbfl_filter_output_null,
1476 				NULL, NULL))) {
1477 			mbfl_convert_filter_delete(decoder);
1478 			return NULL;
1479 		}
1480 
1481 		mbfl_memory_device_init(&device, length + 8, 0);
1482 
1483 		p = string->val;
1484 
1485 		/* search start position */
1486 		for (q = string->val + from; p < q; p++) {
1487 			(*encoder->filter_function)(*p, encoder);
1488 		}
1489 
1490 		/* switch the drain direction */
1491 		encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1492 		encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1493 		encoder->data = decoder;
1494 
1495 		q = string->val + string->len;
1496 
1497 		/* save the encoder, decoder state and the pointer */
1498 		mbfl_convert_filter_copy(decoder, &_bk.decoder);
1499 		mbfl_convert_filter_copy(encoder, &_bk.encoder);
1500 		_bk.p = p;
1501 		_bk.pos = device.pos;
1502 
1503 		if (length > q - p) {
1504 			length = q - p;
1505 		}
1506 
1507 		if (length >= 20) {
1508 			/* output a little shorter than "length" */
1509 			/* XXX: the constant "20" was determined purely on the heuristics. */
1510 			for (r = p + length - 20; p < r; p++) {
1511 				(*encoder->filter_function)(*p, encoder);
1512 			}
1513 
1514 			/* if the offset of the resulting string exceeds the length,
1515 			 * then restore the state */
1516 			if (device.pos > length) {
1517 				p = _bk.p;
1518 				device.pos = _bk.pos;
1519 				decoder->filter_dtor(decoder);
1520 				encoder->filter_dtor(encoder);
1521 				mbfl_convert_filter_copy(&_bk.decoder, decoder);
1522 				mbfl_convert_filter_copy(&_bk.encoder, encoder);
1523 				bk = _bk;
1524 			} else {
1525 				/* save the encoder, decoder state and the pointer */
1526 				mbfl_convert_filter_copy(decoder, &bk.decoder);
1527 				mbfl_convert_filter_copy(encoder, &bk.encoder);
1528 				bk.p = p;
1529 				bk.pos = device.pos;
1530 
1531 				/* flush the stream */
1532 				(*encoder->filter_flush)(encoder);
1533 
1534 				/* if the offset of the resulting string exceeds the length,
1535 				 * then restore the state */
1536 				if (device.pos > length) {
1537 					bk.decoder.filter_dtor(&bk.decoder);
1538 					bk.encoder.filter_dtor(&bk.encoder);
1539 
1540 					p = _bk.p;
1541 					device.pos = _bk.pos;
1542 					decoder->filter_dtor(decoder);
1543 					encoder->filter_dtor(encoder);
1544 					mbfl_convert_filter_copy(&_bk.decoder, decoder);
1545 					mbfl_convert_filter_copy(&_bk.encoder, encoder);
1546 					bk = _bk;
1547 				} else {
1548 					_bk.decoder.filter_dtor(&_bk.decoder);
1549 					_bk.encoder.filter_dtor(&_bk.encoder);
1550 
1551 					p = bk.p;
1552 					device.pos = bk.pos;
1553 					decoder->filter_dtor(decoder);
1554 					encoder->filter_dtor(encoder);
1555 					mbfl_convert_filter_copy(&bk.decoder, decoder);
1556 					mbfl_convert_filter_copy(&bk.encoder, encoder);
1557 				}
1558 			}
1559 		} else {
1560 			bk = _bk;
1561 		}
1562 
1563 		/* detect end position */
1564 		while (p < q) {
1565 			(*encoder->filter_function)(*p, encoder);
1566 
1567 			if (device.pos > length) {
1568 				/* restore filter */
1569 				p = bk.p;
1570 				device.pos = bk.pos;
1571 				decoder->filter_dtor(decoder);
1572 				encoder->filter_dtor(encoder);
1573 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1574 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1575 				break;
1576 			}
1577 
1578 			p++;
1579 
1580 			/* backup current state */
1581 			mbfl_convert_filter_copy(decoder, &_bk.decoder);
1582 			mbfl_convert_filter_copy(encoder, &_bk.encoder);
1583 			_bk.pos = device.pos;
1584 			_bk.p = p;
1585 
1586 			(*encoder->filter_flush)(encoder);
1587 
1588 			if (device.pos > length) {
1589 				_bk.decoder.filter_dtor(&_bk.decoder);
1590 				_bk.encoder.filter_dtor(&_bk.encoder);
1591 
1592 				/* restore filter */
1593 				p = bk.p;
1594 				device.pos = bk.pos;
1595 				decoder->filter_dtor(decoder);
1596 				encoder->filter_dtor(encoder);
1597 				mbfl_convert_filter_copy(&bk.decoder, decoder);
1598 				mbfl_convert_filter_copy(&bk.encoder, encoder);
1599 				break;
1600 			}
1601 
1602 			bk.decoder.filter_dtor(&bk.decoder);
1603 			bk.encoder.filter_dtor(&bk.encoder);
1604 
1605 			p = _bk.p;
1606 			device.pos = _bk.pos;
1607 			decoder->filter_dtor(decoder);
1608 			encoder->filter_dtor(encoder);
1609 			mbfl_convert_filter_copy(&_bk.decoder, decoder);
1610 			mbfl_convert_filter_copy(&_bk.encoder, encoder);
1611 
1612 			bk = _bk;
1613 		}
1614 
1615 		(*encoder->filter_flush)(encoder);
1616 
1617 		bk.decoder.filter_dtor(&bk.decoder);
1618 		bk.encoder.filter_dtor(&bk.encoder);
1619 
1620 		result = mbfl_memory_device_result(&device, result);
1621 
1622 		mbfl_convert_filter_delete(encoder);
1623 		mbfl_convert_filter_delete(decoder);
1624 	}
1625 
1626 	return result;
1627 }
1628 
1629 
1630 /*
1631  *  strwidth
1632  */
is_fullwidth(int c)1633 static int is_fullwidth(int c)
1634 {
1635 	int i;
1636 
1637 	if (c < mbfl_eaw_table[0].begin) {
1638 		return 0;
1639 	}
1640 
1641 	for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1642 		if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1643 			return 1;
1644 		}
1645 	}
1646 
1647 	return 0;
1648 }
1649 
1650 static int
filter_count_width(int c,void * data)1651 filter_count_width(int c, void* data)
1652 {
1653 	(*(int *)data) += (is_fullwidth(c) ? 2: 1);
1654 	return c;
1655 }
1656 
1657 int
mbfl_strwidth(mbfl_string * string)1658 mbfl_strwidth(mbfl_string *string)
1659 {
1660 	int len, n;
1661 	unsigned char *p;
1662 	mbfl_convert_filter *filter;
1663 
1664 	len = 0;
1665 	if (string->len > 0 && string->val != NULL) {
1666 		/* wchar filter */
1667 		filter = mbfl_convert_filter_new(
1668 		    string->no_encoding,
1669 		    mbfl_no_encoding_wchar,
1670 		    filter_count_width, 0, &len);
1671 		if (filter == NULL) {
1672 			mbfl_convert_filter_delete(filter);
1673 			return -1;
1674 		}
1675 
1676 		/* feed data */
1677 		p = string->val;
1678 		n = string->len;
1679 		while (n > 0) {
1680 			(*filter->filter_function)(*p++, filter);
1681 			n--;
1682 		}
1683 
1684 		mbfl_convert_filter_flush(filter);
1685 		mbfl_convert_filter_delete(filter);
1686 	}
1687 
1688 	return len;
1689 }
1690 
1691 
1692 /*
1693  *  strimwidth
1694  */
1695 struct collector_strimwidth_data {
1696 	mbfl_convert_filter *decoder;
1697 	mbfl_convert_filter *decoder_backup;
1698 	mbfl_memory_device device;
1699 	int from;
1700 	int width;
1701 	int outwidth;
1702 	int outchar;
1703 	int status;
1704 	int endpos;
1705 };
1706 
1707 static int
collector_strimwidth(int c,void * data)1708 collector_strimwidth(int c, void* data)
1709 {
1710 	struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1711 
1712 	switch (pc->status) {
1713 	case 10:
1714 		(*pc->decoder->filter_function)(c, pc->decoder);
1715 		break;
1716 	default:
1717 		if (pc->outchar >= pc->from) {
1718 			pc->outwidth += (is_fullwidth(c) ? 2: 1);
1719 
1720 			if (pc->outwidth > pc->width) {
1721 				if (pc->status == 0) {
1722 					pc->endpos = pc->device.pos;
1723 					mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1724 				}
1725 				pc->status++;
1726 				(*pc->decoder->filter_function)(c, pc->decoder);
1727 				c = -1;
1728 			} else {
1729 				(*pc->decoder->filter_function)(c, pc->decoder);
1730 			}
1731 		}
1732 		pc->outchar++;
1733 		break;
1734 	}
1735 
1736 	return c;
1737 }
1738 
1739 mbfl_string *
mbfl_strimwidth(mbfl_string * string,mbfl_string * marker,mbfl_string * result,int from,int width)1740 mbfl_strimwidth(
1741     mbfl_string *string,
1742     mbfl_string *marker,
1743     mbfl_string *result,
1744     int from,
1745     int width)
1746 {
1747 	struct collector_strimwidth_data pc;
1748 	mbfl_convert_filter *encoder;
1749 	int n, mkwidth;
1750 	unsigned char *p;
1751 
1752 	if (string == NULL || result == NULL) {
1753 		return NULL;
1754 	}
1755 	mbfl_string_init(result);
1756 	result->no_language = string->no_language;
1757 	result->no_encoding = string->no_encoding;
1758 	mbfl_memory_device_init(&pc.device, width, 0);
1759 
1760 	/* output code filter */
1761 	pc.decoder = mbfl_convert_filter_new(
1762 	    mbfl_no_encoding_wchar,
1763 	    string->no_encoding,
1764 	    mbfl_memory_device_output, 0, &pc.device);
1765 	pc.decoder_backup = mbfl_convert_filter_new(
1766 	    mbfl_no_encoding_wchar,
1767 	    string->no_encoding,
1768 	    mbfl_memory_device_output, 0, &pc.device);
1769 	/* wchar filter */
1770 	encoder = mbfl_convert_filter_new(
1771 	    string->no_encoding,
1772 	    mbfl_no_encoding_wchar,
1773 	    collector_strimwidth, 0, &pc);
1774 	if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1775 		mbfl_convert_filter_delete(encoder);
1776 		mbfl_convert_filter_delete(pc.decoder);
1777 		mbfl_convert_filter_delete(pc.decoder_backup);
1778 		return NULL;
1779 	}
1780 	mkwidth = 0;
1781 	if (marker) {
1782 		mkwidth = mbfl_strwidth(marker);
1783 	}
1784 	pc.from = from;
1785 	pc.width = width - mkwidth;
1786 	pc.outwidth = 0;
1787 	pc.outchar = 0;
1788 	pc.status = 0;
1789 	pc.endpos = 0;
1790 
1791 	/* feed data */
1792 	p = string->val;
1793 	n = string->len;
1794 	if (p != NULL) {
1795 		while (n > 0) {
1796 			n--;
1797 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1798 				break;
1799 			}
1800 		}
1801 		mbfl_convert_filter_flush(encoder);
1802 		if (pc.status != 0 && mkwidth > 0) {
1803 			pc.width += mkwidth;
1804 			while (n > 0) {
1805 				if ((*encoder->filter_function)(*p++, encoder) < 0) {
1806 					break;
1807 				}
1808 				n--;
1809 			}
1810 			mbfl_convert_filter_flush(encoder);
1811 			if (pc.status != 1) {
1812 				pc.status = 10;
1813 				pc.device.pos = pc.endpos;
1814 				mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1815 				mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
1816 				p = marker->val;
1817 				n = marker->len;
1818 				while (n > 0) {
1819 					if ((*encoder->filter_function)(*p++, encoder) < 0) {
1820 						break;
1821 					}
1822 					n--;
1823 				}
1824 				mbfl_convert_filter_flush(encoder);
1825 			}
1826 		} else if (pc.status != 0) {
1827 			pc.device.pos = pc.endpos;
1828 			mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1829 		}
1830 		mbfl_convert_filter_flush(pc.decoder);
1831 	}
1832 	result = mbfl_memory_device_result(&pc.device, result);
1833 	mbfl_convert_filter_delete(encoder);
1834 	mbfl_convert_filter_delete(pc.decoder);
1835 	mbfl_convert_filter_delete(pc.decoder_backup);
1836 
1837 	return result;
1838 }
1839 
1840 mbfl_string *
mbfl_ja_jp_hantozen(mbfl_string * string,mbfl_string * result,int mode)1841 mbfl_ja_jp_hantozen(
1842     mbfl_string *string,
1843     mbfl_string *result,
1844     int mode)
1845 {
1846 	int n;
1847 	unsigned char *p;
1848 	const mbfl_encoding *encoding;
1849 	mbfl_memory_device device;
1850 	mbfl_convert_filter *decoder = NULL;
1851 	mbfl_convert_filter *encoder = NULL;
1852 	mbfl_convert_filter *tl_filter = NULL;
1853 	mbfl_convert_filter *next_filter = NULL;
1854 	mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1855 
1856 	/* validate parameters */
1857 	if (string == NULL || result == NULL) {
1858 		return NULL;
1859 	}
1860 
1861 	encoding = mbfl_no2encoding(string->no_encoding);
1862 	if (encoding == NULL) {
1863 		return NULL;
1864 	}
1865 
1866 	mbfl_memory_device_init(&device, string->len, 0);
1867 	mbfl_string_init(result);
1868 
1869 	result->no_language = string->no_language;
1870 	result->no_encoding = string->no_encoding;
1871 
1872 	decoder = mbfl_convert_filter_new(
1873 		mbfl_no_encoding_wchar,
1874 		string->no_encoding,
1875 		mbfl_memory_device_output, 0, &device);
1876 	if (decoder == NULL) {
1877 		goto out;
1878 	}
1879 	next_filter = decoder;
1880 
1881 	param =
1882 		(mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
1883 	if (param == NULL) {
1884 		goto out;
1885 	}
1886 
1887 	param->mode = mode;
1888 
1889 	tl_filter = mbfl_convert_filter_new2(
1890 		&vtbl_tl_jisx0201_jisx0208,
1891 		(int(*)(int, void*))next_filter->filter_function,
1892 		(int(*)(void*))next_filter->filter_flush,
1893 		next_filter);
1894 	if (tl_filter == NULL) {
1895 		mbfl_free(param);
1896 		goto out;
1897 	}
1898 
1899 	tl_filter->opaque = param;
1900 	next_filter = tl_filter;
1901 
1902 	encoder = mbfl_convert_filter_new(
1903 		string->no_encoding,
1904 		mbfl_no_encoding_wchar,
1905 		(int(*)(int, void*))next_filter->filter_function,
1906 		(int(*)(void*))next_filter->filter_flush,
1907 		next_filter);
1908 	if (encoder == NULL) {
1909 		goto out;
1910 	}
1911 
1912 	/* feed data */
1913 	p = string->val;
1914 	n = string->len;
1915 	if (p != NULL) {
1916 		while (n > 0) {
1917 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
1918 				break;
1919 			}
1920 			n--;
1921 		}
1922 	}
1923 
1924 	mbfl_convert_filter_flush(encoder);
1925 	result = mbfl_memory_device_result(&device, result);
1926 out:
1927 	if (tl_filter != NULL) {
1928 		if (tl_filter->opaque != NULL) {
1929 			mbfl_free(tl_filter->opaque);
1930 		}
1931 		mbfl_convert_filter_delete(tl_filter);
1932 	}
1933 
1934 	if (decoder != NULL) {
1935 		mbfl_convert_filter_delete(decoder);
1936 	}
1937 
1938 	if (encoder != NULL) {
1939 		mbfl_convert_filter_delete(encoder);
1940 	}
1941 
1942 	return result;
1943 }
1944 
1945 
1946 /*
1947  *  MIME header encode
1948  */
1949 struct mime_header_encoder_data {
1950 	mbfl_convert_filter *conv1_filter;
1951 	mbfl_convert_filter *block_filter;
1952 	mbfl_convert_filter *conv2_filter;
1953 	mbfl_convert_filter *conv2_filter_backup;
1954 	mbfl_convert_filter *encod_filter;
1955 	mbfl_convert_filter *encod_filter_backup;
1956 	mbfl_memory_device outdev;
1957 	mbfl_memory_device tmpdev;
1958 	int status1;
1959 	int status2;
1960 	int prevpos;
1961 	int linehead;
1962 	int firstindent;
1963 	int encnamelen;
1964 	int lwsplen;
1965 	char encname[128];
1966 	char lwsp[16];
1967 };
1968 
1969 static int
mime_header_encoder_block_collector(int c,void * data)1970 mime_header_encoder_block_collector(int c, void *data)
1971 {
1972 	int n;
1973 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
1974 
1975 	switch (pe->status2) {
1976 	case 1:	/* encoded word */
1977 		pe->prevpos = pe->outdev.pos;
1978 		mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
1979 		mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
1980 		(*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1981 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1982 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
1983 		n = pe->outdev.pos - pe->linehead + pe->firstindent;
1984 		pe->outdev.pos = pe->prevpos;
1985 		mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
1986 		mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
1987 		if (n >= 74) {
1988 			(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
1989 			(*pe->encod_filter->filter_flush)(pe->encod_filter);
1990 			mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);	/* ?= */
1991 			mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
1992 			pe->linehead = pe->outdev.pos;
1993 			pe->firstindent = 0;
1994 			mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
1995 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1996 		} else {
1997 			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
1998 		}
1999 		break;
2000 
2001 	default:
2002 		mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2003 		c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2004 		pe->status2 = 1;
2005 		break;
2006 	}
2007 
2008 	return c;
2009 }
2010 
2011 static int
mime_header_encoder_collector(int c,void * data)2012 mime_header_encoder_collector(int c, void *data)
2013 {
2014 	static int qp_table[256] = {
2015 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2016 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2017 		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
2018 		0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
2019 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
2020 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
2021 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
2022 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
2023 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
2024 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
2025 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
2026 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
2027 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
2028 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
2029 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
2030 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
2031 	};
2032 
2033 	int n;
2034 	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2035 
2036 	switch (pe->status1) {
2037 	case 11:	/* encoded word */
2038 		(*pe->block_filter->filter_function)(c, pe->block_filter);
2039 		break;
2040 
2041 	default:	/* ASCII */
2042 		if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
2043 			mbfl_memory_device_output(c, &pe->tmpdev);
2044 			pe->status1 = 1;
2045 		} else if (pe->status1 == 0 && c == 0x20) {	/* repeat SPACE */
2046 			mbfl_memory_device_output(c, &pe->tmpdev);
2047 		} else {
2048 			if (pe->tmpdev.pos < 74 && c == 0x20) {
2049 				n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
2050 				if (n > 74) {
2051 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
2052 					pe->linehead = pe->outdev.pos;
2053 					pe->firstindent = 0;
2054 				} else if (pe->outdev.pos > 0) {
2055 					mbfl_memory_device_output(0x20, &pe->outdev);
2056 				}
2057 				mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2058 				mbfl_memory_device_reset(&pe->tmpdev);
2059 				pe->status1 = 0;
2060 			} else {
2061 				n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
2062 				if (n > 60)  {
2063 					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
2064 					pe->linehead = pe->outdev.pos;
2065 					pe->firstindent = 0;
2066 				} else if (pe->outdev.pos > 0)  {
2067 					mbfl_memory_device_output(0x20, &pe->outdev);
2068 				}
2069 				mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
2070 				mbfl_memory_device_reset(&pe->tmpdev);
2071 				(*pe->block_filter->filter_function)(c, pe->block_filter);
2072 				pe->status1 = 11;
2073 			}
2074 		}
2075 		break;
2076 	}
2077 
2078 	return c;
2079 }
2080 
2081 mbfl_string *
mime_header_encoder_result(struct mime_header_encoder_data * pe,mbfl_string * result)2082 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2083 {
2084 	if (pe->status1 >= 10) {
2085 		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2086 		(*pe->encod_filter->filter_flush)(pe->encod_filter);
2087 		mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);		/* ?= */
2088 	} else if (pe->tmpdev.pos > 0) {
2089 		if (pe->outdev.pos > 0) {
2090 			if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2091 				mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2092 			} else {
2093 				mbfl_memory_device_output(0x20, &pe->outdev);
2094 			}
2095 		}
2096 		mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2097 	}
2098 	mbfl_memory_device_reset(&pe->tmpdev);
2099 	pe->prevpos = 0;
2100 	pe->linehead = 0;
2101 	pe->status1 = 0;
2102 	pe->status2 = 0;
2103 
2104 	return mbfl_memory_device_result(&pe->outdev, result);
2105 }
2106 
2107 struct mime_header_encoder_data*
mime_header_encoder_new(enum mbfl_no_encoding incode,enum mbfl_no_encoding outcode,enum mbfl_no_encoding transenc)2108 mime_header_encoder_new(
2109     enum mbfl_no_encoding incode,
2110     enum mbfl_no_encoding outcode,
2111     enum mbfl_no_encoding transenc)
2112 {
2113 	int n;
2114 	const char *s;
2115 	const mbfl_encoding *outencoding;
2116 	struct mime_header_encoder_data *pe;
2117 
2118 	/* get output encoding and check MIME charset name */
2119 	outencoding = mbfl_no2encoding(outcode);
2120 	if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
2121 		return NULL;
2122 	}
2123 
2124 	pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2125 	if (pe == NULL) {
2126 		return NULL;
2127 	}
2128 
2129 	mbfl_memory_device_init(&pe->outdev, 0, 0);
2130 	mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2131 	pe->prevpos = 0;
2132 	pe->linehead = 0;
2133 	pe->firstindent = 0;
2134 	pe->status1 = 0;
2135 	pe->status2 = 0;
2136 
2137 	/* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
2138 	n = 0;
2139 	pe->encname[n++] = 0x3d;
2140 	pe->encname[n++] = 0x3f;
2141 	s = outencoding->mime_name;
2142 	while (*s) {
2143 		pe->encname[n++] = *s++;
2144 	}
2145 	pe->encname[n++] = 0x3f;
2146 	if (transenc == mbfl_no_encoding_qprint) {
2147 		pe->encname[n++] = 0x51;
2148 	} else {
2149 		pe->encname[n++] = 0x42;
2150 		transenc = mbfl_no_encoding_base64;
2151 	}
2152 	pe->encname[n++] = 0x3f;
2153 	pe->encname[n] = '\0';
2154 	pe->encnamelen = n;
2155 
2156 	n = 0;
2157 	pe->lwsp[n++] = 0x0d;
2158 	pe->lwsp[n++] = 0x0a;
2159 	pe->lwsp[n++] = 0x20;
2160 	pe->lwsp[n] = '\0';
2161 	pe->lwsplen = n;
2162 
2163 	/* transfer encode filter */
2164 	pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2165 	pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2166 
2167 	/* Output code filter */
2168 	pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2169 	pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2170 
2171 	/* encoded block filter */
2172 	pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2173 
2174 	/* Input code filter */
2175 	pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);
2176 
2177 	if (pe->encod_filter == NULL ||
2178 	    pe->encod_filter_backup == NULL ||
2179 	    pe->conv2_filter == NULL ||
2180 	    pe->conv2_filter_backup == NULL ||
2181 	    pe->conv1_filter == NULL) {
2182 		mime_header_encoder_delete(pe);
2183 		return NULL;
2184 	}
2185 
2186 	if (transenc == mbfl_no_encoding_qprint) {
2187 		pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2188 		pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2189 	} else {
2190 		pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2191 		pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2192 	}
2193 
2194 	return pe;
2195 }
2196 
2197 void
mime_header_encoder_delete(struct mime_header_encoder_data * pe)2198 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2199 {
2200 	if (pe) {
2201 		mbfl_convert_filter_delete(pe->conv1_filter);
2202 		mbfl_convert_filter_delete(pe->block_filter);
2203 		mbfl_convert_filter_delete(pe->conv2_filter);
2204 		mbfl_convert_filter_delete(pe->conv2_filter_backup);
2205 		mbfl_convert_filter_delete(pe->encod_filter);
2206 		mbfl_convert_filter_delete(pe->encod_filter_backup);
2207 		mbfl_memory_device_clear(&pe->outdev);
2208 		mbfl_memory_device_clear(&pe->tmpdev);
2209 		mbfl_free((void*)pe);
2210 	}
2211 }
2212 
2213 int
mime_header_encoder_feed(int c,struct mime_header_encoder_data * pe)2214 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2215 {
2216 	return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2217 }
2218 
2219 mbfl_string *
mbfl_mime_header_encode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode,enum mbfl_no_encoding encoding,const char * linefeed,int indent)2220 mbfl_mime_header_encode(
2221     mbfl_string *string,
2222     mbfl_string *result,
2223     enum mbfl_no_encoding outcode,
2224     enum mbfl_no_encoding encoding,
2225     const char *linefeed,
2226     int indent)
2227 {
2228 	int n;
2229 	unsigned char *p;
2230 	struct mime_header_encoder_data *pe;
2231 
2232 	mbfl_string_init(result);
2233 	result->no_language = string->no_language;
2234 	result->no_encoding = mbfl_no_encoding_ascii;
2235 
2236 	pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
2237 	if (pe == NULL) {
2238 		return NULL;
2239 	}
2240 
2241 	if (linefeed != NULL) {
2242 		n = 0;
2243 		while (*linefeed && n < 8) {
2244 			pe->lwsp[n++] = *linefeed++;
2245 		}
2246 		pe->lwsp[n++] = 0x20;
2247 		pe->lwsp[n] = '\0';
2248 		pe->lwsplen = n;
2249 	}
2250 	if (indent > 0 && indent < 74) {
2251 		pe->firstindent = indent;
2252 	}
2253 
2254 	n = string->len;
2255 	p = string->val;
2256 	while (n > 0) {
2257 		(*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2258 		n--;
2259 	}
2260 
2261 	result = mime_header_encoder_result(pe, result);
2262 	mime_header_encoder_delete(pe);
2263 
2264 	return result;
2265 }
2266 
2267 
2268 /*
2269  *  MIME header decode
2270  */
2271 struct mime_header_decoder_data {
2272 	mbfl_convert_filter *deco_filter;
2273 	mbfl_convert_filter *conv1_filter;
2274 	mbfl_convert_filter *conv2_filter;
2275 	mbfl_memory_device outdev;
2276 	mbfl_memory_device tmpdev;
2277 	int cspos;
2278 	int status;
2279 	enum mbfl_no_encoding encoding;
2280 	enum mbfl_no_encoding incode;
2281 	enum mbfl_no_encoding outcode;
2282 };
2283 
2284 static int
mime_header_decoder_collector(int c,void * data)2285 mime_header_decoder_collector(int c, void* data)
2286 {
2287 	const mbfl_encoding *encoding;
2288 	struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2289 
2290 	switch (pd->status) {
2291 	case 1:
2292 		if (c == 0x3f) {		/* ? */
2293 			mbfl_memory_device_output(c, &pd->tmpdev);
2294 			pd->cspos = pd->tmpdev.pos;
2295 			pd->status = 2;
2296 		} else {
2297 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2298 			mbfl_memory_device_reset(&pd->tmpdev);
2299 			if (c == 0x3d) {		/* = */
2300 				mbfl_memory_device_output(c, &pd->tmpdev);
2301 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2302 				pd->status = 9;
2303 			} else {
2304 				(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2305 				pd->status = 0;
2306 			}
2307 		}
2308 		break;
2309 	case 2:		/* store charset string */
2310 		if (c == 0x3f) {		/* ? */
2311 			/* identify charset */
2312 			mbfl_memory_device_output('\0', &pd->tmpdev);
2313 			encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2314 			if (encoding != NULL) {
2315 				pd->incode = encoding->no_encoding;
2316 				pd->status = 3;
2317 			}
2318 			mbfl_memory_device_unput(&pd->tmpdev);
2319 			mbfl_memory_device_output(c, &pd->tmpdev);
2320 		} else {
2321 			mbfl_memory_device_output(c, &pd->tmpdev);
2322 			if (pd->tmpdev.pos > 100) {		/* too long charset string */
2323 				pd->status = 0;
2324 			} else if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2325 				mbfl_memory_device_unput(&pd->tmpdev);
2326 				pd->status = 9;
2327 			}
2328 			if (pd->status != 2) {
2329 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2330 				mbfl_memory_device_reset(&pd->tmpdev);
2331 			}
2332 		}
2333 		break;
2334 	case 3:		/* identify encoding */
2335 		mbfl_memory_device_output(c, &pd->tmpdev);
2336 		if (c == 0x42 || c == 0x62) {		/* 'B' or 'b' */
2337 			pd->encoding = mbfl_no_encoding_base64;
2338 			pd->status = 4;
2339 		} else if (c == 0x51 || c == 0x71) {	/* 'Q' or 'q' */
2340 			pd->encoding = mbfl_no_encoding_qprint;
2341 			pd->status = 4;
2342 		} else {
2343 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2344 				mbfl_memory_device_unput(&pd->tmpdev);
2345 				pd->status = 9;
2346 			} else {
2347 				pd->status = 0;
2348 			}
2349 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2350 			mbfl_memory_device_reset(&pd->tmpdev);
2351 		}
2352 		break;
2353 	case 4:		/* reset filter */
2354 		mbfl_memory_device_output(c, &pd->tmpdev);
2355 		if (c == 0x3f) {		/* ? */
2356 			/* charset convert filter */
2357 			mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
2358 			/* decode filter */
2359 			mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
2360 			pd->status = 5;
2361 		} else {
2362 			if (c == 0x0d || c == 0x0a) {	/* CR or LF */
2363 				mbfl_memory_device_unput(&pd->tmpdev);
2364 				pd->status = 9;
2365 			} else {
2366 				pd->status = 0;
2367 			}
2368 			mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2369 		}
2370 		mbfl_memory_device_reset(&pd->tmpdev);
2371 		break;
2372 	case 5:		/* encoded block */
2373 		if (c == 0x3f) {		/* ? */
2374 			pd->status = 6;
2375 		} else {
2376 			(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2377 		}
2378 		break;
2379 	case 6:		/* check end position */
2380 		if (c == 0x3d) {		/* = */
2381 			/* flush and reset filter */
2382 			(*pd->deco_filter->filter_flush)(pd->deco_filter);
2383 			(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2384 			mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
2385 			pd->status = 7;
2386 		} else {
2387 			(*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2388 			if (c != 0x3f) {		/* ? */
2389 				(*pd->deco_filter->filter_function)(c, pd->deco_filter);
2390 				pd->status = 5;
2391 			}
2392 		}
2393 		break;
2394 	case 7:		/* after encoded block */
2395 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2396 			pd->status = 8;
2397 		} else {
2398 			mbfl_memory_device_output(c, &pd->tmpdev);
2399 			if (c == 0x3d) {		/* = */
2400 				pd->status = 1;
2401 			} else if (c != 0x20 && c != 0x09) {		/* not space */
2402 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2403 				mbfl_memory_device_reset(&pd->tmpdev);
2404 				pd->status = 0;
2405 			}
2406 		}
2407 		break;
2408 	case 8:		/* folding */
2409 	case 9:		/* folding */
2410 		if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2411 			if (c == 0x3d) {		/* = */
2412 				if (pd->status == 8) {
2413 					mbfl_memory_device_output(0x20, &pd->tmpdev);	/* SPACE */
2414 				} else {
2415 					(*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2416 				}
2417 				mbfl_memory_device_output(c, &pd->tmpdev);
2418 				pd->status = 1;
2419 			} else {
2420 				mbfl_memory_device_output(0x20, &pd->tmpdev);
2421 				mbfl_memory_device_output(c, &pd->tmpdev);
2422 				mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2423 				mbfl_memory_device_reset(&pd->tmpdev);
2424 				pd->status = 0;
2425 			}
2426 		}
2427 		break;
2428 	default:		/* non encoded block */
2429 		if (c == 0x0d || c == 0x0a) {	/* CR LF */
2430 			pd->status = 9;
2431 		} else if (c == 0x3d) {		/* = */
2432 			mbfl_memory_device_output(c, &pd->tmpdev);
2433 			pd->status = 1;
2434 		} else {
2435 			(*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2436 		}
2437 		break;
2438 	}
2439 
2440 	return c;
2441 }
2442 
2443 mbfl_string *
mime_header_decoder_result(struct mime_header_decoder_data * pd,mbfl_string * result)2444 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2445 {
2446 	switch (pd->status) {
2447 	case 1:
2448 	case 2:
2449 	case 3:
2450 	case 4:
2451 	case 7:
2452 	case 8:
2453 	case 9:
2454 		mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2455 		break;
2456 	case 5:
2457 	case 6:
2458 		(*pd->deco_filter->filter_flush)(pd->deco_filter);
2459 		(*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2460 		break;
2461 	}
2462 	(*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2463 	mbfl_memory_device_reset(&pd->tmpdev);
2464 	pd->status = 0;
2465 
2466 	return mbfl_memory_device_result(&pd->outdev, result);
2467 }
2468 
2469 struct mime_header_decoder_data*
mime_header_decoder_new(enum mbfl_no_encoding outcode)2470 mime_header_decoder_new(enum mbfl_no_encoding outcode)
2471 {
2472 	struct mime_header_decoder_data *pd;
2473 
2474 	pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2475 	if (pd == NULL) {
2476 		return NULL;
2477 	}
2478 
2479 	mbfl_memory_device_init(&pd->outdev, 0, 0);
2480 	mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2481 	pd->cspos = 0;
2482 	pd->status = 0;
2483 	pd->encoding = mbfl_no_encoding_pass;
2484 	pd->incode = mbfl_no_encoding_ascii;
2485 	pd->outcode = outcode;
2486 	/* charset convert filter */
2487 	pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2488 	pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2489 	/* decode filter */
2490 	pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2491 
2492 	if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2493 		mime_header_decoder_delete(pd);
2494 		return NULL;
2495 	}
2496 
2497 	return pd;
2498 }
2499 
2500 void
mime_header_decoder_delete(struct mime_header_decoder_data * pd)2501 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2502 {
2503 	if (pd) {
2504 		mbfl_convert_filter_delete(pd->conv2_filter);
2505 		mbfl_convert_filter_delete(pd->conv1_filter);
2506 		mbfl_convert_filter_delete(pd->deco_filter);
2507 		mbfl_memory_device_clear(&pd->outdev);
2508 		mbfl_memory_device_clear(&pd->tmpdev);
2509 		mbfl_free((void*)pd);
2510 	}
2511 }
2512 
2513 int
mime_header_decoder_feed(int c,struct mime_header_decoder_data * pd)2514 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2515 {
2516 	return mime_header_decoder_collector(c, pd);
2517 }
2518 
2519 mbfl_string *
mbfl_mime_header_decode(mbfl_string * string,mbfl_string * result,enum mbfl_no_encoding outcode)2520 mbfl_mime_header_decode(
2521     mbfl_string *string,
2522     mbfl_string *result,
2523     enum mbfl_no_encoding outcode)
2524 {
2525 	int n;
2526 	unsigned char *p;
2527 	struct mime_header_decoder_data *pd;
2528 
2529 	mbfl_string_init(result);
2530 	result->no_language = string->no_language;
2531 	result->no_encoding = outcode;
2532 
2533 	pd = mime_header_decoder_new(outcode);
2534 	if (pd == NULL) {
2535 		return NULL;
2536 	}
2537 
2538 	/* feed data */
2539 	n = string->len;
2540 	p = string->val;
2541 	while (n > 0) {
2542 		mime_header_decoder_collector(*p++, pd);
2543 		n--;
2544 	}
2545 
2546 	result = mime_header_decoder_result(pd, result);
2547 	mime_header_decoder_delete(pd);
2548 
2549 	return result;
2550 }
2551 
2552 
2553 
2554 /*
2555  *  convert HTML numeric entity
2556  */
2557 struct collector_htmlnumericentity_data {
2558 	mbfl_convert_filter *decoder;
2559 	int status;
2560 	int cache;
2561 	int digit;
2562 	int *convmap;
2563 	int mapsize;
2564 };
2565 
2566 static int
collector_encode_htmlnumericentity(int c,void * data)2567 collector_encode_htmlnumericentity(int c, void *data)
2568 {
2569 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2570 	int f, n, s, r, d, size, *mapelm;
2571 
2572 	size = pc->mapsize;
2573 	f = 0;
2574 	n = 0;
2575 	while (n < size) {
2576 		mapelm = &(pc->convmap[n*4]);
2577 		if (c >= mapelm[0] && c <= mapelm[1]) {
2578 			s = (c + mapelm[2]) & mapelm[3];
2579 			if (s >= 0) {
2580 				(*pc->decoder->filter_function)(0x26, pc->decoder);	/* '&' */
2581 				(*pc->decoder->filter_function)(0x23, pc->decoder);	/* '#' */
2582 				r = 100000000;
2583 				s %= r;
2584 				while (r > 0) {
2585 					d = s/r;
2586 					if (d || f) {
2587 						f = 1;
2588 						s %= r;
2589 						(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2590 					}
2591 					r /= 10;
2592 				}
2593 				if (!f) {
2594 					f = 1;
2595 					(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2596 				}
2597 				(*pc->decoder->filter_function)(0x3b, pc->decoder);		/* ';' */
2598 			}
2599 		}
2600 		if (f) {
2601 			break;
2602 		}
2603 		n++;
2604 	}
2605 	if (!f) {
2606 		(*pc->decoder->filter_function)(c, pc->decoder);
2607 	}
2608 
2609 	return c;
2610 }
2611 
2612 static int
collector_decode_htmlnumericentity(int c,void * data)2613 collector_decode_htmlnumericentity(int c, void *data)
2614 {
2615 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2616 	int f, n, s, r, d, size, *mapelm;
2617 
2618 	switch (pc->status) {
2619 	case 1:
2620 		if (c == 0x23) {	/* '#' */
2621 			pc->status = 2;
2622 		} else {
2623 			pc->status = 0;
2624 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2625 			(*pc->decoder->filter_function)(c, pc->decoder);
2626 		}
2627 		break;
2628 	case 2:
2629 		if (c >= 0x30 && c <= 0x39) {	/* '0' - '9' */
2630 			pc->cache = c - 0x30;
2631 			pc->status = 3;
2632 			pc->digit = 1;
2633 		} else {
2634 			pc->status = 0;
2635 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2636 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2637 			(*pc->decoder->filter_function)(c, pc->decoder);
2638 		}
2639 		break;
2640 	case 3:
2641 		s = 0;
2642 		f = 0;
2643 		if (c >= 0x30 && c <= 0x39) {	/* '0' - '9' */
2644 			if (pc->digit > 9) {
2645 				pc->status = 0;
2646 				s = pc->cache;
2647 				f = 1;
2648 			} else {
2649 				s = pc->cache*10 + c - 0x30;
2650 				pc->cache = s;
2651 				pc->digit++;
2652 			}
2653 		} else {
2654 			pc->status = 0;
2655 			s = pc->cache;
2656 			f = 1;
2657 			n = 0;
2658 			size = pc->mapsize;
2659 			while (n < size) {
2660 				mapelm = &(pc->convmap[n*4]);
2661 				d = s - mapelm[2];
2662 				if (d >= mapelm[0] && d <= mapelm[1]) {
2663 					f = 0;
2664 					(*pc->decoder->filter_function)(d, pc->decoder);
2665 					if (c != 0x3b) {	/* ';' */
2666 						(*pc->decoder->filter_function)(c, pc->decoder);
2667 					}
2668 					break;
2669 				}
2670 				n++;
2671 			}
2672 		}
2673 		if (f) {
2674 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2675 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2676 			r = 1;
2677 			n = pc->digit;
2678 			while (n > 0) {
2679 				r *= 10;
2680 				n--;
2681 			}
2682 			s %= r;
2683 			r /= 10;
2684 			while (r > 0) {
2685 				d = s/r;
2686 				s %= r;
2687 				r /= 10;
2688 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2689 			}
2690 			(*pc->decoder->filter_function)(c, pc->decoder);
2691 		}
2692 		break;
2693 	default:
2694 		if (c == 0x26) {	/* '&' */
2695 			pc->status = 1;
2696 		} else {
2697 			(*pc->decoder->filter_function)(c, pc->decoder);
2698 		}
2699 		break;
2700 	}
2701 
2702 	return c;
2703 }
2704 
mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter * filter)2705 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2706 {
2707 	struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2708 	int n, s, r, d;
2709 
2710 	if (pc->status) {
2711 		switch (pc->status) {
2712 		case 1: /* '&' */
2713 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2714 			break;
2715 		case 2: /* '#' */
2716 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2717 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2718 			break;
2719 		case 3: /* '0'-'9' */
2720 			(*pc->decoder->filter_function)(0x26, pc->decoder);		/* '&' */
2721 			(*pc->decoder->filter_function)(0x23, pc->decoder);		/* '#' */
2722 
2723 			s = pc->cache;
2724 			r = 1;
2725 			n = pc->digit;
2726 			while (n > 0) {
2727 				r *= 10;
2728 				n--;
2729 			}
2730 			s %= r;
2731 			r /= 10;
2732 			while (r > 0) {
2733 				d = s/r;
2734 				s %= r;
2735 				r /= 10;
2736 				(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2737 			}
2738 
2739 			break;
2740 		default:
2741 			break;
2742 		}
2743 	}
2744 
2745 	pc->status = 0;
2746 	pc->cache = 0;
2747 	pc->digit = 0;
2748 
2749 	return 0;
2750 }
2751 
2752 mbfl_string *
mbfl_html_numeric_entity(mbfl_string * string,mbfl_string * result,int * convmap,int mapsize,int type)2753 mbfl_html_numeric_entity(
2754     mbfl_string *string,
2755     mbfl_string *result,
2756     int *convmap,
2757     int mapsize,
2758     int type)
2759 {
2760 	struct collector_htmlnumericentity_data pc;
2761 	mbfl_memory_device device;
2762 	mbfl_convert_filter *encoder;
2763 	int n;
2764 	unsigned char *p;
2765 
2766 	if (string == NULL || result == NULL) {
2767 		return NULL;
2768 	}
2769 	mbfl_string_init(result);
2770 	result->no_language = string->no_language;
2771 	result->no_encoding = string->no_encoding;
2772 	mbfl_memory_device_init(&device, string->len, 0);
2773 
2774 	/* output code filter */
2775 	pc.decoder = mbfl_convert_filter_new(
2776 	    mbfl_no_encoding_wchar,
2777 	    string->no_encoding,
2778 	    mbfl_memory_device_output, 0, &device);
2779 	/* wchar filter */
2780 	if (type == 0) {
2781 		encoder = mbfl_convert_filter_new(
2782 		    string->no_encoding,
2783 		    mbfl_no_encoding_wchar,
2784 		    collector_encode_htmlnumericentity, 0, &pc);
2785 	} else {
2786 		encoder = mbfl_convert_filter_new(
2787 		    string->no_encoding,
2788 		    mbfl_no_encoding_wchar,
2789 		    collector_decode_htmlnumericentity,
2790 			(int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
2791 	}
2792 	if (pc.decoder == NULL || encoder == NULL) {
2793 		mbfl_convert_filter_delete(encoder);
2794 		mbfl_convert_filter_delete(pc.decoder);
2795 		return NULL;
2796 	}
2797 	pc.status = 0;
2798 	pc.cache = 0;
2799 	pc.digit = 0;
2800 	pc.convmap = convmap;
2801 	pc.mapsize = mapsize;
2802 
2803 	/* feed data */
2804 	p = string->val;
2805 	n = string->len;
2806 	if (p != NULL) {
2807 		while (n > 0) {
2808 			if ((*encoder->filter_function)(*p++, encoder) < 0) {
2809 				break;
2810 			}
2811 			n--;
2812 		}
2813 	}
2814 	mbfl_convert_filter_flush(encoder);
2815 	mbfl_convert_filter_flush(pc.decoder);
2816 	result = mbfl_memory_device_result(&device, result);
2817 	mbfl_convert_filter_delete(encoder);
2818 	mbfl_convert_filter_delete(pc.decoder);
2819 
2820 	return result;
2821 }
2822 
2823 /*
2824  * Local variables:
2825  * tab-width: 4
2826  * c-basic-offset: 4
2827  * End:
2828  */
2829