1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13 */
14 
15 #include "mbfilter_singlebyte.h"
16 
17 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
18 
coalesce(uint32_t a,uint32_t b)19 static inline uint32_t coalesce(uint32_t a, uint32_t b)
20 {
21 	return a ? a : b;
22 }
23 
24 /* Helper for single-byte encodings which use a conversion table */
mbfl_conv_singlebyte_table(int c,mbfl_convert_filter * filter,int tbl_min,const unsigned short tbl[])25 static int mbfl_conv_singlebyte_table(int c, mbfl_convert_filter *filter, int tbl_min, const unsigned short tbl[])
26 {
27 	if (c >= 0 && c < tbl_min) {
28 		CK((*filter->output_function)(c, filter->data));
29 	} else if (c < 0) {
30 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
31 	} else {
32 		CK((*filter->output_function)(coalesce(tbl[c - tbl_min], MBFL_BAD_INPUT), filter->data));
33 	}
34 	return 0;
35 }
36 
mbfl_conv_reverselookup_table(int c,mbfl_convert_filter * filter,int tbl_min,const unsigned short tbl[])37 static int mbfl_conv_reverselookup_table(int c, mbfl_convert_filter *filter, int tbl_min, const unsigned short tbl[])
38 {
39 	if (c >= 0 && c < tbl_min) {
40 		CK((*filter->output_function)(c, filter->data));
41 	} else if (c < 0 || c == MBFL_BAD_INPUT) {
42 		CK(mbfl_filt_conv_illegal_output(c, filter));
43 	} else {
44 		for (int i = 0; i < 256 - tbl_min; i++) {
45 			if (c == tbl[i]) {
46 				CK((*filter->output_function)(i + tbl_min, filter->data));
47 				return 0;
48 			}
49 		}
50 		CK(mbfl_filt_conv_illegal_output(c, filter));
51 	}
52 	return 0;
53 }
54 
55 /* Initialize data structures for a single-byte encoding */
56 #define DEF_SB(id, name, mime_name, aliases) \
57 	static int mbfl_filt_conv_##id##_wchar(int c, mbfl_convert_filter *filter); \
58 	static int mbfl_filt_conv_wchar_##id(int c, mbfl_convert_filter *filter); \
59 	static size_t mb_##id##_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); \
60 	static void mb_wchar_to_##id(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); \
61 	static const struct mbfl_convert_vtbl vtbl_##id##_wchar = { \
62 		mbfl_no_encoding_##id, \
63 		mbfl_no_encoding_wchar, \
64 		mbfl_filt_conv_common_ctor, \
65 		NULL, \
66 		mbfl_filt_conv_##id##_wchar, \
67 		mbfl_filt_conv_common_flush, \
68 		NULL \
69 	}; \
70 	static const struct mbfl_convert_vtbl vtbl_wchar_##id = { \
71 		mbfl_no_encoding_wchar, \
72 		mbfl_no_encoding_##id, \
73 		mbfl_filt_conv_common_ctor, \
74 		NULL, \
75 		mbfl_filt_conv_wchar_##id, \
76 		mbfl_filt_conv_common_flush, \
77 		NULL \
78 	}; \
79 	const mbfl_encoding mbfl_encoding_##id = { \
80 		mbfl_no_encoding_##id, \
81 		name, \
82 		mime_name, \
83 		aliases, \
84 		NULL, \
85 		MBFL_ENCTYPE_SBCS, \
86 		&vtbl_##id##_wchar, \
87 		&vtbl_wchar_##id, \
88 		mb_##id##_to_wchar, \
89 		mb_wchar_to_##id, \
90 		NULL, \
91 		NULL \
92 	}
93 
94 /* For single-byte encodings which use a conversion table */
95 #define DEF_SB_TBL(id, name, mime_name, aliases, tbl_min, tbl) \
96 	static int mbfl_filt_conv_##id##_wchar(int c, mbfl_convert_filter *filter) { \
97 		return mbfl_conv_singlebyte_table(c, filter, tbl_min, tbl); \
98 	} \
99 	static int mbfl_filt_conv_wchar_##id(int c, mbfl_convert_filter *filter) { \
100 		return mbfl_conv_reverselookup_table(c, filter, tbl_min, tbl); \
101 	} \
102 	static size_t mb_##id##_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) \
103 	{ \
104 		unsigned char *p = *in, *e = p + *in_len; \
105 		uint32_t *out = buf, *limit = buf + bufsize; \
106 		while (p < e && out < limit) { \
107 			unsigned char c = *p++; \
108 			*out++ = (c < tbl_min) ? c : coalesce(tbl[c - tbl_min], MBFL_BAD_INPUT); \
109 		} \
110 		*in_len = e - p; \
111 		*in = p; \
112 		return out - buf; \
113 	} \
114 	static void mb_wchar_to_##id(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) \
115 	{ \
116 		unsigned char *out, *limit; \
117 		MB_CONVERT_BUF_LOAD(buf, out, limit); \
118 		MB_CONVERT_BUF_ENSURE(buf, out, limit, len); \
119 		while (len--) { \
120 			uint32_t w = *in++; \
121 			if (w < tbl_min) { \
122 				out = mb_convert_buf_add(out, w & 0xFF); \
123 			} else { \
124 				for (int i = 0; i < 256 - tbl_min; i++) { \
125 					if (w == tbl[i]) { \
126 						out = mb_convert_buf_add(out, i + tbl_min); \
127 						goto next_iteration; \
128 					} \
129 				} \
130 				MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_##id); \
131 				MB_CONVERT_BUF_ENSURE(buf, out, limit, len); \
132 	next_iteration: ; \
133 			} \
134 		} \
135 		MB_CONVERT_BUF_STORE(buf, out, limit); \
136 	} \
137 	DEF_SB(id, name, mime_name, aliases)
138 
139 /* The grand-daddy of them all: ASCII */
140 static const char *ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "IBM-367", "cp367", "csASCII", NULL};
141 DEF_SB(ascii, "ASCII", "US-ASCII", ascii_aliases);
142 
mbfl_filt_conv_ascii_wchar(int c,mbfl_convert_filter * filter)143 static int mbfl_filt_conv_ascii_wchar(int c, mbfl_convert_filter *filter)
144 {
145 	CK((*filter->output_function)((c < 0x80) ? c : MBFL_BAD_INPUT, filter->data));
146 	return 0;
147 }
148 
mbfl_filt_conv_wchar_ascii(int c,mbfl_convert_filter * filter)149 static int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter)
150 {
151 	if (c >= 0 && c < 0x80 && c != MBFL_BAD_INPUT) {
152 		CK((*filter->output_function)(c, filter->data));
153 	} else {
154 		CK(mbfl_filt_conv_illegal_output(c, filter));
155 	}
156 	return 0;
157 }
158 
mb_ascii_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)159 static size_t mb_ascii_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
160 {
161 	unsigned char *p = *in, *e = p + *in_len;
162 	uint32_t *out = buf, *limit = buf + bufsize;
163 
164 	while (p < e && out < limit) {
165 		unsigned char c = *p++;
166 		*out++ = (c < 0x80) ? c : MBFL_BAD_INPUT;
167 	}
168 
169 	*in_len = e - p;
170 	*in = p;
171 	return out - buf;
172 }
173 
mb_wchar_to_ascii(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)174 static void mb_wchar_to_ascii(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
175 {
176 	unsigned char *out, *limit;
177 	MB_CONVERT_BUF_LOAD(buf, out, limit);
178 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
179 
180 	while (len--) {
181 		uint32_t w = *in++;
182 		if (w < 0x80) {
183 			out = mb_convert_buf_add(out, w & 0xFF);
184 		} else {
185 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_ascii);
186 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
187 		}
188 	}
189 
190 	MB_CONVERT_BUF_STORE(buf, out, limit);
191 }
192 
193 /* ISO-8859-X */
194 
195 static const char *iso8859_1_aliases[] = {"ISO8859-1", "latin1", NULL};
196 DEF_SB(8859_1, "ISO-8859-1", "ISO-8859-1", iso8859_1_aliases);
197 
mbfl_filt_conv_8859_1_wchar(int c,mbfl_convert_filter * filter)198 static int mbfl_filt_conv_8859_1_wchar(int c, mbfl_convert_filter *filter)
199 {
200 	return (*filter->output_function)(c, filter->data);
201 }
202 
mbfl_filt_conv_wchar_8859_1(int c,mbfl_convert_filter * filter)203 static int mbfl_filt_conv_wchar_8859_1(int c, mbfl_convert_filter *filter)
204 {
205 	if (c >= 0 && c < 0x100 && c != MBFL_BAD_INPUT) {
206 		CK((*filter->output_function)(c, filter->data));
207 	} else {
208 		CK(mbfl_filt_conv_illegal_output(c, filter));
209 	}
210 	return 0;
211 }
212 
mb_8859_1_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)213 static size_t mb_8859_1_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
214 {
215 	unsigned char *p = *in, *e = p + *in_len;
216 	uint32_t *out = buf, *limit = buf + bufsize;
217 
218 	while (p < e && out < limit) {
219 		*out++ = *p++;
220 	}
221 
222 	*in_len = e - p;
223 	*in = p;
224 	return out - buf;
225 }
226 
mb_wchar_to_8859_1(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)227 static void mb_wchar_to_8859_1(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
228 {
229 	unsigned char *out, *limit;
230 	MB_CONVERT_BUF_LOAD(buf, out, limit);
231 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
232 
233 	while (len--) {
234 		uint32_t w = *in++;
235 		if (w < 0x100) {
236 			out = mb_convert_buf_add(out, w);
237 		} else {
238 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_8859_1);
239 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
240 		}
241 	}
242 
243 	MB_CONVERT_BUF_STORE(buf, out, limit);
244 }
245 
246 static const char *iso8859_2_aliases[] = {"ISO8859-2", "latin2", NULL};
247 static const unsigned short iso8859_2_ucs_table[] = {
248 	0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
249 	0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
250 	0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
251 	0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
252 	0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
253 	0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
254 	0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
255 	0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
256 	0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
257 	0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
258 	0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
259 	0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
260 };
261 DEF_SB_TBL(8859_2, "ISO-8859-2", "ISO-8859-2", iso8859_2_aliases, 0xA0, iso8859_2_ucs_table);
262 
263 static const char *iso8859_3_aliases[] = {"ISO8859-3", "latin3", NULL};
264 static const unsigned short iso8859_3_ucs_table[] = {
265 	0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0x0000, 0x0124, 0x00A7,
266 	0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0x0000, 0x017B,
267 	0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
268 	0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0x0000, 0x017C,
269 	0x00C0, 0x00C1, 0x00C2, 0x0000, 0x00C4, 0x010A, 0x0108, 0x00C7,
270 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
271 	0x0000, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
272 	0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
273 	0x00E0, 0x00E1, 0x00E2, 0x0000, 0x00E4, 0x010B, 0x0109, 0x00E7,
274 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
275 	0x0000, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
276 	0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9
277 };
278 DEF_SB_TBL(8859_3, "ISO-8859-3", "ISO-8859-3", iso8859_3_aliases, 0xA0, iso8859_3_ucs_table);
279 
280 static const char *iso8859_4_aliases[] = {"ISO8859-4", "latin4", NULL};
281 static const unsigned short iso8859_4_ucs_table[] = {
282 	0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
283 	0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
284 	0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
285 	0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
286 	0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
287 	0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
288 	0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
289 	0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
290 	0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
291 	0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
292 	0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
293 	0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9
294 };
295 DEF_SB_TBL(8859_4, "ISO-8859-4", "ISO-8859-4", iso8859_4_aliases, 0xA0, iso8859_4_ucs_table);
296 
297 static const char *iso8859_5_aliases[] = {"ISO8859-5", "cyrillic", NULL};
298 static const unsigned short iso8859_5_ucs_table[] = {
299 	0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
300 	0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
301 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
302 	0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
303 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
304 	0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
305 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
306 	0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
307 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
308 	0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
309 	0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
310 	0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F
311 };
312 DEF_SB_TBL(8859_5, "ISO-8859-5", "ISO-8859-5", iso8859_5_aliases, 0xA0, iso8859_5_ucs_table);
313 
314 static const char *iso8859_6_aliases[] = {"ISO8859-6", "arabic", NULL};
315 static const unsigned short iso8859_6_ucs_table[] = {
316 	0x00A0, 0x0000, 0x0000, 0x0000, 0x00A4, 0x0000, 0x0000, 0x0000,
317 	0x0000, 0x0000, 0x0000, 0x0000, 0x060C, 0x00AD, 0x0000, 0x0000,
318 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
319 	0x0000, 0x0000, 0x0000, 0x061B, 0x0000, 0x0000, 0x0000, 0x061F,
320 	0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
321 	0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
322 	0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
323 	0x0638, 0x0639, 0x063A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
324 	0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
325 	0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
326 	0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
327 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
328 };
329 DEF_SB_TBL(8859_6, "ISO-8859-6", "ISO-8859-6", iso8859_6_aliases, 0xA0, iso8859_6_ucs_table);
330 
331 static const char *iso8859_7_aliases[] = {"ISO8859-7", "greek", NULL};
332 static const unsigned short iso8859_7_ucs_table[] = {
333 	0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7,
334 	0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0x0000, 0x2015,
335 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
336 	0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
337 	0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
338 	0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
339 	0x03A0, 0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
340 	0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
341 	0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
342 	0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
343 	0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
344 	0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0000
345 };
346 DEF_SB_TBL(8859_7, "ISO-8859-7", "ISO-8859-7", iso8859_7_aliases, 0xA0, iso8859_7_ucs_table);
347 
348 static const char *iso8859_8_aliases[] = {"ISO8859-8", "hebrew", NULL};
349 static const unsigned short iso8859_8_ucs_table[] = {
350 	0x00A0, 0x0000, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
351 	0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
352 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
353 	0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x0000,
354 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
355 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
356 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
357 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
358 	0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
359 	0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
360 	0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
361 	0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E, 0x200F, 0x0000
362 };
363 DEF_SB_TBL(8859_8, "ISO-8859-8", "ISO-8859-8", iso8859_8_aliases, 0xA0, iso8859_8_ucs_table);
364 
365 static const char *iso8859_9_aliases[] = {"ISO8859-9", "latin5", NULL};
366 static const unsigned short iso8859_9_ucs_table[] = {
367 	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
368 	0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
369 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
370 	0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
371 	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
372 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
373 	0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
374 	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
375 	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
376 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
377 	0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
378 	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF
379 };
380 DEF_SB_TBL(8859_9, "ISO-8859-9", "ISO-8859-9", iso8859_9_aliases, 0xA0, iso8859_9_ucs_table);
381 
382 static const char *iso8859_10_aliases[] = {"ISO8859-10", "latin6", NULL};
383 static const unsigned short iso8859_10_ucs_table[] = {
384 	0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
385 	0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
386 	0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
387 	0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
388 	0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
389 	0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
390 	0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
391 	0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
392 	0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
393 	0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
394 	0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
395 	0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138
396 };
397 DEF_SB_TBL(8859_10, "ISO-8859-10", "ISO-8859-10", iso8859_10_aliases, 0xA0, iso8859_10_ucs_table);
398 
399 static const char *iso8859_13_aliases[] = {"ISO8859-13", NULL};
400 static const unsigned short iso8859_13_ucs_table[] = {
401 	0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
402 	0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
403 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
404 	0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
405 	0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
406 	0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
407 	0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
408 	0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
409 	0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
410 	0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
411 	0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
412 	0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019
413 };
414 DEF_SB_TBL(8859_13, "ISO-8859-13", "ISO-8859-13", iso8859_13_aliases, 0xA0, iso8859_13_ucs_table);
415 
416 static const char *iso8859_14_aliases[] = {"ISO8859-14", "latin8", NULL};
417 static const unsigned short iso8859_14_ucs_table[] = {
418 	0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
419 	0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
420 	0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
421 	0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
422 	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
423 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
424 	0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
425 	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
426 	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
427 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
428 	0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
429 	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF
430 };
431 DEF_SB_TBL(8859_14, "ISO-8859-14", "ISO-8859-14", iso8859_14_aliases, 0xA0, iso8859_14_ucs_table);
432 
433 static const char *iso8859_15_aliases[] = {"ISO8859-15", NULL};
434 static const unsigned short iso8859_15_ucs_table[] = {
435 	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
436 	0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
437 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
438 	0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
439 	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
440 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
441 	0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
442 	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
443 	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
444 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
445 	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
446 	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
447 };
448 DEF_SB_TBL(8859_15, "ISO-8859-15", "ISO-8859-15", iso8859_15_aliases, 0xA0, iso8859_15_ucs_table);
449 
450 static const char *iso8859_16_aliases[] = {"ISO8859-16", NULL};
451 static const unsigned short iso8859_16_ucs_table[] = {
452 	0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
453 	0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
454 	0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
455 	0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
456 	0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
457 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
458 	0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
459 	0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
460 	0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
461 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
462 	0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
463 	0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF
464 };
465 DEF_SB_TBL(8859_16, "ISO-8859-16", "ISO-8859-16", iso8859_16_aliases, 0xA0, iso8859_16_ucs_table);
466 
467 static const char *cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL};
468 static const unsigned short cp1251_ucs_table[] = {
469 	0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
470 	0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
471 	0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
472 	0x0000, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
473 	0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
474 	0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
475 	0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
476 	0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
477 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
478 	0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
479 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
480 	0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
481 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
482 	0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
483 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
484 	0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F
485 };
486 DEF_SB_TBL(cp1251, "Windows-1251", "Windows-1251", cp1251_aliases, 0x80, cp1251_ucs_table);
487 
488 static const char *cp1252_aliases[] = {"cp1252", NULL};
489 static const unsigned short cp1252_ucs_table[] = {
490 	0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
491 	0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
492 	0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
493 	0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
494 };
495 DEF_SB(cp1252, "Windows-1252", "Windows-1252", cp1252_aliases);
496 
mbfl_filt_conv_wchar_cp1252(int c,mbfl_convert_filter * filter)497 static int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter)
498 {
499 	if (c < 0 || c == MBFL_BAD_INPUT) {
500 		CK(mbfl_filt_conv_illegal_output(c, filter));
501 	} else if (c >= 0x100) {
502 		for (int n = 0; n < 32; n++) {
503 			if (c == cp1252_ucs_table[n]) {
504 				CK((*filter->output_function)(0x80 + n, filter->data));
505 				return 0;
506 			}
507 		}
508 		CK(mbfl_filt_conv_illegal_output(c, filter));
509 	} else if (c <= 0x7F || c >= 0xA0 || c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D) {
510 		CK((*filter->output_function)(c, filter->data));
511 	} else {
512 		CK(mbfl_filt_conv_illegal_output(c, filter));
513 	}
514 	return 0;
515 }
516 
mbfl_filt_conv_cp1252_wchar(int c,mbfl_convert_filter * filter)517 static int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter)
518 {
519 	int s;
520 	if (c >= 0x80 && c < 0xA0) {
521 		s = coalesce(cp1252_ucs_table[c - 0x80], MBFL_BAD_INPUT);
522 	} else {
523 		s = c;
524 	}
525 	CK((*filter->output_function)(s, filter->data));
526 	return 0;
527 }
528 
mb_cp1252_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)529 static size_t mb_cp1252_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
530 {
531 	unsigned char *p = *in, *e = p + *in_len;
532 	uint32_t *out = buf, *limit = buf + bufsize;
533 
534 	while (p < e && out < limit) {
535 		unsigned char c = *p++;
536 
537 		if (c >= 0x80 && c < 0xA0) {
538 			*out++ = coalesce(cp1252_ucs_table[c - 0x80], MBFL_BAD_INPUT);
539 		} else {
540 			*out++ = c;
541 		}
542 	}
543 
544 	*in_len = e - p;
545 	*in = p;
546 	return out - buf;
547 }
548 
mb_wchar_to_cp1252(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)549 static void mb_wchar_to_cp1252(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
550 {
551 	unsigned char *out, *limit;
552 	MB_CONVERT_BUF_LOAD(buf, out, limit);
553 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
554 
555 	while (len--) {
556 		uint32_t w = *in++;
557 
558 		if (w >= 0x100) {
559 			for (int i = 0; i < 32; i++) {
560 				if (w == cp1252_ucs_table[i]) {
561 					out = mb_convert_buf_add(out, i + 0x80);
562 					goto continue_cp1252;
563 				}
564 			}
565 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
566 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
567 		} else if (w <= 0x7F || w >= 0xA0 || w == 0x81 || w == 0x8D || w == 0x8F || w == 0x90 || w == 0x9D) {
568 			out = mb_convert_buf_add(out, w);
569 		} else {
570 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
571 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
572 		}
573 		continue_cp1252: ;
574 	}
575 
576 	MB_CONVERT_BUF_STORE(buf, out, limit);
577 }
578 
579 static const char *cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
580 static const unsigned short cp1254_ucs_table[] = {
581 	0x20AC, 0X0000, 0X201A, 0X0192, 0X201E, 0X2026, 0X2020, 0X2021,
582 	0X02C6, 0X2030, 0X0160, 0X2039, 0X0152, 0X0000, 0X0000, 0X0000,
583 	0X0000, 0X2018, 0X2019, 0X201C, 0X201D, 0X2022, 0X2013, 0X2014,
584 	0X02DC, 0X2122, 0X0161, 0X203A, 0X0153, 0X0000, 0X0000, 0X0178,
585 	0X00A0, 0X00A1, 0X00A2, 0X00A3, 0X00A4, 0X00A5, 0X00A6, 0X00A7,
586 	0X00A8, 0X00A9, 0X00AA, 0X00AB, 0X00AC, 0X00AD, 0X00AE, 0X00AF,
587 	0X00B0, 0X00B1, 0X00B2, 0X00B3, 0X00B4, 0X00B5, 0X00B6, 0X00B7,
588 	0X00B8, 0X00B9, 0X00BA, 0X00BB, 0X00BC, 0X00BD, 0X00BE, 0X00BF,
589 	0X00C0, 0X00C1, 0X00C2, 0X00C3, 0X00C4, 0X00C5, 0X00C6, 0X00C7,
590 	0X00C8, 0X00C9, 0X00CA, 0X00CB, 0X00CC, 0X00CD, 0X00CE, 0X00CF,
591 	0X011E, 0X00D1, 0X00D2, 0X00D3, 0X00D4, 0X00D5, 0X00D6, 0X00D7,
592 	0X00D8, 0X00D9, 0X00DA, 0X00DB, 0X00DC, 0X0130, 0X015E, 0X00DF,
593 	0X00E0, 0X00E1, 0X00E2, 0X00E3, 0X00E4, 0X00E5, 0X00E6, 0X00E7,
594 	0X00E8, 0X00E9, 0X00EA, 0X00EB, 0X00EC, 0X00ED, 0X00EE, 0X00EF,
595 	0X011F, 0X00F1, 0X00F2, 0X00F3, 0X00F4, 0X00F5, 0X00F6, 0X00F7,
596 	0X00F8, 0X00F9, 0X00FA, 0X00FB, 0X00FC, 0X0131, 0X015F, 0X00FF
597 };
598 DEF_SB_TBL(cp1254, "Windows-1254", "Windows-1254", cp1254_aliases, 0x80, cp1254_ucs_table);
599 
600 static const char *cp866_aliases[] = {"CP-866", "IBM866", "IBM-866", NULL};
601 static const unsigned short cp866_ucs_table[] = {
602 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
603 	0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
604 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
605 	0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
606 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
607 	0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
608 	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
609 	0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
610 	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
611 	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
612 	0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
613 	0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
614 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
615 	0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
616 	0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
617 	0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0
618 };
619 DEF_SB_TBL(cp866, "CP866", "CP866", cp866_aliases, 0x80, cp866_ucs_table);
620 
621 static const char *cp850_aliases[] = {"CP-850", "IBM850", "IBM-850", NULL};
622 static const unsigned short cp850_ucs_table[] = {
623 	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
624 	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
625 	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
626 	0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
627 	0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
628 	0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
629 	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
630 	0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
631 	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
632 	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
633 	0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
634 	0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
635 	0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
636 	0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
637 	0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
638 	0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
639 };
640 DEF_SB_TBL(cp850, "CP850", "CP850", cp850_aliases, 0x80, cp850_ucs_table);
641 
642 static const char *koi8r_aliases[] = {"KOI8R", NULL};
643 static const unsigned short koi8r_ucs_table[] = {
644 	0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
645 	0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
646 	0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
647 	0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
648 	0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
649 	0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
650 	0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
651 	0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
652 	0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
653 	0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
654 	0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
655 	0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
656 	0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
657 	0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
658 	0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
659 	0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
660 };
661 DEF_SB_TBL(koi8r, "KOI8-R", "KOI8-R", koi8r_aliases, 0x80, koi8r_ucs_table);
662 
663 static const char *koi8u_aliases[] = {"KOI8U", NULL};
664 static const unsigned short koi8u_ucs_table[] = {
665 	0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
666 	0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
667 	0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
668 	0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
669 	0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
670 	0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
671 	0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
672 	0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
673 	0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
674 	0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
675 	0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
676 	0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
677 	0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
678 	0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
679 	0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
680 	0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
681 };
682 DEF_SB_TBL(koi8u, "KOI8-U", "KOI8-U", koi8u_aliases, 0x80, koi8u_ucs_table);
683 
684 static const char *armscii8_aliases[] = {"ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL};
685 static const unsigned short armscii8_ucs_table[] = {
686 	0x00A0, 0x0000, 0x0587, 0x0589, 0x0029, 0x0028, 0x00BB, 0x00AB,
687 	0x2014, 0x002E, 0x055D, 0x002C, 0x002D, 0x058A, 0x2026, 0x055C,
688 	0x055B, 0x055E, 0x0531, 0x0561, 0x0532, 0x0562, 0x0533, 0x0563,
689 	0x0534, 0x0564, 0x0535, 0x0565, 0x0536, 0x0566, 0x0537, 0x0567,
690 	0x0538, 0x0568, 0x0539, 0x0569, 0x053A, 0x056A, 0x053B, 0x056B,
691 	0x053C, 0x056C, 0x053D, 0x056D, 0x053E, 0x056E, 0x053F, 0x056F,
692 	0x0540, 0x0570, 0x0541, 0x0571, 0x0542, 0x0572, 0x0543, 0x0573,
693 	0x0544, 0x0574, 0x0545, 0x0575, 0x0546, 0x0576, 0x0547, 0x0577,
694 	0x0548, 0x0578, 0x0549, 0x0579, 0x054A, 0x057A, 0x054B, 0x057B,
695 	0x054C, 0x057C, 0x054D, 0x057D, 0x054E, 0x057E, 0x054F, 0x057F,
696 	0x0550, 0x0580, 0x0551, 0x0581, 0x0552, 0x0582, 0x0553, 0x0583,
697 	0x0554, 0x0584, 0x0555, 0x0585, 0x0556, 0x0586, 0x055A, 0x0000
698 };
699 static const unsigned char ucs_armscii8_table[] = {
700 	0xA5, 0xA4, 0x2A, 0x2B, 0xAB, 0xAC, 0xA9, 0x2F
701 };
702 DEF_SB(armscii8, "ArmSCII-8", "ArmSCII-8", armscii8_aliases);
703 
mbfl_filt_conv_armscii8_wchar(int c,mbfl_convert_filter * filter)704 static int mbfl_filt_conv_armscii8_wchar(int c, mbfl_convert_filter *filter)
705 {
706 	CK((*filter->output_function)((c < 0xA0) ? c : coalesce(armscii8_ucs_table[c - 0xA0], MBFL_BAD_INPUT), filter->data));
707 	return 0;
708 }
709 
mbfl_filt_conv_wchar_armscii8(int c,mbfl_convert_filter * filter)710 static int mbfl_filt_conv_wchar_armscii8(int c, mbfl_convert_filter *filter)
711 {
712 	if (c >= 0x28 && c <= 0x2F) {
713 		CK((*filter->output_function)(ucs_armscii8_table[c - 0x28], filter->data));
714 	} else if (c < 0 || c == MBFL_BAD_INPUT) {
715 		CK(mbfl_filt_conv_illegal_output(c, filter));
716 	} else if (c < 0xA0) {
717 		CK((*filter->output_function)(c, filter->data));
718 	} else {
719 		for (int n = 0; n < 0x60; n++) {
720 			if (c == armscii8_ucs_table[n]) {
721 				CK((*filter->output_function)(0xA0 + n, filter->data));
722 				return 0;
723 			}
724 		}
725 		CK(mbfl_filt_conv_illegal_output(c, filter));
726 	}
727 	return 0;
728 }
729 
mb_armscii8_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)730 static size_t mb_armscii8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
731 {
732 	unsigned char *p = *in, *e = p + *in_len;
733 	uint32_t *out = buf, *limit = buf + bufsize;
734 
735 	while (p < e && out < limit) {
736 		unsigned char c = *p++;
737 		*out++ = (c < 0xA0) ? c : coalesce(armscii8_ucs_table[c - 0xA0], MBFL_BAD_INPUT);
738 	}
739 
740 	*in_len = e - p;
741 	*in = p;
742 	return out - buf;
743 }
744 
mb_wchar_to_armscii8(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)745 static void mb_wchar_to_armscii8(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
746 {
747 	unsigned char *out, *limit;
748 	MB_CONVERT_BUF_LOAD(buf, out, limit);
749 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
750 
751 	while (len--) {
752 		uint32_t w = *in++;
753 
754 		if (w >= 0x28 && w <= 0x2F) {
755 			out = mb_convert_buf_add(out, ucs_armscii8_table[w - 0x28]);
756 		} else if (w < 0xA0) {
757 			out = mb_convert_buf_add(out, w);
758 		} else {
759 			for (int i = 0; i < 0x60; i++) {
760 				if (w == armscii8_ucs_table[i]) {
761 					out = mb_convert_buf_add(out, 0xA0 + i);
762 					goto continue_armscii8;
763 				}
764 			}
765 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_armscii8);
766 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
767 		}
768 		continue_armscii8: ;
769 	}
770 
771 	MB_CONVERT_BUF_STORE(buf, out, limit);
772 }
773