1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13 */
14 
15 #include "mbfilter_singlebyte.h"
16 
17 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
18 
coalesce(uint32_t a,uint32_t b)19 static inline uint32_t coalesce(uint32_t a, uint32_t b)
20 {
21 	return a ? a : b;
22 }
23 
24 /* Helper for single-byte encodings which use a conversion table */
mbfl_conv_singlebyte_table(int c,mbfl_convert_filter * filter,int tbl_min,const unsigned short tbl[])25 static int mbfl_conv_singlebyte_table(int c, mbfl_convert_filter *filter, int tbl_min, const unsigned short tbl[])
26 {
27 	if (c >= 0 && c < tbl_min) {
28 		CK((*filter->output_function)(c, filter->data));
29 	} else if (c < 0) {
30 		CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
31 	} else {
32 		CK((*filter->output_function)(coalesce(tbl[c - tbl_min], MBFL_BAD_INPUT), filter->data));
33 	}
34 	return 0;
35 }
36 
mbfl_conv_reverselookup_table(int c,mbfl_convert_filter * filter,int tbl_min,const unsigned short tbl[])37 static int mbfl_conv_reverselookup_table(int c, mbfl_convert_filter *filter, int tbl_min, const unsigned short tbl[])
38 {
39 	if (c >= 0 && c < tbl_min) {
40 		CK((*filter->output_function)(c, filter->data));
41 	} else if (c < 0 || c == MBFL_BAD_INPUT) {
42 		CK(mbfl_filt_conv_illegal_output(c, filter));
43 	} else {
44 		for (int i = 0; i < 256 - tbl_min; i++) {
45 			if (c == tbl[i]) {
46 				CK((*filter->output_function)(i + tbl_min, filter->data));
47 				return 0;
48 			}
49 		}
50 		CK(mbfl_filt_conv_illegal_output(c, filter));
51 	}
52 	return 0;
53 }
54 
55 /* Initialize data structures for a single-byte encoding */
56 #define DEF_SB(id, name, mime_name, aliases) \
57 	static int mbfl_filt_conv_##id##_wchar(int c, mbfl_convert_filter *filter); \
58 	static int mbfl_filt_conv_wchar_##id(int c, mbfl_convert_filter *filter); \
59 	static size_t mb_##id##_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); \
60 	static void mb_wchar_to_##id(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); \
61 	static const struct mbfl_convert_vtbl vtbl_##id##_wchar = { \
62 		mbfl_no_encoding_##id, \
63 		mbfl_no_encoding_wchar, \
64 		mbfl_filt_conv_common_ctor, \
65 		NULL, \
66 		mbfl_filt_conv_##id##_wchar, \
67 		mbfl_filt_conv_common_flush, \
68 		NULL \
69 	}; \
70 	static const struct mbfl_convert_vtbl vtbl_wchar_##id = { \
71 		mbfl_no_encoding_wchar, \
72 		mbfl_no_encoding_##id, \
73 		mbfl_filt_conv_common_ctor, \
74 		NULL, \
75 		mbfl_filt_conv_wchar_##id, \
76 		mbfl_filt_conv_common_flush, \
77 		NULL \
78 	}; \
79 	const mbfl_encoding mbfl_encoding_##id = { \
80 		mbfl_no_encoding_##id, \
81 		name, \
82 		mime_name, \
83 		aliases, \
84 		NULL, \
85 		MBFL_ENCTYPE_SBCS, \
86 		&vtbl_##id##_wchar, \
87 		&vtbl_wchar_##id, \
88 		mb_##id##_to_wchar, \
89 		mb_wchar_to_##id, \
90 		NULL \
91 	}
92 
93 /* For single-byte encodings which use a conversion table */
94 #define DEF_SB_TBL(id, name, mime_name, aliases, tbl_min, tbl) \
95 	static int mbfl_filt_conv_##id##_wchar(int c, mbfl_convert_filter *filter) { \
96 		return mbfl_conv_singlebyte_table(c, filter, tbl_min, tbl); \
97 	} \
98 	static int mbfl_filt_conv_wchar_##id(int c, mbfl_convert_filter *filter) { \
99 		return mbfl_conv_reverselookup_table(c, filter, tbl_min, tbl); \
100 	} \
101 	static size_t mb_##id##_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) \
102 	{ \
103 		unsigned char *p = *in, *e = p + *in_len; \
104 		uint32_t *out = buf, *limit = buf + bufsize; \
105 		while (p < e && out < limit) { \
106 			unsigned char c = *p++; \
107 			*out++ = (c < tbl_min) ? c : coalesce(tbl[c - tbl_min], MBFL_BAD_INPUT); \
108 		} \
109 		*in_len = e - p; \
110 		*in = p; \
111 		return out - buf; \
112 	} \
113 	static void mb_wchar_to_##id(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) \
114 	{ \
115 		unsigned char *out, *limit; \
116 		MB_CONVERT_BUF_LOAD(buf, out, limit); \
117 		MB_CONVERT_BUF_ENSURE(buf, out, limit, len); \
118 		while (len--) { \
119 			uint32_t w = *in++; \
120 			if (w < tbl_min) { \
121 				out = mb_convert_buf_add(out, w & 0xFF); \
122 			} else { \
123 				for (int i = 0; i < 256 - tbl_min; i++) { \
124 					if (w == tbl[i]) { \
125 						out = mb_convert_buf_add(out, i + tbl_min); \
126 						goto next_iteration; \
127 					} \
128 				} \
129 				MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_##id); \
130 				MB_CONVERT_BUF_ENSURE(buf, out, limit, len); \
131 	next_iteration: ; \
132 			} \
133 		} \
134 		MB_CONVERT_BUF_STORE(buf, out, limit); \
135 	} \
136 	DEF_SB(id, name, mime_name, aliases)
137 
138 /* The grand-daddy of them all: ASCII */
139 static const char *ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "IBM-367", "cp367", "csASCII", NULL};
140 DEF_SB(ascii, "ASCII", "US-ASCII", ascii_aliases);
141 
mbfl_filt_conv_ascii_wchar(int c,mbfl_convert_filter * filter)142 static int mbfl_filt_conv_ascii_wchar(int c, mbfl_convert_filter *filter)
143 {
144 	CK((*filter->output_function)((c < 0x80) ? c : MBFL_BAD_INPUT, filter->data));
145 	return 0;
146 }
147 
mbfl_filt_conv_wchar_ascii(int c,mbfl_convert_filter * filter)148 static int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter)
149 {
150 	if (c >= 0 && c < 0x80 && c != MBFL_BAD_INPUT) {
151 		CK((*filter->output_function)(c, filter->data));
152 	} else {
153 		CK(mbfl_filt_conv_illegal_output(c, filter));
154 	}
155 	return 0;
156 }
157 
mb_ascii_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)158 static size_t mb_ascii_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
159 {
160 	unsigned char *p = *in, *e = p + *in_len;
161 	uint32_t *out = buf, *limit = buf + bufsize;
162 
163 	while (p < e && out < limit) {
164 		unsigned char c = *p++;
165 		*out++ = (c < 0x80) ? c : MBFL_BAD_INPUT;
166 	}
167 
168 	*in_len = e - p;
169 	*in = p;
170 	return out - buf;
171 }
172 
mb_wchar_to_ascii(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)173 static void mb_wchar_to_ascii(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
174 {
175 	unsigned char *out, *limit;
176 	MB_CONVERT_BUF_LOAD(buf, out, limit);
177 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
178 
179 	while (len--) {
180 		uint32_t w = *in++;
181 		if (w < 0x80) {
182 			out = mb_convert_buf_add(out, w & 0xFF);
183 		} else {
184 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_ascii);
185 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
186 		}
187 	}
188 
189 	MB_CONVERT_BUF_STORE(buf, out, limit);
190 }
191 
192 /* ISO-8859-X */
193 
194 static const char *iso8859_1_aliases[] = {"ISO8859-1", "latin1", NULL};
195 DEF_SB(8859_1, "ISO-8859-1", "ISO-8859-1", iso8859_1_aliases);
196 
mbfl_filt_conv_8859_1_wchar(int c,mbfl_convert_filter * filter)197 static int mbfl_filt_conv_8859_1_wchar(int c, mbfl_convert_filter *filter)
198 {
199 	return (*filter->output_function)(c, filter->data);
200 }
201 
mbfl_filt_conv_wchar_8859_1(int c,mbfl_convert_filter * filter)202 static int mbfl_filt_conv_wchar_8859_1(int c, mbfl_convert_filter *filter)
203 {
204 	if (c >= 0 && c < 0x100 && c != MBFL_BAD_INPUT) {
205 		CK((*filter->output_function)(c, filter->data));
206 	} else {
207 		CK(mbfl_filt_conv_illegal_output(c, filter));
208 	}
209 	return 0;
210 }
211 
mb_8859_1_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)212 static size_t mb_8859_1_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
213 {
214 	unsigned char *p = *in, *e = p + *in_len;
215 	uint32_t *out = buf, *limit = buf + bufsize;
216 
217 	while (p < e && out < limit) {
218 		*out++ = *p++;
219 	}
220 
221 	*in_len = e - p;
222 	*in = p;
223 	return out - buf;
224 }
225 
mb_wchar_to_8859_1(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)226 static void mb_wchar_to_8859_1(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
227 {
228 	unsigned char *out, *limit;
229 	MB_CONVERT_BUF_LOAD(buf, out, limit);
230 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
231 
232 	while (len--) {
233 		uint32_t w = *in++;
234 		if (w < 0x100) {
235 			out = mb_convert_buf_add(out, w);
236 		} else {
237 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_8859_1);
238 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
239 		}
240 	}
241 
242 	MB_CONVERT_BUF_STORE(buf, out, limit);
243 }
244 
245 static const char *iso8859_2_aliases[] = {"ISO8859-2", "latin2", NULL};
246 static const unsigned short iso8859_2_ucs_table[] = {
247 	0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
248 	0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
249 	0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
250 	0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
251 	0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
252 	0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
253 	0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
254 	0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
255 	0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
256 	0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
257 	0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
258 	0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
259 };
260 DEF_SB_TBL(8859_2, "ISO-8859-2", "ISO-8859-2", iso8859_2_aliases, 0xA0, iso8859_2_ucs_table);
261 
262 static const char *iso8859_3_aliases[] = {"ISO8859-3", "latin3", NULL};
263 static const unsigned short iso8859_3_ucs_table[] = {
264 	0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0x0000, 0x0124, 0x00A7,
265 	0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0x0000, 0x017B,
266 	0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
267 	0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0x0000, 0x017C,
268 	0x00C0, 0x00C1, 0x00C2, 0x0000, 0x00C4, 0x010A, 0x0108, 0x00C7,
269 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
270 	0x0000, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
271 	0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
272 	0x00E0, 0x00E1, 0x00E2, 0x0000, 0x00E4, 0x010B, 0x0109, 0x00E7,
273 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
274 	0x0000, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
275 	0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9
276 };
277 DEF_SB_TBL(8859_3, "ISO-8859-3", "ISO-8859-3", iso8859_3_aliases, 0xA0, iso8859_3_ucs_table);
278 
279 static const char *iso8859_4_aliases[] = {"ISO8859-4", "latin4", NULL};
280 static const unsigned short iso8859_4_ucs_table[] = {
281 	0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
282 	0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
283 	0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
284 	0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
285 	0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
286 	0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
287 	0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
288 	0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
289 	0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
290 	0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
291 	0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
292 	0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9
293 };
294 DEF_SB_TBL(8859_4, "ISO-8859-4", "ISO-8859-4", iso8859_4_aliases, 0xA0, iso8859_4_ucs_table);
295 
296 static const char *iso8859_5_aliases[] = {"ISO8859-5", "cyrillic", NULL};
297 static const unsigned short iso8859_5_ucs_table[] = {
298 	0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
299 	0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
300 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
301 	0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
302 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
303 	0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
304 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
305 	0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
306 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
307 	0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
308 	0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
309 	0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F
310 };
311 DEF_SB_TBL(8859_5, "ISO-8859-5", "ISO-8859-5", iso8859_5_aliases, 0xA0, iso8859_5_ucs_table);
312 
313 static const char *iso8859_6_aliases[] = {"ISO8859-6", "arabic", NULL};
314 static const unsigned short iso8859_6_ucs_table[] = {
315 	0x00A0, 0x0000, 0x0000, 0x0000, 0x00A4, 0x0000, 0x0000, 0x0000,
316 	0x0000, 0x0000, 0x0000, 0x0000, 0x060C, 0x00AD, 0x0000, 0x0000,
317 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
318 	0x0000, 0x0000, 0x0000, 0x061B, 0x0000, 0x0000, 0x0000, 0x061F,
319 	0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
320 	0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
321 	0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
322 	0x0638, 0x0639, 0x063A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
323 	0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
324 	0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
325 	0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
326 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
327 };
328 DEF_SB_TBL(8859_6, "ISO-8859-6", "ISO-8859-6", iso8859_6_aliases, 0xA0, iso8859_6_ucs_table);
329 
330 static const char *iso8859_7_aliases[] = {"ISO8859-7", "greek", NULL};
331 static const unsigned short iso8859_7_ucs_table[] = {
332 	0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7,
333 	0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0x0000, 0x2015,
334 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
335 	0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
336 	0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
337 	0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
338 	0x03A0, 0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
339 	0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
340 	0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
341 	0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
342 	0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
343 	0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0000
344 };
345 DEF_SB_TBL(8859_7, "ISO-8859-7", "ISO-8859-7", iso8859_7_aliases, 0xA0, iso8859_7_ucs_table);
346 
347 static const char *iso8859_8_aliases[] = {"ISO8859-8", "hebrew", NULL};
348 static const unsigned short iso8859_8_ucs_table[] = {
349 	0x00A0, 0x0000, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
350 	0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
351 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
352 	0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x0000,
353 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
354 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
355 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
356 	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
357 	0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
358 	0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
359 	0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
360 	0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E, 0x200F, 0x0000
361 };
362 DEF_SB_TBL(8859_8, "ISO-8859-8", "ISO-8859-8", iso8859_8_aliases, 0xA0, iso8859_8_ucs_table);
363 
364 static const char *iso8859_9_aliases[] = {"ISO8859-9", "latin5", NULL};
365 static const unsigned short iso8859_9_ucs_table[] = {
366 	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
367 	0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
368 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
369 	0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
370 	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
371 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
372 	0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
373 	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
374 	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
375 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
376 	0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
377 	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF
378 };
379 DEF_SB_TBL(8859_9, "ISO-8859-9", "ISO-8859-9", iso8859_9_aliases, 0xA0, iso8859_9_ucs_table);
380 
381 static const char *iso8859_10_aliases[] = {"ISO8859-10", "latin6", NULL};
382 static const unsigned short iso8859_10_ucs_table[] = {
383 	0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
384 	0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
385 	0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
386 	0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
387 	0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
388 	0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
389 	0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
390 	0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
391 	0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
392 	0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
393 	0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
394 	0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138
395 };
396 DEF_SB_TBL(8859_10, "ISO-8859-10", "ISO-8859-10", iso8859_10_aliases, 0xA0, iso8859_10_ucs_table);
397 
398 static const char *iso8859_13_aliases[] = {"ISO8859-13", NULL};
399 static const unsigned short iso8859_13_ucs_table[] = {
400 	0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
401 	0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
402 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
403 	0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
404 	0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
405 	0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
406 	0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
407 	0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
408 	0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
409 	0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
410 	0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
411 	0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019
412 };
413 DEF_SB_TBL(8859_13, "ISO-8859-13", "ISO-8859-13", iso8859_13_aliases, 0xA0, iso8859_13_ucs_table);
414 
415 static const char *iso8859_14_aliases[] = {"ISO8859-14", "latin8", NULL};
416 static const unsigned short iso8859_14_ucs_table[] = {
417 	0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
418 	0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
419 	0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
420 	0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
421 	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
422 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
423 	0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
424 	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
425 	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
426 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
427 	0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
428 	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF
429 };
430 DEF_SB_TBL(8859_14, "ISO-8859-14", "ISO-8859-14", iso8859_14_aliases, 0xA0, iso8859_14_ucs_table);
431 
432 static const char *iso8859_15_aliases[] = {"ISO8859-15", NULL};
433 static const unsigned short iso8859_15_ucs_table[] = {
434 	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
435 	0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
436 	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
437 	0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
438 	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
439 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
440 	0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
441 	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
442 	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
443 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
444 	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
445 	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
446 };
447 DEF_SB_TBL(8859_15, "ISO-8859-15", "ISO-8859-15", iso8859_15_aliases, 0xA0, iso8859_15_ucs_table);
448 
449 static const char *iso8859_16_aliases[] = {"ISO8859-16", NULL};
450 static const unsigned short iso8859_16_ucs_table[] = {
451 	0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
452 	0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
453 	0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
454 	0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
455 	0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
456 	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
457 	0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
458 	0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
459 	0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
460 	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
461 	0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
462 	0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF
463 };
464 DEF_SB_TBL(8859_16, "ISO-8859-16", "ISO-8859-16", iso8859_16_aliases, 0xA0, iso8859_16_ucs_table);
465 
466 static const char *cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL};
467 static const unsigned short cp1251_ucs_table[] = {
468 	0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
469 	0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
470 	0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
471 	0x0000, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
472 	0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
473 	0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
474 	0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
475 	0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
476 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
477 	0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
478 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
479 	0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
480 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
481 	0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
482 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
483 	0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F
484 };
485 DEF_SB_TBL(cp1251, "Windows-1251", "Windows-1251", cp1251_aliases, 0x80, cp1251_ucs_table);
486 
487 static const char *cp1252_aliases[] = {"cp1252", NULL};
488 static const unsigned short cp1252_ucs_table[] = {
489 	0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
490 	0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
491 	0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
492 	0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
493 };
494 DEF_SB(cp1252, "Windows-1252", "Windows-1252", cp1252_aliases);
495 
mbfl_filt_conv_wchar_cp1252(int c,mbfl_convert_filter * filter)496 static int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter)
497 {
498 	if (c < 0 || c == MBFL_BAD_INPUT) {
499 		CK(mbfl_filt_conv_illegal_output(c, filter));
500 	} else if (c >= 0x100) {
501 		for (int n = 0; n < 32; n++) {
502 			if (c == cp1252_ucs_table[n]) {
503 				CK((*filter->output_function)(0x80 + n, filter->data));
504 				return 0;
505 			}
506 		}
507 		CK(mbfl_filt_conv_illegal_output(c, filter));
508 	} else if (c <= 0x7F || c >= 0xA0 || c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D) {
509 		CK((*filter->output_function)(c, filter->data));
510 	} else {
511 		CK(mbfl_filt_conv_illegal_output(c, filter));
512 	}
513 	return 0;
514 }
515 
mbfl_filt_conv_cp1252_wchar(int c,mbfl_convert_filter * filter)516 static int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter)
517 {
518 	int s;
519 	if (c >= 0x80 && c < 0xA0) {
520 		s = coalesce(cp1252_ucs_table[c - 0x80], MBFL_BAD_INPUT);
521 	} else {
522 		s = c;
523 	}
524 	CK((*filter->output_function)(s, filter->data));
525 	return 0;
526 }
527 
mb_cp1252_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)528 static size_t mb_cp1252_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
529 {
530 	unsigned char *p = *in, *e = p + *in_len;
531 	uint32_t *out = buf, *limit = buf + bufsize;
532 
533 	while (p < e && out < limit) {
534 		unsigned char c = *p++;
535 
536 		if (c >= 0x80 && c < 0xA0) {
537 			*out++ = coalesce(cp1252_ucs_table[c - 0x80], MBFL_BAD_INPUT);
538 		} else {
539 			*out++ = c;
540 		}
541 	}
542 
543 	*in_len = e - p;
544 	*in = p;
545 	return out - buf;
546 }
547 
mb_wchar_to_cp1252(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)548 static void mb_wchar_to_cp1252(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
549 {
550 	unsigned char *out, *limit;
551 	MB_CONVERT_BUF_LOAD(buf, out, limit);
552 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
553 
554 	while (len--) {
555 		uint32_t w = *in++;
556 
557 		if (w >= 0x100) {
558 			for (int i = 0; i < 32; i++) {
559 				if (w == cp1252_ucs_table[i]) {
560 					out = mb_convert_buf_add(out, i + 0x80);
561 					goto continue_cp1252;
562 				}
563 			}
564 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
565 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
566 		} else if (w <= 0x7F || w >= 0xA0 || w == 0x81 || w == 0x8D || w == 0x8F || w == 0x90 || w == 0x9D) {
567 			out = mb_convert_buf_add(out, w);
568 		} else {
569 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
570 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
571 		}
572 		continue_cp1252: ;
573 	}
574 
575 	MB_CONVERT_BUF_STORE(buf, out, limit);
576 }
577 
578 static const char *cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
579 static const unsigned short cp1254_ucs_table[] = {
580 	0x20AC, 0X0000, 0X201A, 0X0192, 0X201E, 0X2026, 0X2020, 0X2021,
581 	0X02C6, 0X2030, 0X0160, 0X2039, 0X0152, 0X0000, 0X0000, 0X0000,
582 	0X0000, 0X2018, 0X2019, 0X201C, 0X201D, 0X2022, 0X2013, 0X2014,
583 	0X02DC, 0X2122, 0X0161, 0X203A, 0X0153, 0X0000, 0X0000, 0X0178,
584 	0X00A0, 0X00A1, 0X00A2, 0X00A3, 0X00A4, 0X00A5, 0X00A6, 0X00A7,
585 	0X00A8, 0X00A9, 0X00AA, 0X00AB, 0X00AC, 0X00AD, 0X00AE, 0X00AF,
586 	0X00B0, 0X00B1, 0X00B2, 0X00B3, 0X00B4, 0X00B5, 0X00B6, 0X00B7,
587 	0X00B8, 0X00B9, 0X00BA, 0X00BB, 0X00BC, 0X00BD, 0X00BE, 0X00BF,
588 	0X00C0, 0X00C1, 0X00C2, 0X00C3, 0X00C4, 0X00C5, 0X00C6, 0X00C7,
589 	0X00C8, 0X00C9, 0X00CA, 0X00CB, 0X00CC, 0X00CD, 0X00CE, 0X00CF,
590 	0X011E, 0X00D1, 0X00D2, 0X00D3, 0X00D4, 0X00D5, 0X00D6, 0X00D7,
591 	0X00D8, 0X00D9, 0X00DA, 0X00DB, 0X00DC, 0X0130, 0X015E, 0X00DF,
592 	0X00E0, 0X00E1, 0X00E2, 0X00E3, 0X00E4, 0X00E5, 0X00E6, 0X00E7,
593 	0X00E8, 0X00E9, 0X00EA, 0X00EB, 0X00EC, 0X00ED, 0X00EE, 0X00EF,
594 	0X011F, 0X00F1, 0X00F2, 0X00F3, 0X00F4, 0X00F5, 0X00F6, 0X00F7,
595 	0X00F8, 0X00F9, 0X00FA, 0X00FB, 0X00FC, 0X0131, 0X015F, 0X00FF
596 };
597 DEF_SB_TBL(cp1254, "Windows-1254", "Windows-1254", cp1254_aliases, 0x80, cp1254_ucs_table);
598 
599 static const char *cp866_aliases[] = {"CP-866", "IBM866", "IBM-866", NULL};
600 static const unsigned short cp866_ucs_table[] = {
601 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
602 	0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
603 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
604 	0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
605 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
606 	0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
607 	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
608 	0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
609 	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
610 	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
611 	0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
612 	0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
613 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
614 	0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
615 	0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
616 	0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0
617 };
618 DEF_SB_TBL(cp866, "CP866", "CP866", cp866_aliases, 0x80, cp866_ucs_table);
619 
620 static const char *cp850_aliases[] = {"CP-850", "IBM850", "IBM-850", NULL};
621 static const unsigned short cp850_ucs_table[] = {
622 	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
623 	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
624 	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
625 	0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
626 	0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
627 	0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
628 	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
629 	0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
630 	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
631 	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
632 	0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
633 	0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
634 	0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
635 	0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
636 	0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
637 	0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
638 };
639 DEF_SB_TBL(cp850, "CP850", "CP850", cp850_aliases, 0x80, cp850_ucs_table);
640 
641 static const char *koi8r_aliases[] = {"KOI8R", NULL};
642 static const unsigned short koi8r_ucs_table[] = {
643 	0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
644 	0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
645 	0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
646 	0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
647 	0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
648 	0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
649 	0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
650 	0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
651 	0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
652 	0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
653 	0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
654 	0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
655 	0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
656 	0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
657 	0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
658 	0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
659 };
660 DEF_SB_TBL(koi8r, "KOI8-R", "KOI8-R", koi8r_aliases, 0x80, koi8r_ucs_table);
661 
662 static const char *koi8u_aliases[] = {"KOI8U", NULL};
663 static const unsigned short koi8u_ucs_table[] = {
664 	0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
665 	0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
666 	0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
667 	0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
668 	0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
669 	0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
670 	0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
671 	0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
672 	0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
673 	0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
674 	0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
675 	0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
676 	0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
677 	0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
678 	0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
679 	0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
680 };
681 DEF_SB_TBL(koi8u, "KOI8-U", "KOI8-U", koi8u_aliases, 0x80, koi8u_ucs_table);
682 
683 static const char *armscii8_aliases[] = {"ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL};
684 static const unsigned short armscii8_ucs_table[] = {
685 	0x00A0, 0x0000, 0x0587, 0x0589, 0x0029, 0x0028, 0x00BB, 0x00AB,
686 	0x2014, 0x002E, 0x055D, 0x002C, 0x002D, 0x058A, 0x2026, 0x055C,
687 	0x055B, 0x055E, 0x0531, 0x0561, 0x0532, 0x0562, 0x0533, 0x0563,
688 	0x0534, 0x0564, 0x0535, 0x0565, 0x0536, 0x0566, 0x0537, 0x0567,
689 	0x0538, 0x0568, 0x0539, 0x0569, 0x053A, 0x056A, 0x053B, 0x056B,
690 	0x053C, 0x056C, 0x053D, 0x056D, 0x053E, 0x056E, 0x053F, 0x056F,
691 	0x0540, 0x0570, 0x0541, 0x0571, 0x0542, 0x0572, 0x0543, 0x0573,
692 	0x0544, 0x0574, 0x0545, 0x0575, 0x0546, 0x0576, 0x0547, 0x0577,
693 	0x0548, 0x0578, 0x0549, 0x0579, 0x054A, 0x057A, 0x054B, 0x057B,
694 	0x054C, 0x057C, 0x054D, 0x057D, 0x054E, 0x057E, 0x054F, 0x057F,
695 	0x0550, 0x0580, 0x0551, 0x0581, 0x0552, 0x0582, 0x0553, 0x0583,
696 	0x0554, 0x0584, 0x0555, 0x0585, 0x0556, 0x0586, 0x055A, 0x0000
697 };
698 static const unsigned char ucs_armscii8_table[] = {
699 	0xA5, 0xA4, 0x2A, 0x2B, 0xAB, 0xAC, 0xA9, 0x2F
700 };
701 DEF_SB(armscii8, "ArmSCII-8", "ArmSCII-8", armscii8_aliases);
702 
mbfl_filt_conv_armscii8_wchar(int c,mbfl_convert_filter * filter)703 static int mbfl_filt_conv_armscii8_wchar(int c, mbfl_convert_filter *filter)
704 {
705 	CK((*filter->output_function)((c < 0xA0) ? c : coalesce(armscii8_ucs_table[c - 0xA0], MBFL_BAD_INPUT), filter->data));
706 	return 0;
707 }
708 
mbfl_filt_conv_wchar_armscii8(int c,mbfl_convert_filter * filter)709 static int mbfl_filt_conv_wchar_armscii8(int c, mbfl_convert_filter *filter)
710 {
711 	if (c >= 0x28 && c <= 0x2F) {
712 		CK((*filter->output_function)(ucs_armscii8_table[c - 0x28], filter->data));
713 	} else if (c < 0 || c == MBFL_BAD_INPUT) {
714 		CK(mbfl_filt_conv_illegal_output(c, filter));
715 	} else if (c < 0xA0) {
716 		CK((*filter->output_function)(c, filter->data));
717 	} else {
718 		for (int n = 0; n < 0x60; n++) {
719 			if (c == armscii8_ucs_table[n]) {
720 				CK((*filter->output_function)(0xA0 + n, filter->data));
721 				return 0;
722 			}
723 		}
724 		CK(mbfl_filt_conv_illegal_output(c, filter));
725 	}
726 	return 0;
727 }
728 
mb_armscii8_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)729 static size_t mb_armscii8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
730 {
731 	unsigned char *p = *in, *e = p + *in_len;
732 	uint32_t *out = buf, *limit = buf + bufsize;
733 
734 	while (p < e && out < limit) {
735 		unsigned char c = *p++;
736 		*out++ = (c < 0xA0) ? c : coalesce(armscii8_ucs_table[c - 0xA0], MBFL_BAD_INPUT);
737 	}
738 
739 	*in_len = e - p;
740 	*in = p;
741 	return out - buf;
742 }
743 
mb_wchar_to_armscii8(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)744 static void mb_wchar_to_armscii8(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
745 {
746 	unsigned char *out, *limit;
747 	MB_CONVERT_BUF_LOAD(buf, out, limit);
748 	MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
749 
750 	while (len--) {
751 		uint32_t w = *in++;
752 
753 		if (w >= 0x28 && w <= 0x2F) {
754 			out = mb_convert_buf_add(out, ucs_armscii8_table[w - 0x28]);
755 		} else if (w < 0xA0) {
756 			out = mb_convert_buf_add(out, w);
757 		} else {
758 			for (int i = 0; i < 0x60; i++) {
759 				if (w == armscii8_ucs_table[i]) {
760 					out = mb_convert_buf_add(out, 0xA0 + i);
761 					goto continue_armscii8;
762 				}
763 			}
764 			MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_armscii8);
765 			MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
766 		}
767 		continue_armscii8: ;
768 	}
769 
770 	MB_CONVERT_BUF_STORE(buf, out, limit);
771 }
772