1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 */
14
15 #include "mbfilter_singlebyte.h"
16
17 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
18
coalesce(uint32_t a,uint32_t b)19 static inline uint32_t coalesce(uint32_t a, uint32_t b)
20 {
21 return a ? a : b;
22 }
23
24 /* Helper for single-byte encodings which use a conversion table */
mbfl_conv_singlebyte_table(int c,mbfl_convert_filter * filter,int tbl_min,const unsigned short tbl[])25 static int mbfl_conv_singlebyte_table(int c, mbfl_convert_filter *filter, int tbl_min, const unsigned short tbl[])
26 {
27 if (c >= 0 && c < tbl_min) {
28 CK((*filter->output_function)(c, filter->data));
29 } else if (c < 0) {
30 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
31 } else {
32 CK((*filter->output_function)(coalesce(tbl[c - tbl_min], MBFL_BAD_INPUT), filter->data));
33 }
34 return 0;
35 }
36
mbfl_conv_reverselookup_table(int c,mbfl_convert_filter * filter,int tbl_min,const unsigned short tbl[])37 static int mbfl_conv_reverselookup_table(int c, mbfl_convert_filter *filter, int tbl_min, const unsigned short tbl[])
38 {
39 if (c >= 0 && c < tbl_min) {
40 CK((*filter->output_function)(c, filter->data));
41 } else if (c < 0 || c == MBFL_BAD_INPUT) {
42 CK(mbfl_filt_conv_illegal_output(c, filter));
43 } else {
44 for (int i = 0; i < 256 - tbl_min; i++) {
45 if (c == tbl[i]) {
46 CK((*filter->output_function)(i + tbl_min, filter->data));
47 return 0;
48 }
49 }
50 CK(mbfl_filt_conv_illegal_output(c, filter));
51 }
52 return 0;
53 }
54
55 /* Initialize data structures for a single-byte encoding */
56 #define DEF_SB(id, name, mime_name, aliases) \
57 static int mbfl_filt_conv_##id##_wchar(int c, mbfl_convert_filter *filter); \
58 static int mbfl_filt_conv_wchar_##id(int c, mbfl_convert_filter *filter); \
59 static size_t mb_##id##_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); \
60 static void mb_wchar_to_##id(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); \
61 static const struct mbfl_convert_vtbl vtbl_##id##_wchar = { \
62 mbfl_no_encoding_##id, \
63 mbfl_no_encoding_wchar, \
64 mbfl_filt_conv_common_ctor, \
65 NULL, \
66 mbfl_filt_conv_##id##_wchar, \
67 mbfl_filt_conv_common_flush, \
68 NULL \
69 }; \
70 static const struct mbfl_convert_vtbl vtbl_wchar_##id = { \
71 mbfl_no_encoding_wchar, \
72 mbfl_no_encoding_##id, \
73 mbfl_filt_conv_common_ctor, \
74 NULL, \
75 mbfl_filt_conv_wchar_##id, \
76 mbfl_filt_conv_common_flush, \
77 NULL \
78 }; \
79 const mbfl_encoding mbfl_encoding_##id = { \
80 mbfl_no_encoding_##id, \
81 name, \
82 mime_name, \
83 aliases, \
84 NULL, \
85 MBFL_ENCTYPE_SBCS, \
86 &vtbl_##id##_wchar, \
87 &vtbl_wchar_##id, \
88 mb_##id##_to_wchar, \
89 mb_wchar_to_##id, \
90 NULL, \
91 NULL \
92 }
93
94 /* For single-byte encodings which use a conversion table */
95 #define DEF_SB_TBL(id, name, mime_name, aliases, tbl_min, tbl) \
96 static int mbfl_filt_conv_##id##_wchar(int c, mbfl_convert_filter *filter) { \
97 return mbfl_conv_singlebyte_table(c, filter, tbl_min, tbl); \
98 } \
99 static int mbfl_filt_conv_wchar_##id(int c, mbfl_convert_filter *filter) { \
100 return mbfl_conv_reverselookup_table(c, filter, tbl_min, tbl); \
101 } \
102 static size_t mb_##id##_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) \
103 { \
104 unsigned char *p = *in, *e = p + *in_len; \
105 uint32_t *out = buf, *limit = buf + bufsize; \
106 while (p < e && out < limit) { \
107 unsigned char c = *p++; \
108 *out++ = (c < tbl_min) ? c : coalesce(tbl[c - tbl_min], MBFL_BAD_INPUT); \
109 } \
110 *in_len = e - p; \
111 *in = p; \
112 return out - buf; \
113 } \
114 static void mb_wchar_to_##id(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) \
115 { \
116 unsigned char *out, *limit; \
117 MB_CONVERT_BUF_LOAD(buf, out, limit); \
118 MB_CONVERT_BUF_ENSURE(buf, out, limit, len); \
119 while (len--) { \
120 uint32_t w = *in++; \
121 if (w < tbl_min) { \
122 out = mb_convert_buf_add(out, w & 0xFF); \
123 } else { \
124 for (int i = 0; i < 256 - tbl_min; i++) { \
125 if (w == tbl[i]) { \
126 out = mb_convert_buf_add(out, i + tbl_min); \
127 goto next_iteration; \
128 } \
129 } \
130 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_##id); \
131 MB_CONVERT_BUF_ENSURE(buf, out, limit, len); \
132 next_iteration: ; \
133 } \
134 } \
135 MB_CONVERT_BUF_STORE(buf, out, limit); \
136 } \
137 DEF_SB(id, name, mime_name, aliases)
138
139 /* The grand-daddy of them all: ASCII */
140 static const char *ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "IBM-367", "cp367", "csASCII", NULL};
141 DEF_SB(ascii, "ASCII", "US-ASCII", ascii_aliases);
142
mbfl_filt_conv_ascii_wchar(int c,mbfl_convert_filter * filter)143 static int mbfl_filt_conv_ascii_wchar(int c, mbfl_convert_filter *filter)
144 {
145 CK((*filter->output_function)((c < 0x80) ? c : MBFL_BAD_INPUT, filter->data));
146 return 0;
147 }
148
mbfl_filt_conv_wchar_ascii(int c,mbfl_convert_filter * filter)149 static int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter)
150 {
151 if (c >= 0 && c < 0x80 && c != MBFL_BAD_INPUT) {
152 CK((*filter->output_function)(c, filter->data));
153 } else {
154 CK(mbfl_filt_conv_illegal_output(c, filter));
155 }
156 return 0;
157 }
158
mb_ascii_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)159 static size_t mb_ascii_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
160 {
161 unsigned char *p = *in, *e = p + *in_len;
162 uint32_t *out = buf, *limit = buf + bufsize;
163
164 while (p < e && out < limit) {
165 unsigned char c = *p++;
166 *out++ = (c < 0x80) ? c : MBFL_BAD_INPUT;
167 }
168
169 *in_len = e - p;
170 *in = p;
171 return out - buf;
172 }
173
mb_wchar_to_ascii(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)174 static void mb_wchar_to_ascii(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
175 {
176 unsigned char *out, *limit;
177 MB_CONVERT_BUF_LOAD(buf, out, limit);
178 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
179
180 while (len--) {
181 uint32_t w = *in++;
182 if (w < 0x80) {
183 out = mb_convert_buf_add(out, w & 0xFF);
184 } else {
185 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_ascii);
186 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
187 }
188 }
189
190 MB_CONVERT_BUF_STORE(buf, out, limit);
191 }
192
193 /* ISO-8859-X */
194
195 static const char *iso8859_1_aliases[] = {"ISO8859-1", "latin1", NULL};
196 DEF_SB(8859_1, "ISO-8859-1", "ISO-8859-1", iso8859_1_aliases);
197
mbfl_filt_conv_8859_1_wchar(int c,mbfl_convert_filter * filter)198 static int mbfl_filt_conv_8859_1_wchar(int c, mbfl_convert_filter *filter)
199 {
200 return (*filter->output_function)(c, filter->data);
201 }
202
mbfl_filt_conv_wchar_8859_1(int c,mbfl_convert_filter * filter)203 static int mbfl_filt_conv_wchar_8859_1(int c, mbfl_convert_filter *filter)
204 {
205 if (c >= 0 && c < 0x100 && c != MBFL_BAD_INPUT) {
206 CK((*filter->output_function)(c, filter->data));
207 } else {
208 CK(mbfl_filt_conv_illegal_output(c, filter));
209 }
210 return 0;
211 }
212
mb_8859_1_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)213 static size_t mb_8859_1_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
214 {
215 unsigned char *p = *in, *e = p + *in_len;
216 uint32_t *out = buf, *limit = buf + bufsize;
217
218 while (p < e && out < limit) {
219 *out++ = *p++;
220 }
221
222 *in_len = e - p;
223 *in = p;
224 return out - buf;
225 }
226
mb_wchar_to_8859_1(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)227 static void mb_wchar_to_8859_1(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
228 {
229 unsigned char *out, *limit;
230 MB_CONVERT_BUF_LOAD(buf, out, limit);
231 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
232
233 while (len--) {
234 uint32_t w = *in++;
235 if (w < 0x100) {
236 out = mb_convert_buf_add(out, w);
237 } else {
238 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_8859_1);
239 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
240 }
241 }
242
243 MB_CONVERT_BUF_STORE(buf, out, limit);
244 }
245
246 static const char *iso8859_2_aliases[] = {"ISO8859-2", "latin2", NULL};
247 static const unsigned short iso8859_2_ucs_table[] = {
248 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
249 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
250 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
251 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
252 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
253 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
254 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
255 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
256 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
257 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
258 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
259 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
260 };
261 DEF_SB_TBL(8859_2, "ISO-8859-2", "ISO-8859-2", iso8859_2_aliases, 0xA0, iso8859_2_ucs_table);
262
263 static const char *iso8859_3_aliases[] = {"ISO8859-3", "latin3", NULL};
264 static const unsigned short iso8859_3_ucs_table[] = {
265 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0x0000, 0x0124, 0x00A7,
266 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0x0000, 0x017B,
267 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
268 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0x0000, 0x017C,
269 0x00C0, 0x00C1, 0x00C2, 0x0000, 0x00C4, 0x010A, 0x0108, 0x00C7,
270 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
271 0x0000, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
272 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
273 0x00E0, 0x00E1, 0x00E2, 0x0000, 0x00E4, 0x010B, 0x0109, 0x00E7,
274 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
275 0x0000, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
276 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9
277 };
278 DEF_SB_TBL(8859_3, "ISO-8859-3", "ISO-8859-3", iso8859_3_aliases, 0xA0, iso8859_3_ucs_table);
279
280 static const char *iso8859_4_aliases[] = {"ISO8859-4", "latin4", NULL};
281 static const unsigned short iso8859_4_ucs_table[] = {
282 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
283 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
284 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
285 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
286 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
287 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
288 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
289 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
290 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
291 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
292 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
293 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9
294 };
295 DEF_SB_TBL(8859_4, "ISO-8859-4", "ISO-8859-4", iso8859_4_aliases, 0xA0, iso8859_4_ucs_table);
296
297 static const char *iso8859_5_aliases[] = {"ISO8859-5", "cyrillic", NULL};
298 static const unsigned short iso8859_5_ucs_table[] = {
299 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
300 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
301 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
302 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
303 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
304 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
305 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
306 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
307 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
308 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
309 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
310 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F
311 };
312 DEF_SB_TBL(8859_5, "ISO-8859-5", "ISO-8859-5", iso8859_5_aliases, 0xA0, iso8859_5_ucs_table);
313
314 static const char *iso8859_6_aliases[] = {"ISO8859-6", "arabic", NULL};
315 static const unsigned short iso8859_6_ucs_table[] = {
316 0x00A0, 0x0000, 0x0000, 0x0000, 0x00A4, 0x0000, 0x0000, 0x0000,
317 0x0000, 0x0000, 0x0000, 0x0000, 0x060C, 0x00AD, 0x0000, 0x0000,
318 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
319 0x0000, 0x0000, 0x0000, 0x061B, 0x0000, 0x0000, 0x0000, 0x061F,
320 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
321 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
322 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
323 0x0638, 0x0639, 0x063A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
324 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
325 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
326 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
327 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
328 };
329 DEF_SB_TBL(8859_6, "ISO-8859-6", "ISO-8859-6", iso8859_6_aliases, 0xA0, iso8859_6_ucs_table);
330
331 static const char *iso8859_7_aliases[] = {"ISO8859-7", "greek", NULL};
332 static const unsigned short iso8859_7_ucs_table[] = {
333 0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7,
334 0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0x0000, 0x2015,
335 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
336 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
337 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
338 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
339 0x03A0, 0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
340 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
341 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
342 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
343 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
344 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0000
345 };
346 DEF_SB_TBL(8859_7, "ISO-8859-7", "ISO-8859-7", iso8859_7_aliases, 0xA0, iso8859_7_ucs_table);
347
348 static const char *iso8859_8_aliases[] = {"ISO8859-8", "hebrew", NULL};
349 static const unsigned short iso8859_8_ucs_table[] = {
350 0x00A0, 0x0000, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
351 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
352 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
353 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x0000,
354 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
355 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
356 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
357 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
358 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
359 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
360 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
361 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E, 0x200F, 0x0000
362 };
363 DEF_SB_TBL(8859_8, "ISO-8859-8", "ISO-8859-8", iso8859_8_aliases, 0xA0, iso8859_8_ucs_table);
364
365 static const char *iso8859_9_aliases[] = {"ISO8859-9", "latin5", NULL};
366 static const unsigned short iso8859_9_ucs_table[] = {
367 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
368 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
369 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
370 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
371 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
372 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
373 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
374 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
375 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
376 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
377 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
378 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF
379 };
380 DEF_SB_TBL(8859_9, "ISO-8859-9", "ISO-8859-9", iso8859_9_aliases, 0xA0, iso8859_9_ucs_table);
381
382 static const char *iso8859_10_aliases[] = {"ISO8859-10", "latin6", NULL};
383 static const unsigned short iso8859_10_ucs_table[] = {
384 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
385 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
386 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
387 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
388 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
389 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
390 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
391 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
392 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
393 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
394 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
395 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138
396 };
397 DEF_SB_TBL(8859_10, "ISO-8859-10", "ISO-8859-10", iso8859_10_aliases, 0xA0, iso8859_10_ucs_table);
398
399 static const char *iso8859_13_aliases[] = {"ISO8859-13", NULL};
400 static const unsigned short iso8859_13_ucs_table[] = {
401 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
402 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
403 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
404 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
405 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
406 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
407 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
408 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
409 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
410 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
411 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
412 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019
413 };
414 DEF_SB_TBL(8859_13, "ISO-8859-13", "ISO-8859-13", iso8859_13_aliases, 0xA0, iso8859_13_ucs_table);
415
416 static const char *iso8859_14_aliases[] = {"ISO8859-14", "latin8", NULL};
417 static const unsigned short iso8859_14_ucs_table[] = {
418 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
419 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
420 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
421 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
422 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
423 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
424 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
425 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
426 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
427 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
428 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
429 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF
430 };
431 DEF_SB_TBL(8859_14, "ISO-8859-14", "ISO-8859-14", iso8859_14_aliases, 0xA0, iso8859_14_ucs_table);
432
433 static const char *iso8859_15_aliases[] = {"ISO8859-15", NULL};
434 static const unsigned short iso8859_15_ucs_table[] = {
435 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
436 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
437 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
438 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
439 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
440 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
441 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
442 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
443 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
444 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
445 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
446 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
447 };
448 DEF_SB_TBL(8859_15, "ISO-8859-15", "ISO-8859-15", iso8859_15_aliases, 0xA0, iso8859_15_ucs_table);
449
450 static const char *iso8859_16_aliases[] = {"ISO8859-16", NULL};
451 static const unsigned short iso8859_16_ucs_table[] = {
452 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
453 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
454 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
455 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
456 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
457 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
458 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
459 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
460 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
461 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
462 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
463 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF
464 };
465 DEF_SB_TBL(8859_16, "ISO-8859-16", "ISO-8859-16", iso8859_16_aliases, 0xA0, iso8859_16_ucs_table);
466
467 static const char *cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL};
468 static const unsigned short cp1251_ucs_table[] = {
469 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
470 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
471 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
472 0x0000, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
473 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
474 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
475 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
476 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
477 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
478 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
479 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
480 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
481 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
482 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
483 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
484 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F
485 };
486 DEF_SB_TBL(cp1251, "Windows-1251", "Windows-1251", cp1251_aliases, 0x80, cp1251_ucs_table);
487
488 static const char *cp1252_aliases[] = {"cp1252", NULL};
489 static const unsigned short cp1252_ucs_table[] = {
490 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
491 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
492 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
493 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
494 };
495 DEF_SB(cp1252, "Windows-1252", "Windows-1252", cp1252_aliases);
496
mbfl_filt_conv_wchar_cp1252(int c,mbfl_convert_filter * filter)497 static int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter)
498 {
499 if (c < 0 || c == MBFL_BAD_INPUT) {
500 CK(mbfl_filt_conv_illegal_output(c, filter));
501 } else if (c >= 0x100) {
502 for (int n = 0; n < 32; n++) {
503 if (c == cp1252_ucs_table[n]) {
504 CK((*filter->output_function)(0x80 + n, filter->data));
505 return 0;
506 }
507 }
508 CK(mbfl_filt_conv_illegal_output(c, filter));
509 } else if (c <= 0x7F || c >= 0xA0 || c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D) {
510 CK((*filter->output_function)(c, filter->data));
511 } else {
512 CK(mbfl_filt_conv_illegal_output(c, filter));
513 }
514 return 0;
515 }
516
mbfl_filt_conv_cp1252_wchar(int c,mbfl_convert_filter * filter)517 static int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter)
518 {
519 int s;
520 if (c >= 0x80 && c < 0xA0) {
521 s = coalesce(cp1252_ucs_table[c - 0x80], MBFL_BAD_INPUT);
522 } else {
523 s = c;
524 }
525 CK((*filter->output_function)(s, filter->data));
526 return 0;
527 }
528
mb_cp1252_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)529 static size_t mb_cp1252_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
530 {
531 unsigned char *p = *in, *e = p + *in_len;
532 uint32_t *out = buf, *limit = buf + bufsize;
533
534 while (p < e && out < limit) {
535 unsigned char c = *p++;
536
537 if (c >= 0x80 && c < 0xA0) {
538 *out++ = coalesce(cp1252_ucs_table[c - 0x80], MBFL_BAD_INPUT);
539 } else {
540 *out++ = c;
541 }
542 }
543
544 *in_len = e - p;
545 *in = p;
546 return out - buf;
547 }
548
mb_wchar_to_cp1252(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)549 static void mb_wchar_to_cp1252(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
550 {
551 unsigned char *out, *limit;
552 MB_CONVERT_BUF_LOAD(buf, out, limit);
553 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
554
555 while (len--) {
556 uint32_t w = *in++;
557
558 if (w >= 0x100) {
559 for (int i = 0; i < 32; i++) {
560 if (w == cp1252_ucs_table[i]) {
561 out = mb_convert_buf_add(out, i + 0x80);
562 goto continue_cp1252;
563 }
564 }
565 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
566 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
567 } else if (w <= 0x7F || w >= 0xA0 || w == 0x81 || w == 0x8D || w == 0x8F || w == 0x90 || w == 0x9D) {
568 out = mb_convert_buf_add(out, w);
569 } else {
570 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
571 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
572 }
573 continue_cp1252: ;
574 }
575
576 MB_CONVERT_BUF_STORE(buf, out, limit);
577 }
578
579 static const char *cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
580 static const unsigned short cp1254_ucs_table[] = {
581 0x20AC, 0X0000, 0X201A, 0X0192, 0X201E, 0X2026, 0X2020, 0X2021,
582 0X02C6, 0X2030, 0X0160, 0X2039, 0X0152, 0X0000, 0X0000, 0X0000,
583 0X0000, 0X2018, 0X2019, 0X201C, 0X201D, 0X2022, 0X2013, 0X2014,
584 0X02DC, 0X2122, 0X0161, 0X203A, 0X0153, 0X0000, 0X0000, 0X0178,
585 0X00A0, 0X00A1, 0X00A2, 0X00A3, 0X00A4, 0X00A5, 0X00A6, 0X00A7,
586 0X00A8, 0X00A9, 0X00AA, 0X00AB, 0X00AC, 0X00AD, 0X00AE, 0X00AF,
587 0X00B0, 0X00B1, 0X00B2, 0X00B3, 0X00B4, 0X00B5, 0X00B6, 0X00B7,
588 0X00B8, 0X00B9, 0X00BA, 0X00BB, 0X00BC, 0X00BD, 0X00BE, 0X00BF,
589 0X00C0, 0X00C1, 0X00C2, 0X00C3, 0X00C4, 0X00C5, 0X00C6, 0X00C7,
590 0X00C8, 0X00C9, 0X00CA, 0X00CB, 0X00CC, 0X00CD, 0X00CE, 0X00CF,
591 0X011E, 0X00D1, 0X00D2, 0X00D3, 0X00D4, 0X00D5, 0X00D6, 0X00D7,
592 0X00D8, 0X00D9, 0X00DA, 0X00DB, 0X00DC, 0X0130, 0X015E, 0X00DF,
593 0X00E0, 0X00E1, 0X00E2, 0X00E3, 0X00E4, 0X00E5, 0X00E6, 0X00E7,
594 0X00E8, 0X00E9, 0X00EA, 0X00EB, 0X00EC, 0X00ED, 0X00EE, 0X00EF,
595 0X011F, 0X00F1, 0X00F2, 0X00F3, 0X00F4, 0X00F5, 0X00F6, 0X00F7,
596 0X00F8, 0X00F9, 0X00FA, 0X00FB, 0X00FC, 0X0131, 0X015F, 0X00FF
597 };
598 DEF_SB_TBL(cp1254, "Windows-1254", "Windows-1254", cp1254_aliases, 0x80, cp1254_ucs_table);
599
600 static const char *cp866_aliases[] = {"CP-866", "IBM866", "IBM-866", NULL};
601 static const unsigned short cp866_ucs_table[] = {
602 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
603 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
604 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
605 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
606 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
607 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
608 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
609 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
610 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
611 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
612 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
613 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
614 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
615 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
616 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
617 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0
618 };
619 DEF_SB_TBL(cp866, "CP866", "CP866", cp866_aliases, 0x80, cp866_ucs_table);
620
621 static const char *cp850_aliases[] = {"CP-850", "IBM850", "IBM-850", NULL};
622 static const unsigned short cp850_ucs_table[] = {
623 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
624 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
625 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
626 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
627 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
628 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
629 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
630 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
631 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
632 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
633 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
634 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
635 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
636 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
637 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
638 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
639 };
640 DEF_SB_TBL(cp850, "CP850", "CP850", cp850_aliases, 0x80, cp850_ucs_table);
641
642 static const char *koi8r_aliases[] = {"KOI8R", NULL};
643 static const unsigned short koi8r_ucs_table[] = {
644 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
645 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
646 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
647 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
648 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
649 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
650 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
651 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
652 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
653 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
654 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
655 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
656 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
657 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
658 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
659 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
660 };
661 DEF_SB_TBL(koi8r, "KOI8-R", "KOI8-R", koi8r_aliases, 0x80, koi8r_ucs_table);
662
663 static const char *koi8u_aliases[] = {"KOI8U", NULL};
664 static const unsigned short koi8u_ucs_table[] = {
665 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
666 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
667 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
668 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
669 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
670 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
671 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
672 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
673 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
674 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
675 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
676 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
677 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
678 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
679 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
680 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
681 };
682 DEF_SB_TBL(koi8u, "KOI8-U", "KOI8-U", koi8u_aliases, 0x80, koi8u_ucs_table);
683
684 static const char *armscii8_aliases[] = {"ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL};
685 static const unsigned short armscii8_ucs_table[] = {
686 0x00A0, 0x0000, 0x0587, 0x0589, 0x0029, 0x0028, 0x00BB, 0x00AB,
687 0x2014, 0x002E, 0x055D, 0x002C, 0x002D, 0x058A, 0x2026, 0x055C,
688 0x055B, 0x055E, 0x0531, 0x0561, 0x0532, 0x0562, 0x0533, 0x0563,
689 0x0534, 0x0564, 0x0535, 0x0565, 0x0536, 0x0566, 0x0537, 0x0567,
690 0x0538, 0x0568, 0x0539, 0x0569, 0x053A, 0x056A, 0x053B, 0x056B,
691 0x053C, 0x056C, 0x053D, 0x056D, 0x053E, 0x056E, 0x053F, 0x056F,
692 0x0540, 0x0570, 0x0541, 0x0571, 0x0542, 0x0572, 0x0543, 0x0573,
693 0x0544, 0x0574, 0x0545, 0x0575, 0x0546, 0x0576, 0x0547, 0x0577,
694 0x0548, 0x0578, 0x0549, 0x0579, 0x054A, 0x057A, 0x054B, 0x057B,
695 0x054C, 0x057C, 0x054D, 0x057D, 0x054E, 0x057E, 0x054F, 0x057F,
696 0x0550, 0x0580, 0x0551, 0x0581, 0x0552, 0x0582, 0x0553, 0x0583,
697 0x0554, 0x0584, 0x0555, 0x0585, 0x0556, 0x0586, 0x055A, 0x0000
698 };
699 static const unsigned char ucs_armscii8_table[] = {
700 0xA5, 0xA4, 0x2A, 0x2B, 0xAB, 0xAC, 0xA9, 0x2F
701 };
702 DEF_SB(armscii8, "ArmSCII-8", "ArmSCII-8", armscii8_aliases);
703
mbfl_filt_conv_armscii8_wchar(int c,mbfl_convert_filter * filter)704 static int mbfl_filt_conv_armscii8_wchar(int c, mbfl_convert_filter *filter)
705 {
706 CK((*filter->output_function)((c < 0xA0) ? c : coalesce(armscii8_ucs_table[c - 0xA0], MBFL_BAD_INPUT), filter->data));
707 return 0;
708 }
709
mbfl_filt_conv_wchar_armscii8(int c,mbfl_convert_filter * filter)710 static int mbfl_filt_conv_wchar_armscii8(int c, mbfl_convert_filter *filter)
711 {
712 if (c >= 0x28 && c <= 0x2F) {
713 CK((*filter->output_function)(ucs_armscii8_table[c - 0x28], filter->data));
714 } else if (c < 0 || c == MBFL_BAD_INPUT) {
715 CK(mbfl_filt_conv_illegal_output(c, filter));
716 } else if (c < 0xA0) {
717 CK((*filter->output_function)(c, filter->data));
718 } else {
719 for (int n = 0; n < 0x60; n++) {
720 if (c == armscii8_ucs_table[n]) {
721 CK((*filter->output_function)(0xA0 + n, filter->data));
722 return 0;
723 }
724 }
725 CK(mbfl_filt_conv_illegal_output(c, filter));
726 }
727 return 0;
728 }
729
mb_armscii8_to_wchar(unsigned char ** in,size_t * in_len,uint32_t * buf,size_t bufsize,unsigned int * state)730 static size_t mb_armscii8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
731 {
732 unsigned char *p = *in, *e = p + *in_len;
733 uint32_t *out = buf, *limit = buf + bufsize;
734
735 while (p < e && out < limit) {
736 unsigned char c = *p++;
737 *out++ = (c < 0xA0) ? c : coalesce(armscii8_ucs_table[c - 0xA0], MBFL_BAD_INPUT);
738 }
739
740 *in_len = e - p;
741 *in = p;
742 return out - buf;
743 }
744
mb_wchar_to_armscii8(uint32_t * in,size_t len,mb_convert_buf * buf,bool end)745 static void mb_wchar_to_armscii8(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
746 {
747 unsigned char *out, *limit;
748 MB_CONVERT_BUF_LOAD(buf, out, limit);
749 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
750
751 while (len--) {
752 uint32_t w = *in++;
753
754 if (w >= 0x28 && w <= 0x2F) {
755 out = mb_convert_buf_add(out, ucs_armscii8_table[w - 0x28]);
756 } else if (w < 0xA0) {
757 out = mb_convert_buf_add(out, w);
758 } else {
759 for (int i = 0; i < 0x60; i++) {
760 if (w == armscii8_ucs_table[i]) {
761 out = mb_convert_buf_add(out, 0xA0 + i);
762 goto continue_armscii8;
763 }
764 }
765 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_armscii8);
766 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
767 }
768 continue_armscii8: ;
769 }
770
771 MB_CONVERT_BUF_STORE(buf, out, limit);
772 }
773