xref: /PHP-8.1/ext/mysqlnd/mysqlnd_charset.c (revision df4dd82e)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Andrey Hristov <andrey@php.net>                             |
14   |          Ulf Wendel <uw@php.net>                                     |
15   |          Georg Richter <georg@php.net>                               |
16   +----------------------------------------------------------------------+
17 */
18 
19 #include "php.h"
20 #include "mysqlnd.h"
21 #include "mysqlnd_priv.h"
22 #include "mysqlnd_debug.h"
23 #include "mysqlnd_charset.h"
24 
25 /* {{{ utf8 functions */
check_mb_utf8mb3_sequence(const char * const start,const char * const end)26 static unsigned int check_mb_utf8mb3_sequence(const char * const start, const char * const end)
27 {
28 	zend_uchar	c;
29 
30 	if (start >= end) {
31 		return 0;
32 	}
33 
34 	c = (zend_uchar) start[0];
35 
36 	if (c < 0x80) {
37 		return 1;		/* single byte character */
38 	}
39 	if (c < 0xC2) {
40 		return 0;		/* invalid mb character */
41 	}
42 	if (c < 0xE0) {
43 		if (start + 2 > end) {
44 			return 0;	/* too small */
45 		}
46 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
47 			return 0;
48 		}
49 		return 2;
50 	}
51 	if (c < 0xF0) {
52 		if (start + 3 > end) {
53 			return 0;	/* too small */
54 		}
55 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
56 			(c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
57 			return 0;	/* invalid utf8 character */
58 		}
59 		return 3;
60 	}
61 	return 0;
62 }
63 
64 
check_mb_utf8_sequence(const char * const start,const char * const end)65 static unsigned int check_mb_utf8_sequence(const char * const start, const char * const end)
66 {
67 	zend_uchar	c;
68 
69 	if (start >= end) {
70 		return 0;
71 	}
72 
73 	c = (zend_uchar) start[0];
74 
75 	if (c < 0x80) {
76 		return 1;		/* single byte character */
77 	}
78 	if (c < 0xC2) {
79 		return 0;		/* invalid mb character */
80 	}
81 	if (c < 0xE0) {
82 		if (start + 2 > end) {
83 			return 0;	/* too small */
84 		}
85 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
86 			return 0;
87 		}
88 		return 2;
89 	}
90 	if (c < 0xF0) {
91 		if (start + 3 > end) {
92 			return 0;	/* too small */
93 		}
94 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
95 			(c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
96 			return 0;	/* invalid utf8 character */
97 		}
98 		return 3;
99 	}
100 	if (c < 0xF5) {
101 		if (start + 4 > end) { /* We need 4 characters */
102 			return 0;	/* too small */
103 		}
104 
105 		/*
106 		  UTF-8 quick four-byte mask:
107 		  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
108 		  Encoding allows to encode U+00010000..U+001FFFFF
109 
110 		  The maximum character defined in the Unicode standard is U+0010FFFF.
111 		  Higher characters U+00110000..U+001FFFFF are not used.
112 
113 		  11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
114 		  11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
115 
116 		  Valid codes:
117 		  [F0][90..BF][80..BF][80..BF]
118 		  [F1][80..BF][80..BF][80..BF]
119 		  [F2][80..BF][80..BF][80..BF]
120 		  [F3][80..BF][80..BF][80..BF]
121 		  [F4][80..8F][80..BF][80..BF]
122 		*/
123 
124 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 &&
125 			((zend_uchar)start[2] ^ 0x80) < 0x40 &&
126 			((zend_uchar)start[3] ^ 0x80) < 0x40 &&
127 				(c >= 0xf1 || (zend_uchar)start[1] >= 0x90) &&
128 				(c <= 0xf3 || (zend_uchar)start[1] <= 0x8F)))
129 		{
130 			return 0;	/* invalid utf8 character */
131 		}
132 		return 4;
133 	}
134 	return 0;
135 }
136 
check_mb_utf8mb3_valid(const char * const start,const char * const end)137 static unsigned int check_mb_utf8mb3_valid(const char * const start, const char * const end)
138 {
139 	unsigned int len = check_mb_utf8mb3_sequence(start, end);
140 	return (len > 1)? len:0;
141 }
142 
check_mb_utf8_valid(const char * const start,const char * const end)143 static unsigned int check_mb_utf8_valid(const char * const start, const char * const end)
144 {
145 	unsigned int len = check_mb_utf8_sequence(start, end);
146 	return (len > 1)? len:0;
147 }
148 
149 
mysqlnd_mbcharlen_utf8mb3(const unsigned int utf8)150 static unsigned int mysqlnd_mbcharlen_utf8mb3(const unsigned int utf8)
151 {
152 	if (utf8 < 0x80) {
153 		return 1;		/* single byte character */
154 	}
155 	if (utf8 < 0xC2) {
156 		return 0;		/* invalid multibyte header */
157 	}
158 	if (utf8 < 0xE0) {
159 		return 2;		/* double byte character */
160 	}
161 	if (utf8 < 0xF0) {
162 		return 3;		/* triple byte character */
163 	}
164 	return 0;
165 }
166 
167 
mysqlnd_mbcharlen_utf8(const unsigned int utf8)168 static unsigned int mysqlnd_mbcharlen_utf8(const unsigned int utf8)
169 {
170 	if (utf8 < 0x80) {
171 		return 1;		/* single byte character */
172 	}
173 	if (utf8 < 0xC2) {
174 		return 0;		/* invalid multibyte header */
175 	}
176 	if (utf8 < 0xE0) {
177 		return 2;		/* double byte character */
178 	}
179 	if (utf8 < 0xF0) {
180 		return 3;		/* triple byte character */
181 	}
182 	if (utf8 < 0xF8) {
183 		return 4;		/* four byte character */
184 	}
185 	return 0;
186 }
187 /* }}} */
188 
189 
190 /* {{{ big5 functions */
191 #define valid_big5head(c)	(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xF9)
192 #define valid_big5tail(c)	((0x40 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0x7E) || \
193 							(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE))
194 
195 #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
196 
check_mb_big5(const char * const start,const char * const end)197 static unsigned int check_mb_big5(const char * const start, const char * const end)
198 {
199 	return (valid_big5head(*(start)) && (end - start) > 1 && valid_big5tail(*(start + 1)) ? 2 : 0);
200 }
201 
202 
mysqlnd_mbcharlen_big5(const unsigned int big5)203 static unsigned int mysqlnd_mbcharlen_big5(const unsigned int big5)
204 {
205 	return (valid_big5head(big5)) ? 2 : 1;
206 }
207 /* }}} */
208 
209 
210 /* {{{ cp932 functions */
211 #define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
212 #define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
213 
214 
check_mb_cp932(const char * const start,const char * const end)215 static unsigned int check_mb_cp932(const char * const start, const char * const end)
216 {
217 	return (valid_cp932head((zend_uchar)start[0]) && (end - start >  1) &&
218 			valid_cp932tail((zend_uchar)start[1])) ? 2 : 0;
219 }
220 
221 
mysqlnd_mbcharlen_cp932(const unsigned int cp932)222 static unsigned int mysqlnd_mbcharlen_cp932(const unsigned int cp932)
223 {
224 	return (valid_cp932head((zend_uchar)cp932)) ? 2 : 1;
225 }
226 /* }}} */
227 
228 
229 /* {{{ euckr functions */
230 #define valid_euckr(c)	((0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE))
231 
check_mb_euckr(const char * const start,const char * const end)232 static unsigned int check_mb_euckr(const char * const start, const char * const end)
233 {
234 	if (end - start <= 1) {
235 		return 0;	/* invalid length */
236 	}
237 	if (*(zend_uchar *)start < 0x80) {
238 		return 0;	/* invalid euckr character */
239 	}
240 	if (valid_euckr(start[1])) {
241 		return 2;
242 	}
243 	return 0;
244 }
245 
246 
mysqlnd_mbcharlen_euckr(const unsigned int kr)247 static unsigned int mysqlnd_mbcharlen_euckr(const unsigned int kr)
248 {
249 	return (valid_euckr(kr)) ? 2 : 1;
250 }
251 /* }}} */
252 
253 
254 /* {{{ eucjpms functions */
255 #define valid_eucjpms(c) 		(((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
256 #define valid_eucjpms_kata(c)	(((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
257 #define valid_eucjpms_ss2(c)	(((c) & 0xFF) == 0x8E)
258 #define valid_eucjpms_ss3(c)	(((c) & 0xFF) == 0x8F)
259 
check_mb_eucjpms(const char * const start,const char * const end)260 static unsigned int check_mb_eucjpms(const char * const start, const char * const end)
261 {
262 	if (*((zend_uchar *)start) < 0x80) {
263 		return 0;	/* invalid eucjpms character */
264 	}
265 	if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
266 		return 2;
267 	}
268 	if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
269 		return 2;
270 	}
271 	if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
272 		valid_eucjpms(start[2])) {
273 		return 2;
274 	}
275 	return 0;
276 }
277 
278 
mysqlnd_mbcharlen_eucjpms(const unsigned int jpms)279 static unsigned int mysqlnd_mbcharlen_eucjpms(const unsigned int jpms)
280 {
281 	if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
282 		return 2;
283 	}
284 	if (valid_eucjpms_ss3(jpms)) {
285 		return 3;
286 	}
287 	return 1;
288 }
289 /* }}} */
290 
291 
292 /* {{{ gb2312 functions */
293 #define valid_gb2312_head(c)	(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xF7)
294 #define valid_gb2312_tail(c)	(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE)
295 
296 
check_mb_gb2312(const char * const start,const char * const end)297 static unsigned int check_mb_gb2312(const char * const start, const char * const end)
298 {
299 	return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
300 			valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
301 }
302 
303 
mysqlnd_mbcharlen_gb2312(const unsigned int gb)304 static unsigned int mysqlnd_mbcharlen_gb2312(const unsigned int gb)
305 {
306 	return (valid_gb2312_head(gb)) ? 2 : 1;
307 }
308 /* }}} */
309 
310 
311 /* {{{ gbk functions */
312 #define valid_gbk_head(c)	(0x81<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE)
313 #define valid_gbk_tail(c)	((0x40<=(zend_uchar)(c) && (zend_uchar)(c)<=0x7E) || (0x80<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE))
314 
check_mb_gbk(const char * const start,const char * const end)315 static unsigned int check_mb_gbk(const char * const start, const char * const end)
316 {
317 	return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
318 }
319 
mysqlnd_mbcharlen_gbk(const unsigned int gbk)320 static unsigned int mysqlnd_mbcharlen_gbk(const unsigned int gbk)
321 {
322 	return (valid_gbk_head(gbk) ? 2 : 1);
323 }
324 /* }}} */
325 
326 
327 /* {{{ sjis functions */
328 #define valid_sjis_head(c)	((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
329 #define valid_sjis_tail(c)	((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
330 
331 
check_mb_sjis(const char * const start,const char * const end)332 static unsigned int check_mb_sjis(const char * const start, const char * const end)
333 {
334 	return (valid_sjis_head((zend_uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((zend_uchar)start[1])) ? 2 : 0;
335 }
336 
337 
mysqlnd_mbcharlen_sjis(const unsigned int sjis)338 static unsigned int mysqlnd_mbcharlen_sjis(const unsigned int sjis)
339 {
340 	return (valid_sjis_head((zend_uchar)sjis)) ? 2 : 1;
341 }
342 /* }}} */
343 
344 
345 /* {{{ ucs2 functions */
check_mb_ucs2(const char * const start __attribute ((unused)),const char * const end __attribute ((unused)))346 static unsigned int check_mb_ucs2(const char * const start __attribute((unused)), const char * const end __attribute((unused)))
347 {
348 	return 2; /* always 2 */
349 }
350 
mysqlnd_mbcharlen_ucs2(const unsigned int ucs2 __attribute ((unused)))351 static unsigned int mysqlnd_mbcharlen_ucs2(const unsigned int ucs2 __attribute((unused)))
352 {
353 	return 2; /* always 2 */
354 }
355 /* }}} */
356 
357 
358 /* {{{ ujis functions */
359 #define valid_ujis(c)     	((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
360 #define valid_ujis_kata(c)  ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
361 #define valid_ujis_ss2(c) 	(((c)&0xFF) == 0x8E)
362 #define valid_ujis_ss3(c) 	(((c)&0xFF) == 0x8F)
363 
check_mb_ujis(const char * const start,const char * const end)364 static unsigned int check_mb_ujis(const char * const start, const char * const end)
365 {
366 	if (*(zend_uchar*)start < 0x80) {
367 		return 0;	/* invalid ujis character */
368 	}
369 	if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
370 		return 2;
371 	}
372 	if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
373 		return 2;
374 	}
375 	if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
376 		return 3;
377 	}
378 	return 0;
379 }
380 
381 
mysqlnd_mbcharlen_ujis(const unsigned int ujis)382 static unsigned int mysqlnd_mbcharlen_ujis(const unsigned int ujis)
383 {
384 	return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
385 }
386 /* }}} */
387 
388 
389 
390 /* {{{ utf16 functions */
391 #define UTF16_HIGH_HEAD(x)  ((((zend_uchar) (x)) & 0xFC) == 0xD8)
392 #define UTF16_LOW_HEAD(x)   ((((zend_uchar) (x)) & 0xFC) == 0xDC)
393 
check_mb_utf16(const char * const start,const char * const end)394 static unsigned int check_mb_utf16(const char * const start, const char * const end)
395 {
396 	if (start + 2 > end) {
397 		return 0;
398 	}
399 
400 	if (UTF16_HIGH_HEAD(*start)) {
401 		return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
402 	}
403 
404 	if (UTF16_LOW_HEAD(*start)) {
405 		return 0;
406 	}
407 	return 2;
408 }
409 
410 
mysqlnd_mbcharlen_utf16(const unsigned int utf16)411 static uint32_t mysqlnd_mbcharlen_utf16(const unsigned int utf16)
412 {
413 	return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
414 }
415 /* }}} */
416 
417 
418 /* {{{ utf32 functions */
check_mb_utf32(const char * const start __attribute ((unused)),const char * const end __attribute ((unused)))419 static unsigned int check_mb_utf32(const char * const start __attribute((unused)), const char * const end __attribute((unused)))
420 {
421 	return 4;
422 }
423 
424 
mysqlnd_mbcharlen_utf32(const unsigned int utf32 __attribute ((unused)))425 static unsigned int mysqlnd_mbcharlen_utf32(const unsigned int utf32 __attribute((unused)))
426 {
427 	return 4;
428 }
429 /* }}} */
430 
431 
432 /* {{{ gb18030 functions */
433 #define is_gb18030_odd(c)          (0x81 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE)
434 #define is_gb18030_even_2(c)       ((0x40 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x7E) || (0x80 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE))
435 #define is_gb18030_even_4(c)       (0x30 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x39)
436 
437 
mysqlnd_mbcharlen_gb18030(const unsigned int c)438 static unsigned int mysqlnd_mbcharlen_gb18030(const unsigned int c)
439 {
440 	if (c <= 0xFF) {
441 		return !is_gb18030_odd(c);
442 	}
443 	if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) {
444 		return 0;
445 	}
446 	if (is_gb18030_even_2((c & 0xFF))) {
447 	    return 2;
448 	}
449 	if (is_gb18030_even_4((c & 0xFF))) {
450 		return 4;
451 	}
452 
453 	return 0;
454 }
455 
456 
my_ismbchar_gb18030(const char * start,const char * end)457 static unsigned int my_ismbchar_gb18030(const char * start, const char * end)
458 {
459 	if (end - start <= 1 || !is_gb18030_odd(start[0])) {
460 		return 0;
461 	}
462 
463 	if (is_gb18030_even_2(start[1])) {
464 		return 2;
465 	} else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) {
466 		return 4;
467 	}
468 
469 	return 0;
470 }
471 /* }}} */
472 
473 /*
474   The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,
475   for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
476   Change easily now, with a macro, could be made compilastion dependable.
477 */
478 
479 #define UTF8_MB4 "utf8mb4"
480 #define UTF8_MB3 "utf8"
481 
482 /* {{{ mysqlnd_charsets */
483 const MYSQLND_CHARSET mysqlnd_charsets[] =
484 {
485 	{   1, "big5","big5_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
486 	{   3, "dec8", "dec8_swedish_ci", 1, 1, "", NULL, NULL},
487 	{   4, "cp850", "cp850_general_ci", 1, 1, "", NULL, NULL},
488 	{   6, "hp8", "hp8_english_ci", 1, 1, "", NULL, NULL},
489 	{   7, "koi8r", "koi8r_general_ci", 1, 1, "", NULL, NULL},
490 	{   8, "latin1", "latin1_swedish_ci", 1, 1, "", NULL, NULL},
491 	{   5, "latin1", "latin1_german1_ci", 1, 1, "", NULL, NULL}, /* should be after 0x8 because swedish_ci is the default collation */
492 	{   9, "latin2", "latin2_general_ci", 1, 1, "", NULL, NULL},
493 	{   2, "latin2", "latin2_czech_cs", 1, 1, "", NULL, NULL}, /* should be after 0x9 because general_ci is the default collation */
494 	{  10, "swe7", "swe7_swedish_ci", 1, 1, "", NULL, NULL},
495 	{  11, "ascii", "ascii_general_ci", 1, 1, "", NULL, NULL},
496 	{  12, "ujis", "ujis_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
497 	{  13, "sjis", "sjis_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
498 	{  16, "hebrew", "hebrew_general_ci", 1, 1, "", NULL, NULL},
499 	{  17, "filename", "filename", 1, 5, "", NULL, NULL},
500 	{  18, "tis620", "tis620_thai_ci", 1, 1, "", NULL, NULL},
501 	{  19, "euckr", "euckr_korean_ci", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
502 	{  21, "latin2", "latin2_hungarian_ci", 1, 1, "", NULL, NULL},
503 	{  27, "latin2", "latin2_croatian_ci", 1, 1, "", NULL, NULL},
504 	{  22, "koi8u", "koi8u_general_ci", 1, 1, "", NULL, NULL},
505 	{  24, "gb2312", "gb2312_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
506 	{  25, "greek", "greek_general_ci", 1, 1, "", NULL, NULL},
507 	{  26, "cp1250", "cp1250_general_ci", 1, 1, "", NULL, NULL},
508 	{  28, "gbk", "gbk_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
509 	{  30, "latin5", "latin5_turkish_ci", 1, 1, "", NULL, NULL},
510 	{  31, "latin1", "latin1_german2_ci", 1, 1, "", NULL, NULL},
511 	{  15, "latin1", "latin1_danish_ci", 1, 1, "", NULL, NULL},
512 	{  32, "armscii8", "armscii8_general_ci", 1, 1, "", NULL, NULL},
513 	{  33, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
514 	{  35, "ucs2", "ucs2_general_ci", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
515 	{  36, "cp866", "cp866_general_ci", 1, 1, "", NULL, NULL},
516 	{  37, "keybcs2", "keybcs2_general_ci", 1, 1, "", NULL, NULL},
517 	{  38, "macce", "macce_general_ci", 1, 1, "", NULL, NULL},
518 	{  39, "macroman", "macroman_general_ci", 1, 1, "", NULL, NULL},
519 	{  40, "cp852", "cp852_general_ci", 1, 1, "", NULL, NULL},
520 	{  41, "latin7", "latin7_general_ci", 1, 1, "", NULL, NULL},
521 	{  20, "latin7", "latin7_estonian_cs", 1, 1, "", NULL, NULL},
522 	{  57, "cp1256", "cp1256_general_ci", 1, 1, "", NULL, NULL},
523 	{  59, "cp1257", "cp1257_general_ci", 1, 1, "", NULL, NULL},
524 	{  63, "binary", "binary", 1, 1, "", NULL, NULL},
525 	{  97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
526 	{  29, "cp1257", "cp1257_lithuanian_ci", 1, 1, "", NULL, NULL},
527 	{  31, "latin1", "latin1_german2_ci", 1, 1, "", NULL, NULL},
528 	{  34, "cp1250", "cp1250_czech_cs", 1, 1, "", NULL, NULL},
529 	{  42, "latin7", "latin7_general_cs", 1, 1, "", NULL, NULL},
530 	{  43, "macce", "macce_bin", 1, 1, "", NULL, NULL},
531 	{  44, "cp1250", "cp1250_croatian_ci", 1, 1, "", NULL, NULL},
532 	{  45, UTF8_MB4, UTF8_MB4"_general_ci", 1, 4, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8,  check_mb_utf8_valid},
533 	{  46, UTF8_MB4, UTF8_MB4"_bin", 1, 4, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8,  check_mb_utf8_valid},
534 	{  47, "latin1", "latin1_bin", 1, 1, "", NULL, NULL},
535 	{  48, "latin1", "latin1_general_ci", 1, 1, "", NULL, NULL},
536 	{  49, "latin1", "latin1_general_cs", 1, 1, "", NULL, NULL},
537 	{  51, "cp1251", "cp1251_general_ci", 1, 1, "", NULL, NULL},
538 	{  14, "cp1251", "cp1251_bulgarian_ci", 1, 1, "", NULL, NULL},
539 	{  23, "cp1251", "cp1251_ukrainian_ci", 1, 1, "", NULL, NULL},
540 	{  50, "cp1251", "cp1251_bin", 1, 1, "", NULL, NULL},
541 	{  52, "cp1251", "cp1251_general_cs", 1, 1, "", NULL, NULL},
542 	{  53, "macroman", "macroman_bin", 1, 1, "", NULL, NULL},
543 	{  54, "utf16", "utf16_general_ci", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
544 	{  55, "utf16", "utf16_bin", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
545 	{  56, "utf16le", "utf16le_general_ci", 2, 4, "UTF-16LE Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
546 	{  58, "cp1257", "cp1257_bin", 1, 1, "", NULL, NULL},
547 /*55*/{  60, "utf32", "utf32_general_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
548 /*55*/{  61, "utf32", "utf32_bin", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
549 	{  62, "utf16le", "utf16le_bin", 2, 4, "UTF-16LE Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
550 	{  64, "armscii8", "armscii8_bin", 1, 1, "", NULL, NULL},
551 	{  65, "ascii", "ascii_bin", 1, 1, "", NULL, NULL},
552 	{  66, "cp1250", "cp1250_bin", 1, 1, "", NULL, NULL},
553 	{  67, "cp1256", "cp1256_bin", 1, 1, "", NULL, NULL},
554 	{  68, "cp866", "cp866_bin", 1, 1, "", NULL, NULL},
555 	{  69, "dec8", "dec8_bin", 1, 1, "", NULL, NULL},
556 	{  70, "greek", "greek_bin", 1, 1, "", NULL, NULL},
557 	{  71, "hebrew", "hebrew_bin", 1, 1, "", NULL, NULL},
558 	{  72, "hp8", "hp8_bin", 1, 1, "", NULL, NULL},
559 	{  73, "keybcs2", "keybcs2_bin", 1, 1, "", NULL, NULL},
560 	{  74, "koi8r", "koi8r_bin", 1, 1, "", NULL, NULL},
561 	{  75, "koi8u", "koi8u_bin", 1, 1, "", NULL, NULL},
562 	{  77, "latin2", "latin2_bin", 1, 1, "", NULL, NULL},
563 	{  78, "latin5", "latin5_bin", 1, 1, "", NULL, NULL},
564 	{  79, "latin7", "latin7_bin", 1, 1, "", NULL, NULL},
565 	{  80, "cp850", "cp850_bin", 1, 1, "", NULL, NULL},
566 	{  81, "cp852", "cp852_bin", 1, 1, "", NULL, NULL},
567 	{  82, "swe7", "swe7_bin", 1, 1, "", NULL, NULL},
568 	{  83, UTF8_MB3, UTF8_MB3"_bin", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
569 	{  84, "big5", "big5_bin", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
570 	{  85, "euckr", "euckr_bin", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
571 	{  86, "gb2312", "gb2312_bin", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
572 	{  87, "gbk", "gbk_bin", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
573 	{  88, "sjis", "sjis_bin", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
574 	{  89, "tis620", "tis620_bin", 1, 1, "", NULL, NULL},
575 	{  90, "ucs2", "ucs2_bin", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
576 	{  91, "ujis", "ujis_bin", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
577 	{  92, "geostd8", "geostd8_general_ci", 1, 1, "", NULL, NULL},
578 	{  93, "geostd8", "geostd8_bin", 1, 1, "", NULL, NULL},
579 	{  94, "latin1", "latin1_spanish_ci", 1, 1, "", NULL, NULL},
580 	{  95, "cp932", "cp932_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
581 	{  96, "cp932", "cp932_bin", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
582 	{  97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
583 	{  98, "eucjpms", "eucjpms_bin", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
584 	{  99, "cp1250", "cp1250_polish_ci", 1, 1, "", NULL, NULL},
585 	{ 128, "ucs2", "ucs2_unicode_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
586 	{ 129, "ucs2", "ucs2_icelandic_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
587 	{ 130, "ucs2", "ucs2_latvian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
588 	{ 131, "ucs2", "ucs2_romanian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
589 	{ 132, "ucs2", "ucs2_slovenian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
590 	{ 133, "ucs2", "ucs2_polish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
591 	{ 134, "ucs2", "ucs2_estonian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
592 	{ 135, "ucs2", "ucs2_spanish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
593 	{ 136, "ucs2", "ucs2_swedish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
594 	{ 137, "ucs2", "ucs2_turkish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
595 	{ 138, "ucs2", "ucs2_czech_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
596 	{ 139, "ucs2", "ucs2_danish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
597 	{ 140, "ucs2", "ucs2_lithuanian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
598 	{ 141, "ucs2", "ucs2_slovak_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
599 	{ 142, "ucs2", "ucs2_spanish2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
600 	{ 143, "ucs2", "ucs2_roman_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
601 	{ 144, "ucs2", "ucs2_persian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
602 	{ 145, "ucs2", "ucs2_esperanto_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
603 	{ 146, "ucs2", "ucs2_hungarian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
604 	{ 147, "ucs2", "ucs2_sinhala_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
605 	{ 148, "ucs2", "ucs2_german2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
606 	{ 149, "ucs2", "ucs2_croatian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
607 	{ 150, "ucs2", "ucs2_unicode_520_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
608 	{ 151, "ucs2", "ucs2_vietnamese_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
609 
610 /*56*/{160, "utf32", "utf32_unicode_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
611 /*56*/{161, "utf32", "utf32_icelandic_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
612 /*56*/{162, "utf32", "utf32_latvian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
613 /*56*/{163, "utf32", "utf32_romanian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
614 /*56*/{164, "utf32", "utf32_slovenian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
615 /*56*/{165, "utf32", "utf32_polish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
616 /*56*/{166, "utf32", "utf32_estonian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
617 /*56*/{167, "utf32", "utf32_spanish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
618 /*56*/{168, "utf32", "utf32_swedish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
619 /*56*/{169, "utf32", "utf32_turkish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
620 /*56*/{170, "utf32", "utf32_czech_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
621 /*56*/{171, "utf32", "utf32_danish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
622 /*56*/{172, "utf32", "utf32_lithuanian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
623 /*56*/{173, "utf32", "utf32_slovak_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
624 /*56*/{174, "utf32", "utf32_spanish2_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
625 /*56*/{175, "utf32", "utf32_roman_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
626 /*56*/{176, "utf32", "utf32_persian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
627 /*56*/{177, "utf32", "utf32_esperanto_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
628 /*56*/{178, "utf32", "utf32_hungarian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
629 /*56*/{179, "utf32", "utf32_sinhala_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
630 /*56*/{180, "utf32", "utf32_german2_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
631 /*56*/{181, "utf32", "utf32_croatian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
632 /*56*/{182, "utf32", "utf32_unicode_520_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
633 /*56*/{183, "utf32", "utf32_vietnamese_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
634 
635 	{ 192, UTF8_MB3, UTF8_MB3"_unicode_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
636 	{ 193, UTF8_MB3, UTF8_MB3"_icelandic_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
637 	{ 194, UTF8_MB3, UTF8_MB3"_latvian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3,  check_mb_utf8mb3_valid},
638 	{ 195, UTF8_MB3, UTF8_MB3"_romanian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
639 	{ 196, UTF8_MB3, UTF8_MB3"_slovenian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
640 	{ 197, UTF8_MB3, UTF8_MB3"_polish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
641 	{ 198, UTF8_MB3, UTF8_MB3"_estonian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
642 	{ 199, UTF8_MB3, UTF8_MB3"_spanish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
643 	{ 200, UTF8_MB3, UTF8_MB3"_swedish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
644 	{ 201, UTF8_MB3, UTF8_MB3"_turkish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
645 	{ 202, UTF8_MB3, UTF8_MB3"_czech_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
646 	{ 203, UTF8_MB3, UTF8_MB3"_danish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
647 	{ 204, UTF8_MB3, UTF8_MB3"_lithuanian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
648 	{ 205, UTF8_MB3, UTF8_MB3"_slovak_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
649 	{ 206, UTF8_MB3, UTF8_MB3"_spanish2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
650 	{ 207, UTF8_MB3, UTF8_MB3"_roman_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
651 	{ 208, UTF8_MB3, UTF8_MB3"_persian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
652 	{ 209, UTF8_MB3, UTF8_MB3"_esperanto_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
653 	{ 210, UTF8_MB3, UTF8_MB3"_hungarian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
654 	{ 211, UTF8_MB3, UTF8_MB3"_sinhala_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
655 	{ 212, UTF8_MB3, UTF8_MB3"_german2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
656 	{ 213, UTF8_MB3, UTF8_MB3"_croatian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
657 	{ 214, UTF8_MB3, UTF8_MB3"_unicode_520_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
658 	{ 215, UTF8_MB3, UTF8_MB3"_vietnamese_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
659 
660 	{ 224, UTF8_MB4, UTF8_MB4"_unicode_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
661 	{ 225, UTF8_MB4, UTF8_MB4"_icelandic_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
662 	{ 226, UTF8_MB4, UTF8_MB4"_latvian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
663 	{ 227, UTF8_MB4, UTF8_MB4"_romanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
664 	{ 228, UTF8_MB4, UTF8_MB4"_slovenian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
665 	{ 229, UTF8_MB4, UTF8_MB4"_polish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
666 	{ 230, UTF8_MB4, UTF8_MB4"_estonian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
667 	{ 231, UTF8_MB4, UTF8_MB4"_spanish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
668 	{ 232, UTF8_MB4, UTF8_MB4"_swedish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
669 	{ 233, UTF8_MB4, UTF8_MB4"_turkish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
670 	{ 234, UTF8_MB4, UTF8_MB4"_czech_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
671 	{ 235, UTF8_MB4, UTF8_MB4"_danish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
672 	{ 236, UTF8_MB4, UTF8_MB4"_lithuanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
673 	{ 237, UTF8_MB4, UTF8_MB4"_slovak_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
674 	{ 238, UTF8_MB4, UTF8_MB4"_spanish2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
675 	{ 239, UTF8_MB4, UTF8_MB4"_roman_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
676 	{ 240, UTF8_MB4, UTF8_MB4"_persian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
677 	{ 241, UTF8_MB4, UTF8_MB4"_esperanto_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
678 	{ 242, UTF8_MB4, UTF8_MB4"_hungarian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
679 	{ 243, UTF8_MB4, UTF8_MB4"_sinhala_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
680 	{ 244, UTF8_MB4, UTF8_MB4"_german2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
681 	{ 245, UTF8_MB4, UTF8_MB4"_croatian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
682 	{ 246, UTF8_MB4, UTF8_MB4"_unicode_520_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
683 	{ 247, UTF8_MB4, UTF8_MB4"_vietnamese_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
684 	{ 248, "gb18030", "gb18030_chinese_ci", 1, 4, "", mysqlnd_mbcharlen_gb18030, my_ismbchar_gb18030},
685 	{ 249, "gb18030", "gb18030_bin", 1, 4, "", mysqlnd_mbcharlen_gb18030, my_ismbchar_gb18030},
686 
687 	{ 254, UTF8_MB3, UTF8_MB3"_general_cs", 1, 3, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
688 	{ 255, UTF8_MB4, UTF8_MB4"_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
689 	{ 256, UTF8_MB4, UTF8_MB4"_de_pb_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
690 	{ 257, UTF8_MB4, UTF8_MB4"_is_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
691 	{ 258, UTF8_MB4, UTF8_MB4"_lv_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
692 	{ 259, UTF8_MB4, UTF8_MB4"_ro_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
693 	{ 260, UTF8_MB4, UTF8_MB4"_sl_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
694 	{ 261, UTF8_MB4, UTF8_MB4"_pl_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
695 	{ 262, UTF8_MB4, UTF8_MB4"_et_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
696 	{ 263, UTF8_MB4, UTF8_MB4"_es_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
697 	{ 264, UTF8_MB4, UTF8_MB4"_sv_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
698 	{ 265, UTF8_MB4, UTF8_MB4"_tr_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
699 	{ 266, UTF8_MB4, UTF8_MB4"_cs_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
700 	{ 267, UTF8_MB4, UTF8_MB4"_da_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
701 	{ 268, UTF8_MB4, UTF8_MB4"_lt_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
702 	{ 269, UTF8_MB4, UTF8_MB4"_sk_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
703 	{ 270, UTF8_MB4, UTF8_MB4"_es_trad_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
704 	{ 271, UTF8_MB4, UTF8_MB4"_la_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
705 	{ 272, UTF8_MB4, UTF8_MB4"_fa_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
706 	{ 273, UTF8_MB4, UTF8_MB4"_eo_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
707 	{ 274, UTF8_MB4, UTF8_MB4"_hu_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
708 	{ 275, UTF8_MB4, UTF8_MB4"_hr_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
709 	{ 276, UTF8_MB4, UTF8_MB4"_si_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
710 	{ 277, UTF8_MB4, UTF8_MB4"_vi_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
711 	{ 278, UTF8_MB4, UTF8_MB4"_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
712 	{ 279, UTF8_MB4, UTF8_MB4"_de_pb_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
713 	{ 280, UTF8_MB4, UTF8_MB4"_is_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
714 	{ 281, UTF8_MB4, UTF8_MB4"_lv_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
715 	{ 282, UTF8_MB4, UTF8_MB4"_ro_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
716 	{ 283, UTF8_MB4, UTF8_MB4"_sl_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
717 	{ 284, UTF8_MB4, UTF8_MB4"_pl_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
718 	{ 285, UTF8_MB4, UTF8_MB4"_et_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
719 	{ 286, UTF8_MB4, UTF8_MB4"_es_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
720 	{ 287, UTF8_MB4, UTF8_MB4"_sv_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
721 	{ 288, UTF8_MB4, UTF8_MB4"_tr_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
722 	{ 289, UTF8_MB4, UTF8_MB4"_cs_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
723 	{ 290, UTF8_MB4, UTF8_MB4"_da_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
724 	{ 291, UTF8_MB4, UTF8_MB4"_lt_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
725 	{ 292, UTF8_MB4, UTF8_MB4"_sk_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
726 	{ 293, UTF8_MB4, UTF8_MB4"_es_trad_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
727 	{ 294, UTF8_MB4, UTF8_MB4"_la_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
728 	{ 295, UTF8_MB4, UTF8_MB4"_fa_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
729 	{ 296, UTF8_MB4, UTF8_MB4"_eo_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
730 	{ 297, UTF8_MB4, UTF8_MB4"_hu_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
731 	{ 298, UTF8_MB4, UTF8_MB4"_hr_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
732 	{ 299, UTF8_MB4, UTF8_MB4"_si_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
733 	{ 300, UTF8_MB4, UTF8_MB4"_vi_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
734 	{ 303, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
735 	{   0, NULL, NULL, 0, 0, NULL, NULL, NULL}
736 };
737 /* }}} */
738 
739 
740 /* {{{ mysqlnd_find_charset_nr */
mysqlnd_find_charset_nr(const unsigned int charsetnr)741 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_nr(const unsigned int charsetnr)
742 {
743 	const MYSQLND_CHARSET * c = mysqlnd_charsets;
744 
745 	do {
746 		if (c->nr == charsetnr) {
747 			return c;
748 		}
749 		++c;
750 	} while (c[0].nr != 0);
751 	return NULL;
752 }
753 /* }}} */
754 
755 
756 /* {{{ mysqlnd_find_charset_name */
mysqlnd_find_charset_name(const char * const name)757 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_name(const char * const name)
758 {
759 	if (name) {
760 		const MYSQLND_CHARSET * c = mysqlnd_charsets;
761 		do {
762 			if (!strcasecmp(c->name, name)) {
763 				return c;
764 			}
765 			++c;
766 		} while (c[0].nr != 0);
767 	}
768 	return NULL;
769 }
770 /* }}} */
771 
772 
773 /* {{{ mysqlnd_cset_escape_quotes */
mysqlnd_cset_escape_quotes(const MYSQLND_CHARSET * const cset,char * newstr,const char * escapestr,const size_t escapestr_len)774 PHPAPI zend_ulong mysqlnd_cset_escape_quotes(const MYSQLND_CHARSET * const cset, char * newstr,
775 											 const char * escapestr, const size_t escapestr_len)
776 {
777 	const char 	*newstr_s = newstr;
778 	const char 	*newstr_e = newstr + 2 * escapestr_len;
779 	const char 	*end = escapestr + escapestr_len;
780 	bool	escape_overflow = FALSE;
781 
782 	DBG_ENTER("mysqlnd_cset_escape_quotes");
783 
784 	for (;escapestr < end; escapestr++) {
785 		unsigned int len = 0;
786 		/* check unicode characters */
787 
788 		if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
789 
790 			/* check possible overflow */
791 			if ((newstr + len) > newstr_e) {
792 				escape_overflow = TRUE;
793 				break;
794 			}
795 			/* copy mb char without escaping it */
796 			while (len--) {
797 				*newstr++ = *escapestr++;
798 			}
799 			escapestr--;
800 			continue;
801 		}
802 		if (*escapestr == '\'') {
803 			if (newstr + 2 > newstr_e) {
804 				escape_overflow = TRUE;
805 				break;
806 			}
807 			*newstr++ = '\'';
808 			*newstr++ = '\'';
809 		} else {
810 			if (newstr + 1 > newstr_e) {
811 				escape_overflow = TRUE;
812 				break;
813 			}
814 			*newstr++ = *escapestr;
815 		}
816 	}
817 	*newstr = '\0';
818 
819 	if (escape_overflow) {
820 		DBG_RETURN((zend_ulong)~0);
821 	}
822 	DBG_RETURN((zend_ulong)(newstr - newstr_s));
823 }
824 /* }}} */
825 
826 
827 /* {{{ mysqlnd_cset_escape_slashes */
mysqlnd_cset_escape_slashes(const MYSQLND_CHARSET * const cset,char * newstr,const char * escapestr,const size_t escapestr_len)828 PHPAPI zend_ulong mysqlnd_cset_escape_slashes(const MYSQLND_CHARSET * const cset, char *newstr,
829 											  const char * escapestr, const size_t escapestr_len)
830 {
831 	const char 	*newstr_s = newstr;
832 	const char 	*newstr_e = newstr + 2 * escapestr_len;
833 	const char 	*end = escapestr + escapestr_len;
834 	bool	escape_overflow = FALSE;
835 
836 	DBG_ENTER("mysqlnd_cset_escape_slashes");
837 	DBG_INF_FMT("charset=%s", cset->name);
838 
839 	for (;escapestr < end; escapestr++) {
840 		char esc = '\0';
841 		unsigned int len = 0;
842 
843 		/* check unicode characters */
844 		if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
845 			/* check possible overflow */
846 			if ((newstr + len) > newstr_e) {
847 				escape_overflow = TRUE;
848 				break;
849 			}
850 			/* copy mb char without escaping it */
851 			while (len--) {
852 				*newstr++ = *escapestr++;
853 			}
854 			escapestr--;
855 			continue;
856 		}
857 		if (cset->char_maxlen > 1 && cset->mb_charlen(*escapestr) > 1) {
858 			esc = *escapestr;
859 		} else {
860 			switch (*escapestr) {
861 				case 0:
862 					esc = '0';
863 					break;
864 				case '\n':
865 					esc = 'n';
866 					break;
867 				case '\r':
868 					esc = 'r';
869 					break;
870 				case '\\':
871 				case '\'':
872 				case '"':
873 					esc = *escapestr;
874 					break;
875 				case '\032':
876 					esc = 'Z';
877 					break;
878 			}
879 		}
880 		if (esc) {
881 			if (newstr + 2 > newstr_e) {
882 				escape_overflow = TRUE;
883 				break;
884 			}
885 			/* copy escaped character */
886 			*newstr++ = '\\';
887 			*newstr++ = esc;
888 		} else {
889 			if (newstr + 1 > newstr_e) {
890 				escape_overflow = TRUE;
891 				break;
892 			}
893 			/* copy non escaped character */
894 			*newstr++ = *escapestr;
895 		}
896 	}
897 	*newstr = '\0';
898 
899 	if (escape_overflow) {
900 		DBG_RETURN((zend_ulong)~0);
901 	}
902 	DBG_RETURN((zend_ulong)(newstr - newstr_s));
903 }
904 /* }}} */
905 
906 
907 static struct st_mysqlnd_plugin_charsets mysqlnd_plugin_charsets_plugin =
908 {
909 	{
910 		MYSQLND_PLUGIN_API_VERSION,
911 		"charsets",
912 		MYSQLND_VERSION_ID,
913 		PHP_MYSQLND_VERSION,
914 		"PHP License 3.01",
915 		"Andrey Hristov <andrey@php.net>,  Ulf Wendel <uw@php.net>, Georg Richter <georg@php.net>",
916 		{
917 			NULL, /* no statistics , will be filled later if there are some */
918 			NULL, /* no statistics */
919 		},
920 		{
921 			NULL /* plugin shutdown */
922 		}
923 	},
924 	{/* methods */
925 		mysqlnd_find_charset_nr,
926 		mysqlnd_find_charset_name,
927 		mysqlnd_cset_escape_quotes,
928 		mysqlnd_cset_escape_slashes
929 	}
930 };
931 
932 
933 /* {{{ mysqlnd_charsets_plugin_register */
934 void
mysqlnd_charsets_plugin_register(void)935 mysqlnd_charsets_plugin_register(void)
936 {
937 	mysqlnd_plugin_register_ex((struct st_mysqlnd_plugin_header *) &mysqlnd_plugin_charsets_plugin);
938 }
939 /* }}} */
940