xref: /php-src/ext/mysqlnd/mysqlnd_charset.c (revision 8ffac997)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Andrey Hristov <andrey@php.net>                             |
14   |          Ulf Wendel <uw@php.net>                                     |
15   |          Georg Richter <georg@php.net>                               |
16   +----------------------------------------------------------------------+
17 */
18 
19 #include "php.h"
20 #include "mysqlnd.h"
21 #include "mysqlnd_priv.h"
22 #include "mysqlnd_debug.h"
23 #include "mysqlnd_charset.h"
24 
25 #define ENUMERATE_ENCODINGS_CHARLEN(ENUMERATOR) \
26 	ENUMERATOR(mysqlnd_mbcharlen_null) \
27 	ENUMERATOR(mysqlnd_mbcharlen_big5) \
28 	ENUMERATOR(mysqlnd_mbcharlen_ujis) \
29 	ENUMERATOR(mysqlnd_mbcharlen_sjis) \
30 	ENUMERATOR(mysqlnd_mbcharlen_euckr) \
31 	ENUMERATOR(mysqlnd_mbcharlen_gb2312) \
32 	ENUMERATOR(mysqlnd_mbcharlen_gbk) \
33 	ENUMERATOR(mysqlnd_mbcharlen_utf8mb3) \
34 	ENUMERATOR(mysqlnd_mbcharlen_ucs2) \
35 	ENUMERATOR(mysqlnd_mbcharlen_eucjpms) \
36 	ENUMERATOR(mysqlnd_mbcharlen_utf8) \
37 	ENUMERATOR(mysqlnd_mbcharlen_utf16) \
38 	ENUMERATOR(mysqlnd_mbcharlen_utf32) \
39 	ENUMERATOR(mysqlnd_mbcharlen_cp932) \
40 	ENUMERATOR(mysqlnd_mbcharlen_gb18030)
41 
42 #define ENUMERATE_ENCODINGS_VALID(ENUMERATOR) \
43 	ENUMERATOR(check_null) \
44 	ENUMERATOR(check_mb_big5) \
45 	ENUMERATOR(check_mb_ujis) \
46 	ENUMERATOR(check_mb_sjis) \
47 	ENUMERATOR(check_mb_euckr) \
48 	ENUMERATOR(check_mb_gb2312) \
49 	ENUMERATOR(check_mb_gbk) \
50 	ENUMERATOR(check_mb_utf8mb3_valid) \
51 	ENUMERATOR(check_mb_ucs2) \
52 	ENUMERATOR(check_mb_eucjpms) \
53 	ENUMERATOR(check_mb_utf8_valid) \
54 	ENUMERATOR(check_mb_utf16) \
55 	ENUMERATOR(check_mb_utf32) \
56 	ENUMERATOR(check_mb_cp932) \
57 	ENUMERATOR(my_ismbchar_gb18030)
58 
59 #define LOWEST_MB_BIG5			0xA1
60 #define LOWEST_MB_UJIS			0x80
61 #define LOWEST_MB_SJIS			0x80
62 #define LOWEST_MB_EUCKR			0x80
63 #define LOWEST_MB_GB2312		0xA1
64 #define LOWEST_MB_GBK			0x80
65 #define LOWEST_MB_UTF8MB3		0x80
66 #define LOWEST_MB_UCS2			0x00
67 #define LOWEST_MB_EUCJPMS		0x80
68 #define LOWEST_MB_UTF8			0x80
69 #define LOWEST_MB_UTF16			0x00
70 #define LOWEST_MB_UTF32			0x00
71 #define LOWEST_MB_CP932			0x80
72 #define LOWEST_MB_GB18030		0x00
73 
74 #define ENUMERATOR_ENUM(x) x##_id,
75 enum mysqlnd_encoding_charlen {
76 	ENUMERATE_ENCODINGS_CHARLEN(ENUMERATOR_ENUM)
77 };
78 enum mysqlnd_encoding_valid {
79 	ENUMERATE_ENCODINGS_VALID(ENUMERATOR_ENUM)
80 };
81 #undef ENUMERATOR_ENUM
82 
83 static unsigned int mysqlnd_mbcharlen_dispatch(enum mysqlnd_encoding_charlen encoding, const unsigned int c);
84 static unsigned int mysqlnd_mbvalid_dispatch(enum mysqlnd_encoding_valid encoding, const char * const start, const char * const end);
85 
86 /* {{{ utf8 functions */
check_mb_utf8mb3_sequence(const char * const start,const char * const end)87 static unsigned int check_mb_utf8mb3_sequence(const char * const start, const char * const end)
88 {
89 	zend_uchar	c;
90 
91 	if (UNEXPECTED(start >= end)) {
92 		return 0;
93 	}
94 
95 	c = (zend_uchar) start[0];
96 
97 	if (c < 0x80) {
98 		return 1;		/* single byte character */
99 	}
100 	if (c < 0xC2) {
101 		return 0;		/* invalid mb character */
102 	}
103 	if (c < 0xE0) {
104 		if (start + 2 > end) {
105 			return 0;	/* too small */
106 		}
107 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
108 			return 0;
109 		}
110 		return 2;
111 	}
112 	if (c < 0xF0) {
113 		if (start + 3 > end) {
114 			return 0;	/* too small */
115 		}
116 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
117 			(c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
118 			return 0;	/* invalid utf8 character */
119 		}
120 		return 3;
121 	}
122 	return 0;
123 }
124 
125 
check_mb_utf8_sequence(const char * const start,const char * const end)126 static unsigned int check_mb_utf8_sequence(const char * const start, const char * const end)
127 {
128 	zend_uchar	c;
129 
130 	if (UNEXPECTED(start >= end)) {
131 		return 0;
132 	}
133 
134 	c = (zend_uchar) start[0];
135 
136 	if (c < 0x80) {
137 		return 1;		/* single byte character */
138 	}
139 	if (c < 0xC2) {
140 		return 0;		/* invalid mb character */
141 	}
142 	if (c < 0xE0) {
143 		if (start + 2 > end) {
144 			return 0;	/* too small */
145 		}
146 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
147 			return 0;
148 		}
149 		return 2;
150 	}
151 	if (c < 0xF0) {
152 		if (start + 3 > end) {
153 			return 0;	/* too small */
154 		}
155 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
156 			(c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
157 			return 0;	/* invalid utf8 character */
158 		}
159 		return 3;
160 	}
161 	if (c < 0xF5) {
162 		if (start + 4 > end) { /* We need 4 characters */
163 			return 0;	/* too small */
164 		}
165 
166 		/*
167 		  UTF-8 quick four-byte mask:
168 		  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
169 		  Encoding allows to encode U+00010000..U+001FFFFF
170 
171 		  The maximum character defined in the Unicode standard is U+0010FFFF.
172 		  Higher characters U+00110000..U+001FFFFF are not used.
173 
174 		  11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
175 		  11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
176 
177 		  Valid codes:
178 		  [F0][90..BF][80..BF][80..BF]
179 		  [F1][80..BF][80..BF][80..BF]
180 		  [F2][80..BF][80..BF][80..BF]
181 		  [F3][80..BF][80..BF][80..BF]
182 		  [F4][80..8F][80..BF][80..BF]
183 		*/
184 
185 		if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 &&
186 			((zend_uchar)start[2] ^ 0x80) < 0x40 &&
187 			((zend_uchar)start[3] ^ 0x80) < 0x40 &&
188 				(c >= 0xf1 || (zend_uchar)start[1] >= 0x90) &&
189 				(c <= 0xf3 || (zend_uchar)start[1] <= 0x8F)))
190 		{
191 			return 0;	/* invalid utf8 character */
192 		}
193 		return 4;
194 	}
195 	return 0;
196 }
197 
check_mb_utf8mb3_valid(const char * const start,const char * const end)198 static unsigned int check_mb_utf8mb3_valid(const char * const start, const char * const end)
199 {
200 	unsigned int len = check_mb_utf8mb3_sequence(start, end);
201 	return (len > 1)? len:0;
202 }
203 
check_mb_utf8_valid(const char * const start,const char * const end)204 static unsigned int check_mb_utf8_valid(const char * const start, const char * const end)
205 {
206 	unsigned int len = check_mb_utf8_sequence(start, end);
207 	return (len > 1)? len:0;
208 }
209 
210 
mysqlnd_mbcharlen_utf8mb3(const unsigned int utf8)211 static unsigned int mysqlnd_mbcharlen_utf8mb3(const unsigned int utf8)
212 {
213 	if (utf8 < 0x80) {
214 		return 1;		/* single byte character */
215 	}
216 	if (utf8 < 0xC2) {
217 		return 0;		/* invalid multibyte header */
218 	}
219 	if (utf8 < 0xE0) {
220 		return 2;		/* double byte character */
221 	}
222 	if (utf8 < 0xF0) {
223 		return 3;		/* triple byte character */
224 	}
225 	return 0;
226 }
227 
228 
mysqlnd_mbcharlen_utf8(const unsigned int utf8)229 static unsigned int mysqlnd_mbcharlen_utf8(const unsigned int utf8)
230 {
231 	if (utf8 < 0x80) {
232 		return 1;		/* single byte character */
233 	}
234 	if (utf8 < 0xC2) {
235 		return 0;		/* invalid multibyte header */
236 	}
237 	if (utf8 < 0xE0) {
238 		return 2;		/* double byte character */
239 	}
240 	if (utf8 < 0xF0) {
241 		return 3;		/* triple byte character */
242 	}
243 	if (utf8 < 0xF8) {
244 		return 4;		/* four byte character */
245 	}
246 	return 0;
247 }
248 /* }}} */
249 
250 
251 /* {{{ big5 functions */
252 #define valid_big5head(c)	(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xF9)
253 #define valid_big5tail(c)	((0x40 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0x7E) || \
254 							(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE))
255 
256 #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
257 
check_mb_big5(const char * const start,const char * const end)258 static unsigned int check_mb_big5(const char * const start, const char * const end)
259 {
260 	return (valid_big5head(*(start)) && (end - start) > 1 && valid_big5tail(*(start + 1)) ? 2 : 0);
261 }
262 
263 
mysqlnd_mbcharlen_big5(const unsigned int big5)264 static unsigned int mysqlnd_mbcharlen_big5(const unsigned int big5)
265 {
266 	return (valid_big5head(big5)) ? 2 : 1;
267 }
268 /* }}} */
269 
270 
271 /* {{{ cp932 functions */
272 #define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
273 #define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
274 
275 
check_mb_cp932(const char * const start,const char * const end)276 static unsigned int check_mb_cp932(const char * const start, const char * const end)
277 {
278 	return (valid_cp932head((zend_uchar)start[0]) && (end - start >  1) &&
279 			valid_cp932tail((zend_uchar)start[1])) ? 2 : 0;
280 }
281 
282 
mysqlnd_mbcharlen_cp932(const unsigned int cp932)283 static unsigned int mysqlnd_mbcharlen_cp932(const unsigned int cp932)
284 {
285 	return (valid_cp932head((zend_uchar)cp932)) ? 2 : 1;
286 }
287 /* }}} */
288 
289 
290 /* {{{ euckr functions */
291 #define valid_euckr(c)	((0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE))
292 
check_mb_euckr(const char * const start,const char * const end)293 static unsigned int check_mb_euckr(const char * const start, const char * const end)
294 {
295 	if (end - start <= 1) {
296 		return 0;	/* invalid length */
297 	}
298 	if (*(zend_uchar *)start < 0x80) {
299 		return 0;	/* invalid euckr character */
300 	}
301 	if (valid_euckr(start[1])) {
302 		return 2;
303 	}
304 	return 0;
305 }
306 
307 
mysqlnd_mbcharlen_euckr(const unsigned int kr)308 static unsigned int mysqlnd_mbcharlen_euckr(const unsigned int kr)
309 {
310 	return (valid_euckr(kr)) ? 2 : 1;
311 }
312 /* }}} */
313 
314 
315 /* {{{ eucjpms functions */
316 #define valid_eucjpms(c) 		(((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
317 #define valid_eucjpms_kata(c)	(((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
318 #define valid_eucjpms_ss2(c)	(((c) & 0xFF) == 0x8E)
319 #define valid_eucjpms_ss3(c)	(((c) & 0xFF) == 0x8F)
320 
check_mb_eucjpms(const char * const start,const char * const end)321 static unsigned int check_mb_eucjpms(const char * const start, const char * const end)
322 {
323 	if (*((zend_uchar *)start) < 0x80) {
324 		return 0;	/* invalid eucjpms character */
325 	}
326 	if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
327 		return 2;
328 	}
329 	if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
330 		return 2;
331 	}
332 	if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
333 		valid_eucjpms(start[2])) {
334 		return 3;
335 	}
336 	return 0;
337 }
338 
339 
mysqlnd_mbcharlen_eucjpms(const unsigned int jpms)340 static unsigned int mysqlnd_mbcharlen_eucjpms(const unsigned int jpms)
341 {
342 	if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
343 		return 2;
344 	}
345 	if (valid_eucjpms_ss3(jpms)) {
346 		return 3;
347 	}
348 	return 1;
349 }
350 /* }}} */
351 
352 
353 /* {{{ gb2312 functions */
354 #define valid_gb2312_head(c)	(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xF7)
355 #define valid_gb2312_tail(c)	(0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE)
356 
357 
check_mb_gb2312(const char * const start,const char * const end)358 static unsigned int check_mb_gb2312(const char * const start, const char * const end)
359 {
360 	return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
361 			valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
362 }
363 
364 
mysqlnd_mbcharlen_gb2312(const unsigned int gb)365 static unsigned int mysqlnd_mbcharlen_gb2312(const unsigned int gb)
366 {
367 	return (valid_gb2312_head(gb)) ? 2 : 1;
368 }
369 /* }}} */
370 
371 
372 /* {{{ gbk functions */
373 #define valid_gbk_head(c)	(0x81<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE)
374 #define valid_gbk_tail(c)	((0x40<=(zend_uchar)(c) && (zend_uchar)(c)<=0x7E) || (0x80<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE))
375 
check_mb_gbk(const char * const start,const char * const end)376 static unsigned int check_mb_gbk(const char * const start, const char * const end)
377 {
378 	return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
379 }
380 
mysqlnd_mbcharlen_gbk(const unsigned int gbk)381 static unsigned int mysqlnd_mbcharlen_gbk(const unsigned int gbk)
382 {
383 	return (valid_gbk_head(gbk) ? 2 : 1);
384 }
385 /* }}} */
386 
387 
388 /* {{{ sjis functions */
389 #define valid_sjis_head(c)	((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
390 #define valid_sjis_tail(c)	((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
391 
392 
check_mb_sjis(const char * const start,const char * const end)393 static unsigned int check_mb_sjis(const char * const start, const char * const end)
394 {
395 	return (valid_sjis_head((zend_uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((zend_uchar)start[1])) ? 2 : 0;
396 }
397 
398 
mysqlnd_mbcharlen_sjis(const unsigned int sjis)399 static unsigned int mysqlnd_mbcharlen_sjis(const unsigned int sjis)
400 {
401 	return (valid_sjis_head((zend_uchar)sjis)) ? 2 : 1;
402 }
403 /* }}} */
404 
405 
406 /* {{{ ucs2 functions */
check_mb_ucs2(const char * const start __attribute ((unused)),const char * const end __attribute ((unused)))407 static unsigned int check_mb_ucs2(const char * const start __attribute((unused)), const char * const end __attribute((unused)))
408 {
409 	return 2; /* always 2 */
410 }
411 
mysqlnd_mbcharlen_ucs2(const unsigned int ucs2 __attribute ((unused)))412 static unsigned int mysqlnd_mbcharlen_ucs2(const unsigned int ucs2 __attribute((unused)))
413 {
414 	return 2; /* always 2 */
415 }
416 /* }}} */
417 
418 
419 /* {{{ ujis functions */
420 #define valid_ujis(c)     	((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
421 #define valid_ujis_kata(c)  ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
422 #define valid_ujis_ss2(c) 	(((c)&0xFF) == 0x8E)
423 #define valid_ujis_ss3(c) 	(((c)&0xFF) == 0x8F)
424 
check_mb_ujis(const char * const start,const char * const end)425 static unsigned int check_mb_ujis(const char * const start, const char * const end)
426 {
427 	if (*(zend_uchar*)start < 0x80) {
428 		return 0;	/* invalid ujis character */
429 	}
430 	if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
431 		return 2;
432 	}
433 	if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
434 		return 2;
435 	}
436 	if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
437 		return 3;
438 	}
439 	return 0;
440 }
441 
442 
mysqlnd_mbcharlen_ujis(const unsigned int ujis)443 static unsigned int mysqlnd_mbcharlen_ujis(const unsigned int ujis)
444 {
445 	return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
446 }
447 /* }}} */
448 
449 
450 
451 /* {{{ utf16 functions */
452 #define UTF16_HIGH_HEAD(x)  ((((zend_uchar) (x)) & 0xFC) == 0xD8)
453 #define UTF16_LOW_HEAD(x)   ((((zend_uchar) (x)) & 0xFC) == 0xDC)
454 
check_mb_utf16(const char * const start,const char * const end)455 static unsigned int check_mb_utf16(const char * const start, const char * const end)
456 {
457 	if (start + 2 > end) {
458 		return 0;
459 	}
460 
461 	if (UTF16_HIGH_HEAD(*start)) {
462 		return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
463 	}
464 
465 	if (UTF16_LOW_HEAD(*start)) {
466 		return 0;
467 	}
468 	return 2;
469 }
470 
471 
mysqlnd_mbcharlen_utf16(const unsigned int utf16)472 static uint32_t mysqlnd_mbcharlen_utf16(const unsigned int utf16)
473 {
474 	return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
475 }
476 /* }}} */
477 
478 
479 /* {{{ utf32 functions */
check_mb_utf32(const char * const start __attribute ((unused)),const char * const end __attribute ((unused)))480 static unsigned int check_mb_utf32(const char * const start __attribute((unused)), const char * const end __attribute((unused)))
481 {
482 	return 4;
483 }
484 
485 
mysqlnd_mbcharlen_utf32(const unsigned int utf32 __attribute ((unused)))486 static unsigned int mysqlnd_mbcharlen_utf32(const unsigned int utf32 __attribute((unused)))
487 {
488 	return 4;
489 }
490 /* }}} */
491 
492 
493 /* {{{ gb18030 functions */
494 #define is_gb18030_odd(c)          (0x81 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE)
495 #define is_gb18030_even_2(c)       ((0x40 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x7E) || (0x80 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE))
496 #define is_gb18030_even_4(c)       (0x30 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x39)
497 
498 
mysqlnd_mbcharlen_gb18030(const unsigned int c)499 static unsigned int mysqlnd_mbcharlen_gb18030(const unsigned int c)
500 {
501 	if (c <= 0xFF) {
502 		return !is_gb18030_odd(c);
503 	}
504 	if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) {
505 		return 0;
506 	}
507 	if (is_gb18030_even_2((c & 0xFF))) {
508 	    return 2;
509 	}
510 	if (is_gb18030_even_4((c & 0xFF))) {
511 		return 4;
512 	}
513 
514 	return 0;
515 }
516 
517 
my_ismbchar_gb18030(const char * start,const char * end)518 static unsigned int my_ismbchar_gb18030(const char * start, const char * end)
519 {
520 	if (end - start <= 1 || !is_gb18030_odd(start[0])) {
521 		return 0;
522 	}
523 
524 	if (is_gb18030_even_2(start[1])) {
525 		return 2;
526 	} else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) {
527 		return 4;
528 	}
529 
530 	return 0;
531 }
532 /* }}} */
533 
534 /*
535   The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,
536   for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
537   Change easily now, with a macro, could be made compilastion dependable.
538 */
539 
540 #define UTF8_MB4 "utf8mb4"
541 #define UTF8_MB3 "utf8"
542 
mysqlnd_mbcharlen_null(const unsigned int c)543 static zend_always_inline unsigned int mysqlnd_mbcharlen_null(const unsigned int c)
544 {
545 	ZEND_UNREACHABLE();
546 	return 0;
547 }
548 
check_null(const char * const start,const char * const end)549 static zend_always_inline unsigned int check_null(const char * const start, const char * const end)
550 {
551 	ZEND_UNREACHABLE();
552 	return 0;
553 }
554 
mysqlnd_mbcharlen_dispatch(enum mysqlnd_encoding_charlen encoding,const unsigned int c)555 static unsigned int mysqlnd_mbcharlen_dispatch(enum mysqlnd_encoding_charlen encoding, const unsigned int c)
556 {
557 	switch (encoding) {
558 #define ENUMERATOR_DISPATCH(x) case x##_id: return x(c);
559 		ENUMERATE_ENCODINGS_CHARLEN(ENUMERATOR_DISPATCH)
560 #undef ENUMERATOR_DISPATCH
561 		default: return mysqlnd_mbcharlen_null(c);
562 	}
563 }
564 
mysqlnd_mbvalid_dispatch(enum mysqlnd_encoding_valid encoding,const char * const start,const char * const end)565 static unsigned int mysqlnd_mbvalid_dispatch(enum mysqlnd_encoding_valid encoding, const char * const start, const char * const end)
566 {
567 	switch (encoding) {
568 #define ENUMERATOR_DISPATCH(x) case x##_id: return x(start, end);
569 		ENUMERATE_ENCODINGS_VALID(ENUMERATOR_DISPATCH)
570 #undef ENUMERATOR_DISPATCH
571 		default: return check_null(start, end);
572 	}
573 }
574 
575 /* {{{ mysqlnd_charsets */
576 const MYSQLND_CHARSET mysqlnd_charsets[] =
577 {
578 	{   1, "big5","big5_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_big5_id, check_mb_big5_id, LOWEST_MB_BIG5},
579 	{   3, "dec8", "dec8_swedish_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
580 	{   4, "cp850", "cp850_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
581 	{   6, "hp8", "hp8_english_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
582 	{   7, "koi8r", "koi8r_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
583 	{   8, "latin1", "latin1_swedish_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
584 	{   5, "latin1", "latin1_german1_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100}, /* should be after 0x8 because swedish_ci is the default collation */
585 	{   9, "latin2", "latin2_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
586 	{   2, "latin2", "latin2_czech_cs", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100}, /* should be after 0x9 because general_ci is the default collation */
587 	{  10, "swe7", "swe7_swedish_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
588 	{  11, "ascii", "ascii_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
589 	{  12, "ujis", "ujis_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_ujis_id, check_mb_ujis_id, LOWEST_MB_UJIS},
590 	{  13, "sjis", "sjis_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_sjis_id, check_mb_sjis_id, LOWEST_MB_SJIS},
591 	{  16, "hebrew", "hebrew_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
592 	{  17, "filename", "filename", 1, 5, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
593 	{  18, "tis620", "tis620_thai_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
594 	{  19, "euckr", "euckr_korean_ci", 1, 2, "", mysqlnd_mbcharlen_euckr_id, check_mb_euckr_id, LOWEST_MB_EUCKR},
595 	{  21, "latin2", "latin2_hungarian_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
596 	{  27, "latin2", "latin2_croatian_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
597 	{  22, "koi8u", "koi8u_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
598 	{  24, "gb2312", "gb2312_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gb2312_id, check_mb_gb2312_id, LOWEST_MB_GB2312},
599 	{  25, "greek", "greek_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
600 	{  26, "cp1250", "cp1250_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
601 	{  28, "gbk", "gbk_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gbk_id, check_mb_gbk_id, LOWEST_MB_GBK},
602 	{  30, "latin5", "latin5_turkish_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
603 	{  31, "latin1", "latin1_german2_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
604 	{  15, "latin1", "latin1_danish_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
605 	{  32, "armscii8", "armscii8_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
606 	{  33, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3_id,  check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
607 	{  35, "ucs2", "ucs2_general_ci", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
608 	{  36, "cp866", "cp866_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
609 	{  37, "keybcs2", "keybcs2_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
610 	{  38, "macce", "macce_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
611 	{  39, "macroman", "macroman_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
612 	{  40, "cp852", "cp852_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
613 	{  41, "latin7", "latin7_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
614 	{  20, "latin7", "latin7_estonian_cs", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
615 	{  57, "cp1256", "cp1256_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
616 	{  59, "cp1257", "cp1257_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
617 	{  63, "binary", "binary", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
618 	{  97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms_id, check_mb_eucjpms_id, LOWEST_MB_EUCJPMS},
619 	{  29, "cp1257", "cp1257_lithuanian_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
620 	{  31, "latin1", "latin1_german2_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
621 	{  34, "cp1250", "cp1250_czech_cs", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
622 	{  42, "latin7", "latin7_general_cs", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
623 	{  43, "macce", "macce_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
624 	{  44, "cp1250", "cp1250_croatian_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
625 	{  45, UTF8_MB4, UTF8_MB4"_general_ci", 1, 4, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8_id,  check_mb_utf8_valid_id, LOWEST_MB_UTF8},
626 	{  46, UTF8_MB4, UTF8_MB4"_bin", 1, 4, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8_id,  check_mb_utf8_valid_id, LOWEST_MB_UTF8},
627 	{  47, "latin1", "latin1_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
628 	{  48, "latin1", "latin1_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
629 	{  49, "latin1", "latin1_general_cs", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
630 	{  51, "cp1251", "cp1251_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
631 	{  14, "cp1251", "cp1251_bulgarian_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
632 	{  23, "cp1251", "cp1251_ukrainian_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
633 	{  50, "cp1251", "cp1251_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
634 	{  52, "cp1251", "cp1251_general_cs", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
635 	{  53, "macroman", "macroman_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
636 	{  54, "utf16", "utf16_general_ci", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16_id, check_mb_utf16_id, LOWEST_MB_UTF16},
637 	{  55, "utf16", "utf16_bin", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16_id, check_mb_utf16_id, LOWEST_MB_UTF16},
638 	{  56, "utf16le", "utf16le_general_ci", 2, 4, "UTF-16LE Unicode", mysqlnd_mbcharlen_utf16_id, check_mb_utf16_id, LOWEST_MB_UTF16},
639 	{  58, "cp1257", "cp1257_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
640 /*55*/{  60, "utf32", "utf32_general_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
641 /*55*/{  61, "utf32", "utf32_bin", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
642 	{  62, "utf16le", "utf16le_bin", 2, 4, "UTF-16LE Unicode", mysqlnd_mbcharlen_utf16_id, check_mb_utf16_id, LOWEST_MB_UTF16},
643 	{  64, "armscii8", "armscii8_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
644 	{  65, "ascii", "ascii_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
645 	{  66, "cp1250", "cp1250_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
646 	{  67, "cp1256", "cp1256_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
647 	{  68, "cp866", "cp866_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
648 	{  69, "dec8", "dec8_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
649 	{  70, "greek", "greek_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
650 	{  71, "hebrew", "hebrew_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
651 	{  72, "hp8", "hp8_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
652 	{  73, "keybcs2", "keybcs2_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
653 	{  74, "koi8r", "koi8r_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
654 	{  75, "koi8u", "koi8u_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
655 	{  77, "latin2", "latin2_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
656 	{  78, "latin5", "latin5_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
657 	{  79, "latin7", "latin7_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
658 	{  80, "cp850", "cp850_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
659 	{  81, "cp852", "cp852_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
660 	{  82, "swe7", "swe7_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
661 	{  83, UTF8_MB3, UTF8_MB3"_bin", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3_id,  check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
662 	{  84, "big5", "big5_bin", 1, 2, "", mysqlnd_mbcharlen_big5_id, check_mb_big5_id, LOWEST_MB_BIG5},
663 	{  85, "euckr", "euckr_bin", 1, 2, "", mysqlnd_mbcharlen_euckr_id, check_mb_euckr_id, LOWEST_MB_EUCKR},
664 	{  86, "gb2312", "gb2312_bin", 1, 2, "", mysqlnd_mbcharlen_gb2312_id, check_mb_gb2312_id, LOWEST_MB_GB2312},
665 	{  87, "gbk", "gbk_bin", 1, 2, "", mysqlnd_mbcharlen_gbk_id, check_mb_gbk_id, LOWEST_MB_GBK},
666 	{  88, "sjis", "sjis_bin", 1, 2, "", mysqlnd_mbcharlen_sjis_id, check_mb_sjis_id, LOWEST_MB_SJIS},
667 	{  89, "tis620", "tis620_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
668 	{  90, "ucs2", "ucs2_bin", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
669 	{  91, "ujis", "ujis_bin", 1, 3, "", mysqlnd_mbcharlen_ujis_id, check_mb_ujis_id, LOWEST_MB_UJIS},
670 	{  92, "geostd8", "geostd8_general_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
671 	{  93, "geostd8", "geostd8_bin", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
672 	{  94, "latin1", "latin1_spanish_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
673 	{  95, "cp932", "cp932_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_cp932_id, check_mb_cp932_id, LOWEST_MB_CP932},
674 	{  96, "cp932", "cp932_bin", 1, 2, "", mysqlnd_mbcharlen_cp932_id, check_mb_cp932_id, LOWEST_MB_CP932},
675 	{  97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms_id, check_mb_eucjpms_id, LOWEST_MB_EUCJPMS},
676 	{  98, "eucjpms", "eucjpms_bin", 1, 3, "", mysqlnd_mbcharlen_eucjpms_id, check_mb_eucjpms_id, LOWEST_MB_EUCJPMS},
677 	{  99, "cp1250", "cp1250_polish_ci", 1, 1, "", mysqlnd_mbcharlen_null_id, check_null_id, 0x100},
678 	{ 128, "ucs2", "ucs2_unicode_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
679 	{ 129, "ucs2", "ucs2_icelandic_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
680 	{ 130, "ucs2", "ucs2_latvian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
681 	{ 131, "ucs2", "ucs2_romanian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
682 	{ 132, "ucs2", "ucs2_slovenian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
683 	{ 133, "ucs2", "ucs2_polish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
684 	{ 134, "ucs2", "ucs2_estonian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
685 	{ 135, "ucs2", "ucs2_spanish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
686 	{ 136, "ucs2", "ucs2_swedish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
687 	{ 137, "ucs2", "ucs2_turkish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
688 	{ 138, "ucs2", "ucs2_czech_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
689 	{ 139, "ucs2", "ucs2_danish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
690 	{ 140, "ucs2", "ucs2_lithuanian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
691 	{ 141, "ucs2", "ucs2_slovak_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
692 	{ 142, "ucs2", "ucs2_spanish2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
693 	{ 143, "ucs2", "ucs2_roman_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
694 	{ 144, "ucs2", "ucs2_persian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
695 	{ 145, "ucs2", "ucs2_esperanto_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
696 	{ 146, "ucs2", "ucs2_hungarian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
697 	{ 147, "ucs2", "ucs2_sinhala_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
698 	{ 148, "ucs2", "ucs2_german2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
699 	{ 149, "ucs2", "ucs2_croatian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
700 	{ 150, "ucs2", "ucs2_unicode_520_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
701 	{ 151, "ucs2", "ucs2_vietnamese_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2_id, check_mb_ucs2_id, LOWEST_MB_UCS2},
702 
703 /*56*/{160, "utf32", "utf32_unicode_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
704 /*56*/{161, "utf32", "utf32_icelandic_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
705 /*56*/{162, "utf32", "utf32_latvian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
706 /*56*/{163, "utf32", "utf32_romanian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
707 /*56*/{164, "utf32", "utf32_slovenian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
708 /*56*/{165, "utf32", "utf32_polish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
709 /*56*/{166, "utf32", "utf32_estonian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
710 /*56*/{167, "utf32", "utf32_spanish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
711 /*56*/{168, "utf32", "utf32_swedish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
712 /*56*/{169, "utf32", "utf32_turkish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
713 /*56*/{170, "utf32", "utf32_czech_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
714 /*56*/{171, "utf32", "utf32_danish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
715 /*56*/{172, "utf32", "utf32_lithuanian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
716 /*56*/{173, "utf32", "utf32_slovak_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
717 /*56*/{174, "utf32", "utf32_spanish2_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
718 /*56*/{175, "utf32", "utf32_roman_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
719 /*56*/{176, "utf32", "utf32_persian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
720 /*56*/{177, "utf32", "utf32_esperanto_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
721 /*56*/{178, "utf32", "utf32_hungarian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
722 /*56*/{179, "utf32", "utf32_sinhala_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
723 /*56*/{180, "utf32", "utf32_german2_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
724 /*56*/{181, "utf32", "utf32_croatian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
725 /*56*/{182, "utf32", "utf32_unicode_520_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
726 /*56*/{183, "utf32", "utf32_vietnamese_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32_id, check_mb_utf32_id, LOWEST_MB_UTF32},
727 
728 	{ 192, UTF8_MB3, UTF8_MB3"_unicode_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
729 	{ 193, UTF8_MB3, UTF8_MB3"_icelandic_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
730 	{ 194, UTF8_MB3, UTF8_MB3"_latvian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id,  check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
731 	{ 195, UTF8_MB3, UTF8_MB3"_romanian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
732 	{ 196, UTF8_MB3, UTF8_MB3"_slovenian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
733 	{ 197, UTF8_MB3, UTF8_MB3"_polish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
734 	{ 198, UTF8_MB3, UTF8_MB3"_estonian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
735 	{ 199, UTF8_MB3, UTF8_MB3"_spanish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
736 	{ 200, UTF8_MB3, UTF8_MB3"_swedish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
737 	{ 201, UTF8_MB3, UTF8_MB3"_turkish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
738 	{ 202, UTF8_MB3, UTF8_MB3"_czech_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
739 	{ 203, UTF8_MB3, UTF8_MB3"_danish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
740 	{ 204, UTF8_MB3, UTF8_MB3"_lithuanian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
741 	{ 205, UTF8_MB3, UTF8_MB3"_slovak_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
742 	{ 206, UTF8_MB3, UTF8_MB3"_spanish2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
743 	{ 207, UTF8_MB3, UTF8_MB3"_roman_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
744 	{ 208, UTF8_MB3, UTF8_MB3"_persian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
745 	{ 209, UTF8_MB3, UTF8_MB3"_esperanto_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
746 	{ 210, UTF8_MB3, UTF8_MB3"_hungarian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
747 	{ 211, UTF8_MB3, UTF8_MB3"_sinhala_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
748 	{ 212, UTF8_MB3, UTF8_MB3"_german2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
749 	{ 213, UTF8_MB3, UTF8_MB3"_croatian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
750 	{ 214, UTF8_MB3, UTF8_MB3"_unicode_520_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
751 	{ 215, UTF8_MB3, UTF8_MB3"_vietnamese_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3_id, check_mb_utf8mb3_valid_id, LOWEST_MB_UTF8MB3},
752 
753 	{ 224, UTF8_MB4, UTF8_MB4"_unicode_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
754 	{ 225, UTF8_MB4, UTF8_MB4"_icelandic_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
755 	{ 226, UTF8_MB4, UTF8_MB4"_latvian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
756 	{ 227, UTF8_MB4, UTF8_MB4"_romanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
757 	{ 228, UTF8_MB4, UTF8_MB4"_slovenian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
758 	{ 229, UTF8_MB4, UTF8_MB4"_polish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
759 	{ 230, UTF8_MB4, UTF8_MB4"_estonian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
760 	{ 231, UTF8_MB4, UTF8_MB4"_spanish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
761 	{ 232, UTF8_MB4, UTF8_MB4"_swedish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
762 	{ 233, UTF8_MB4, UTF8_MB4"_turkish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
763 	{ 234, UTF8_MB4, UTF8_MB4"_czech_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
764 	{ 235, UTF8_MB4, UTF8_MB4"_danish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
765 	{ 236, UTF8_MB4, UTF8_MB4"_lithuanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
766 	{ 237, UTF8_MB4, UTF8_MB4"_slovak_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
767 	{ 238, UTF8_MB4, UTF8_MB4"_spanish2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
768 	{ 239, UTF8_MB4, UTF8_MB4"_roman_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
769 	{ 240, UTF8_MB4, UTF8_MB4"_persian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
770 	{ 241, UTF8_MB4, UTF8_MB4"_esperanto_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
771 	{ 242, UTF8_MB4, UTF8_MB4"_hungarian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
772 	{ 243, UTF8_MB4, UTF8_MB4"_sinhala_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
773 	{ 244, UTF8_MB4, UTF8_MB4"_german2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
774 	{ 245, UTF8_MB4, UTF8_MB4"_croatian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
775 	{ 246, UTF8_MB4, UTF8_MB4"_unicode_520_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
776 	{ 247, UTF8_MB4, UTF8_MB4"_vietnamese_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
777 	{ 248, "gb18030", "gb18030_chinese_ci", 1, 4, "", mysqlnd_mbcharlen_gb18030_id, my_ismbchar_gb18030_id, LOWEST_MB_GB18030},
778 	{ 249, "gb18030", "gb18030_bin", 1, 4, "", mysqlnd_mbcharlen_gb18030_id, my_ismbchar_gb18030_id, LOWEST_MB_GB18030},
779 
780 	{ 254, UTF8_MB3, UTF8_MB3"_general_cs", 1, 3, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
781 	{ 255, UTF8_MB4, UTF8_MB4"_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
782 	{ 256, UTF8_MB4, UTF8_MB4"_de_pb_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
783 	{ 257, UTF8_MB4, UTF8_MB4"_is_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
784 	{ 258, UTF8_MB4, UTF8_MB4"_lv_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
785 	{ 259, UTF8_MB4, UTF8_MB4"_ro_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
786 	{ 260, UTF8_MB4, UTF8_MB4"_sl_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
787 	{ 261, UTF8_MB4, UTF8_MB4"_pl_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
788 	{ 262, UTF8_MB4, UTF8_MB4"_et_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
789 	{ 263, UTF8_MB4, UTF8_MB4"_es_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
790 	{ 264, UTF8_MB4, UTF8_MB4"_sv_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
791 	{ 265, UTF8_MB4, UTF8_MB4"_tr_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
792 	{ 266, UTF8_MB4, UTF8_MB4"_cs_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
793 	{ 267, UTF8_MB4, UTF8_MB4"_da_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
794 	{ 268, UTF8_MB4, UTF8_MB4"_lt_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
795 	{ 269, UTF8_MB4, UTF8_MB4"_sk_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
796 	{ 270, UTF8_MB4, UTF8_MB4"_es_trad_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
797 	{ 271, UTF8_MB4, UTF8_MB4"_la_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
798 	{ 272, UTF8_MB4, UTF8_MB4"_fa_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
799 	{ 273, UTF8_MB4, UTF8_MB4"_eo_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
800 	{ 274, UTF8_MB4, UTF8_MB4"_hu_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
801 	{ 275, UTF8_MB4, UTF8_MB4"_hr_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
802 	{ 276, UTF8_MB4, UTF8_MB4"_si_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
803 	{ 277, UTF8_MB4, UTF8_MB4"_vi_0900_ai_ci", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
804 	{ 278, UTF8_MB4, UTF8_MB4"_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
805 	{ 279, UTF8_MB4, UTF8_MB4"_de_pb_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
806 	{ 280, UTF8_MB4, UTF8_MB4"_is_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
807 	{ 281, UTF8_MB4, UTF8_MB4"_lv_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
808 	{ 282, UTF8_MB4, UTF8_MB4"_ro_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
809 	{ 283, UTF8_MB4, UTF8_MB4"_sl_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
810 	{ 284, UTF8_MB4, UTF8_MB4"_pl_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
811 	{ 285, UTF8_MB4, UTF8_MB4"_et_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
812 	{ 286, UTF8_MB4, UTF8_MB4"_es_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
813 	{ 287, UTF8_MB4, UTF8_MB4"_sv_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
814 	{ 288, UTF8_MB4, UTF8_MB4"_tr_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
815 	{ 289, UTF8_MB4, UTF8_MB4"_cs_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
816 	{ 290, UTF8_MB4, UTF8_MB4"_da_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
817 	{ 291, UTF8_MB4, UTF8_MB4"_lt_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
818 	{ 292, UTF8_MB4, UTF8_MB4"_sk_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
819 	{ 293, UTF8_MB4, UTF8_MB4"_es_trad_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
820 	{ 294, UTF8_MB4, UTF8_MB4"_la_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
821 	{ 295, UTF8_MB4, UTF8_MB4"_fa_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
822 	{ 296, UTF8_MB4, UTF8_MB4"_eo_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
823 	{ 297, UTF8_MB4, UTF8_MB4"_hu_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
824 	{ 298, UTF8_MB4, UTF8_MB4"_hr_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
825 	{ 299, UTF8_MB4, UTF8_MB4"_si_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
826 	{ 300, UTF8_MB4, UTF8_MB4"_vi_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
827 	{ 303, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs", 1, 4, "", mysqlnd_mbcharlen_utf8_id, check_mb_utf8_valid_id, LOWEST_MB_UTF8},
828 	{   0, NULL, NULL, 0, 0, NULL, mysqlnd_mbcharlen_null_id, check_null_id, 0x100}
829 };
830 /* }}} */
831 
832 
833 /* {{{ mysqlnd_find_charset_nr */
mysqlnd_find_charset_nr(const unsigned int charsetnr)834 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_nr(const unsigned int charsetnr)
835 {
836 	const MYSQLND_CHARSET * c = mysqlnd_charsets;
837 
838 	do {
839 		if (c->nr == charsetnr) {
840 			return c;
841 		}
842 		++c;
843 	} while (c[0].nr != 0);
844 	return NULL;
845 }
846 /* }}} */
847 
848 
849 /* {{{ mysqlnd_find_charset_name */
mysqlnd_find_charset_name(const char * const name)850 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_name(const char * const name)
851 {
852 	if (name) {
853 		const MYSQLND_CHARSET * c = mysqlnd_charsets;
854 		do {
855 			if (!strcasecmp(c->name, name)) {
856 				return c;
857 			}
858 			++c;
859 		} while (c[0].nr != 0);
860 	}
861 	return NULL;
862 }
863 /* }}} */
864 
865 /* {{{ mysqlnd_cset_escape_quotes */
mysqlnd_cset_escape_quotes(const MYSQLND_CHARSET * const cset,char * newstr,const char * escapestr,const size_t escapestr_len)866 PHPAPI zend_ulong mysqlnd_cset_escape_quotes(const MYSQLND_CHARSET * const cset, char * newstr,
867 											 const char * escapestr, const size_t escapestr_len)
868 {
869 	const char 	*newstr_s = newstr;
870 	const char 	*newstr_e = newstr + 2 * escapestr_len;
871 	const char 	*end = escapestr + escapestr_len;
872 
873 	DBG_ENTER("mysqlnd_cset_escape_quotes");
874 
875 	for (;escapestr < end; escapestr++) {
876 		unsigned int len = 0;
877 		/* check unicode characters */
878 
879 		if (*((zend_uchar *) escapestr) >= cset->lowest_mb_byte && (len = mysqlnd_mbvalid_dispatch(cset->mb_valid, escapestr, end))) {
880 			ZEND_ASSERT(newstr + len <= newstr_e);
881 			/* copy mb char without escaping it */
882 			while (len--) {
883 				*newstr++ = *escapestr++;
884 			}
885 			escapestr--;
886 			continue;
887 		}
888 		if (*escapestr == '\'') {
889 			ZEND_ASSERT(newstr + 2 <= newstr_e);
890 			*newstr++ = '\'';
891 			*newstr++ = '\'';
892 		} else {
893 			ZEND_ASSERT(newstr + 1 <= newstr_e);
894 			*newstr++ = *escapestr;
895 		}
896 	}
897 	*newstr = '\0';
898 
899 	DBG_RETURN((zend_ulong)(newstr - newstr_s));
900 }
901 /* }}} */
902 
903 
904 /* {{{ mysqlnd_cset_escape_slashes */
mysqlnd_cset_escape_slashes(const MYSQLND_CHARSET * const cset,char * newstr,const char * escapestr,const size_t escapestr_len)905 PHPAPI zend_ulong mysqlnd_cset_escape_slashes(const MYSQLND_CHARSET * const cset, char *newstr,
906 											  const char * escapestr, const size_t escapestr_len)
907 {
908 	const char 	*newstr_s = newstr;
909 	const char 	*newstr_e = newstr + 2 * escapestr_len;
910 	const char 	*end = escapestr + escapestr_len;
911 
912 	DBG_ENTER("mysqlnd_cset_escape_slashes");
913 	DBG_INF_FMT("charset=%s", cset->name);
914 
915 	for (;escapestr < end; escapestr++) {
916 		char esc = '\0';
917 
918 		/* check unicode characters */
919 		if (*((zend_uchar *) escapestr) >= cset->lowest_mb_byte) {
920 			unsigned int len = mysqlnd_mbvalid_dispatch(cset->mb_valid, escapestr, end);
921 			if (len) {
922 				ZEND_ASSERT(newstr + len <= newstr_e);
923 				/* copy mb char without escaping it */
924 				while (len--) {
925 					*newstr++ = *escapestr++;
926 				}
927 				escapestr--;
928 				continue;
929 			} else if (mysqlnd_mbcharlen_dispatch(cset->mb_charlen, *escapestr) > 1) {
930 				esc = *escapestr;
931 			}
932 		}
933 		if (!esc) {
934 			switch (*escapestr) {
935 				case 0:
936 					esc = '0';
937 					break;
938 				case '\n':
939 					esc = 'n';
940 					break;
941 				case '\r':
942 					esc = 'r';
943 					break;
944 				case '\\':
945 				case '\'':
946 				case '"':
947 					esc = *escapestr;
948 					break;
949 				case '\032':
950 					esc = 'Z';
951 					break;
952 			}
953 		}
954 		if (esc) {
955 			ZEND_ASSERT(newstr + 2 <= newstr_e);
956 			/* copy escaped character */
957 			*newstr++ = '\\';
958 			*newstr++ = esc;
959 		} else {
960 			ZEND_ASSERT(newstr + 1 <= newstr_e);
961 			/* copy non escaped character */
962 			*newstr++ = *escapestr;
963 		}
964 	}
965 	*newstr = '\0';
966 
967 	DBG_RETURN((zend_ulong)(newstr - newstr_s));
968 }
969 /* }}} */
970 
971 
972 static struct st_mysqlnd_plugin_charsets mysqlnd_plugin_charsets_plugin =
973 {
974 	{
975 		MYSQLND_PLUGIN_API_VERSION,
976 		"charsets",
977 		MYSQLND_VERSION_ID,
978 		PHP_MYSQLND_VERSION,
979 		"PHP License 3.01",
980 		"Andrey Hristov <andrey@php.net>,  Ulf Wendel <uw@php.net>, Georg Richter <georg@php.net>",
981 		{
982 			NULL, /* no statistics , will be filled later if there are some */
983 			NULL, /* no statistics */
984 		},
985 		{
986 			NULL /* plugin shutdown */
987 		}
988 	},
989 	{/* methods */
990 		mysqlnd_find_charset_nr,
991 		mysqlnd_find_charset_name,
992 		mysqlnd_cset_escape_quotes,
993 		mysqlnd_cset_escape_slashes
994 	}
995 };
996 
997 
998 /* {{{ mysqlnd_charsets_plugin_register */
999 void
mysqlnd_charsets_plugin_register(void)1000 mysqlnd_charsets_plugin_register(void)
1001 {
1002 	mysqlnd_plugin_register_ex((struct st_mysqlnd_plugin_header *) &mysqlnd_plugin_charsets_plugin);
1003 }
1004 /* }}} */
1005 
1006 #if MYSQLND_CHARSETS_SANITY_CHECK
mysqlnd_charsets_sanity_check(void)1007 void mysqlnd_charsets_sanity_check(void)
1008 {
1009 	/* Sanity check fast path checks for encoding handling. */
1010 	for (size_t i = 0; i < sizeof(mysqlnd_charsets) / sizeof(*mysqlnd_charsets); i++) {
1011 		const MYSQLND_CHARSET *c = &mysqlnd_charsets[i];
1012 		printf("charset nr %u\n", c->nr);
1013 
1014 		/* Charset configuration consistency. */
1015 		if (c->mb_valid != check_null_id) {
1016 			ZEND_ASSERT(c->mb_valid != mysqlnd_mbcharlen_null_id);
1017 		}
1018 		if (c->mb_charlen != mysqlnd_mbcharlen_null_id) {
1019 			ZEND_ASSERT(c->mb_charlen != check_null_id);
1020 		}
1021 		ZEND_ASSERT(c->char_minlen <= c->char_maxlen);
1022 		if (c->char_maxlen == 1) {
1023 			ZEND_ASSERT(c->lowest_mb_byte == 0x100);
1024 		}
1025 		if (c->char_minlen > 1) {
1026 			ZEND_ASSERT(c->mb_valid != check_null_id);
1027 			ZEND_ASSERT(c->mb_charlen != mysqlnd_mbcharlen_null_id);
1028 			ZEND_ASSERT(c->lowest_mb_byte == 0x00);
1029 		} else if (c->mb_valid != check_null_id) {
1030 			/* Charset behavioural consistency w.r.t. charlen. */
1031 			for (unsigned int ch = 0; ch < 255; ch++) {
1032 				if (mysqlnd_mbcharlen_dispatch(c->mb_charlen, ch) > 1) {
1033 					ZEND_ASSERT(ch >= c->lowest_mb_byte);
1034 				}
1035 			}
1036 
1037 			/* Charset behavioural consistency w.r.t. validity. */
1038 			union {
1039 				zend_uchar c[4];
1040 				uint32_t l;
1041 			} input;
1042 			for (uint32_t i = 0; i < 0xFFFFFFFF; i++) {
1043 				input.l = i;
1044 				if (mysqlnd_mbvalid_dispatch(c->mb_valid, (char *)input.c, (char *)input.c + 4) > 1) {
1045 					if (input.c[0] < c->lowest_mb_byte) {
1046 						printf("Input %u (%u %u)\n", input.l, input.c[0], c->lowest_mb_byte);
1047 						ZEND_ASSERT(false);
1048 					}
1049 				}
1050 			}
1051 		}
1052 	}
1053 }
1054 #endif
1055