xref: /PHP-8.0/ext/intl/converter/converter.c (revision 7c3dfbb8)
1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | http://www.php.net/license/3_01.txt                                  |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Sara Golemon <pollita@php.net>                              |
12    +----------------------------------------------------------------------+
13  */
14 
15 #include "converter.h"
16 #include "converter_arginfo.h"
17 #include "zend_exceptions.h"
18 
19 #include <unicode/utypes.h>
20 #include <unicode/utf8.h>
21 #include <unicode/utf16.h>
22 #include <unicode/ucnv.h>
23 #include <unicode/ustring.h>
24 
25 #include "../intl_error.h"
26 #include "../intl_common.h"
27 
28 typedef struct _php_converter_object {
29 	UConverter *src, *dest;
30 	zend_fcall_info to_cb, from_cb;
31 	zend_fcall_info_cache to_cache, from_cache;
32 	intl_error error;
33 	zend_object obj;
34 } php_converter_object;
35 
36 
php_converter_fetch_object(zend_object * obj)37 static inline php_converter_object *php_converter_fetch_object(zend_object *obj) {
38 	return (php_converter_object *)((char*)(obj) - XtOffsetOf(php_converter_object, obj));
39 }
40 #define Z_INTL_CONVERTER_P(zv) php_converter_fetch_object(Z_OBJ_P(zv))
41 
42 static zend_class_entry     *php_converter_ce;
43 static zend_object_handlers  php_converter_object_handlers;
44 
45 #define CONV_GET(pzv)  (Z_INTL_CONVERTER_P((pzv)))
46 #define THROW_UFAILURE(obj, fname, error) php_converter_throw_failure(obj, error, \
47                                           fname "() returned error " ZEND_LONG_FMT ": %s", (zend_long)error, u_errorName(error))
48 
49 /* {{{ php_converter_throw_failure */
php_converter_throw_failure(php_converter_object * objval,UErrorCode error,const char * format,...)50 static inline void php_converter_throw_failure(php_converter_object *objval, UErrorCode error, const char *format, ...) {
51 	intl_error *err = objval ? &(objval->error) : NULL;
52 	char message[1024];
53 	va_list vargs;
54 
55 	va_start(vargs, format);
56 	vsnprintf(message, sizeof(message), format, vargs);
57 	va_end(vargs);
58 
59 	intl_errors_set(err, error, message, 1);
60 }
61 /* }}} */
62 
63 /* {{{ php_converter_default_callback */
php_converter_default_callback(zval * return_value,zval * zobj,zend_long reason,zval * error)64 static void php_converter_default_callback(zval *return_value, zval *zobj, zend_long reason, zval *error) {
65 	/* Basic functionality so children can call parent::toUCallback() */
66 	switch (reason) {
67 		case UCNV_UNASSIGNED:
68 		case UCNV_ILLEGAL:
69 		case UCNV_IRREGULAR:
70 		{
71 			php_converter_object *objval = (php_converter_object*)CONV_GET(zobj);
72 			char chars[127];
73 			int8_t chars_len = sizeof(chars);
74 			UErrorCode uerror = U_ZERO_ERROR;
75             if(!objval->src) {
76                 php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
77 				chars[0] = 0x1A;
78 				chars[1] = 0;
79 				chars_len = 1;
80 				ZEND_TRY_ASSIGN_REF_LONG(error, U_INVALID_STATE_ERROR);
81                 RETVAL_STRINGL(chars, chars_len);
82                 return;
83             }
84 
85 			/* Yes, this is fairly wasteful at first glance,
86 			 * but considering that the alternative is to store
87 			 * what's sent into setSubstChars() and the fact
88 			 * that this is an extremely unlikely codepath
89 			 * I'd rather take the CPU hit here, than waste time
90 			 * storing a value I'm unlikely to use.
91 			 */
92 			ucnv_getSubstChars(objval->src, chars, &chars_len, &uerror);
93 			if (U_FAILURE(uerror)) {
94 				THROW_UFAILURE(objval, "ucnv_getSubstChars", uerror);
95 				chars[0] = 0x1A;
96 				chars[1] = 0;
97 				chars_len = 1;
98 			}
99 			ZEND_TRY_ASSIGN_REF_LONG(error, uerror);
100 			RETVAL_STRINGL(chars, chars_len);
101 		}
102 	}
103 }
104 /* }}} */
105 
106 /* {{{ */
PHP_METHOD(UConverter,toUCallback)107 PHP_METHOD(UConverter, toUCallback) {
108 	zend_long reason;
109 	zend_string *source, *codeUnits;
110 	zval *error;
111 
112 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "lSSz",
113 		&reason, &source, &codeUnits, &error) == FAILURE) {
114 		RETURN_THROWS();
115 	}
116 
117 	php_converter_default_callback(return_value, ZEND_THIS, reason, error);
118 }
119 /* }}} */
120 
121 /* {{{ */
PHP_METHOD(UConverter,fromUCallback)122 PHP_METHOD(UConverter, fromUCallback) {
123 	zend_long reason;
124 	zval *source, *error;
125 	zend_long codePoint;
126 
127 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "lalz",
128 		&reason, &source, &codePoint, &error) == FAILURE) {
129 		RETURN_THROWS();
130 	}
131 
132 	php_converter_default_callback(return_value, ZEND_THIS, reason, error);
133 }
134 /* }}} */
135 
136 /* {{{ php_converter_check_limits */
php_converter_check_limits(php_converter_object * objval,zend_long available,zend_long needed)137 static inline zend_bool php_converter_check_limits(php_converter_object *objval, zend_long available, zend_long needed) {
138 	if (available < needed) {
139 		php_converter_throw_failure(objval, U_BUFFER_OVERFLOW_ERROR, "Buffer overrun " ZEND_LONG_FMT " bytes needed, " ZEND_LONG_FMT " available", needed, available);
140 		return 0;
141 	}
142 	return 1;
143 }
144 /* }}} */
145 
146 #define TARGET_CHECK(cnvargs, needed) php_converter_check_limits(objval, cnvargs->targetLimit - cnvargs->target, needed)
147 
148 /* {{{ php_converter_append_toUnicode_target */
php_converter_append_toUnicode_target(zval * val,UConverterToUnicodeArgs * args,php_converter_object * objval)149 static void php_converter_append_toUnicode_target(zval *val, UConverterToUnicodeArgs *args, php_converter_object *objval) {
150 	switch (Z_TYPE_P(val)) {
151 		case IS_NULL:
152 			/* Code unit is being skipped */
153 			return;
154 		case IS_LONG:
155 		{
156 			zend_long lval = Z_LVAL_P(val);
157 			if ((lval < 0) || (lval > 0x10FFFF)) {
158 				php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "Invalid codepoint U+%04lx", lval);
159 				return;
160 			}
161 			if (lval > 0xFFFF) {
162 				/* Supplemental planes U+010000 - U+10FFFF */
163 				if (TARGET_CHECK(args, 2)) {
164 					/* TODO: Find the ICU call which does this properly */
165 					*(args->target++) = (UChar)(((lval - 0x10000) >> 10)   | 0xD800);
166 					*(args->target++) = (UChar)(((lval - 0x10000) & 0x3FF) | 0xDC00);
167 				}
168 				return;
169 			}
170 			/* Non-suggogate BMP codepoint */
171 			if (TARGET_CHECK(args, 1)) {
172 				*(args->target++) = (UChar)lval;
173 			}
174 			return;
175 		}
176 		case IS_STRING:
177 		{
178 			const char *strval = Z_STRVAL_P(val);
179 			int i = 0, strlen = Z_STRLEN_P(val);
180 
181 			while((i != strlen) && TARGET_CHECK(args, 1)) {
182 				UChar c;
183 				U8_NEXT(strval, i, strlen, c);
184 				*(args->target++) = c;
185 			}
186 			return;
187 		}
188 		case IS_ARRAY:
189 		{
190 			HashTable *ht = Z_ARRVAL_P(val);
191 			zval *tmpzval;
192 
193 			ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
194 				php_converter_append_toUnicode_target(tmpzval, args, objval);
195 			} ZEND_HASH_FOREACH_END();
196 			return;
197 		}
198 		default:
199 			php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR,
200                                                     "toUCallback() specified illegal type for substitution character");
201 	}
202 }
203 /* }}} */
204 
205 /* {{{ php_converter_to_u_callback */
php_converter_to_u_callback(const void * context,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * pErrorCode)206 static void php_converter_to_u_callback(const void *context,
207                                         UConverterToUnicodeArgs *args,
208                                         const char *codeUnits, int32_t length,
209                                         UConverterCallbackReason reason,
210                                         UErrorCode *pErrorCode) {
211 	php_converter_object *objval = (php_converter_object*)context;
212 	zval retval;
213 	zval zargs[4];
214 
215 	ZVAL_LONG(&zargs[0], reason);
216 	if (args->source) {
217 		ZVAL_STRINGL(&zargs[1], args->source, args->sourceLimit - args->source);
218 	} else {
219 		ZVAL_EMPTY_STRING(&zargs[1]);
220 	}
221 	if (codeUnits) {
222 		ZVAL_STRINGL(&zargs[2], codeUnits, length);
223 	} else {
224 		ZVAL_EMPTY_STRING(&zargs[2]);
225 	}
226 	ZVAL_LONG(&zargs[3], *pErrorCode);
227 	ZVAL_MAKE_REF(&zargs[3]);
228 
229 	objval->to_cb.param_count    = 4;
230 	objval->to_cb.params = zargs;
231 	objval->to_cb.retval = &retval;
232 	if (zend_call_function(&(objval->to_cb), &(objval->to_cache)) == FAILURE) {
233 		/* Unlikely */
234 		php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling toUCallback()");
235 	} else if (!Z_ISUNDEF(retval)) {
236 		php_converter_append_toUnicode_target(&retval, args, objval);
237 		zval_ptr_dtor(&retval);
238 	}
239 
240 	if (Z_TYPE(zargs[3]) == IS_LONG) {
241 		*pErrorCode = Z_LVAL(zargs[3]);
242 	} else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
243 		*pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
244 	}
245 
246 	zval_ptr_dtor(&zargs[0]);
247 	zval_ptr_dtor(&zargs[1]);
248 	zval_ptr_dtor(&zargs[2]);
249 	zval_ptr_dtor(&zargs[3]);
250 }
251 /* }}} */
252 
253 /* {{{ php_converter_append_fromUnicode_target */
php_converter_append_fromUnicode_target(zval * val,UConverterFromUnicodeArgs * args,php_converter_object * objval)254 static void php_converter_append_fromUnicode_target(zval *val, UConverterFromUnicodeArgs *args, php_converter_object *objval) {
255 	switch (Z_TYPE_P(val)) {
256 		case IS_NULL:
257 			/* Ignore */
258 			return;
259 		case IS_LONG:
260 			if (TARGET_CHECK(args, 1)) {
261 				*(args->target++) = Z_LVAL_P(val);
262 			}
263 			return;
264 		case IS_STRING:
265 		{
266 			size_t vallen = Z_STRLEN_P(val);
267 			if (TARGET_CHECK(args, vallen)) {
268 				memcpy(args->target, Z_STRVAL_P(val), vallen);
269 				args->target += vallen;
270 			}
271 			return;
272 		}
273 		case IS_ARRAY:
274 		{
275 			HashTable *ht = Z_ARRVAL_P(val);
276 			zval *tmpzval;
277 			ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
278 				php_converter_append_fromUnicode_target(tmpzval, args, objval);
279 			} ZEND_HASH_FOREACH_END();
280 			return;
281 		}
282 		default:
283 			php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "fromUCallback() specified illegal type for substitution character");
284 	}
285 }
286 /* }}} */
287 
288 /* {{{ php_converter_from_u_callback */
php_converter_from_u_callback(const void * context,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)289 static void php_converter_from_u_callback(const void *context,
290                                           UConverterFromUnicodeArgs *args,
291                                           const UChar *codeUnits, int32_t length, UChar32 codePoint,
292                                           UConverterCallbackReason reason,
293                                           UErrorCode *pErrorCode) {
294 	php_converter_object *objval = (php_converter_object*)context;
295 	zval retval;
296 	zval zargs[4];
297 	int i;
298 
299 	ZVAL_LONG(&zargs[0], reason);
300 	array_init(&zargs[1]);
301 	i = 0;
302 	while (i < length) {
303 		UChar32 c;
304 		U16_NEXT(codeUnits, i, length, c);
305 		add_next_index_long(&zargs[1], c);
306 	}
307 	ZVAL_LONG(&zargs[2], codePoint);
308 	ZVAL_LONG(&zargs[3], *pErrorCode);
309 	ZVAL_MAKE_REF(&zargs[3]);
310 
311 	objval->from_cb.param_count = 4;
312 	objval->from_cb.params = zargs;
313 	objval->from_cb.retval = &retval;
314 	if (zend_call_function(&(objval->from_cb), &(objval->from_cache)) == FAILURE) {
315 		/* Unlikely */
316 		php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling fromUCallback()");
317 	} else if (!Z_ISUNDEF(retval)) {
318 		php_converter_append_fromUnicode_target(&retval, args, objval);
319 		zval_ptr_dtor(&retval);
320 	}
321 
322 	if (Z_TYPE(zargs[3]) == IS_LONG) {
323 		*pErrorCode = Z_LVAL(zargs[3]);
324 	} else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
325 		*pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
326 	}
327 
328 	zval_ptr_dtor(&zargs[0]);
329 	zval_ptr_dtor(&zargs[1]);
330 	zval_ptr_dtor(&zargs[2]);
331 	zval_ptr_dtor(&zargs[3]);
332 }
333 /* }}} */
334 
335 /* {{{ php_converter_set_callbacks */
php_converter_set_callbacks(php_converter_object * objval,UConverter * cnv)336 static inline zend_bool php_converter_set_callbacks(php_converter_object *objval, UConverter *cnv) {
337 	zend_bool ret = 1;
338 	UErrorCode error = U_ZERO_ERROR;
339 
340 	if (objval->obj.ce == php_converter_ce) {
341 		/* Short-circuit having to go through method calls and data marshalling
342 		 * when we're using default behavior
343 		 */
344 		return 1;
345 	}
346 
347 	ucnv_setToUCallBack(cnv, (UConverterToUCallback)php_converter_to_u_callback, (const void*)objval,
348                                  NULL, NULL, &error);
349 	if (U_FAILURE(error)) {
350 		THROW_UFAILURE(objval, "ucnv_setToUCallBack", error);
351 		ret = 0;
352 	}
353 
354 	error = U_ZERO_ERROR;
355 	ucnv_setFromUCallBack(cnv, (UConverterFromUCallback)php_converter_from_u_callback, (const void*)objval,
356                                     NULL, NULL, &error);
357 	if (U_FAILURE(error)) {
358 		THROW_UFAILURE(objval, "ucnv_setFromUCallBack", error);
359 		ret = 0;
360 	}
361 	return ret;
362 }
363 /* }}} */
364 
365 /* {{{ php_converter_set_encoding */
php_converter_set_encoding(php_converter_object * objval,UConverter ** pcnv,const char * enc,size_t enc_len)366 static zend_bool php_converter_set_encoding(php_converter_object *objval,
367                                             UConverter **pcnv,
368                                             const char *enc, size_t enc_len
369                                            ) {
370 	UErrorCode error = U_ZERO_ERROR;
371 	UConverter *cnv = ucnv_open(enc, &error);
372 
373 	if (error == U_AMBIGUOUS_ALIAS_WARNING) {
374 		UErrorCode getname_error = U_ZERO_ERROR;
375 		const char *actual_encoding = ucnv_getName(cnv, &getname_error);
376 		if (U_FAILURE(getname_error)) {
377 			/* Should never happen */
378 			actual_encoding = "(unknown)";
379 		}
380 		php_error_docref(NULL, E_WARNING, "Ambiguous encoding specified, using %s", actual_encoding);
381 	} else if (U_FAILURE(error)) {
382 		if (objval) {
383 			THROW_UFAILURE(objval, "ucnv_open", error);
384 		} else {
385 			php_error_docref(NULL, E_WARNING, "Error setting encoding: %d - %s", (int)error, u_errorName(error));
386 		}
387 		return 0;
388 	}
389 
390 	if (objval && !php_converter_set_callbacks(objval, cnv)) {
391 		return 0;
392 	}
393 
394 	if (*pcnv) {
395 		ucnv_close(*pcnv);
396 	}
397 	*pcnv = cnv;
398 	return 1;
399 }
400 /* }}} */
401 
402 /* {{{ php_converter_do_set_encoding */
php_converter_do_set_encoding(UConverter ** pcnv,INTERNAL_FUNCTION_PARAMETERS)403 static void php_converter_do_set_encoding(UConverter **pcnv, INTERNAL_FUNCTION_PARAMETERS) {
404 	php_converter_object *objval = CONV_GET(ZEND_THIS);
405 	char *enc;
406 	size_t enc_len;
407 
408 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &enc, &enc_len) == FAILURE) {
409 		RETURN_THROWS();
410 	}
411 	intl_errors_reset(&objval->error);
412 
413 	RETURN_BOOL(php_converter_set_encoding(objval, pcnv, enc, enc_len));
414 }
415 /* }}} */
416 
417 /* {{{ */
PHP_METHOD(UConverter,setSourceEncoding)418 PHP_METHOD(UConverter, setSourceEncoding) {
419 	php_converter_object *objval = CONV_GET(ZEND_THIS);
420 	php_converter_do_set_encoding(&(objval->src), INTERNAL_FUNCTION_PARAM_PASSTHRU);
421 }
422 /* }}} */
423 
424 /* {{{ */
PHP_METHOD(UConverter,setDestinationEncoding)425 PHP_METHOD(UConverter, setDestinationEncoding) {
426 	php_converter_object *objval = CONV_GET(ZEND_THIS);
427 	php_converter_do_set_encoding(&(objval->dest), INTERNAL_FUNCTION_PARAM_PASSTHRU);
428 }
429 /* }}} */
430 
431 /* {{{ php_converter_do_get_encoding */
php_converter_do_get_encoding(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)432 static void php_converter_do_get_encoding(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
433 	const char *name;
434 
435 	if (zend_parse_parameters_none() == FAILURE) {
436 		RETURN_THROWS();
437 	}
438 
439 	intl_errors_reset(&objval->error);
440 
441 	if (!cnv) {
442 		RETURN_NULL();
443 	}
444 
445 	name = ucnv_getName(cnv, &objval->error.code);
446 	if (U_FAILURE(objval->error.code)) {
447 		THROW_UFAILURE(objval, "ucnv_getName()", objval->error.code);
448 		RETURN_FALSE;
449 	}
450 
451 	RETURN_STRING(name);
452 }
453 /* }}} */
454 
455 /* {{{ */
PHP_METHOD(UConverter,getSourceEncoding)456 PHP_METHOD(UConverter, getSourceEncoding) {
457 	php_converter_object *objval = CONV_GET(ZEND_THIS);
458 	php_converter_do_get_encoding(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
459 }
460 /* }}} */
461 
462 /* {{{ */
PHP_METHOD(UConverter,getDestinationEncoding)463 PHP_METHOD(UConverter, getDestinationEncoding) {
464         php_converter_object *objval = CONV_GET(ZEND_THIS);
465         php_converter_do_get_encoding(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
466 }
467 /* }}} */
468 
469 /* {{{ php_converter_do_get_type */
php_converter_do_get_type(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)470 static void php_converter_do_get_type(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
471 	UConverterType t;
472 
473 	if (zend_parse_parameters_none() == FAILURE) {
474 		RETURN_THROWS();
475 	}
476 	intl_errors_reset(&objval->error);
477 
478 	if (!cnv) {
479 		RETURN_NULL();
480 	}
481 
482 	t = ucnv_getType(cnv);
483 	if (U_FAILURE(objval->error.code)) {
484 		THROW_UFAILURE(objval, "ucnv_getType", objval->error.code);
485 		RETURN_FALSE;
486 	}
487 
488 	RETURN_LONG(t);
489 }
490 /* }}} */
491 
492 /* {{{ */
PHP_METHOD(UConverter,getSourceType)493 PHP_METHOD(UConverter, getSourceType) {
494 	php_converter_object *objval = CONV_GET(ZEND_THIS);
495 	php_converter_do_get_type(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
496 }
497 /* }}} */
498 
499 /* {{{ */
PHP_METHOD(UConverter,getDestinationType)500 PHP_METHOD(UConverter, getDestinationType) {
501 	php_converter_object *objval = CONV_GET(ZEND_THIS);
502 	php_converter_do_get_type(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
503 }
504 /* }}} */
505 
506 /* {{{ php_converter_resolve_callback */
php_converter_resolve_callback(zval * zobj,php_converter_object * objval,const char * callback_name,zend_fcall_info * finfo,zend_fcall_info_cache * fcache)507 static void php_converter_resolve_callback(zval *zobj,
508                                            php_converter_object *objval,
509                                            const char *callback_name,
510                                            zend_fcall_info *finfo,
511                                            zend_fcall_info_cache *fcache) {
512 	char *errstr = NULL;
513 	zval caller;
514 
515 	array_init(&caller);
516 	Z_ADDREF_P(zobj);
517 	add_index_zval(&caller, 0, zobj);
518 	add_index_string(&caller, 1, callback_name);
519 	if (zend_fcall_info_init(&caller, 0, finfo, fcache, NULL, &errstr) == FAILURE) {
520 		php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Error setting converter callback: %s", errstr);
521 	}
522 	zend_array_destroy(Z_ARR(caller));
523 	ZVAL_UNDEF(&finfo->function_name);
524 	if (errstr) {
525 		efree(errstr);
526 	}
527 }
528 /* }}} */
529 
530 /* {{{ */
PHP_METHOD(UConverter,__construct)531 PHP_METHOD(UConverter, __construct) {
532 	php_converter_object *objval = CONV_GET(ZEND_THIS);
533 	char *src = "utf-8";
534 	size_t src_len = sizeof("utf-8") - 1;
535 	char *dest = src;
536 	size_t dest_len = src_len;
537 
538 	intl_error_reset(NULL);
539 
540 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!s!", &dest, &dest_len, &src, &src_len) == FAILURE) {
541 		RETURN_THROWS();
542 	}
543 
544 	php_converter_set_encoding(objval, &(objval->src),  src,  src_len );
545 	php_converter_set_encoding(objval, &(objval->dest), dest, dest_len);
546 	php_converter_resolve_callback(ZEND_THIS, objval, "toUCallback",   &(objval->to_cb),   &(objval->to_cache));
547 	php_converter_resolve_callback(ZEND_THIS, objval, "fromUCallback", &(objval->from_cb), &(objval->from_cache));
548 }
549 /* }}} */
550 
551 /* {{{ */
PHP_METHOD(UConverter,setSubstChars)552 PHP_METHOD(UConverter, setSubstChars) {
553 	php_converter_object *objval = CONV_GET(ZEND_THIS);
554 	char *chars;
555 	size_t chars_len;
556 	int ret = 1;
557 
558 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &chars, &chars_len) == FAILURE) {
559 		RETURN_THROWS();
560 	}
561 	intl_errors_reset(&objval->error);
562 
563 	if (objval->src) {
564 		UErrorCode error = U_ZERO_ERROR;
565 		ucnv_setSubstChars(objval->src, chars, chars_len, &error);
566 		if (U_FAILURE(error)) {
567 			THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
568 			ret = 0;
569 		}
570 	} else {
571 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
572 		ret = 0;
573 	}
574 
575 	if (objval->dest) {
576 		UErrorCode error = U_ZERO_ERROR;
577 		ucnv_setSubstChars(objval->dest, chars, chars_len, &error);
578 		if (U_FAILURE(error)) {
579 			THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
580 			ret = 0;
581 		}
582 	} else {
583 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Destination Converter has not been initialized yet");
584 		ret = 0;
585 	}
586 
587 	RETURN_BOOL(ret);
588 }
589 /* }}} */
590 
591 /* {{{ */
PHP_METHOD(UConverter,getSubstChars)592 PHP_METHOD(UConverter, getSubstChars) {
593 	php_converter_object *objval = CONV_GET(ZEND_THIS);
594 	char chars[127];
595 	int8_t chars_len = sizeof(chars);
596 	UErrorCode error = U_ZERO_ERROR;
597 
598 	if (zend_parse_parameters_none() == FAILURE) {
599 		RETURN_THROWS();
600 	}
601 	intl_errors_reset(&objval->error);
602 
603 	if (!objval->src) {
604 		RETURN_NULL();
605 	}
606 
607 	/* src and dest get the same subst chars set,
608 	 * so it doesn't really matter which one we read from
609 	 */
610 	ucnv_getSubstChars(objval->src, chars, &chars_len, &error);
611 	if (U_FAILURE(error)) {
612 		THROW_UFAILURE(objval, "ucnv_getSubstChars", error);
613 		RETURN_FALSE;
614 	}
615 
616 	RETURN_STRINGL(chars, chars_len);
617 }
618 /* }}} */
619 
620 /* {{{ php_converter_do_convert */
php_converter_do_convert(UConverter * dest_cnv,UConverter * src_cnv,const char * src,int32_t src_len,php_converter_object * objval)621 static zend_string* php_converter_do_convert(UConverter *dest_cnv,
622                                              UConverter *src_cnv,  const char *src, int32_t src_len,
623                                              php_converter_object *objval
624                                             ) {
625 	UErrorCode	error = U_ZERO_ERROR;
626 	int32_t		temp_len, ret_len;
627 	zend_string	*ret;
628 	UChar		*temp;
629 
630 	if (!src_cnv || !dest_cnv) {
631 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR,
632 		                            "Internal converters not initialized");
633 		return NULL;
634 	}
635 
636 	/* Get necessary buffer size first */
637 	temp_len = 1 + ucnv_toUChars(src_cnv, NULL, 0, src, src_len, &error);
638 	if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
639 		THROW_UFAILURE(objval, "ucnv_toUChars", error);
640 		return NULL;
641 	}
642 	temp = safe_emalloc(sizeof(UChar), temp_len, sizeof(UChar));
643 
644 	/* Convert to intermediate UChar* array */
645 	error = U_ZERO_ERROR;
646 	temp_len = ucnv_toUChars(src_cnv, temp, temp_len, src, src_len, &error);
647 	if (U_FAILURE(error)) {
648 		THROW_UFAILURE(objval, "ucnv_toUChars", error);
649 		efree(temp);
650 		return NULL;
651 	}
652 	temp[temp_len] = 0;
653 
654 	/* Get necessary output buffer size */
655 	ret_len = ucnv_fromUChars(dest_cnv, NULL, 0, temp, temp_len, &error);
656 	if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
657 		THROW_UFAILURE(objval, "ucnv_fromUChars", error);
658 		efree(temp);
659 		return NULL;
660 	}
661 
662 	ret = zend_string_alloc(ret_len, 0);
663 
664 	/* Convert to final encoding */
665 	error = U_ZERO_ERROR;
666 	ZSTR_LEN(ret) = ucnv_fromUChars(dest_cnv, ZSTR_VAL(ret), ret_len+1, temp, temp_len, &error);
667 	efree(temp);
668 	if (U_FAILURE(error)) {
669 		THROW_UFAILURE(objval, "ucnv_fromUChars", error);
670 		zend_string_efree(ret);
671 		return NULL;
672 	}
673 
674 	return ret;
675 }
676 /* }}} */
677 
678 /* {{{ */
679 #define UCNV_REASON_CASE(v) case (UCNV_ ## v) : RETURN_STRINGL( "REASON_" #v , sizeof( "REASON_" #v ) - 1);
PHP_METHOD(UConverter,reasonText)680 PHP_METHOD(UConverter, reasonText) {
681 	zend_long reason;
682 
683 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &reason) == FAILURE) {
684 		RETURN_THROWS();
685 	}
686 	intl_error_reset(NULL);
687 
688 	switch (reason) {
689 		UCNV_REASON_CASE(UNASSIGNED)
690 		UCNV_REASON_CASE(ILLEGAL)
691 		UCNV_REASON_CASE(IRREGULAR)
692 		UCNV_REASON_CASE(RESET)
693 		UCNV_REASON_CASE(CLOSE)
694 		UCNV_REASON_CASE(CLONE)
695 		default:
696 			zend_argument_value_error(1, "must be a UConverter::REASON_* constant");
697 			RETURN_THROWS();
698 	}
699 }
700 /* }}} */
701 
702 /* {{{ */
PHP_METHOD(UConverter,convert)703 PHP_METHOD(UConverter, convert) {
704         php_converter_object *objval = CONV_GET(ZEND_THIS);
705 	char *str;
706 	size_t str_len;
707 	zend_string *ret;
708 	zend_bool reverse = 0;
709 
710 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
711 	                          &str, &str_len, &reverse) == FAILURE) {
712 		RETURN_THROWS();
713 	}
714 	intl_errors_reset(&objval->error);
715 
716 	ret = php_converter_do_convert(reverse ? objval->src : objval->dest,
717 	                               reverse ? objval->dest : objval->src,
718 	                               str,   str_len,
719 	                               objval);
720 	if (ret) {
721 		RETURN_NEW_STR(ret);
722 	} else {
723 		RETURN_FALSE;
724 	}
725 }
726 /* }}} */
727 
728 /* {{{ */
PHP_METHOD(UConverter,transcode)729 PHP_METHOD(UConverter, transcode) {
730 	char *str, *src, *dest;
731 	size_t str_len, src_len, dest_len;
732 	zval *options = NULL;
733 	UConverter *src_cnv = NULL, *dest_cnv = NULL;
734 
735 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|a!",
736 			&str, &str_len, &dest, &dest_len, &src, &src_len, &options) == FAILURE) {
737 		RETURN_THROWS();
738 	}
739 	intl_error_reset(NULL);
740 
741 	if (php_converter_set_encoding(NULL, &src_cnv,  src,  src_len) &&
742 	    php_converter_set_encoding(NULL, &dest_cnv, dest, dest_len)) {
743 	    zend_string *ret;
744 		UErrorCode error = U_ZERO_ERROR;
745 
746 		if (options && zend_hash_num_elements(Z_ARRVAL_P(options))) {
747 			zval *tmpzval;
748 
749 			if (U_SUCCESS(error) &&
750 				(tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "from_subst", sizeof("from_subst") - 1)) != NULL &&
751 				Z_TYPE_P(tmpzval) == IS_STRING) {
752 				error = U_ZERO_ERROR;
753 				ucnv_setSubstChars(src_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
754 			}
755 			if (U_SUCCESS(error) &&
756 				(tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "to_subst", sizeof("to_subst") - 1)) != NULL &&
757 				Z_TYPE_P(tmpzval) == IS_STRING) {
758 				error = U_ZERO_ERROR;
759 				ucnv_setSubstChars(dest_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
760 			}
761 		}
762 
763 		if (U_SUCCESS(error) &&
764 			(ret = php_converter_do_convert(dest_cnv, src_cnv, str, str_len, NULL)) != NULL) {
765 			RETVAL_NEW_STR(ret);
766 		}
767 
768 		if (U_FAILURE(error)) {
769 			THROW_UFAILURE(NULL, "transcode", error);
770 			RETVAL_FALSE;
771 		}
772 	} else {
773 		RETVAL_FALSE;
774 	}
775 
776 	if (src_cnv) {
777 		ucnv_close(src_cnv);
778 	}
779 	if (dest_cnv) {
780 		ucnv_close(dest_cnv);
781 	}
782 }
783 /* }}} */
784 
785 /* {{{ */
PHP_METHOD(UConverter,getErrorCode)786 PHP_METHOD(UConverter, getErrorCode) {
787 	php_converter_object *objval = CONV_GET(ZEND_THIS);
788 
789 	if (zend_parse_parameters_none() == FAILURE) {
790 		RETURN_THROWS();
791 	}
792 
793 	RETURN_LONG(intl_error_get_code(&(objval->error)));
794 }
795 /* }}} */
796 
797 /* {{{ */
PHP_METHOD(UConverter,getErrorMessage)798 PHP_METHOD(UConverter, getErrorMessage) {
799 	php_converter_object *objval = CONV_GET(ZEND_THIS);
800 
801 	if (zend_parse_parameters_none() == FAILURE) {
802 		RETURN_THROWS();
803 	}
804 
805 	zend_string *message = intl_error_get_message(&(objval->error));
806 	if (message) {
807 		RETURN_STR(message);
808 	} else {
809 		RETURN_NULL();
810 	}
811 }
812 /* }}} */
813 
814 /* {{{ */
PHP_METHOD(UConverter,getAvailable)815 PHP_METHOD(UConverter, getAvailable) {
816 	int32_t i,
817 			count = ucnv_countAvailable();
818 
819 	if (zend_parse_parameters_none() == FAILURE) {
820 		RETURN_THROWS();
821 	}
822 	intl_error_reset(NULL);
823 
824 	array_init(return_value);
825 	for(i = 0; i < count; i++) {
826 		const char *name = ucnv_getAvailableName(i);
827 		add_next_index_string(return_value, name);
828 	}
829 }
830 /* }}} */
831 
832 /* {{{ */
PHP_METHOD(UConverter,getAliases)833 PHP_METHOD(UConverter, getAliases) {
834 	char *name;
835 	size_t name_len;
836 	UErrorCode error = U_ZERO_ERROR;
837 	uint16_t i, count;
838 
839 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
840 		RETURN_THROWS();
841 	}
842 	intl_error_reset(NULL);
843 
844 	count = ucnv_countAliases(name, &error);
845 	if (U_FAILURE(error)) {
846 		THROW_UFAILURE(NULL, "ucnv_countAliases", error);
847 		RETURN_FALSE;
848 	}
849 
850 	array_init(return_value);
851 	for(i = 0; i < count; i++) {
852 		const char *alias;
853 
854 		error = U_ZERO_ERROR;
855 		alias = ucnv_getAlias(name, i, &error);
856 		if (U_FAILURE(error)) {
857 			THROW_UFAILURE(NULL, "ucnv_getAlias", error);
858 			zend_array_destroy(Z_ARR_P(return_value));
859 			RETURN_NULL();
860 		}
861 		add_next_index_string(return_value, alias);
862 	}
863 }
864 /* }}} */
865 
866 /* {{{ */
PHP_METHOD(UConverter,getStandards)867 PHP_METHOD(UConverter, getStandards) {
868 	uint16_t i, count;
869 
870 	if (zend_parse_parameters_none() == FAILURE) {
871 		RETURN_THROWS();
872 	}
873 	intl_error_reset(NULL);
874 
875 	array_init(return_value);
876 	count = ucnv_countStandards();
877 	for(i = 0; i < count; i++) {
878 		UErrorCode error = U_ZERO_ERROR;
879 		const char *name = ucnv_getStandard(i, &error);
880 		if (U_FAILURE(error)) {
881 			THROW_UFAILURE(NULL, "ucnv_getStandard", error);
882 			zend_array_destroy(Z_ARR_P(return_value));
883 			RETURN_NULL();
884 		}
885 		add_next_index_string(return_value, name);
886 	}
887 }
888 /* }}} */
889 
890 /* {{{ Converter create/clone/destroy */
php_converter_dtor_object(zend_object * obj)891 static void php_converter_dtor_object(zend_object *obj) {
892 	php_converter_object *objval = php_converter_fetch_object(obj);
893 
894 	if (objval->src) {
895 		ucnv_close(objval->src);
896 	}
897 
898 	if (objval->dest) {
899 		ucnv_close(objval->dest);
900 	}
901 
902 	intl_error_reset(&(objval->error));
903 }
904 
php_converter_object_ctor(zend_class_entry * ce,php_converter_object ** pobjval)905 static zend_object *php_converter_object_ctor(zend_class_entry *ce, php_converter_object **pobjval) {
906 	php_converter_object *objval;
907 
908 	objval = zend_object_alloc(sizeof(php_converter_object), ce);
909 
910 	zend_object_std_init(&objval->obj, ce);
911 	object_properties_init(&objval->obj, ce);
912 	intl_error_init(&(objval->error));
913 
914 	objval->obj.handlers = &php_converter_object_handlers;
915 	*pobjval = objval;
916 
917 	return &objval->obj;
918 }
919 
php_converter_create_object(zend_class_entry * ce)920 static zend_object *php_converter_create_object(zend_class_entry *ce) {
921 	php_converter_object *objval = NULL;
922 	zend_object *retval = php_converter_object_ctor(ce, &objval);
923 
924 	object_properties_init(&(objval->obj), ce);
925 
926 	return retval;
927 }
928 
php_converter_clone_object(zend_object * object)929 static zend_object *php_converter_clone_object(zend_object *object) {
930 	php_converter_object *objval, *oldobj = php_converter_fetch_object(object);
931 	zend_object *retval = php_converter_object_ctor(object->ce, &objval);
932 	UErrorCode error = U_ZERO_ERROR;
933 
934 	intl_errors_reset(&oldobj->error);
935 
936 #if U_ICU_VERSION_MAJOR_NUM > 70
937 	objval->src = ucnv_clone(oldobj->src, &error);
938 #else
939 	objval->src = ucnv_safeClone(oldobj->src, NULL, NULL, &error);
940 #endif
941 	if (U_SUCCESS(error)) {
942 		error = U_ZERO_ERROR;
943 #if U_ICU_VERSION_MAJOR_NUM > 70
944 		objval->dest = ucnv_clone(oldobj->dest, &error);
945 #else
946 		objval->dest = ucnv_safeClone(oldobj->dest, NULL, NULL, &error);
947 #endif
948 	}
949 	if (U_FAILURE(error)) {
950 		zend_string *err_msg;
951 		THROW_UFAILURE(oldobj, "ucnv_safeClone", error);
952 
953 		err_msg = intl_error_get_message(&oldobj->error);
954 		zend_throw_exception(NULL, ZSTR_VAL(err_msg), 0);
955 		zend_string_release_ex(err_msg, 0);
956 
957 		return retval;
958 	}
959 
960 	/* Update contexts for converter error handlers */
961 	php_converter_set_callbacks(objval, objval->src );
962 	php_converter_set_callbacks(objval, objval->dest);
963 
964 	zend_objects_clone_members(&(objval->obj), &(oldobj->obj));
965 
966 	/* Newly cloned object deliberately does not inherit error state from original object */
967 
968 	return retval;
969 }
970 /* }}} */
971 
972 #define CONV_REASON_CONST(v) zend_declare_class_constant_long(php_converter_ce, "REASON_" #v, sizeof("REASON_" #v) - 1, UCNV_ ## v)
973 #define CONV_TYPE_CONST(v)   zend_declare_class_constant_long(php_converter_ce, #v ,          sizeof(#v) - 1,           UCNV_ ## v)
974 
975 /* {{{ php_converter_minit */
php_converter_minit(INIT_FUNC_ARGS)976 int php_converter_minit(INIT_FUNC_ARGS) {
977 	zend_class_entry ce;
978 
979 	INIT_CLASS_ENTRY(ce, "UConverter", class_UConverter_methods);
980 	php_converter_ce = zend_register_internal_class(&ce);
981 	php_converter_ce->create_object = php_converter_create_object;
982 	memcpy(&php_converter_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
983 	php_converter_object_handlers.offset = XtOffsetOf(php_converter_object, obj);
984 	php_converter_object_handlers.clone_obj = php_converter_clone_object;
985 	php_converter_object_handlers.dtor_obj = php_converter_dtor_object;
986 
987 	/* enum UConverterCallbackReason */
988 	CONV_REASON_CONST(UNASSIGNED);
989 	CONV_REASON_CONST(ILLEGAL);
990 	CONV_REASON_CONST(IRREGULAR);
991 	CONV_REASON_CONST(RESET);
992 	CONV_REASON_CONST(CLOSE);
993 	CONV_REASON_CONST(CLONE);
994 
995 	/* enum UConverterType */
996 	CONV_TYPE_CONST(UNSUPPORTED_CONVERTER);
997 	CONV_TYPE_CONST(SBCS);
998 	CONV_TYPE_CONST(DBCS);
999 	CONV_TYPE_CONST(MBCS);
1000 	CONV_TYPE_CONST(LATIN_1);
1001 	CONV_TYPE_CONST(UTF8);
1002 	CONV_TYPE_CONST(UTF16_BigEndian);
1003 	CONV_TYPE_CONST(UTF16_LittleEndian);
1004 	CONV_TYPE_CONST(UTF32_BigEndian);
1005 	CONV_TYPE_CONST(UTF32_LittleEndian);
1006 	CONV_TYPE_CONST(EBCDIC_STATEFUL);
1007 	CONV_TYPE_CONST(ISO_2022);
1008 	CONV_TYPE_CONST(LMBCS_1);
1009 	CONV_TYPE_CONST(LMBCS_2);
1010 	CONV_TYPE_CONST(LMBCS_3);
1011 	CONV_TYPE_CONST(LMBCS_4);
1012 	CONV_TYPE_CONST(LMBCS_5);
1013 	CONV_TYPE_CONST(LMBCS_6);
1014 	CONV_TYPE_CONST(LMBCS_8);
1015 	CONV_TYPE_CONST(LMBCS_11);
1016 	CONV_TYPE_CONST(LMBCS_16);
1017 	CONV_TYPE_CONST(LMBCS_17);
1018 	CONV_TYPE_CONST(LMBCS_18);
1019 	CONV_TYPE_CONST(LMBCS_19);
1020 	CONV_TYPE_CONST(LMBCS_LAST);
1021 	CONV_TYPE_CONST(HZ);
1022 	CONV_TYPE_CONST(SCSU);
1023 	CONV_TYPE_CONST(ISCII);
1024 	CONV_TYPE_CONST(US_ASCII);
1025 	CONV_TYPE_CONST(UTF7);
1026 	CONV_TYPE_CONST(BOCU1);
1027 	CONV_TYPE_CONST(UTF16);
1028 	CONV_TYPE_CONST(UTF32);
1029 	CONV_TYPE_CONST(CESU8);
1030 	CONV_TYPE_CONST(IMAP_MAILBOX);
1031 
1032 	return SUCCESS;
1033 }
1034 /* }}} */
1035