xref: /PHP-8.4/ext/intl/converter/converter.c (revision d005ab20)
1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Sara Golemon <pollita@php.net>                              |
12    +----------------------------------------------------------------------+
13  */
14 
15 #include "converter.h"
16 #include "zend_exceptions.h"
17 
18 #include <unicode/utypes.h>
19 #include <unicode/utf8.h>
20 #include <unicode/utf16.h>
21 #include <unicode/ucnv.h>
22 #include <unicode/ustring.h>
23 
24 #include "../intl_error.h"
25 #include "../intl_common.h"
26 #include "converter_arginfo.h"
27 
28 typedef struct _php_converter_object {
29 	UConverter *src, *dest;
30 	zend_fcall_info to_cb, from_cb;
31 	zend_fcall_info_cache to_cache, from_cache;
32 	intl_error error;
33 	zend_object obj;
34 } php_converter_object;
35 
36 
php_converter_fetch_object(zend_object * obj)37 static inline php_converter_object *php_converter_fetch_object(zend_object *obj) {
38 	return (php_converter_object *)((char*)(obj) - XtOffsetOf(php_converter_object, obj));
39 }
40 #define Z_INTL_CONVERTER_P(zv) php_converter_fetch_object(Z_OBJ_P(zv))
41 
42 static zend_class_entry     *php_converter_ce;
43 static zend_object_handlers  php_converter_object_handlers;
44 
45 #define CONV_GET(pzv)  (Z_INTL_CONVERTER_P((pzv)))
46 #define THROW_UFAILURE(obj, fname, error) php_converter_throw_failure(obj, error, \
47                                           fname "() returned error " ZEND_LONG_FMT ": %s", (zend_long)error, u_errorName(error))
48 
49 /* {{{ php_converter_throw_failure */
php_converter_throw_failure(php_converter_object * objval,UErrorCode error,const char * format,...)50 static inline void php_converter_throw_failure(php_converter_object *objval, UErrorCode error, const char *format, ...) {
51 	intl_error *err = objval ? &(objval->error) : NULL;
52 	char message[1024];
53 	va_list vargs;
54 
55 	va_start(vargs, format);
56 	vsnprintf(message, sizeof(message), format, vargs);
57 	va_end(vargs);
58 
59 	intl_errors_set(err, error, message, 1);
60 }
61 /* }}} */
62 
63 /* {{{ php_converter_default_callback */
php_converter_default_callback(zval * return_value,zval * zobj,zend_long reason,zval * error)64 static void php_converter_default_callback(zval *return_value, zval *zobj, zend_long reason, zval *error) {
65 	/* Basic functionality so children can call parent::toUCallback() */
66 	switch (reason) {
67 		case UCNV_UNASSIGNED:
68 		case UCNV_ILLEGAL:
69 		case UCNV_IRREGULAR:
70 		{
71 			php_converter_object *objval = (php_converter_object*)CONV_GET(zobj);
72 			char chars[127];
73 			int8_t chars_len = sizeof(chars);
74 			UErrorCode uerror = U_ZERO_ERROR;
75 			if(!objval->src) {
76 				php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
77 				chars[0] = 0x1A;
78 				chars[1] = 0;
79 				chars_len = 1;
80 				ZEND_TRY_ASSIGN_REF_LONG(error, U_INVALID_STATE_ERROR);
81 				RETVAL_STRINGL(chars, chars_len);
82 				return;
83 			}
84 
85 			/* Yes, this is fairly wasteful at first glance,
86 			 * but considering that the alternative is to store
87 			 * what's sent into setSubstChars() and the fact
88 			 * that this is an extremely unlikely codepath
89 			 * I'd rather take the CPU hit here, than waste time
90 			 * storing a value I'm unlikely to use.
91 			 */
92 			ucnv_getSubstChars(objval->src, chars, &chars_len, &uerror);
93 			if (U_FAILURE(uerror)) {
94 				THROW_UFAILURE(objval, "ucnv_getSubstChars", uerror);
95 				chars[0] = 0x1A;
96 				chars[1] = 0;
97 				chars_len = 1;
98 			}
99 			ZEND_TRY_ASSIGN_REF_LONG(error, uerror);
100 			RETVAL_STRINGL(chars, chars_len);
101 		}
102 	}
103 }
104 /* }}} */
105 
106 /* {{{ */
PHP_METHOD(UConverter,toUCallback)107 PHP_METHOD(UConverter, toUCallback) {
108 	zend_long reason;
109 	zend_string *source, *codeUnits;
110 	zval *error;
111 
112 	ZEND_PARSE_PARAMETERS_START(4, 4)
113 		Z_PARAM_LONG(reason)
114 		Z_PARAM_STR(source)
115 		Z_PARAM_STR(codeUnits)
116 		Z_PARAM_ZVAL(error)
117 	ZEND_PARSE_PARAMETERS_END();
118 
119 	php_converter_default_callback(return_value, ZEND_THIS, reason, error);
120 }
121 /* }}} */
122 
123 /* {{{ */
PHP_METHOD(UConverter,fromUCallback)124 PHP_METHOD(UConverter, fromUCallback) {
125 	zend_long reason;
126 	zval *source, *error;
127 	zend_long codePoint;
128 
129 	ZEND_PARSE_PARAMETERS_START(4, 4)
130 		Z_PARAM_LONG(reason)
131 		Z_PARAM_ARRAY(source)
132 		Z_PARAM_LONG(codePoint)
133 		Z_PARAM_ZVAL(error)
134 	ZEND_PARSE_PARAMETERS_END();
135 
136 	php_converter_default_callback(return_value, ZEND_THIS, reason, error);
137 }
138 /* }}} */
139 
140 /* {{{ php_converter_check_limits */
php_converter_check_limits(php_converter_object * objval,zend_long available,zend_long needed)141 static inline bool php_converter_check_limits(php_converter_object *objval, zend_long available, zend_long needed) {
142 	if (available < needed) {
143 		php_converter_throw_failure(objval, U_BUFFER_OVERFLOW_ERROR, "Buffer overrun " ZEND_LONG_FMT " bytes needed, " ZEND_LONG_FMT " available", needed, available);
144 		return 0;
145 	}
146 	return 1;
147 }
148 /* }}} */
149 
150 #define TARGET_CHECK(cnvargs, needed) php_converter_check_limits(objval, cnvargs->targetLimit - cnvargs->target, needed)
151 
152 /* {{{ php_converter_append_toUnicode_target */
php_converter_append_toUnicode_target(zval * val,UConverterToUnicodeArgs * args,php_converter_object * objval)153 static void php_converter_append_toUnicode_target(zval *val, UConverterToUnicodeArgs *args, php_converter_object *objval) {
154 	switch (Z_TYPE_P(val)) {
155 		case IS_NULL:
156 			/* Code unit is being skipped */
157 			return;
158 		case IS_LONG:
159 		{
160 			zend_long lval = Z_LVAL_P(val);
161 			if ((lval < 0) || (lval > 0x10FFFF)) {
162 				php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "Invalid codepoint U+%04lx", lval);
163 				return;
164 			}
165 			if (lval > 0xFFFF) {
166 				/* Supplemental planes U+010000 - U+10FFFF */
167 				if (TARGET_CHECK(args, 2)) {
168 					/* TODO: Find the ICU call which does this properly */
169 					*(args->target++) = (UChar)(((lval - 0x10000) >> 10)   | 0xD800);
170 					*(args->target++) = (UChar)(((lval - 0x10000) & 0x3FF) | 0xDC00);
171 				}
172 				return;
173 			}
174 			/* Non-suggogate BMP codepoint */
175 			if (TARGET_CHECK(args, 1)) {
176 				*(args->target++) = (UChar)lval;
177 			}
178 			return;
179 		}
180 		case IS_STRING:
181 		{
182 			const char *strval = Z_STRVAL_P(val);
183 			int i = 0, strlen = Z_STRLEN_P(val);
184 
185 			while((i != strlen) && TARGET_CHECK(args, 1)) {
186 				UChar c;
187 				U8_NEXT(strval, i, strlen, c);
188 				*(args->target++) = c;
189 			}
190 			return;
191 		}
192 		case IS_ARRAY:
193 		{
194 			HashTable *ht = Z_ARRVAL_P(val);
195 			zval *tmpzval;
196 
197 			ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
198 				php_converter_append_toUnicode_target(tmpzval, args, objval);
199 			} ZEND_HASH_FOREACH_END();
200 			return;
201 		}
202 		default:
203 			php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR,
204 			                            "toUCallback() specified illegal type for substitution character");
205 	}
206 }
207 /* }}} */
208 
209 /* {{{ php_converter_to_u_callback */
php_converter_to_u_callback(const void * context,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * pErrorCode)210 static void php_converter_to_u_callback(const void *context,
211                                         UConverterToUnicodeArgs *args,
212                                         const char *codeUnits, int32_t length,
213                                         UConverterCallbackReason reason,
214                                         UErrorCode *pErrorCode) {
215 	php_converter_object *objval = (php_converter_object*)context;
216 	zval retval;
217 	zval zargs[4];
218 
219 	ZVAL_LONG(&zargs[0], reason);
220 	if (args->source) {
221 		ZVAL_STRINGL(&zargs[1], args->source, args->sourceLimit - args->source);
222 	} else {
223 		ZVAL_EMPTY_STRING(&zargs[1]);
224 	}
225 	if (codeUnits) {
226 		ZVAL_STRINGL(&zargs[2], codeUnits, length);
227 	} else {
228 		ZVAL_EMPTY_STRING(&zargs[2]);
229 	}
230 	ZVAL_LONG(&zargs[3], *pErrorCode);
231 	ZVAL_MAKE_REF(&zargs[3]);
232 
233 	objval->to_cb.param_count    = 4;
234 	objval->to_cb.params = zargs;
235 	objval->to_cb.retval = &retval;
236 	if (zend_call_function(&(objval->to_cb), &(objval->to_cache)) == FAILURE) {
237 		/* Unlikely */
238 		php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling toUCallback()");
239 	} else if (!Z_ISUNDEF(retval)) {
240 		php_converter_append_toUnicode_target(&retval, args, objval);
241 		zval_ptr_dtor(&retval);
242 	}
243 
244 	if (Z_TYPE(zargs[3]) == IS_LONG) {
245 		*pErrorCode = Z_LVAL(zargs[3]);
246 	} else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
247 		*pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
248 	}
249 
250 	zval_ptr_dtor(&zargs[0]);
251 	zval_ptr_dtor(&zargs[1]);
252 	zval_ptr_dtor(&zargs[2]);
253 	zval_ptr_dtor(&zargs[3]);
254 }
255 /* }}} */
256 
257 /* {{{ php_converter_append_fromUnicode_target */
php_converter_append_fromUnicode_target(zval * val,UConverterFromUnicodeArgs * args,php_converter_object * objval)258 static void php_converter_append_fromUnicode_target(zval *val, UConverterFromUnicodeArgs *args, php_converter_object *objval) {
259 	switch (Z_TYPE_P(val)) {
260 		case IS_NULL:
261 			/* Ignore */
262 			return;
263 		case IS_LONG:
264 			if (TARGET_CHECK(args, 1)) {
265 				*(args->target++) = Z_LVAL_P(val);
266 			}
267 			return;
268 		case IS_STRING:
269 		{
270 			size_t vallen = Z_STRLEN_P(val);
271 			if (TARGET_CHECK(args, vallen)) {
272 				args->target = zend_mempcpy(args->target, Z_STRVAL_P(val), vallen);
273 			}
274 			return;
275 		}
276 		case IS_ARRAY:
277 		{
278 			HashTable *ht = Z_ARRVAL_P(val);
279 			zval *tmpzval;
280 			ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
281 				php_converter_append_fromUnicode_target(tmpzval, args, objval);
282 			} ZEND_HASH_FOREACH_END();
283 			return;
284 		}
285 		default:
286 			php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "fromUCallback() specified illegal type for substitution character");
287 	}
288 }
289 /* }}} */
290 
291 /* {{{ php_converter_from_u_callback */
php_converter_from_u_callback(const void * context,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)292 static void php_converter_from_u_callback(const void *context,
293                                           UConverterFromUnicodeArgs *args,
294                                           const UChar *codeUnits, int32_t length, UChar32 codePoint,
295                                           UConverterCallbackReason reason,
296                                           UErrorCode *pErrorCode) {
297 	php_converter_object *objval = (php_converter_object*)context;
298 	zval retval;
299 	zval zargs[4];
300 	int i;
301 
302 	ZVAL_LONG(&zargs[0], reason);
303 	array_init(&zargs[1]);
304 	i = 0;
305 	while (i < length) {
306 		UChar32 c;
307 		U16_NEXT(codeUnits, i, length, c);
308 		add_next_index_long(&zargs[1], c);
309 	}
310 	ZVAL_LONG(&zargs[2], codePoint);
311 	ZVAL_LONG(&zargs[3], *pErrorCode);
312 	ZVAL_MAKE_REF(&zargs[3]);
313 
314 	objval->from_cb.param_count = 4;
315 	objval->from_cb.params = zargs;
316 	objval->from_cb.retval = &retval;
317 	if (zend_call_function(&(objval->from_cb), &(objval->from_cache)) == FAILURE) {
318 		/* Unlikely */
319 		php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling fromUCallback()");
320 	} else if (!Z_ISUNDEF(retval)) {
321 		php_converter_append_fromUnicode_target(&retval, args, objval);
322 		zval_ptr_dtor(&retval);
323 	}
324 
325 	if (Z_TYPE(zargs[3]) == IS_LONG) {
326 		*pErrorCode = Z_LVAL(zargs[3]);
327 	} else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
328 		*pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
329 	}
330 
331 	zval_ptr_dtor(&zargs[0]);
332 	zval_ptr_dtor(&zargs[1]);
333 	zval_ptr_dtor(&zargs[2]);
334 	zval_ptr_dtor(&zargs[3]);
335 }
336 /* }}} */
337 
338 /* {{{ php_converter_set_callbacks */
php_converter_set_callbacks(php_converter_object * objval,UConverter * cnv)339 static inline bool php_converter_set_callbacks(php_converter_object *objval, UConverter *cnv) {
340 	bool ret = true;
341 	UErrorCode error = U_ZERO_ERROR;
342 
343 	if (objval->obj.ce == php_converter_ce) {
344 		/* Short-circuit having to go through method calls and data marshalling
345 		 * when we're using default behavior
346 		 */
347 		return 1;
348 	}
349 
350 	ucnv_setToUCallBack(cnv, (UConverterToUCallback)php_converter_to_u_callback, (const void*)objval,
351 	                    NULL, NULL, &error);
352 	if (U_FAILURE(error)) {
353 		THROW_UFAILURE(objval, "ucnv_setToUCallBack", error);
354 		ret = 0;
355 	}
356 
357 	error = U_ZERO_ERROR;
358 	ucnv_setFromUCallBack(cnv, (UConverterFromUCallback)php_converter_from_u_callback, (const void*)objval,
359 	                      NULL, NULL, &error);
360 	if (U_FAILURE(error)) {
361 		THROW_UFAILURE(objval, "ucnv_setFromUCallBack", error);
362 		ret = 0;
363 	}
364 	return ret;
365 }
366 /* }}} */
367 
368 /* {{{ php_converter_set_encoding */
php_converter_set_encoding(php_converter_object * objval,UConverter ** pcnv,const char * enc,size_t enc_len)369 static bool php_converter_set_encoding(php_converter_object *objval,
370                                        UConverter **pcnv,
371                                        const char *enc, size_t enc_len) {
372 	UErrorCode error = U_ZERO_ERROR;
373 	UConverter *cnv = ucnv_open(enc, &error);
374 
375 	if (error == U_AMBIGUOUS_ALIAS_WARNING) {
376 		UErrorCode getname_error = U_ZERO_ERROR;
377 		const char *actual_encoding = ucnv_getName(cnv, &getname_error);
378 		if (U_FAILURE(getname_error)) {
379 			/* Should never happen */
380 			actual_encoding = "(unknown)";
381 		}
382 		php_error_docref(NULL, E_WARNING, "Ambiguous encoding specified, using %s", actual_encoding);
383 	} else if (U_FAILURE(error)) {
384 		if (objval) {
385 			THROW_UFAILURE(objval, "ucnv_open", error);
386 		} else {
387 			php_error_docref(NULL, E_WARNING, "Error setting encoding: %d - %s", (int)error, u_errorName(error));
388 		}
389 		return 0;
390 	}
391 
392 	if (objval && !php_converter_set_callbacks(objval, cnv)) {
393 		return 0;
394 	}
395 
396 	if (*pcnv) {
397 		ucnv_close(*pcnv);
398 	}
399 	*pcnv = cnv;
400 	return 1;
401 }
402 /* }}} */
403 
404 /* {{{ php_converter_do_set_encoding */
php_converter_do_set_encoding(UConverter ** pcnv,INTERNAL_FUNCTION_PARAMETERS)405 static void php_converter_do_set_encoding(UConverter **pcnv, INTERNAL_FUNCTION_PARAMETERS) {
406 	php_converter_object *objval = CONV_GET(ZEND_THIS);
407 	char *enc;
408 	size_t enc_len;
409 
410 	ZEND_PARSE_PARAMETERS_START(1, 1)
411 		Z_PARAM_STRING(enc, enc_len)
412 	ZEND_PARSE_PARAMETERS_END();
413 
414 	intl_errors_reset(&objval->error);
415 
416 	RETURN_BOOL(php_converter_set_encoding(objval, pcnv, enc, enc_len));
417 }
418 /* }}} */
419 
420 /* {{{ */
PHP_METHOD(UConverter,setSourceEncoding)421 PHP_METHOD(UConverter, setSourceEncoding) {
422 	php_converter_object *objval = CONV_GET(ZEND_THIS);
423 	php_converter_do_set_encoding(&(objval->src), INTERNAL_FUNCTION_PARAM_PASSTHRU);
424 }
425 /* }}} */
426 
427 /* {{{ */
PHP_METHOD(UConverter,setDestinationEncoding)428 PHP_METHOD(UConverter, setDestinationEncoding) {
429 	php_converter_object *objval = CONV_GET(ZEND_THIS);
430 	php_converter_do_set_encoding(&(objval->dest), INTERNAL_FUNCTION_PARAM_PASSTHRU);
431 }
432 /* }}} */
433 
434 /* {{{ php_converter_do_get_encoding */
php_converter_do_get_encoding(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)435 static void php_converter_do_get_encoding(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
436 	const char *name;
437 
438 	ZEND_PARSE_PARAMETERS_NONE();
439 
440 	intl_errors_reset(&objval->error);
441 
442 	if (!cnv) {
443 		RETURN_NULL();
444 	}
445 
446 	name = ucnv_getName(cnv, &objval->error.code);
447 	if (U_FAILURE(objval->error.code)) {
448 		THROW_UFAILURE(objval, "ucnv_getName()", objval->error.code);
449 		RETURN_FALSE;
450 	}
451 
452 	RETURN_STRING(name);
453 }
454 /* }}} */
455 
456 /* {{{ */
PHP_METHOD(UConverter,getSourceEncoding)457 PHP_METHOD(UConverter, getSourceEncoding) {
458 	php_converter_object *objval = CONV_GET(ZEND_THIS);
459 	php_converter_do_get_encoding(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
460 }
461 /* }}} */
462 
463 /* {{{ */
PHP_METHOD(UConverter,getDestinationEncoding)464 PHP_METHOD(UConverter, getDestinationEncoding) {
465 	php_converter_object *objval = CONV_GET(ZEND_THIS);
466 	php_converter_do_get_encoding(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
467 }
468 /* }}} */
469 
470 /* {{{ php_converter_do_get_type */
php_converter_do_get_type(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)471 static void php_converter_do_get_type(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
472 	UConverterType t;
473 
474 	ZEND_PARSE_PARAMETERS_NONE();
475 	intl_errors_reset(&objval->error);
476 
477 	if (!cnv) {
478 		RETURN_NULL();
479 	}
480 
481 	t = ucnv_getType(cnv);
482 	if (U_FAILURE(objval->error.code)) {
483 		THROW_UFAILURE(objval, "ucnv_getType", objval->error.code);
484 		RETURN_FALSE;
485 	}
486 
487 	RETURN_LONG(t);
488 }
489 /* }}} */
490 
491 /* {{{ */
PHP_METHOD(UConverter,getSourceType)492 PHP_METHOD(UConverter, getSourceType) {
493 	php_converter_object *objval = CONV_GET(ZEND_THIS);
494 	php_converter_do_get_type(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
495 }
496 /* }}} */
497 
498 /* {{{ */
PHP_METHOD(UConverter,getDestinationType)499 PHP_METHOD(UConverter, getDestinationType) {
500 	php_converter_object *objval = CONV_GET(ZEND_THIS);
501 	php_converter_do_get_type(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
502 }
503 /* }}} */
504 
505 /* {{{ php_converter_resolve_callback */
php_converter_resolve_callback(zval * zobj,php_converter_object * objval,const char * callback_name,zend_fcall_info * finfo,zend_fcall_info_cache * fcache)506 static void php_converter_resolve_callback(zval *zobj,
507                                            php_converter_object *objval,
508                                            const char *callback_name,
509                                            zend_fcall_info *finfo,
510                                            zend_fcall_info_cache *fcache) {
511 	char *errstr = NULL;
512 	zval caller;
513 
514 	array_init(&caller);
515 	Z_ADDREF_P(zobj);
516 	add_index_zval(&caller, 0, zobj);
517 	add_index_string(&caller, 1, callback_name);
518 	if (zend_fcall_info_init(&caller, 0, finfo, fcache, NULL, &errstr) == FAILURE) {
519 		php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Error setting converter callback: %s", errstr);
520 	}
521 	zend_array_destroy(Z_ARR(caller));
522 	ZVAL_UNDEF(&finfo->function_name);
523 	if (errstr) {
524 		efree(errstr);
525 	}
526 }
527 /* }}} */
528 
529 /* {{{ */
PHP_METHOD(UConverter,__construct)530 PHP_METHOD(UConverter, __construct) {
531 	php_converter_object *objval = CONV_GET(ZEND_THIS);
532 	char *src = "utf-8";
533 	size_t src_len = sizeof("utf-8") - 1;
534 	char *dest = src;
535 	size_t dest_len = src_len;
536 
537 	intl_error_reset(NULL);
538 
539 	ZEND_PARSE_PARAMETERS_START(0, 2)
540 		Z_PARAM_OPTIONAL
541 		Z_PARAM_STRING_OR_NULL(dest, dest_len)
542 		Z_PARAM_STRING_OR_NULL(src, src_len)
543 	ZEND_PARSE_PARAMETERS_END();
544 
545 	php_converter_set_encoding(objval, &(objval->src),  src,  src_len );
546 	php_converter_set_encoding(objval, &(objval->dest), dest, dest_len);
547 	php_converter_resolve_callback(ZEND_THIS, objval, "toUCallback",   &(objval->to_cb),   &(objval->to_cache));
548 	php_converter_resolve_callback(ZEND_THIS, objval, "fromUCallback", &(objval->from_cb), &(objval->from_cache));
549 }
550 /* }}} */
551 
552 /* {{{ */
PHP_METHOD(UConverter,setSubstChars)553 PHP_METHOD(UConverter, setSubstChars) {
554 	php_converter_object *objval = CONV_GET(ZEND_THIS);
555 	char *chars;
556 	size_t chars_len;
557 	int ret = 1;
558 
559 	ZEND_PARSE_PARAMETERS_START(1, 1)
560 		Z_PARAM_STRING(chars, chars_len)
561 	ZEND_PARSE_PARAMETERS_END();
562 
563 	intl_errors_reset(&objval->error);
564 
565 	if (objval->src) {
566 		UErrorCode error = U_ZERO_ERROR;
567 		ucnv_setSubstChars(objval->src, chars, chars_len, &error);
568 		if (U_FAILURE(error)) {
569 			THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
570 			ret = 0;
571 		}
572 	} else {
573 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
574 		ret = 0;
575 	}
576 
577 	if (objval->dest) {
578 		UErrorCode error = U_ZERO_ERROR;
579 		ucnv_setSubstChars(objval->dest, chars, chars_len, &error);
580 		if (U_FAILURE(error)) {
581 			THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
582 			ret = 0;
583 		}
584 	} else {
585 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Destination Converter has not been initialized yet");
586 		ret = 0;
587 	}
588 
589 	RETURN_BOOL(ret);
590 }
591 /* }}} */
592 
593 /* {{{ */
PHP_METHOD(UConverter,getSubstChars)594 PHP_METHOD(UConverter, getSubstChars) {
595 	php_converter_object *objval = CONV_GET(ZEND_THIS);
596 	char chars[127];
597 	int8_t chars_len = sizeof(chars);
598 	UErrorCode error = U_ZERO_ERROR;
599 
600 	ZEND_PARSE_PARAMETERS_NONE();
601 	intl_errors_reset(&objval->error);
602 
603 	if (!objval->src) {
604 		RETURN_NULL();
605 	}
606 
607 	/* src and dest get the same subst chars set,
608 	 * so it doesn't really matter which one we read from
609 	 */
610 	ucnv_getSubstChars(objval->src, chars, &chars_len, &error);
611 	if (U_FAILURE(error)) {
612 		THROW_UFAILURE(objval, "ucnv_getSubstChars", error);
613 		RETURN_FALSE;
614 	}
615 
616 	RETURN_STRINGL(chars, chars_len);
617 }
618 /* }}} */
619 
620 /* {{{ php_converter_do_convert */
php_converter_do_convert(UConverter * dest_cnv,UConverter * src_cnv,const char * src,int32_t src_len,php_converter_object * objval)621 static zend_string* php_converter_do_convert(UConverter *dest_cnv,
622                                              UConverter *src_cnv,  const char *src, int32_t src_len,
623                                              php_converter_object *objval
624                                             ) {
625 	UErrorCode	error = U_ZERO_ERROR;
626 	int32_t		temp_len, ret_len;
627 	zend_string	*ret;
628 	UChar		*temp;
629 
630 	if (!src_cnv || !dest_cnv) {
631 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR,
632 		                            "Internal converters not initialized");
633 		return NULL;
634 	}
635 
636 	/* Get necessary buffer size first */
637 	temp_len = 1 + ucnv_toUChars(src_cnv, NULL, 0, src, src_len, &error);
638 	if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
639 		THROW_UFAILURE(objval, "ucnv_toUChars", error);
640 		return NULL;
641 	}
642 	temp = safe_emalloc(sizeof(UChar), temp_len, sizeof(UChar));
643 
644 	/* Convert to intermediate UChar* array */
645 	error = U_ZERO_ERROR;
646 	temp_len = ucnv_toUChars(src_cnv, temp, temp_len, src, src_len, &error);
647 	if (U_FAILURE(error)) {
648 		THROW_UFAILURE(objval, "ucnv_toUChars", error);
649 		efree(temp);
650 		return NULL;
651 	}
652 	temp[temp_len] = 0;
653 
654 	/* Get necessary output buffer size */
655 	ret_len = ucnv_fromUChars(dest_cnv, NULL, 0, temp, temp_len, &error);
656 	if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
657 		THROW_UFAILURE(objval, "ucnv_fromUChars", error);
658 		efree(temp);
659 		return NULL;
660 	}
661 
662 	ret = zend_string_alloc(ret_len, 0);
663 
664 	/* Convert to final encoding */
665 	error = U_ZERO_ERROR;
666 	ZSTR_LEN(ret) = ucnv_fromUChars(dest_cnv, ZSTR_VAL(ret), ret_len+1, temp, temp_len, &error);
667 	efree(temp);
668 	if (U_FAILURE(error)) {
669 		THROW_UFAILURE(objval, "ucnv_fromUChars", error);
670 		zend_string_efree(ret);
671 		return NULL;
672 	}
673 
674 	return ret;
675 }
676 /* }}} */
677 
678 /* {{{ */
679 #define UCNV_REASON_CASE(v) case (UCNV_ ## v) : RETURN_STRINGL( "REASON_" #v , sizeof( "REASON_" #v ) - 1);
PHP_METHOD(UConverter,reasonText)680 PHP_METHOD(UConverter, reasonText) {
681 	zend_long reason;
682 
683 	ZEND_PARSE_PARAMETERS_START(1, 1)
684 		Z_PARAM_LONG(reason)
685 	ZEND_PARSE_PARAMETERS_END();
686 	intl_error_reset(NULL);
687 
688 	switch (reason) {
689 		UCNV_REASON_CASE(UNASSIGNED)
690 		UCNV_REASON_CASE(ILLEGAL)
691 		UCNV_REASON_CASE(IRREGULAR)
692 		UCNV_REASON_CASE(RESET)
693 		UCNV_REASON_CASE(CLOSE)
694 		UCNV_REASON_CASE(CLONE)
695 		default:
696 			zend_argument_value_error(1, "must be a UConverter::REASON_* constant");
697 			RETURN_THROWS();
698 	}
699 }
700 /* }}} */
701 
702 /* {{{ */
PHP_METHOD(UConverter,convert)703 PHP_METHOD(UConverter, convert) {
704 	php_converter_object *objval = CONV_GET(ZEND_THIS);
705 	char *str;
706 	size_t str_len;
707 	zend_string *ret;
708 	bool reverse = false;
709 
710 	ZEND_PARSE_PARAMETERS_START(1, 2)
711 		Z_PARAM_STRING(str, str_len)
712 		Z_PARAM_OPTIONAL
713 		Z_PARAM_BOOL(reverse)
714 	ZEND_PARSE_PARAMETERS_END();
715 	intl_errors_reset(&objval->error);
716 
717 	ret = php_converter_do_convert(reverse ? objval->src : objval->dest,
718 	                               reverse ? objval->dest : objval->src,
719 	                               str,   str_len,
720 	                               objval);
721 	if (ret) {
722 		RETURN_NEW_STR(ret);
723 	} else {
724 		RETURN_FALSE;
725 	}
726 }
727 /* }}} */
728 
729 /* {{{ */
PHP_METHOD(UConverter,transcode)730 PHP_METHOD(UConverter, transcode) {
731 	char *str, *src, *dest;
732 	size_t str_len, src_len, dest_len;
733 	zval *options = NULL;
734 	UConverter *src_cnv = NULL, *dest_cnv = NULL;
735 
736 	ZEND_PARSE_PARAMETERS_START(3, 4)
737 		Z_PARAM_STRING(str, str_len)
738 		Z_PARAM_STRING(dest, dest_len)
739 		Z_PARAM_STRING(src, src_len)
740 		Z_PARAM_OPTIONAL
741 		Z_PARAM_ARRAY_OR_NULL(options)
742 	ZEND_PARSE_PARAMETERS_END();
743 	intl_error_reset(NULL);
744 
745 	if (php_converter_set_encoding(NULL, &src_cnv,  src,  src_len) &&
746 	    php_converter_set_encoding(NULL, &dest_cnv, dest, dest_len)) {
747 	    zend_string *ret;
748 		UErrorCode error = U_ZERO_ERROR;
749 
750 		if (options && zend_hash_num_elements(Z_ARRVAL_P(options))) {
751 			zval *tmpzval;
752 
753 			if (U_SUCCESS(error) &&
754 				(tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "from_subst", sizeof("from_subst") - 1)) != NULL &&
755 				Z_TYPE_P(tmpzval) == IS_STRING) {
756 				error = U_ZERO_ERROR;
757 				ucnv_setSubstChars(src_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
758 			}
759 			if (U_SUCCESS(error) &&
760 				(tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "to_subst", sizeof("to_subst") - 1)) != NULL &&
761 				Z_TYPE_P(tmpzval) == IS_STRING) {
762 				error = U_ZERO_ERROR;
763 				ucnv_setSubstChars(dest_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
764 			}
765 		}
766 
767 		if (U_SUCCESS(error) &&
768 			(ret = php_converter_do_convert(dest_cnv, src_cnv, str, str_len, NULL)) != NULL) {
769 			RETVAL_NEW_STR(ret);
770 		}
771 
772 		if (U_FAILURE(error)) {
773 			THROW_UFAILURE(NULL, "transcode", error);
774 			RETVAL_FALSE;
775 		}
776 	} else {
777 		RETVAL_FALSE;
778 	}
779 
780 	if (src_cnv) {
781 		ucnv_close(src_cnv);
782 	}
783 	if (dest_cnv) {
784 		ucnv_close(dest_cnv);
785 	}
786 }
787 /* }}} */
788 
789 /* {{{ */
PHP_METHOD(UConverter,getErrorCode)790 PHP_METHOD(UConverter, getErrorCode) {
791 	php_converter_object *objval = CONV_GET(ZEND_THIS);
792 
793 	ZEND_PARSE_PARAMETERS_NONE();
794 
795 	RETURN_LONG(intl_error_get_code(&(objval->error)));
796 }
797 /* }}} */
798 
799 /* {{{ */
PHP_METHOD(UConverter,getErrorMessage)800 PHP_METHOD(UConverter, getErrorMessage) {
801 	php_converter_object *objval = CONV_GET(ZEND_THIS);
802 
803 	ZEND_PARSE_PARAMETERS_NONE();
804 
805 	zend_string *message = intl_error_get_message(&(objval->error));
806 	if (message) {
807 		RETURN_STR(message);
808 	} else {
809 		RETURN_NULL();
810 	}
811 }
812 /* }}} */
813 
814 /* {{{ */
PHP_METHOD(UConverter,getAvailable)815 PHP_METHOD(UConverter, getAvailable) {
816 	int32_t i,
817 			count = ucnv_countAvailable();
818 
819 	ZEND_PARSE_PARAMETERS_NONE();
820 
821 	intl_error_reset(NULL);
822 
823 	array_init(return_value);
824 	for(i = 0; i < count; i++) {
825 		const char *name = ucnv_getAvailableName(i);
826 		add_next_index_string(return_value, name);
827 	}
828 }
829 /* }}} */
830 
831 /* {{{ */
PHP_METHOD(UConverter,getAliases)832 PHP_METHOD(UConverter, getAliases) {
833 	char *name;
834 	size_t name_len;
835 	UErrorCode error = U_ZERO_ERROR;
836 	uint16_t i, count;
837 
838 	ZEND_PARSE_PARAMETERS_START(1, 1)
839 		Z_PARAM_STRING(name, name_len)
840 	ZEND_PARSE_PARAMETERS_END();
841 	intl_error_reset(NULL);
842 
843 	count = ucnv_countAliases(name, &error);
844 	if (U_FAILURE(error)) {
845 		THROW_UFAILURE(NULL, "ucnv_countAliases", error);
846 		RETURN_FALSE;
847 	}
848 
849 	array_init(return_value);
850 	for(i = 0; i < count; i++) {
851 		const char *alias;
852 
853 		error = U_ZERO_ERROR;
854 		alias = ucnv_getAlias(name, i, &error);
855 		if (U_FAILURE(error)) {
856 			THROW_UFAILURE(NULL, "ucnv_getAlias", error);
857 			zend_array_destroy(Z_ARR_P(return_value));
858 			RETURN_NULL();
859 		}
860 		add_next_index_string(return_value, alias);
861 	}
862 }
863 /* }}} */
864 
865 /* {{{ */
PHP_METHOD(UConverter,getStandards)866 PHP_METHOD(UConverter, getStandards) {
867 	uint16_t i, count;
868 
869 	ZEND_PARSE_PARAMETERS_NONE();
870 	intl_error_reset(NULL);
871 
872 	array_init(return_value);
873 	count = ucnv_countStandards();
874 	for(i = 0; i < count; i++) {
875 		UErrorCode error = U_ZERO_ERROR;
876 		const char *name = ucnv_getStandard(i, &error);
877 		if (U_FAILURE(error)) {
878 			THROW_UFAILURE(NULL, "ucnv_getStandard", error);
879 			zend_array_destroy(Z_ARR_P(return_value));
880 			RETURN_NULL();
881 		}
882 		add_next_index_string(return_value, name);
883 	}
884 }
885 /* }}} */
886 
887 /* {{{ Converter create/clone/destroy */
php_converter_free_object(zend_object * obj)888 static void php_converter_free_object(zend_object *obj) {
889 	php_converter_object *objval = php_converter_fetch_object(obj);
890 
891 	if (objval->src) {
892 		ucnv_close(objval->src);
893 	}
894 
895 	if (objval->dest) {
896 		ucnv_close(objval->dest);
897 	}
898 
899 	intl_error_reset(&objval->error);
900 	zend_object_std_dtor(obj);
901 }
902 
php_converter_object_ctor(zend_class_entry * ce,php_converter_object ** pobjval)903 static zend_object *php_converter_object_ctor(zend_class_entry *ce, php_converter_object **pobjval) {
904 	php_converter_object *objval;
905 
906 	objval = zend_object_alloc(sizeof(php_converter_object), ce);
907 
908 	zend_object_std_init(&objval->obj, ce);
909 	object_properties_init(&objval->obj, ce);
910 	intl_error_init(&(objval->error));
911 
912 	*pobjval = objval;
913 
914 	return &objval->obj;
915 }
916 
php_converter_create_object(zend_class_entry * ce)917 static zend_object *php_converter_create_object(zend_class_entry *ce) {
918 	php_converter_object *objval = NULL;
919 	zend_object *retval = php_converter_object_ctor(ce, &objval);
920 
921 	object_properties_init(&(objval->obj), ce);
922 
923 	return retval;
924 }
925 
php_converter_clone_object(zend_object * object)926 static zend_object *php_converter_clone_object(zend_object *object) {
927 	php_converter_object *objval, *oldobj = php_converter_fetch_object(object);
928 	zend_object *retval = php_converter_object_ctor(object->ce, &objval);
929 	UErrorCode error = U_ZERO_ERROR;
930 
931 #if U_ICU_VERSION_MAJOR_NUM > 70
932 	objval->src = ucnv_clone(oldobj->src, &error);
933 #else
934 	objval->src = ucnv_safeClone(oldobj->src, NULL, NULL, &error);
935 #endif
936 	if (U_SUCCESS(error)) {
937 		error = U_ZERO_ERROR;
938 #if U_ICU_VERSION_MAJOR_NUM > 70
939 		objval->dest = ucnv_clone(oldobj->dest, &error);
940 #else
941 		objval->dest = ucnv_safeClone(oldobj->dest, NULL, NULL, &error);
942 #endif
943 	}
944 
945 	if (U_FAILURE(error)) {
946 		zend_throw_error(NULL, "Failed to clone UConverter");
947 		return retval;
948 	}
949 
950 	/* Update contexts for converter error handlers */
951 	php_converter_set_callbacks(objval, objval->src );
952 	php_converter_set_callbacks(objval, objval->dest);
953 
954 	zend_objects_clone_members(&(objval->obj), &(oldobj->obj));
955 
956 	/* Newly cloned object deliberately does not inherit error state from original object */
957 
958 	return retval;
959 }
960 /* }}} */
961 
962 /* {{{ php_converter_minit */
php_converter_minit(INIT_FUNC_ARGS)963 int php_converter_minit(INIT_FUNC_ARGS) {
964 	php_converter_ce = register_class_UConverter();
965 	php_converter_ce->create_object = php_converter_create_object;
966 	php_converter_ce->default_object_handlers = &php_converter_object_handlers;
967 	memcpy(&php_converter_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
968 	php_converter_object_handlers.offset = XtOffsetOf(php_converter_object, obj);
969 	php_converter_object_handlers.clone_obj = php_converter_clone_object;
970 	php_converter_object_handlers.free_obj = php_converter_free_object;
971 
972 	return SUCCESS;
973 }
974 /* }}} */
975