xref: /php-src/ext/intl/converter/converter.c (revision 7ba97880)
1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Sara Golemon <pollita@php.net>                              |
12    +----------------------------------------------------------------------+
13  */
14 
15 #include "converter.h"
16 #include "zend_exceptions.h"
17 
18 #include <unicode/utypes.h>
19 #include <unicode/utf8.h>
20 #include <unicode/utf16.h>
21 #include <unicode/ucnv.h>
22 #include <unicode/ustring.h>
23 
24 #include "../intl_error.h"
25 #include "../intl_common.h"
26 #include "converter_arginfo.h"
27 
28 typedef struct _php_converter_object {
29 	UConverter *src, *dest;
30 	zend_fcall_info_cache to_cache, from_cache;
31 	intl_error error;
32 	zend_object obj;
33 } php_converter_object;
34 
35 
php_converter_fetch_object(zend_object * obj)36 static inline php_converter_object *php_converter_fetch_object(zend_object *obj) {
37 	return (php_converter_object *)((char*)(obj) - XtOffsetOf(php_converter_object, obj));
38 }
39 #define Z_INTL_CONVERTER_P(zv) php_converter_fetch_object(Z_OBJ_P(zv))
40 
41 static zend_class_entry     *php_converter_ce;
42 static zend_object_handlers  php_converter_object_handlers;
43 
44 #define CONV_GET(pzv)  (Z_INTL_CONVERTER_P((pzv)))
45 #define THROW_UFAILURE(obj, fname, error) php_converter_throw_failure(obj, error, \
46                                           fname "() returned error " ZEND_LONG_FMT ": %s", (zend_long)error, u_errorName(error))
47 
48 /* {{{ php_converter_throw_failure */
php_converter_throw_failure(php_converter_object * objval,UErrorCode error,const char * format,...)49 static inline void php_converter_throw_failure(php_converter_object *objval, UErrorCode error, const char *format, ...) {
50 	intl_error *err = objval ? &(objval->error) : NULL;
51 	char message[1024];
52 	va_list vargs;
53 
54 	va_start(vargs, format);
55 	vsnprintf(message, sizeof(message), format, vargs);
56 	va_end(vargs);
57 
58 	intl_errors_set(err, error, message, 1);
59 }
60 /* }}} */
61 
62 /* {{{ php_converter_default_callback */
php_converter_default_callback(zval * return_value,zval * zobj,zend_long reason,zval * error)63 static void php_converter_default_callback(zval *return_value, zval *zobj, zend_long reason, zval *error) {
64 	/* Basic functionality so children can call parent::toUCallback() */
65 	switch (reason) {
66 		case UCNV_UNASSIGNED:
67 		case UCNV_ILLEGAL:
68 		case UCNV_IRREGULAR:
69 		{
70 			php_converter_object *objval = (php_converter_object*)CONV_GET(zobj);
71 			char chars[127];
72 			int8_t chars_len = sizeof(chars);
73 			UErrorCode uerror = U_ZERO_ERROR;
74 			if(!objval->src) {
75 				php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
76 				chars[0] = 0x1A;
77 				chars[1] = 0;
78 				chars_len = 1;
79 				ZEND_TRY_ASSIGN_REF_LONG(error, U_INVALID_STATE_ERROR);
80 				RETVAL_STRINGL(chars, chars_len);
81 				return;
82 			}
83 
84 			/* Yes, this is fairly wasteful at first glance,
85 			 * but considering that the alternative is to store
86 			 * what's sent into setSubstChars() and the fact
87 			 * that this is an extremely unlikely codepath
88 			 * I'd rather take the CPU hit here, than waste time
89 			 * storing a value I'm unlikely to use.
90 			 */
91 			ucnv_getSubstChars(objval->src, chars, &chars_len, &uerror);
92 			if (U_FAILURE(uerror)) {
93 				THROW_UFAILURE(objval, "ucnv_getSubstChars", uerror);
94 				chars[0] = 0x1A;
95 				chars[1] = 0;
96 				chars_len = 1;
97 			}
98 			ZEND_TRY_ASSIGN_REF_LONG(error, uerror);
99 			RETVAL_STRINGL(chars, chars_len);
100 		}
101 	}
102 }
103 /* }}} */
104 
105 /* {{{ */
PHP_METHOD(UConverter,toUCallback)106 PHP_METHOD(UConverter, toUCallback) {
107 	zend_long reason;
108 	zend_string *source, *codeUnits;
109 	zval *error;
110 
111 	ZEND_PARSE_PARAMETERS_START(4, 4)
112 		Z_PARAM_LONG(reason)
113 		Z_PARAM_STR(source)
114 		Z_PARAM_STR(codeUnits)
115 		Z_PARAM_ZVAL(error)
116 	ZEND_PARSE_PARAMETERS_END();
117 
118 	php_converter_default_callback(return_value, ZEND_THIS, reason, error);
119 }
120 /* }}} */
121 
122 /* {{{ */
PHP_METHOD(UConverter,fromUCallback)123 PHP_METHOD(UConverter, fromUCallback) {
124 	zend_long reason;
125 	zval *source, *error;
126 	zend_long codePoint;
127 
128 	ZEND_PARSE_PARAMETERS_START(4, 4)
129 		Z_PARAM_LONG(reason)
130 		Z_PARAM_ARRAY(source)
131 		Z_PARAM_LONG(codePoint)
132 		Z_PARAM_ZVAL(error)
133 	ZEND_PARSE_PARAMETERS_END();
134 
135 	php_converter_default_callback(return_value, ZEND_THIS, reason, error);
136 }
137 /* }}} */
138 
139 /* {{{ php_converter_check_limits */
php_converter_check_limits(php_converter_object * objval,zend_long available,zend_long needed)140 static inline bool php_converter_check_limits(php_converter_object *objval, zend_long available, zend_long needed) {
141 	if (available < needed) {
142 		php_converter_throw_failure(objval, U_BUFFER_OVERFLOW_ERROR, "Buffer overrun " ZEND_LONG_FMT " bytes needed, " ZEND_LONG_FMT " available", needed, available);
143 		return 0;
144 	}
145 	return 1;
146 }
147 /* }}} */
148 
149 #define TARGET_CHECK(cnvargs, needed) php_converter_check_limits(objval, cnvargs->targetLimit - cnvargs->target, needed)
150 
151 /* {{{ php_converter_append_toUnicode_target */
php_converter_append_toUnicode_target(zval * val,UConverterToUnicodeArgs * args,php_converter_object * objval)152 static void php_converter_append_toUnicode_target(zval *val, UConverterToUnicodeArgs *args, php_converter_object *objval) {
153 	switch (Z_TYPE_P(val)) {
154 		case IS_NULL:
155 			/* Code unit is being skipped */
156 			return;
157 		case IS_LONG:
158 		{
159 			zend_long lval = Z_LVAL_P(val);
160 			if ((lval < 0) || (lval > 0x10FFFF)) {
161 				php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "Invalid codepoint U+%04lx", lval);
162 				return;
163 			}
164 			if (lval > 0xFFFF) {
165 				/* Supplemental planes U+010000 - U+10FFFF */
166 				if (TARGET_CHECK(args, 2)) {
167 					/* TODO: Find the ICU call which does this properly */
168 					*(args->target++) = (UChar)(((lval - 0x10000) >> 10)   | 0xD800);
169 					*(args->target++) = (UChar)(((lval - 0x10000) & 0x3FF) | 0xDC00);
170 				}
171 				return;
172 			}
173 			/* Non-suggogate BMP codepoint */
174 			if (TARGET_CHECK(args, 1)) {
175 				*(args->target++) = (UChar)lval;
176 			}
177 			return;
178 		}
179 		case IS_STRING:
180 		{
181 			const char *strval = Z_STRVAL_P(val);
182 			int i = 0, strlen = Z_STRLEN_P(val);
183 
184 			while((i != strlen) && TARGET_CHECK(args, 1)) {
185 				UChar c;
186 				U8_NEXT(strval, i, strlen, c);
187 				*(args->target++) = c;
188 			}
189 			return;
190 		}
191 		case IS_ARRAY:
192 		{
193 			HashTable *ht = Z_ARRVAL_P(val);
194 			zval *tmpzval;
195 
196 			ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
197 				php_converter_append_toUnicode_target(tmpzval, args, objval);
198 			} ZEND_HASH_FOREACH_END();
199 			return;
200 		}
201 		default:
202 			php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR,
203 			                            "toUCallback() specified illegal type for substitution character");
204 	}
205 }
206 /* }}} */
207 
208 /* {{{ php_converter_to_u_callback */
php_converter_to_u_callback(const void * context,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * pErrorCode)209 static void php_converter_to_u_callback(const void *context,
210                                         UConverterToUnicodeArgs *args,
211                                         const char *codeUnits, int32_t length,
212                                         UConverterCallbackReason reason,
213                                         UErrorCode *pErrorCode) {
214 	php_converter_object *objval = (php_converter_object*)context;
215 	zval retval;
216 	zval zargs[4];
217 
218 	ZVAL_LONG(&zargs[0], reason);
219 	if (args->source) {
220 		ZVAL_STRINGL(&zargs[1], args->source, args->sourceLimit - args->source);
221 	} else {
222 		ZVAL_EMPTY_STRING(&zargs[1]);
223 	}
224 	if (codeUnits) {
225 		ZVAL_STRINGL(&zargs[2], codeUnits, length);
226 	} else {
227 		ZVAL_EMPTY_STRING(&zargs[2]);
228 	}
229 	ZVAL_LONG(&zargs[3], *pErrorCode);
230 	ZVAL_MAKE_REF(&zargs[3]);
231 
232 	zend_call_known_fcc(&objval->to_cache, &retval, 4, zargs, NULL);
233 	/* When no exception is thrown */
234 	if (EXPECTED(!Z_ISUNDEF(retval))) {
235 		php_converter_append_toUnicode_target(&retval, args, objval);
236 		zval_ptr_dtor(&retval);
237 	}
238 
239 	if (Z_TYPE(zargs[3]) == IS_LONG) {
240 		*pErrorCode = Z_LVAL(zargs[3]);
241 	} else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
242 		*pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
243 	}
244 
245 	zval_ptr_dtor(&zargs[0]);
246 	zval_ptr_dtor(&zargs[1]);
247 	zval_ptr_dtor(&zargs[2]);
248 	zval_ptr_dtor(&zargs[3]);
249 }
250 /* }}} */
251 
252 /* {{{ php_converter_append_fromUnicode_target */
php_converter_append_fromUnicode_target(zval * val,UConverterFromUnicodeArgs * args,php_converter_object * objval)253 static void php_converter_append_fromUnicode_target(zval *val, UConverterFromUnicodeArgs *args, php_converter_object *objval) {
254 	switch (Z_TYPE_P(val)) {
255 		case IS_NULL:
256 			/* Ignore */
257 			return;
258 		case IS_LONG:
259 			if (TARGET_CHECK(args, 1)) {
260 				*(args->target++) = Z_LVAL_P(val);
261 			}
262 			return;
263 		case IS_STRING:
264 		{
265 			size_t vallen = Z_STRLEN_P(val);
266 			if (TARGET_CHECK(args, vallen)) {
267 				args->target = zend_mempcpy(args->target, Z_STRVAL_P(val), vallen);
268 			}
269 			return;
270 		}
271 		case IS_ARRAY:
272 		{
273 			HashTable *ht = Z_ARRVAL_P(val);
274 			zval *tmpzval;
275 			ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
276 				php_converter_append_fromUnicode_target(tmpzval, args, objval);
277 			} ZEND_HASH_FOREACH_END();
278 			return;
279 		}
280 		default:
281 			php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "fromUCallback() specified illegal type for substitution character");
282 	}
283 }
284 /* }}} */
285 
286 /* {{{ php_converter_from_u_callback */
php_converter_from_u_callback(const void * context,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)287 static void php_converter_from_u_callback(const void *context,
288                                           UConverterFromUnicodeArgs *args,
289                                           const UChar *codeUnits, int32_t length, UChar32 codePoint,
290                                           UConverterCallbackReason reason,
291                                           UErrorCode *pErrorCode) {
292 	php_converter_object *objval = (php_converter_object*)context;
293 	zval retval;
294 	zval zargs[4];
295 
296 	ZVAL_LONG(&zargs[0], reason);
297 	array_init(&zargs[1]);
298 	int i = 0;
299 	while (i < length) {
300 		UChar32 c;
301 		U16_NEXT(codeUnits, i, length, c);
302 		add_next_index_long(&zargs[1], c);
303 	}
304 	ZVAL_LONG(&zargs[2], codePoint);
305 	ZVAL_LONG(&zargs[3], *pErrorCode);
306 	ZVAL_MAKE_REF(&zargs[3]);
307 
308 	zend_call_known_fcc(&objval->from_cache, &retval, 4, zargs, NULL);
309 	/* When no exception is thrown */
310 	if (EXPECTED(!Z_ISUNDEF(retval))) {
311 		php_converter_append_fromUnicode_target(&retval, args, objval);
312 		zval_ptr_dtor(&retval);
313 	}
314 
315 	if (Z_TYPE(zargs[3]) == IS_LONG) {
316 		*pErrorCode = Z_LVAL(zargs[3]);
317 	} else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
318 		*pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
319 	}
320 
321 	zval_ptr_dtor(&zargs[0]);
322 	zval_ptr_dtor(&zargs[1]);
323 	zval_ptr_dtor(&zargs[2]);
324 	zval_ptr_dtor(&zargs[3]);
325 }
326 /* }}} */
327 
328 /* {{{ php_converter_set_callbacks */
php_converter_set_callbacks(php_converter_object * objval,UConverter * cnv)329 static inline bool php_converter_set_callbacks(php_converter_object *objval, UConverter *cnv) {
330 	bool ret = true;
331 	UErrorCode error = U_ZERO_ERROR;
332 
333 	if (objval->obj.ce == php_converter_ce) {
334 		/* Short-circuit having to go through method calls and data marshalling
335 		 * when we're using default behavior
336 		 */
337 		return 1;
338 	}
339 
340 	ucnv_setToUCallBack(cnv, (UConverterToUCallback)php_converter_to_u_callback, (const void*)objval,
341 	                    NULL, NULL, &error);
342 	if (U_FAILURE(error)) {
343 		THROW_UFAILURE(objval, "ucnv_setToUCallBack", error);
344 		ret = 0;
345 	}
346 
347 	error = U_ZERO_ERROR;
348 	ucnv_setFromUCallBack(cnv, (UConverterFromUCallback)php_converter_from_u_callback, (const void*)objval,
349 	                      NULL, NULL, &error);
350 	if (U_FAILURE(error)) {
351 		THROW_UFAILURE(objval, "ucnv_setFromUCallBack", error);
352 		ret = 0;
353 	}
354 	return ret;
355 }
356 /* }}} */
357 
358 /* {{{ php_converter_set_encoding */
php_converter_set_encoding(php_converter_object * objval,UConverter ** pcnv,const char * enc,size_t enc_len)359 static bool php_converter_set_encoding(php_converter_object *objval,
360                                        UConverter **pcnv,
361                                        const char *enc, size_t enc_len) {
362 	UErrorCode error = U_ZERO_ERROR;
363 	UConverter *cnv = ucnv_open(enc, &error);
364 
365 	if (error == U_AMBIGUOUS_ALIAS_WARNING) {
366 		UErrorCode getname_error = U_ZERO_ERROR;
367 		const char *actual_encoding = ucnv_getName(cnv, &getname_error);
368 		if (U_FAILURE(getname_error)) {
369 			/* Should never happen */
370 			actual_encoding = "(unknown)";
371 		}
372 		php_error_docref(NULL, E_WARNING, "Ambiguous encoding specified, using %s", actual_encoding);
373 	} else if (U_FAILURE(error)) {
374 		if (objval) {
375 			THROW_UFAILURE(objval, "ucnv_open", error);
376 		} else {
377 			php_error_docref(NULL, E_WARNING, "Error setting encoding: %d - %s", (int)error, u_errorName(error));
378 		}
379 		return 0;
380 	}
381 
382 	if (objval && !php_converter_set_callbacks(objval, cnv)) {
383 		return 0;
384 	}
385 
386 	if (*pcnv) {
387 		ucnv_close(*pcnv);
388 	}
389 	*pcnv = cnv;
390 	return 1;
391 }
392 /* }}} */
393 
394 /* {{{ php_converter_do_set_encoding */
php_converter_do_set_encoding(UConverter ** pcnv,INTERNAL_FUNCTION_PARAMETERS)395 static void php_converter_do_set_encoding(UConverter **pcnv, INTERNAL_FUNCTION_PARAMETERS) {
396 	php_converter_object *objval = CONV_GET(ZEND_THIS);
397 	char *enc;
398 	size_t enc_len;
399 
400 	ZEND_PARSE_PARAMETERS_START(1, 1)
401 		Z_PARAM_STRING(enc, enc_len)
402 	ZEND_PARSE_PARAMETERS_END();
403 
404 	intl_errors_reset(&objval->error);
405 
406 	RETURN_BOOL(php_converter_set_encoding(objval, pcnv, enc, enc_len));
407 }
408 /* }}} */
409 
410 /* {{{ */
PHP_METHOD(UConverter,setSourceEncoding)411 PHP_METHOD(UConverter, setSourceEncoding) {
412 	php_converter_object *objval = CONV_GET(ZEND_THIS);
413 	php_converter_do_set_encoding(&(objval->src), INTERNAL_FUNCTION_PARAM_PASSTHRU);
414 }
415 /* }}} */
416 
417 /* {{{ */
PHP_METHOD(UConverter,setDestinationEncoding)418 PHP_METHOD(UConverter, setDestinationEncoding) {
419 	php_converter_object *objval = CONV_GET(ZEND_THIS);
420 	php_converter_do_set_encoding(&(objval->dest), INTERNAL_FUNCTION_PARAM_PASSTHRU);
421 }
422 /* }}} */
423 
424 /* {{{ php_converter_do_get_encoding */
php_converter_do_get_encoding(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)425 static void php_converter_do_get_encoding(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
426 	const char *name;
427 
428 	ZEND_PARSE_PARAMETERS_NONE();
429 
430 	intl_errors_reset(&objval->error);
431 
432 	if (!cnv) {
433 		RETURN_NULL();
434 	}
435 
436 	name = ucnv_getName(cnv, &objval->error.code);
437 	if (U_FAILURE(objval->error.code)) {
438 		THROW_UFAILURE(objval, "ucnv_getName()", objval->error.code);
439 		RETURN_FALSE;
440 	}
441 
442 	RETURN_STRING(name);
443 }
444 /* }}} */
445 
446 /* {{{ */
PHP_METHOD(UConverter,getSourceEncoding)447 PHP_METHOD(UConverter, getSourceEncoding) {
448 	php_converter_object *objval = CONV_GET(ZEND_THIS);
449 	php_converter_do_get_encoding(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
450 }
451 /* }}} */
452 
453 /* {{{ */
PHP_METHOD(UConverter,getDestinationEncoding)454 PHP_METHOD(UConverter, getDestinationEncoding) {
455 	php_converter_object *objval = CONV_GET(ZEND_THIS);
456 	php_converter_do_get_encoding(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
457 }
458 /* }}} */
459 
460 /* {{{ php_converter_do_get_type */
php_converter_do_get_type(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)461 static void php_converter_do_get_type(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
462 	UConverterType t;
463 
464 	ZEND_PARSE_PARAMETERS_NONE();
465 	intl_errors_reset(&objval->error);
466 
467 	if (!cnv) {
468 		RETURN_NULL();
469 	}
470 
471 	t = ucnv_getType(cnv);
472 	if (U_FAILURE(objval->error.code)) {
473 		THROW_UFAILURE(objval, "ucnv_getType", objval->error.code);
474 		RETURN_FALSE;
475 	}
476 
477 	RETURN_LONG(t);
478 }
479 /* }}} */
480 
481 /* {{{ */
PHP_METHOD(UConverter,getSourceType)482 PHP_METHOD(UConverter, getSourceType) {
483 	php_converter_object *objval = CONV_GET(ZEND_THIS);
484 	php_converter_do_get_type(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
485 }
486 /* }}} */
487 
488 /* {{{ */
PHP_METHOD(UConverter,getDestinationType)489 PHP_METHOD(UConverter, getDestinationType) {
490 	php_converter_object *objval = CONV_GET(ZEND_THIS);
491 	php_converter_do_get_type(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
492 }
493 /* }}} */
494 
495 /* {{{ php_converter_resolve_callback */
php_converter_resolve_callback(zend_fcall_info_cache * fcc,zend_object * obj,const char * callback_name,size_t callback_name_len)496 static void php_converter_resolve_callback(
497 	zend_fcall_info_cache *fcc,
498 	zend_object *obj,
499 	const char *callback_name,
500 	size_t callback_name_len
501 ) {
502 	zend_function *fn = zend_hash_str_find_ptr_lc(&obj->ce->function_table, callback_name, callback_name_len);
503 	ZEND_ASSERT(fn != NULL);
504 
505 	fcc->function_handler = fn;
506 	fcc->object = obj;
507 	fcc->called_scope = obj->ce;
508 	fcc->calling_scope = NULL;
509 	fcc->closure = NULL;
510 }
511 /* }}} */
512 
513 /* {{{ */
PHP_METHOD(UConverter,__construct)514 PHP_METHOD(UConverter, __construct) {
515 	php_converter_object *objval = CONV_GET(ZEND_THIS);
516 	char *src = "utf-8";
517 	size_t src_len = sizeof("utf-8") - 1;
518 	char *dest = src;
519 	size_t dest_len = src_len;
520 
521 	intl_error_reset(NULL);
522 
523 	ZEND_PARSE_PARAMETERS_START(0, 2)
524 		Z_PARAM_OPTIONAL
525 		Z_PARAM_STRING_OR_NULL(dest, dest_len)
526 		Z_PARAM_STRING_OR_NULL(src, src_len)
527 	ZEND_PARSE_PARAMETERS_END();
528 
529 	php_converter_set_encoding(objval, &(objval->src),  src,  src_len );
530 	php_converter_set_encoding(objval, &(objval->dest), dest, dest_len);
531 	php_converter_resolve_callback(&objval->to_cache, Z_OBJ_P(ZEND_THIS), ZEND_STRL("toUCallback"));
532 	php_converter_resolve_callback(&objval->from_cache, Z_OBJ_P(ZEND_THIS), ZEND_STRL("fromUCallback"));
533 }
534 /* }}} */
535 
536 /* {{{ */
PHP_METHOD(UConverter,setSubstChars)537 PHP_METHOD(UConverter, setSubstChars) {
538 	php_converter_object *objval = CONV_GET(ZEND_THIS);
539 	char *chars;
540 	size_t chars_len;
541 	int ret = 1;
542 
543 	ZEND_PARSE_PARAMETERS_START(1, 1)
544 		Z_PARAM_STRING(chars, chars_len)
545 	ZEND_PARSE_PARAMETERS_END();
546 
547 	intl_errors_reset(&objval->error);
548 
549 	if (objval->src) {
550 		UErrorCode error = U_ZERO_ERROR;
551 		ucnv_setSubstChars(objval->src, chars, chars_len, &error);
552 		if (U_FAILURE(error)) {
553 			THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
554 			ret = 0;
555 		}
556 	} else {
557 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
558 		ret = 0;
559 	}
560 
561 	if (objval->dest) {
562 		UErrorCode error = U_ZERO_ERROR;
563 		ucnv_setSubstChars(objval->dest, chars, chars_len, &error);
564 		if (U_FAILURE(error)) {
565 			THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
566 			ret = 0;
567 		}
568 	} else {
569 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Destination Converter has not been initialized yet");
570 		ret = 0;
571 	}
572 
573 	RETURN_BOOL(ret);
574 }
575 /* }}} */
576 
577 /* {{{ */
PHP_METHOD(UConverter,getSubstChars)578 PHP_METHOD(UConverter, getSubstChars) {
579 	php_converter_object *objval = CONV_GET(ZEND_THIS);
580 	char chars[127];
581 	int8_t chars_len = sizeof(chars);
582 	UErrorCode error = U_ZERO_ERROR;
583 
584 	ZEND_PARSE_PARAMETERS_NONE();
585 	intl_errors_reset(&objval->error);
586 
587 	if (!objval->src) {
588 		RETURN_NULL();
589 	}
590 
591 	/* src and dest get the same subst chars set,
592 	 * so it doesn't really matter which one we read from
593 	 */
594 	ucnv_getSubstChars(objval->src, chars, &chars_len, &error);
595 	if (U_FAILURE(error)) {
596 		THROW_UFAILURE(objval, "ucnv_getSubstChars", error);
597 		RETURN_FALSE;
598 	}
599 
600 	RETURN_STRINGL(chars, chars_len);
601 }
602 /* }}} */
603 
604 /* {{{ php_converter_do_convert */
php_converter_do_convert(UConverter * dest_cnv,UConverter * src_cnv,const char * src,int32_t src_len,php_converter_object * objval)605 static zend_string* php_converter_do_convert(UConverter *dest_cnv,
606                                              UConverter *src_cnv,  const char *src, int32_t src_len,
607                                              php_converter_object *objval
608                                             ) {
609 	UErrorCode	error = U_ZERO_ERROR;
610 	int32_t		temp_len, ret_len;
611 	zend_string	*ret;
612 	UChar		*temp;
613 
614 	if (!src_cnv || !dest_cnv) {
615 		php_converter_throw_failure(objval, U_INVALID_STATE_ERROR,
616 		                            "Internal converters not initialized");
617 		return NULL;
618 	}
619 
620 	/* Get necessary buffer size first */
621 	temp_len = 1 + ucnv_toUChars(src_cnv, NULL, 0, src, src_len, &error);
622 	if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
623 		THROW_UFAILURE(objval, "ucnv_toUChars", error);
624 		return NULL;
625 	}
626 	temp = safe_emalloc(sizeof(UChar), temp_len, sizeof(UChar));
627 
628 	/* Convert to intermediate UChar* array */
629 	error = U_ZERO_ERROR;
630 	temp_len = ucnv_toUChars(src_cnv, temp, temp_len, src, src_len, &error);
631 	if (U_FAILURE(error)) {
632 		THROW_UFAILURE(objval, "ucnv_toUChars", error);
633 		efree(temp);
634 		return NULL;
635 	}
636 	temp[temp_len] = 0;
637 
638 	/* Get necessary output buffer size */
639 	ret_len = ucnv_fromUChars(dest_cnv, NULL, 0, temp, temp_len, &error);
640 	if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
641 		THROW_UFAILURE(objval, "ucnv_fromUChars", error);
642 		efree(temp);
643 		return NULL;
644 	}
645 
646 	ret = zend_string_alloc(ret_len, 0);
647 
648 	/* Convert to final encoding */
649 	error = U_ZERO_ERROR;
650 	ZSTR_LEN(ret) = ucnv_fromUChars(dest_cnv, ZSTR_VAL(ret), ret_len+1, temp, temp_len, &error);
651 	efree(temp);
652 	if (U_FAILURE(error)) {
653 		THROW_UFAILURE(objval, "ucnv_fromUChars", error);
654 		zend_string_efree(ret);
655 		return NULL;
656 	}
657 
658 	return ret;
659 }
660 /* }}} */
661 
662 /* {{{ */
663 #define UCNV_REASON_CASE(v) case (UCNV_ ## v) : RETURN_STRINGL( "REASON_" #v , sizeof( "REASON_" #v ) - 1);
PHP_METHOD(UConverter,reasonText)664 PHP_METHOD(UConverter, reasonText) {
665 	zend_long reason;
666 
667 	ZEND_PARSE_PARAMETERS_START(1, 1)
668 		Z_PARAM_LONG(reason)
669 	ZEND_PARSE_PARAMETERS_END();
670 	intl_error_reset(NULL);
671 
672 	switch (reason) {
673 		UCNV_REASON_CASE(UNASSIGNED)
674 		UCNV_REASON_CASE(ILLEGAL)
675 		UCNV_REASON_CASE(IRREGULAR)
676 		UCNV_REASON_CASE(RESET)
677 		UCNV_REASON_CASE(CLOSE)
678 		UCNV_REASON_CASE(CLONE)
679 		default:
680 			zend_argument_value_error(1, "must be a UConverter::REASON_* constant");
681 			RETURN_THROWS();
682 	}
683 }
684 /* }}} */
685 
686 /* {{{ */
PHP_METHOD(UConverter,convert)687 PHP_METHOD(UConverter, convert) {
688 	php_converter_object *objval = CONV_GET(ZEND_THIS);
689 	char *str;
690 	size_t str_len;
691 	zend_string *ret;
692 	bool reverse = false;
693 
694 	ZEND_PARSE_PARAMETERS_START(1, 2)
695 		Z_PARAM_STRING(str, str_len)
696 		Z_PARAM_OPTIONAL
697 		Z_PARAM_BOOL(reverse)
698 	ZEND_PARSE_PARAMETERS_END();
699 	intl_errors_reset(&objval->error);
700 
701 	ret = php_converter_do_convert(reverse ? objval->src : objval->dest,
702 	                               reverse ? objval->dest : objval->src,
703 	                               str,   str_len,
704 	                               objval);
705 	if (ret) {
706 		RETURN_NEW_STR(ret);
707 	} else {
708 		RETURN_FALSE;
709 	}
710 }
711 /* }}} */
712 
713 /* {{{ */
PHP_METHOD(UConverter,transcode)714 PHP_METHOD(UConverter, transcode) {
715 	char *str, *src, *dest;
716 	size_t str_len, src_len, dest_len;
717 	zval *options = NULL;
718 	UConverter *src_cnv = NULL, *dest_cnv = NULL;
719 
720 	ZEND_PARSE_PARAMETERS_START(3, 4)
721 		Z_PARAM_STRING(str, str_len)
722 		Z_PARAM_STRING(dest, dest_len)
723 		Z_PARAM_STRING(src, src_len)
724 		Z_PARAM_OPTIONAL
725 		Z_PARAM_ARRAY_OR_NULL(options)
726 	ZEND_PARSE_PARAMETERS_END();
727 	intl_error_reset(NULL);
728 
729 	if (php_converter_set_encoding(NULL, &src_cnv,  src,  src_len) &&
730 	    php_converter_set_encoding(NULL, &dest_cnv, dest, dest_len)) {
731 	    zend_string *ret;
732 		UErrorCode error = U_ZERO_ERROR;
733 
734 		if (options && zend_hash_num_elements(Z_ARRVAL_P(options))) {
735 			zval *tmpzval;
736 
737 			if (U_SUCCESS(error) &&
738 				(tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "from_subst", sizeof("from_subst") - 1)) != NULL &&
739 				Z_TYPE_P(tmpzval) == IS_STRING) {
740 				error = U_ZERO_ERROR;
741 				ucnv_setSubstChars(src_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
742 			}
743 			if (U_SUCCESS(error) &&
744 				(tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "to_subst", sizeof("to_subst") - 1)) != NULL &&
745 				Z_TYPE_P(tmpzval) == IS_STRING) {
746 				error = U_ZERO_ERROR;
747 				ucnv_setSubstChars(dest_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
748 			}
749 		}
750 
751 		if (U_SUCCESS(error) &&
752 			(ret = php_converter_do_convert(dest_cnv, src_cnv, str, str_len, NULL)) != NULL) {
753 			RETVAL_NEW_STR(ret);
754 		}
755 
756 		if (U_FAILURE(error)) {
757 			THROW_UFAILURE(NULL, "transcode", error);
758 			RETVAL_FALSE;
759 		}
760 	} else {
761 		RETVAL_FALSE;
762 	}
763 
764 	if (src_cnv) {
765 		ucnv_close(src_cnv);
766 	}
767 	if (dest_cnv) {
768 		ucnv_close(dest_cnv);
769 	}
770 }
771 /* }}} */
772 
773 /* {{{ */
PHP_METHOD(UConverter,getErrorCode)774 PHP_METHOD(UConverter, getErrorCode) {
775 	php_converter_object *objval = CONV_GET(ZEND_THIS);
776 
777 	ZEND_PARSE_PARAMETERS_NONE();
778 
779 	RETURN_LONG(intl_error_get_code(&(objval->error)));
780 }
781 /* }}} */
782 
783 /* {{{ */
PHP_METHOD(UConverter,getErrorMessage)784 PHP_METHOD(UConverter, getErrorMessage) {
785 	php_converter_object *objval = CONV_GET(ZEND_THIS);
786 
787 	ZEND_PARSE_PARAMETERS_NONE();
788 
789 	zend_string *message = intl_error_get_message(&(objval->error));
790 	if (message) {
791 		RETURN_STR(message);
792 	} else {
793 		RETURN_NULL();
794 	}
795 }
796 /* }}} */
797 
798 /* {{{ */
PHP_METHOD(UConverter,getAvailable)799 PHP_METHOD(UConverter, getAvailable) {
800 	int32_t i,
801 			count = ucnv_countAvailable();
802 
803 	ZEND_PARSE_PARAMETERS_NONE();
804 
805 	intl_error_reset(NULL);
806 
807 	array_init(return_value);
808 	for(i = 0; i < count; i++) {
809 		const char *name = ucnv_getAvailableName(i);
810 		add_next_index_string(return_value, name);
811 	}
812 }
813 /* }}} */
814 
815 /* {{{ */
PHP_METHOD(UConverter,getAliases)816 PHP_METHOD(UConverter, getAliases) {
817 	char *name;
818 	size_t name_len;
819 	UErrorCode error = U_ZERO_ERROR;
820 	uint16_t i, count;
821 
822 	ZEND_PARSE_PARAMETERS_START(1, 1)
823 		Z_PARAM_STRING(name, name_len)
824 	ZEND_PARSE_PARAMETERS_END();
825 	intl_error_reset(NULL);
826 
827 	count = ucnv_countAliases(name, &error);
828 	if (U_FAILURE(error)) {
829 		THROW_UFAILURE(NULL, "ucnv_countAliases", error);
830 		RETURN_FALSE;
831 	}
832 
833 	array_init(return_value);
834 	for(i = 0; i < count; i++) {
835 		const char *alias;
836 
837 		error = U_ZERO_ERROR;
838 		alias = ucnv_getAlias(name, i, &error);
839 		if (U_FAILURE(error)) {
840 			THROW_UFAILURE(NULL, "ucnv_getAlias", error);
841 			zend_array_destroy(Z_ARR_P(return_value));
842 			RETURN_NULL();
843 		}
844 		add_next_index_string(return_value, alias);
845 	}
846 }
847 /* }}} */
848 
849 /* {{{ */
PHP_METHOD(UConverter,getStandards)850 PHP_METHOD(UConverter, getStandards) {
851 	uint16_t i, count;
852 
853 	ZEND_PARSE_PARAMETERS_NONE();
854 	intl_error_reset(NULL);
855 
856 	array_init(return_value);
857 	count = ucnv_countStandards();
858 	for(i = 0; i < count; i++) {
859 		UErrorCode error = U_ZERO_ERROR;
860 		const char *name = ucnv_getStandard(i, &error);
861 		if (U_FAILURE(error)) {
862 			THROW_UFAILURE(NULL, "ucnv_getStandard", error);
863 			zend_array_destroy(Z_ARR_P(return_value));
864 			RETURN_NULL();
865 		}
866 		add_next_index_string(return_value, name);
867 	}
868 }
869 /* }}} */
870 
871 /* {{{ Converter create/clone/destroy */
php_converter_free_object(zend_object * obj)872 static void php_converter_free_object(zend_object *obj) {
873 	php_converter_object *objval = php_converter_fetch_object(obj);
874 
875 	if (objval->src) {
876 		ucnv_close(objval->src);
877 	}
878 
879 	if (objval->dest) {
880 		ucnv_close(objval->dest);
881 	}
882 
883 	intl_error_reset(&objval->error);
884 	zend_object_std_dtor(obj);
885 }
886 
php_converter_object_ctor(zend_class_entry * ce,php_converter_object ** pobjval)887 static zend_object *php_converter_object_ctor(zend_class_entry *ce, php_converter_object **pobjval) {
888 	php_converter_object *objval;
889 
890 	objval = zend_object_alloc(sizeof(php_converter_object), ce);
891 
892 	zend_object_std_init(&objval->obj, ce);
893 	object_properties_init(&objval->obj, ce);
894 	intl_error_init(&(objval->error));
895 
896 	*pobjval = objval;
897 
898 	return &objval->obj;
899 }
900 
php_converter_create_object(zend_class_entry * ce)901 static zend_object *php_converter_create_object(zend_class_entry *ce) {
902 	php_converter_object *objval = NULL;
903 	zend_object *retval = php_converter_object_ctor(ce, &objval);
904 
905 	object_properties_init(&(objval->obj), ce);
906 
907 	return retval;
908 }
909 
php_converter_clone_object(zend_object * object)910 static zend_object *php_converter_clone_object(zend_object *object) {
911 	php_converter_object *objval, *oldobj = php_converter_fetch_object(object);
912 	zend_object *retval = php_converter_object_ctor(object->ce, &objval);
913 	UErrorCode error = U_ZERO_ERROR;
914 
915 #if U_ICU_VERSION_MAJOR_NUM > 70
916 	objval->src = ucnv_clone(oldobj->src, &error);
917 #else
918 	objval->src = ucnv_safeClone(oldobj->src, NULL, NULL, &error);
919 #endif
920 	if (U_SUCCESS(error)) {
921 		error = U_ZERO_ERROR;
922 #if U_ICU_VERSION_MAJOR_NUM > 70
923 		objval->dest = ucnv_clone(oldobj->dest, &error);
924 #else
925 		objval->dest = ucnv_safeClone(oldobj->dest, NULL, NULL, &error);
926 #endif
927 	}
928 
929 	if (U_FAILURE(error)) {
930 		zend_throw_error(NULL, "Failed to clone UConverter");
931 		return retval;
932 	}
933 
934 	/* Update contexts for converter error handlers */
935 	php_converter_set_callbacks(objval, objval->src );
936 	php_converter_set_callbacks(objval, objval->dest);
937 
938 	zend_objects_clone_members(&(objval->obj), &(oldobj->obj));
939 
940 	/* Newly cloned object deliberately does not inherit error state from original object */
941 
942 	return retval;
943 }
944 /* }}} */
945 
946 /* {{{ php_converter_minit */
php_converter_minit(INIT_FUNC_ARGS)947 int php_converter_minit(INIT_FUNC_ARGS) {
948 	php_converter_ce = register_class_UConverter();
949 	php_converter_ce->create_object = php_converter_create_object;
950 	php_converter_ce->default_object_handlers = &php_converter_object_handlers;
951 	memcpy(&php_converter_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
952 	php_converter_object_handlers.offset = XtOffsetOf(php_converter_object, obj);
953 	php_converter_object_handlers.clone_obj = php_converter_clone_object;
954 	php_converter_object_handlers.free_obj = php_converter_free_object;
955 
956 	return SUCCESS;
957 }
958 /* }}} */
959