1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Sara Golemon <pollita@php.net> |
12 +----------------------------------------------------------------------+
13 */
14
15 #include "converter.h"
16 #include "converter_arginfo.h"
17 #include "zend_exceptions.h"
18
19 #include <unicode/utypes.h>
20 #include <unicode/utf8.h>
21 #include <unicode/utf16.h>
22 #include <unicode/ucnv.h>
23 #include <unicode/ustring.h>
24
25 #include "../intl_error.h"
26 #include "../intl_common.h"
27
28 typedef struct _php_converter_object {
29 UConverter *src, *dest;
30 zend_fcall_info to_cb, from_cb;
31 zend_fcall_info_cache to_cache, from_cache;
32 intl_error error;
33 zend_object obj;
34 } php_converter_object;
35
36
php_converter_fetch_object(zend_object * obj)37 static inline php_converter_object *php_converter_fetch_object(zend_object *obj) {
38 return (php_converter_object *)((char*)(obj) - XtOffsetOf(php_converter_object, obj));
39 }
40 #define Z_INTL_CONVERTER_P(zv) php_converter_fetch_object(Z_OBJ_P(zv))
41
42 static zend_class_entry *php_converter_ce;
43 static zend_object_handlers php_converter_object_handlers;
44
45 #define CONV_GET(pzv) (Z_INTL_CONVERTER_P((pzv)))
46 #define THROW_UFAILURE(obj, fname, error) php_converter_throw_failure(obj, error, \
47 fname "() returned error " ZEND_LONG_FMT ": %s", (zend_long)error, u_errorName(error))
48
49 /* {{{ php_converter_throw_failure */
php_converter_throw_failure(php_converter_object * objval,UErrorCode error,const char * format,...)50 static inline void php_converter_throw_failure(php_converter_object *objval, UErrorCode error, const char *format, ...) {
51 intl_error *err = objval ? &(objval->error) : NULL;
52 char message[1024];
53 va_list vargs;
54
55 va_start(vargs, format);
56 vsnprintf(message, sizeof(message), format, vargs);
57 va_end(vargs);
58
59 intl_errors_set(err, error, message, 1);
60 }
61 /* }}} */
62
63 /* {{{ php_converter_default_callback */
php_converter_default_callback(zval * return_value,zval * zobj,zend_long reason,zval * error)64 static void php_converter_default_callback(zval *return_value, zval *zobj, zend_long reason, zval *error) {
65 /* Basic functionality so children can call parent::toUCallback() */
66 switch (reason) {
67 case UCNV_UNASSIGNED:
68 case UCNV_ILLEGAL:
69 case UCNV_IRREGULAR:
70 {
71 php_converter_object *objval = (php_converter_object*)CONV_GET(zobj);
72 char chars[127];
73 int8_t chars_len = sizeof(chars);
74 UErrorCode uerror = U_ZERO_ERROR;
75 if(!objval->src) {
76 php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
77 chars[0] = 0x1A;
78 chars[1] = 0;
79 chars_len = 1;
80 ZEND_TRY_ASSIGN_REF_LONG(error, U_INVALID_STATE_ERROR);
81 RETVAL_STRINGL(chars, chars_len);
82 return;
83 }
84
85 /* Yes, this is fairly wasteful at first glance,
86 * but considering that the alternative is to store
87 * what's sent into setSubstChars() and the fact
88 * that this is an extremely unlikely codepath
89 * I'd rather take the CPU hit here, than waste time
90 * storing a value I'm unlikely to use.
91 */
92 ucnv_getSubstChars(objval->src, chars, &chars_len, &uerror);
93 if (U_FAILURE(uerror)) {
94 THROW_UFAILURE(objval, "ucnv_getSubstChars", uerror);
95 chars[0] = 0x1A;
96 chars[1] = 0;
97 chars_len = 1;
98 }
99 ZEND_TRY_ASSIGN_REF_LONG(error, uerror);
100 RETVAL_STRINGL(chars, chars_len);
101 }
102 }
103 }
104 /* }}} */
105
106 /* {{{ */
PHP_METHOD(UConverter,toUCallback)107 PHP_METHOD(UConverter, toUCallback) {
108 zend_long reason;
109 zend_string *source, *codeUnits;
110 zval *error;
111
112 if (zend_parse_parameters(ZEND_NUM_ARGS(), "lSSz",
113 &reason, &source, &codeUnits, &error) == FAILURE) {
114 RETURN_THROWS();
115 }
116
117 php_converter_default_callback(return_value, ZEND_THIS, reason, error);
118 }
119 /* }}} */
120
121 /* {{{ */
PHP_METHOD(UConverter,fromUCallback)122 PHP_METHOD(UConverter, fromUCallback) {
123 zend_long reason;
124 zval *source, *error;
125 zend_long codePoint;
126
127 if (zend_parse_parameters(ZEND_NUM_ARGS(), "lalz",
128 &reason, &source, &codePoint, &error) == FAILURE) {
129 RETURN_THROWS();
130 }
131
132 php_converter_default_callback(return_value, ZEND_THIS, reason, error);
133 }
134 /* }}} */
135
136 /* {{{ php_converter_check_limits */
php_converter_check_limits(php_converter_object * objval,zend_long available,zend_long needed)137 static inline bool php_converter_check_limits(php_converter_object *objval, zend_long available, zend_long needed) {
138 if (available < needed) {
139 php_converter_throw_failure(objval, U_BUFFER_OVERFLOW_ERROR, "Buffer overrun " ZEND_LONG_FMT " bytes needed, " ZEND_LONG_FMT " available", needed, available);
140 return 0;
141 }
142 return 1;
143 }
144 /* }}} */
145
146 #define TARGET_CHECK(cnvargs, needed) php_converter_check_limits(objval, cnvargs->targetLimit - cnvargs->target, needed)
147
148 /* {{{ php_converter_append_toUnicode_target */
php_converter_append_toUnicode_target(zval * val,UConverterToUnicodeArgs * args,php_converter_object * objval)149 static void php_converter_append_toUnicode_target(zval *val, UConverterToUnicodeArgs *args, php_converter_object *objval) {
150 switch (Z_TYPE_P(val)) {
151 case IS_NULL:
152 /* Code unit is being skipped */
153 return;
154 case IS_LONG:
155 {
156 zend_long lval = Z_LVAL_P(val);
157 if ((lval < 0) || (lval > 0x10FFFF)) {
158 php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "Invalid codepoint U+%04lx", lval);
159 return;
160 }
161 if (lval > 0xFFFF) {
162 /* Supplemental planes U+010000 - U+10FFFF */
163 if (TARGET_CHECK(args, 2)) {
164 /* TODO: Find the ICU call which does this properly */
165 *(args->target++) = (UChar)(((lval - 0x10000) >> 10) | 0xD800);
166 *(args->target++) = (UChar)(((lval - 0x10000) & 0x3FF) | 0xDC00);
167 }
168 return;
169 }
170 /* Non-suggogate BMP codepoint */
171 if (TARGET_CHECK(args, 1)) {
172 *(args->target++) = (UChar)lval;
173 }
174 return;
175 }
176 case IS_STRING:
177 {
178 const char *strval = Z_STRVAL_P(val);
179 int i = 0, strlen = Z_STRLEN_P(val);
180
181 while((i != strlen) && TARGET_CHECK(args, 1)) {
182 UChar c;
183 U8_NEXT(strval, i, strlen, c);
184 *(args->target++) = c;
185 }
186 return;
187 }
188 case IS_ARRAY:
189 {
190 HashTable *ht = Z_ARRVAL_P(val);
191 zval *tmpzval;
192
193 ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
194 php_converter_append_toUnicode_target(tmpzval, args, objval);
195 } ZEND_HASH_FOREACH_END();
196 return;
197 }
198 default:
199 php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR,
200 "toUCallback() specified illegal type for substitution character");
201 }
202 }
203 /* }}} */
204
205 /* {{{ php_converter_to_u_callback */
php_converter_to_u_callback(const void * context,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * pErrorCode)206 static void php_converter_to_u_callback(const void *context,
207 UConverterToUnicodeArgs *args,
208 const char *codeUnits, int32_t length,
209 UConverterCallbackReason reason,
210 UErrorCode *pErrorCode) {
211 php_converter_object *objval = (php_converter_object*)context;
212 zval retval;
213 zval zargs[4];
214
215 ZVAL_LONG(&zargs[0], reason);
216 if (args->source) {
217 ZVAL_STRINGL(&zargs[1], args->source, args->sourceLimit - args->source);
218 } else {
219 ZVAL_EMPTY_STRING(&zargs[1]);
220 }
221 if (codeUnits) {
222 ZVAL_STRINGL(&zargs[2], codeUnits, length);
223 } else {
224 ZVAL_EMPTY_STRING(&zargs[2]);
225 }
226 ZVAL_LONG(&zargs[3], *pErrorCode);
227 ZVAL_MAKE_REF(&zargs[3]);
228
229 objval->to_cb.param_count = 4;
230 objval->to_cb.params = zargs;
231 objval->to_cb.retval = &retval;
232 if (zend_call_function(&(objval->to_cb), &(objval->to_cache)) == FAILURE) {
233 /* Unlikely */
234 php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling toUCallback()");
235 } else if (!Z_ISUNDEF(retval)) {
236 php_converter_append_toUnicode_target(&retval, args, objval);
237 zval_ptr_dtor(&retval);
238 }
239
240 if (Z_TYPE(zargs[3]) == IS_LONG) {
241 *pErrorCode = Z_LVAL(zargs[3]);
242 } else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
243 *pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
244 }
245
246 zval_ptr_dtor(&zargs[0]);
247 zval_ptr_dtor(&zargs[1]);
248 zval_ptr_dtor(&zargs[2]);
249 zval_ptr_dtor(&zargs[3]);
250 }
251 /* }}} */
252
253 /* {{{ php_converter_append_fromUnicode_target */
php_converter_append_fromUnicode_target(zval * val,UConverterFromUnicodeArgs * args,php_converter_object * objval)254 static void php_converter_append_fromUnicode_target(zval *val, UConverterFromUnicodeArgs *args, php_converter_object *objval) {
255 switch (Z_TYPE_P(val)) {
256 case IS_NULL:
257 /* Ignore */
258 return;
259 case IS_LONG:
260 if (TARGET_CHECK(args, 1)) {
261 *(args->target++) = Z_LVAL_P(val);
262 }
263 return;
264 case IS_STRING:
265 {
266 size_t vallen = Z_STRLEN_P(val);
267 if (TARGET_CHECK(args, vallen)) {
268 memcpy(args->target, Z_STRVAL_P(val), vallen);
269 args->target += vallen;
270 }
271 return;
272 }
273 case IS_ARRAY:
274 {
275 HashTable *ht = Z_ARRVAL_P(val);
276 zval *tmpzval;
277 ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
278 php_converter_append_fromUnicode_target(tmpzval, args, objval);
279 } ZEND_HASH_FOREACH_END();
280 return;
281 }
282 default:
283 php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "fromUCallback() specified illegal type for substitution character");
284 }
285 }
286 /* }}} */
287
288 /* {{{ php_converter_from_u_callback */
php_converter_from_u_callback(const void * context,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)289 static void php_converter_from_u_callback(const void *context,
290 UConverterFromUnicodeArgs *args,
291 const UChar *codeUnits, int32_t length, UChar32 codePoint,
292 UConverterCallbackReason reason,
293 UErrorCode *pErrorCode) {
294 php_converter_object *objval = (php_converter_object*)context;
295 zval retval;
296 zval zargs[4];
297 int i;
298
299 ZVAL_LONG(&zargs[0], reason);
300 array_init(&zargs[1]);
301 i = 0;
302 while (i < length) {
303 UChar32 c;
304 U16_NEXT(codeUnits, i, length, c);
305 add_next_index_long(&zargs[1], c);
306 }
307 ZVAL_LONG(&zargs[2], codePoint);
308 ZVAL_LONG(&zargs[3], *pErrorCode);
309 ZVAL_MAKE_REF(&zargs[3]);
310
311 objval->from_cb.param_count = 4;
312 objval->from_cb.params = zargs;
313 objval->from_cb.retval = &retval;
314 if (zend_call_function(&(objval->from_cb), &(objval->from_cache)) == FAILURE) {
315 /* Unlikely */
316 php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling fromUCallback()");
317 } else if (!Z_ISUNDEF(retval)) {
318 php_converter_append_fromUnicode_target(&retval, args, objval);
319 zval_ptr_dtor(&retval);
320 }
321
322 if (Z_TYPE(zargs[3]) == IS_LONG) {
323 *pErrorCode = Z_LVAL(zargs[3]);
324 } else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
325 *pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
326 }
327
328 zval_ptr_dtor(&zargs[0]);
329 zval_ptr_dtor(&zargs[1]);
330 zval_ptr_dtor(&zargs[2]);
331 zval_ptr_dtor(&zargs[3]);
332 }
333 /* }}} */
334
335 /* {{{ php_converter_set_callbacks */
php_converter_set_callbacks(php_converter_object * objval,UConverter * cnv)336 static inline bool php_converter_set_callbacks(php_converter_object *objval, UConverter *cnv) {
337 bool ret = 1;
338 UErrorCode error = U_ZERO_ERROR;
339
340 if (objval->obj.ce == php_converter_ce) {
341 /* Short-circuit having to go through method calls and data marshalling
342 * when we're using default behavior
343 */
344 return 1;
345 }
346
347 ucnv_setToUCallBack(cnv, (UConverterToUCallback)php_converter_to_u_callback, (const void*)objval,
348 NULL, NULL, &error);
349 if (U_FAILURE(error)) {
350 THROW_UFAILURE(objval, "ucnv_setToUCallBack", error);
351 ret = 0;
352 }
353
354 error = U_ZERO_ERROR;
355 ucnv_setFromUCallBack(cnv, (UConverterFromUCallback)php_converter_from_u_callback, (const void*)objval,
356 NULL, NULL, &error);
357 if (U_FAILURE(error)) {
358 THROW_UFAILURE(objval, "ucnv_setFromUCallBack", error);
359 ret = 0;
360 }
361 return ret;
362 }
363 /* }}} */
364
365 /* {{{ php_converter_set_encoding */
php_converter_set_encoding(php_converter_object * objval,UConverter ** pcnv,const char * enc,size_t enc_len)366 static bool php_converter_set_encoding(php_converter_object *objval,
367 UConverter **pcnv,
368 const char *enc, size_t enc_len) {
369 UErrorCode error = U_ZERO_ERROR;
370 UConverter *cnv = ucnv_open(enc, &error);
371
372 if (error == U_AMBIGUOUS_ALIAS_WARNING) {
373 UErrorCode getname_error = U_ZERO_ERROR;
374 const char *actual_encoding = ucnv_getName(cnv, &getname_error);
375 if (U_FAILURE(getname_error)) {
376 /* Should never happen */
377 actual_encoding = "(unknown)";
378 }
379 php_error_docref(NULL, E_WARNING, "Ambiguous encoding specified, using %s", actual_encoding);
380 } else if (U_FAILURE(error)) {
381 if (objval) {
382 THROW_UFAILURE(objval, "ucnv_open", error);
383 } else {
384 php_error_docref(NULL, E_WARNING, "Error setting encoding: %d - %s", (int)error, u_errorName(error));
385 }
386 return 0;
387 }
388
389 if (objval && !php_converter_set_callbacks(objval, cnv)) {
390 return 0;
391 }
392
393 if (*pcnv) {
394 ucnv_close(*pcnv);
395 }
396 *pcnv = cnv;
397 return 1;
398 }
399 /* }}} */
400
401 /* {{{ php_converter_do_set_encoding */
php_converter_do_set_encoding(UConverter ** pcnv,INTERNAL_FUNCTION_PARAMETERS)402 static void php_converter_do_set_encoding(UConverter **pcnv, INTERNAL_FUNCTION_PARAMETERS) {
403 php_converter_object *objval = CONV_GET(ZEND_THIS);
404 char *enc;
405 size_t enc_len;
406
407 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &enc, &enc_len) == FAILURE) {
408 RETURN_THROWS();
409 }
410 intl_errors_reset(&objval->error);
411
412 RETURN_BOOL(php_converter_set_encoding(objval, pcnv, enc, enc_len));
413 }
414 /* }}} */
415
416 /* {{{ */
PHP_METHOD(UConverter,setSourceEncoding)417 PHP_METHOD(UConverter, setSourceEncoding) {
418 php_converter_object *objval = CONV_GET(ZEND_THIS);
419 php_converter_do_set_encoding(&(objval->src), INTERNAL_FUNCTION_PARAM_PASSTHRU);
420 }
421 /* }}} */
422
423 /* {{{ */
PHP_METHOD(UConverter,setDestinationEncoding)424 PHP_METHOD(UConverter, setDestinationEncoding) {
425 php_converter_object *objval = CONV_GET(ZEND_THIS);
426 php_converter_do_set_encoding(&(objval->dest), INTERNAL_FUNCTION_PARAM_PASSTHRU);
427 }
428 /* }}} */
429
430 /* {{{ php_converter_do_get_encoding */
php_converter_do_get_encoding(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)431 static void php_converter_do_get_encoding(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
432 const char *name;
433
434 if (zend_parse_parameters_none() == FAILURE) {
435 RETURN_THROWS();
436 }
437
438 intl_errors_reset(&objval->error);
439
440 if (!cnv) {
441 RETURN_NULL();
442 }
443
444 name = ucnv_getName(cnv, &objval->error.code);
445 if (U_FAILURE(objval->error.code)) {
446 THROW_UFAILURE(objval, "ucnv_getName()", objval->error.code);
447 RETURN_FALSE;
448 }
449
450 RETURN_STRING(name);
451 }
452 /* }}} */
453
454 /* {{{ */
PHP_METHOD(UConverter,getSourceEncoding)455 PHP_METHOD(UConverter, getSourceEncoding) {
456 php_converter_object *objval = CONV_GET(ZEND_THIS);
457 php_converter_do_get_encoding(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
458 }
459 /* }}} */
460
461 /* {{{ */
PHP_METHOD(UConverter,getDestinationEncoding)462 PHP_METHOD(UConverter, getDestinationEncoding) {
463 php_converter_object *objval = CONV_GET(ZEND_THIS);
464 php_converter_do_get_encoding(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
465 }
466 /* }}} */
467
468 /* {{{ php_converter_do_get_type */
php_converter_do_get_type(php_converter_object * objval,UConverter * cnv,INTERNAL_FUNCTION_PARAMETERS)469 static void php_converter_do_get_type(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
470 UConverterType t;
471
472 if (zend_parse_parameters_none() == FAILURE) {
473 RETURN_THROWS();
474 }
475 intl_errors_reset(&objval->error);
476
477 if (!cnv) {
478 RETURN_NULL();
479 }
480
481 t = ucnv_getType(cnv);
482 if (U_FAILURE(objval->error.code)) {
483 THROW_UFAILURE(objval, "ucnv_getType", objval->error.code);
484 RETURN_FALSE;
485 }
486
487 RETURN_LONG(t);
488 }
489 /* }}} */
490
491 /* {{{ */
PHP_METHOD(UConverter,getSourceType)492 PHP_METHOD(UConverter, getSourceType) {
493 php_converter_object *objval = CONV_GET(ZEND_THIS);
494 php_converter_do_get_type(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
495 }
496 /* }}} */
497
498 /* {{{ */
PHP_METHOD(UConverter,getDestinationType)499 PHP_METHOD(UConverter, getDestinationType) {
500 php_converter_object *objval = CONV_GET(ZEND_THIS);
501 php_converter_do_get_type(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
502 }
503 /* }}} */
504
505 /* {{{ php_converter_resolve_callback */
php_converter_resolve_callback(zval * zobj,php_converter_object * objval,const char * callback_name,zend_fcall_info * finfo,zend_fcall_info_cache * fcache)506 static void php_converter_resolve_callback(zval *zobj,
507 php_converter_object *objval,
508 const char *callback_name,
509 zend_fcall_info *finfo,
510 zend_fcall_info_cache *fcache) {
511 char *errstr = NULL;
512 zval caller;
513
514 array_init(&caller);
515 Z_ADDREF_P(zobj);
516 add_index_zval(&caller, 0, zobj);
517 add_index_string(&caller, 1, callback_name);
518 if (zend_fcall_info_init(&caller, 0, finfo, fcache, NULL, &errstr) == FAILURE) {
519 php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Error setting converter callback: %s", errstr);
520 }
521 zend_array_destroy(Z_ARR(caller));
522 ZVAL_UNDEF(&finfo->function_name);
523 if (errstr) {
524 efree(errstr);
525 }
526 }
527 /* }}} */
528
529 /* {{{ */
PHP_METHOD(UConverter,__construct)530 PHP_METHOD(UConverter, __construct) {
531 php_converter_object *objval = CONV_GET(ZEND_THIS);
532 char *src = "utf-8";
533 size_t src_len = sizeof("utf-8") - 1;
534 char *dest = src;
535 size_t dest_len = src_len;
536
537 intl_error_reset(NULL);
538
539 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!s!", &dest, &dest_len, &src, &src_len) == FAILURE) {
540 RETURN_THROWS();
541 }
542
543 php_converter_set_encoding(objval, &(objval->src), src, src_len );
544 php_converter_set_encoding(objval, &(objval->dest), dest, dest_len);
545 php_converter_resolve_callback(ZEND_THIS, objval, "toUCallback", &(objval->to_cb), &(objval->to_cache));
546 php_converter_resolve_callback(ZEND_THIS, objval, "fromUCallback", &(objval->from_cb), &(objval->from_cache));
547 }
548 /* }}} */
549
550 /* {{{ */
PHP_METHOD(UConverter,setSubstChars)551 PHP_METHOD(UConverter, setSubstChars) {
552 php_converter_object *objval = CONV_GET(ZEND_THIS);
553 char *chars;
554 size_t chars_len;
555 int ret = 1;
556
557 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &chars, &chars_len) == FAILURE) {
558 RETURN_THROWS();
559 }
560 intl_errors_reset(&objval->error);
561
562 if (objval->src) {
563 UErrorCode error = U_ZERO_ERROR;
564 ucnv_setSubstChars(objval->src, chars, chars_len, &error);
565 if (U_FAILURE(error)) {
566 THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
567 ret = 0;
568 }
569 } else {
570 php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
571 ret = 0;
572 }
573
574 if (objval->dest) {
575 UErrorCode error = U_ZERO_ERROR;
576 ucnv_setSubstChars(objval->dest, chars, chars_len, &error);
577 if (U_FAILURE(error)) {
578 THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
579 ret = 0;
580 }
581 } else {
582 php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Destination Converter has not been initialized yet");
583 ret = 0;
584 }
585
586 RETURN_BOOL(ret);
587 }
588 /* }}} */
589
590 /* {{{ */
PHP_METHOD(UConverter,getSubstChars)591 PHP_METHOD(UConverter, getSubstChars) {
592 php_converter_object *objval = CONV_GET(ZEND_THIS);
593 char chars[127];
594 int8_t chars_len = sizeof(chars);
595 UErrorCode error = U_ZERO_ERROR;
596
597 if (zend_parse_parameters_none() == FAILURE) {
598 RETURN_THROWS();
599 }
600 intl_errors_reset(&objval->error);
601
602 if (!objval->src) {
603 RETURN_NULL();
604 }
605
606 /* src and dest get the same subst chars set,
607 * so it doesn't really matter which one we read from
608 */
609 ucnv_getSubstChars(objval->src, chars, &chars_len, &error);
610 if (U_FAILURE(error)) {
611 THROW_UFAILURE(objval, "ucnv_getSubstChars", error);
612 RETURN_FALSE;
613 }
614
615 RETURN_STRINGL(chars, chars_len);
616 }
617 /* }}} */
618
619 /* {{{ php_converter_do_convert */
php_converter_do_convert(UConverter * dest_cnv,UConverter * src_cnv,const char * src,int32_t src_len,php_converter_object * objval)620 static zend_string* php_converter_do_convert(UConverter *dest_cnv,
621 UConverter *src_cnv, const char *src, int32_t src_len,
622 php_converter_object *objval
623 ) {
624 UErrorCode error = U_ZERO_ERROR;
625 int32_t temp_len, ret_len;
626 zend_string *ret;
627 UChar *temp;
628
629 if (!src_cnv || !dest_cnv) {
630 php_converter_throw_failure(objval, U_INVALID_STATE_ERROR,
631 "Internal converters not initialized");
632 return NULL;
633 }
634
635 /* Get necessary buffer size first */
636 temp_len = 1 + ucnv_toUChars(src_cnv, NULL, 0, src, src_len, &error);
637 if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
638 THROW_UFAILURE(objval, "ucnv_toUChars", error);
639 return NULL;
640 }
641 temp = safe_emalloc(sizeof(UChar), temp_len, sizeof(UChar));
642
643 /* Convert to intermediate UChar* array */
644 error = U_ZERO_ERROR;
645 temp_len = ucnv_toUChars(src_cnv, temp, temp_len, src, src_len, &error);
646 if (U_FAILURE(error)) {
647 THROW_UFAILURE(objval, "ucnv_toUChars", error);
648 efree(temp);
649 return NULL;
650 }
651 temp[temp_len] = 0;
652
653 /* Get necessary output buffer size */
654 ret_len = ucnv_fromUChars(dest_cnv, NULL, 0, temp, temp_len, &error);
655 if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
656 THROW_UFAILURE(objval, "ucnv_fromUChars", error);
657 efree(temp);
658 return NULL;
659 }
660
661 ret = zend_string_alloc(ret_len, 0);
662
663 /* Convert to final encoding */
664 error = U_ZERO_ERROR;
665 ZSTR_LEN(ret) = ucnv_fromUChars(dest_cnv, ZSTR_VAL(ret), ret_len+1, temp, temp_len, &error);
666 efree(temp);
667 if (U_FAILURE(error)) {
668 THROW_UFAILURE(objval, "ucnv_fromUChars", error);
669 zend_string_efree(ret);
670 return NULL;
671 }
672
673 return ret;
674 }
675 /* }}} */
676
677 /* {{{ */
678 #define UCNV_REASON_CASE(v) case (UCNV_ ## v) : RETURN_STRINGL( "REASON_" #v , sizeof( "REASON_" #v ) - 1);
PHP_METHOD(UConverter,reasonText)679 PHP_METHOD(UConverter, reasonText) {
680 zend_long reason;
681
682 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &reason) == FAILURE) {
683 RETURN_THROWS();
684 }
685 intl_error_reset(NULL);
686
687 switch (reason) {
688 UCNV_REASON_CASE(UNASSIGNED)
689 UCNV_REASON_CASE(ILLEGAL)
690 UCNV_REASON_CASE(IRREGULAR)
691 UCNV_REASON_CASE(RESET)
692 UCNV_REASON_CASE(CLOSE)
693 UCNV_REASON_CASE(CLONE)
694 default:
695 zend_argument_value_error(1, "must be a UConverter::REASON_* constant");
696 RETURN_THROWS();
697 }
698 }
699 /* }}} */
700
701 /* {{{ */
PHP_METHOD(UConverter,convert)702 PHP_METHOD(UConverter, convert) {
703 php_converter_object *objval = CONV_GET(ZEND_THIS);
704 char *str;
705 size_t str_len;
706 zend_string *ret;
707 bool reverse = 0;
708
709 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
710 &str, &str_len, &reverse) == FAILURE) {
711 RETURN_THROWS();
712 }
713 intl_errors_reset(&objval->error);
714
715 ret = php_converter_do_convert(reverse ? objval->src : objval->dest,
716 reverse ? objval->dest : objval->src,
717 str, str_len,
718 objval);
719 if (ret) {
720 RETURN_NEW_STR(ret);
721 } else {
722 RETURN_FALSE;
723 }
724 }
725 /* }}} */
726
727 /* {{{ */
PHP_METHOD(UConverter,transcode)728 PHP_METHOD(UConverter, transcode) {
729 char *str, *src, *dest;
730 size_t str_len, src_len, dest_len;
731 zval *options = NULL;
732 UConverter *src_cnv = NULL, *dest_cnv = NULL;
733
734 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|a!",
735 &str, &str_len, &dest, &dest_len, &src, &src_len, &options) == FAILURE) {
736 RETURN_THROWS();
737 }
738 intl_error_reset(NULL);
739
740 if (php_converter_set_encoding(NULL, &src_cnv, src, src_len) &&
741 php_converter_set_encoding(NULL, &dest_cnv, dest, dest_len)) {
742 zend_string *ret;
743 UErrorCode error = U_ZERO_ERROR;
744
745 if (options && zend_hash_num_elements(Z_ARRVAL_P(options))) {
746 zval *tmpzval;
747
748 if (U_SUCCESS(error) &&
749 (tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "from_subst", sizeof("from_subst") - 1)) != NULL &&
750 Z_TYPE_P(tmpzval) == IS_STRING) {
751 error = U_ZERO_ERROR;
752 ucnv_setSubstChars(src_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
753 }
754 if (U_SUCCESS(error) &&
755 (tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "to_subst", sizeof("to_subst") - 1)) != NULL &&
756 Z_TYPE_P(tmpzval) == IS_STRING) {
757 error = U_ZERO_ERROR;
758 ucnv_setSubstChars(dest_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
759 }
760 }
761
762 if (U_SUCCESS(error) &&
763 (ret = php_converter_do_convert(dest_cnv, src_cnv, str, str_len, NULL)) != NULL) {
764 RETVAL_NEW_STR(ret);
765 }
766
767 if (U_FAILURE(error)) {
768 THROW_UFAILURE(NULL, "transcode", error);
769 RETVAL_FALSE;
770 }
771 } else {
772 RETVAL_FALSE;
773 }
774
775 if (src_cnv) {
776 ucnv_close(src_cnv);
777 }
778 if (dest_cnv) {
779 ucnv_close(dest_cnv);
780 }
781 }
782 /* }}} */
783
784 /* {{{ */
PHP_METHOD(UConverter,getErrorCode)785 PHP_METHOD(UConverter, getErrorCode) {
786 php_converter_object *objval = CONV_GET(ZEND_THIS);
787
788 if (zend_parse_parameters_none() == FAILURE) {
789 RETURN_THROWS();
790 }
791
792 RETURN_LONG(intl_error_get_code(&(objval->error)));
793 }
794 /* }}} */
795
796 /* {{{ */
PHP_METHOD(UConverter,getErrorMessage)797 PHP_METHOD(UConverter, getErrorMessage) {
798 php_converter_object *objval = CONV_GET(ZEND_THIS);
799
800 if (zend_parse_parameters_none() == FAILURE) {
801 RETURN_THROWS();
802 }
803
804 zend_string *message = intl_error_get_message(&(objval->error));
805 if (message) {
806 RETURN_STR(message);
807 } else {
808 RETURN_NULL();
809 }
810 }
811 /* }}} */
812
813 /* {{{ */
PHP_METHOD(UConverter,getAvailable)814 PHP_METHOD(UConverter, getAvailable) {
815 int32_t i,
816 count = ucnv_countAvailable();
817
818 if (zend_parse_parameters_none() == FAILURE) {
819 RETURN_THROWS();
820 }
821 intl_error_reset(NULL);
822
823 array_init(return_value);
824 for(i = 0; i < count; i++) {
825 const char *name = ucnv_getAvailableName(i);
826 add_next_index_string(return_value, name);
827 }
828 }
829 /* }}} */
830
831 /* {{{ */
PHP_METHOD(UConverter,getAliases)832 PHP_METHOD(UConverter, getAliases) {
833 char *name;
834 size_t name_len;
835 UErrorCode error = U_ZERO_ERROR;
836 uint16_t i, count;
837
838 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
839 RETURN_THROWS();
840 }
841 intl_error_reset(NULL);
842
843 count = ucnv_countAliases(name, &error);
844 if (U_FAILURE(error)) {
845 THROW_UFAILURE(NULL, "ucnv_countAliases", error);
846 RETURN_FALSE;
847 }
848
849 array_init(return_value);
850 for(i = 0; i < count; i++) {
851 const char *alias;
852
853 error = U_ZERO_ERROR;
854 alias = ucnv_getAlias(name, i, &error);
855 if (U_FAILURE(error)) {
856 THROW_UFAILURE(NULL, "ucnv_getAlias", error);
857 zend_array_destroy(Z_ARR_P(return_value));
858 RETURN_NULL();
859 }
860 add_next_index_string(return_value, alias);
861 }
862 }
863 /* }}} */
864
865 /* {{{ */
PHP_METHOD(UConverter,getStandards)866 PHP_METHOD(UConverter, getStandards) {
867 uint16_t i, count;
868
869 if (zend_parse_parameters_none() == FAILURE) {
870 RETURN_THROWS();
871 }
872 intl_error_reset(NULL);
873
874 array_init(return_value);
875 count = ucnv_countStandards();
876 for(i = 0; i < count; i++) {
877 UErrorCode error = U_ZERO_ERROR;
878 const char *name = ucnv_getStandard(i, &error);
879 if (U_FAILURE(error)) {
880 THROW_UFAILURE(NULL, "ucnv_getStandard", error);
881 zend_array_destroy(Z_ARR_P(return_value));
882 RETURN_NULL();
883 }
884 add_next_index_string(return_value, name);
885 }
886 }
887 /* }}} */
888
889 /* {{{ Converter create/clone/destroy */
php_converter_free_object(zend_object * obj)890 static void php_converter_free_object(zend_object *obj) {
891 php_converter_object *objval = php_converter_fetch_object(obj);
892
893 if (objval->src) {
894 ucnv_close(objval->src);
895 }
896
897 if (objval->dest) {
898 ucnv_close(objval->dest);
899 }
900
901 intl_error_reset(&objval->error);
902 zend_object_std_dtor(obj);
903 }
904
php_converter_object_ctor(zend_class_entry * ce,php_converter_object ** pobjval)905 static zend_object *php_converter_object_ctor(zend_class_entry *ce, php_converter_object **pobjval) {
906 php_converter_object *objval;
907
908 objval = zend_object_alloc(sizeof(php_converter_object), ce);
909
910 zend_object_std_init(&objval->obj, ce);
911 object_properties_init(&objval->obj, ce);
912 intl_error_init(&(objval->error));
913
914 objval->obj.handlers = &php_converter_object_handlers;
915 *pobjval = objval;
916
917 return &objval->obj;
918 }
919
php_converter_create_object(zend_class_entry * ce)920 static zend_object *php_converter_create_object(zend_class_entry *ce) {
921 php_converter_object *objval = NULL;
922 zend_object *retval = php_converter_object_ctor(ce, &objval);
923
924 object_properties_init(&(objval->obj), ce);
925
926 return retval;
927 }
928
php_converter_clone_object(zend_object * object)929 static zend_object *php_converter_clone_object(zend_object *object) {
930 php_converter_object *objval, *oldobj = php_converter_fetch_object(object);
931 zend_object *retval = php_converter_object_ctor(object->ce, &objval);
932 UErrorCode error = U_ZERO_ERROR;
933
934 intl_errors_reset(&oldobj->error);
935
936 #if U_ICU_VERSION_MAJOR_NUM > 70
937 objval->src = ucnv_clone(oldobj->src, &error);
938 #else
939 objval->src = ucnv_safeClone(oldobj->src, NULL, NULL, &error);
940 #endif
941 if (U_SUCCESS(error)) {
942 error = U_ZERO_ERROR;
943 #if U_ICU_VERSION_MAJOR_NUM > 70
944 objval->dest = ucnv_clone(oldobj->dest, &error);
945 #else
946 objval->dest = ucnv_safeClone(oldobj->dest, NULL, NULL, &error);
947 #endif
948 }
949 if (U_FAILURE(error)) {
950 zend_string *err_msg;
951 THROW_UFAILURE(oldobj, "ucnv_safeClone", error);
952
953 err_msg = intl_error_get_message(&oldobj->error);
954 zend_throw_exception(NULL, ZSTR_VAL(err_msg), 0);
955 zend_string_release_ex(err_msg, 0);
956
957 return retval;
958 }
959
960 /* Update contexts for converter error handlers */
961 php_converter_set_callbacks(objval, objval->src );
962 php_converter_set_callbacks(objval, objval->dest);
963
964 zend_objects_clone_members(&(objval->obj), &(oldobj->obj));
965
966 /* Newly cloned object deliberately does not inherit error state from original object */
967
968 return retval;
969 }
970 /* }}} */
971
972 #define CONV_REASON_CONST(v) zend_declare_class_constant_long(php_converter_ce, "REASON_" #v, sizeof("REASON_" #v) - 1, UCNV_ ## v)
973 #define CONV_TYPE_CONST(v) zend_declare_class_constant_long(php_converter_ce, #v , sizeof(#v) - 1, UCNV_ ## v)
974
975 /* {{{ php_converter_minit */
php_converter_minit(INIT_FUNC_ARGS)976 int php_converter_minit(INIT_FUNC_ARGS) {
977 php_converter_ce = register_class_UConverter();
978 php_converter_ce->create_object = php_converter_create_object;
979 memcpy(&php_converter_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
980 php_converter_object_handlers.offset = XtOffsetOf(php_converter_object, obj);
981 php_converter_object_handlers.clone_obj = php_converter_clone_object;
982 php_converter_object_handlers.free_obj = php_converter_free_object;
983
984 /* enum UConverterCallbackReason */
985 CONV_REASON_CONST(UNASSIGNED);
986 CONV_REASON_CONST(ILLEGAL);
987 CONV_REASON_CONST(IRREGULAR);
988 CONV_REASON_CONST(RESET);
989 CONV_REASON_CONST(CLOSE);
990 CONV_REASON_CONST(CLONE);
991
992 /* enum UConverterType */
993 CONV_TYPE_CONST(UNSUPPORTED_CONVERTER);
994 CONV_TYPE_CONST(SBCS);
995 CONV_TYPE_CONST(DBCS);
996 CONV_TYPE_CONST(MBCS);
997 CONV_TYPE_CONST(LATIN_1);
998 CONV_TYPE_CONST(UTF8);
999 CONV_TYPE_CONST(UTF16_BigEndian);
1000 CONV_TYPE_CONST(UTF16_LittleEndian);
1001 CONV_TYPE_CONST(UTF32_BigEndian);
1002 CONV_TYPE_CONST(UTF32_LittleEndian);
1003 CONV_TYPE_CONST(EBCDIC_STATEFUL);
1004 CONV_TYPE_CONST(ISO_2022);
1005 CONV_TYPE_CONST(LMBCS_1);
1006 CONV_TYPE_CONST(LMBCS_2);
1007 CONV_TYPE_CONST(LMBCS_3);
1008 CONV_TYPE_CONST(LMBCS_4);
1009 CONV_TYPE_CONST(LMBCS_5);
1010 CONV_TYPE_CONST(LMBCS_6);
1011 CONV_TYPE_CONST(LMBCS_8);
1012 CONV_TYPE_CONST(LMBCS_11);
1013 CONV_TYPE_CONST(LMBCS_16);
1014 CONV_TYPE_CONST(LMBCS_17);
1015 CONV_TYPE_CONST(LMBCS_18);
1016 CONV_TYPE_CONST(LMBCS_19);
1017 CONV_TYPE_CONST(LMBCS_LAST);
1018 CONV_TYPE_CONST(HZ);
1019 CONV_TYPE_CONST(SCSU);
1020 CONV_TYPE_CONST(ISCII);
1021 CONV_TYPE_CONST(US_ASCII);
1022 CONV_TYPE_CONST(UTF7);
1023 CONV_TYPE_CONST(BOCU1);
1024 CONV_TYPE_CONST(UTF16);
1025 CONV_TYPE_CONST(UTF32);
1026 CONV_TYPE_CONST(CESU8);
1027 CONV_TYPE_CONST(IMAP_MAILBOX);
1028
1029 return SUCCESS;
1030 }
1031 /* }}} */
1032