1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Gustavo Lopes <cataphract@php.net> |
12 +----------------------------------------------------------------------+
13 */
14
15 #ifdef HAVE_CONFIG_H
16 #include <config.h>
17 #endif
18
19 #include "php_intl.h"
20 #include "transliterator.h"
21 #include "transliterator_class.h"
22 #include "intl_data.h"
23 #include "intl_convert.h"
24
25 #include <zend_exceptions.h>
26
create_transliterator(char * str_id,size_t str_id_len,zend_long direction,zval * object)27 static int create_transliterator( char *str_id, size_t str_id_len, zend_long direction, zval *object )
28 {
29 Transliterator_object *to;
30 UChar *ustr_id = NULL;
31 int32_t ustr_id_len = 0;
32 UTransliterator *utrans;
33 UParseError parse_error;
34
35 intl_error_reset( NULL );
36
37 if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
38 {
39 zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
40 return FAILURE;
41 }
42
43 object_init_ex( object, Transliterator_ce_ptr );
44 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
45
46 /* Convert transliterator id to UTF-16 */
47 intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
48 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
49 {
50 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
51 intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 );
52 zval_ptr_dtor( object );
53 return FAILURE;
54 }
55
56 /* Open ICU Transliterator. */
57 utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
58 NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
59 if (ustr_id) {
60 efree( ustr_id );
61 }
62
63 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
64 {
65 char *buf = NULL;
66 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
67 spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
68 " with id \"%s\"", str_id );
69 if( buf == NULL ) {
70 intl_error_set_custom_msg( NULL,
71 "transliterator_create: unable to open ICU transliterator", 0 );
72 }
73 else
74 {
75 intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 );
76 efree( buf );
77 }
78 zval_ptr_dtor( object );
79 return FAILURE;
80 }
81
82 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
83 /* no need to close the transliterator manually on construction error */
84 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
85 {
86 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
87 intl_error_set_custom_msg( NULL,
88 "transliterator_create: internal constructor call failed", 0 );
89 zval_ptr_dtor( object );
90 return FAILURE;
91 }
92
93 return SUCCESS;
94 }
95
96 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create)97 PHP_FUNCTION( transliterator_create )
98 {
99 char *str_id;
100 size_t str_id_len;
101 zend_long direction = TRANSLITERATOR_FORWARD;
102 int res;
103
104 TRANSLITERATOR_METHOD_INIT_VARS;
105
106 (void) to; /* unused */
107
108 ZEND_PARSE_PARAMETERS_START(1, 2)
109 Z_PARAM_STRING(str_id, str_id_len)
110 Z_PARAM_OPTIONAL
111 Z_PARAM_LONG(direction)
112 ZEND_PARSE_PARAMETERS_END();
113
114 object = return_value;
115 res = create_transliterator( str_id, str_id_len, direction, object );
116 if( res == FAILURE )
117 RETURN_NULL();
118
119 /* success, leave return_value as it is (set by create_transliterator) */
120 }
121 /* }}} */
122
123 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create_from_rules)124 PHP_FUNCTION( transliterator_create_from_rules )
125 {
126 char *str_rules;
127 size_t str_rules_len;
128 UChar *ustr_rules = NULL;
129 int32_t ustr_rules_len = 0;
130 zend_long direction = TRANSLITERATOR_FORWARD;
131 UParseError parse_error;
132 UTransliterator *utrans;
133 UChar id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
134 0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
135 TRANSLITERATOR_METHOD_INIT_VARS;
136
137 ZEND_PARSE_PARAMETERS_START(1, 2)
138 Z_PARAM_STRING(str_rules, str_rules_len)
139 Z_PARAM_OPTIONAL
140 Z_PARAM_LONG(direction)
141 ZEND_PARSE_PARAMETERS_END();
142
143 if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
144 {
145 zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
146 RETURN_THROWS();
147 }
148
149 object = return_value;
150 object_init_ex( object, Transliterator_ce_ptr );
151 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
152
153 intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
154 str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
155 /* (I'm not a big fan of non-obvious flow control macros ).
156 * This one checks the error value, destroys object and returns false */
157 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "String conversion of rules to UTF-16 failed" );
158
159 /* Open ICU Transliterator. */
160 utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
161 ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
162 if (ustr_rules) {
163 efree( ustr_rules );
164 }
165
166 intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) );
167 if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
168 {
169 char *msg = NULL;
170 smart_str parse_error_str;
171 parse_error_str = intl_parse_error_to_string( &parse_error );
172 spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
173 "create ICU transliterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "" );
174 smart_str_free( &parse_error_str );
175 if( msg != NULL )
176 {
177 intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 );
178 efree( msg );
179 }
180 zval_ptr_dtor( return_value );
181 RETURN_NULL();
182 }
183 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
184 /* no need to close the transliterator manually on construction error */
185 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_from_rules: internal constructor call failed" );
186 }
187 /* }}} */
188
189 /* {{{ Opens the inverse transliterator transliterator. */
PHP_FUNCTION(transliterator_create_inverse)190 PHP_FUNCTION( transliterator_create_inverse )
191 {
192 Transliterator_object *to_orig;
193 UTransliterator *utrans;
194 TRANSLITERATOR_METHOD_INIT_VARS;
195
196 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
197 &object, Transliterator_ce_ptr ) == FAILURE )
198 {
199 RETURN_THROWS();
200 }
201
202 TRANSLITERATOR_METHOD_FETCH_OBJECT;
203 to_orig = to;
204
205 object = return_value;
206 object_init_ex( object, Transliterator_ce_ptr );
207 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
208
209 utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
210 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_inverse: could not create "
211 "inverse ICU transliterator" );
212 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
213 /* no need to close the transliterator manually on construction error */
214 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create: internal constructor call failed" );
215 }
216 /* }}} */
217
218 /* {{{ Return an array with the registered transliterator IDs. */
PHP_FUNCTION(transliterator_list_ids)219 PHP_FUNCTION( transliterator_list_ids )
220 {
221 UEnumeration *en;
222 const UChar *elem;
223 int32_t elem_len;
224 UErrorCode status = U_ZERO_ERROR;
225
226 intl_error_reset( NULL );
227
228 ZEND_PARSE_PARAMETERS_NONE();
229
230 en = utrans_openIDs( &status );
231 INTL_CHECK_STATUS( status,
232 "transliterator_list_ids: Failed to obtain registered transliterators" );
233
234 array_init( return_value );
235 while( (elem = uenum_unext( en, &elem_len, &status )) )
236 {
237 zend_string *el = intl_convert_utf16_to_utf8(elem, elem_len, &status );
238
239 if( !el )
240 {
241 break;
242 }
243 else
244 {
245 add_next_index_str( return_value, el);
246 }
247 }
248 uenum_close( en );
249
250 intl_error_set_code( NULL, status );
251 if( U_FAILURE( status ) )
252 {
253 zend_array_destroy( Z_ARR_P(return_value) );
254 RETVAL_FALSE;
255 intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
256 "Failed to build array of registered transliterators", 0 );
257 }
258 }
259 /* }}} */
260
261 /* {{{ Transliterate a string. */
PHP_FUNCTION(transliterator_transliterate)262 PHP_FUNCTION( transliterator_transliterate )
263 {
264 char *str;
265 UChar *ustr = NULL,
266 *uresult = NULL;
267 size_t str_len;
268 int32_t ustr_len = 0,
269 capacity,
270 uresult_len;
271 zend_long start = 0,
272 limit = -1;
273 int success = 0;
274 zval tmp_object;
275 TRANSLITERATOR_METHOD_INIT_VARS;
276
277 object = getThis();
278
279 ZVAL_UNDEF(&tmp_object);
280
281 if (object == NULL) {
282 /* in non-OOP version, accept both a transliterator and a string */
283 zend_string *arg1_str;
284 zend_object *arg1_obj;
285
286 ZEND_PARSE_PARAMETERS_START(2, 4)
287 Z_PARAM_OBJ_OF_CLASS_OR_STR(arg1_obj, Transliterator_ce_ptr, arg1_str)
288 Z_PARAM_STRING(str, str_len)
289 Z_PARAM_OPTIONAL
290 Z_PARAM_LONG(start)
291 Z_PARAM_LONG(limit)
292 ZEND_PARSE_PARAMETERS_END();
293
294 if (arg1_str) { /* not a transliterator object as first argument */
295 int res;
296 object = &tmp_object;
297 res = create_transliterator(ZSTR_VAL(arg1_str), ZSTR_LEN(arg1_str), TRANSLITERATOR_FORWARD, object);
298 if( res == FAILURE )
299 {
300 if (!EG(exception)) {
301 zend_string *message = intl_error_get_message( NULL );
302 php_error_docref(NULL, E_WARNING, "Could not create transliterator with ID \"%s\" (%s)", ZSTR_VAL(arg1_str), ZSTR_VAL(message) );
303 zend_string_free( message );
304 }
305 ZVAL_UNDEF(&tmp_object);
306 /* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
307 goto cleanup;
308 }
309 } else {
310 ZVAL_OBJ_COPY(&tmp_object, arg1_obj);
311 object = &tmp_object;
312 }
313 } else if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|ll", &str, &str_len, &start, &limit) == FAILURE) {
314 RETURN_THROWS();
315 }
316
317 if (limit < -1) {
318 zend_argument_value_error(object ? 3 : 4, "must be greater than or equal to -1");
319 goto cleanup_object;
320 }
321
322 if (start < 0) {
323 zend_argument_value_error(object ? 2 : 3, "must be greater than or equal to 0");
324 goto cleanup_object;
325 }
326
327 if (limit != -1 && start > limit) {
328 zend_argument_value_error(object ? 2 : 3, "must be less than or equal to argument #%d ($end)", object ? 3 : 4);
329 goto cleanup_object;
330 }
331
332 /* end argument parsing/validation */
333
334 TRANSLITERATOR_METHOD_FETCH_OBJECT;
335
336 intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, TRANSLITERATOR_ERROR_CODE_P(to));
337 INTL_METHOD_CHECK_STATUS_OR_GOTO(to, "String conversion of string to UTF-16 failed", cleanup_object);
338
339 /* we've started allocating resources, goto from now on */
340
341 if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
342 {
343 char *msg;
344 spprintf( &msg, 0,
345 "transliterator_transliterate: Neither \"start\" nor the \"end\" "
346 "arguments can exceed the number of UTF-16 code units "
347 "(in this case, %d)", (int) ustr_len );
348 if(msg != NULL )
349 {
350 intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
351 msg, 1 );
352 efree( msg );
353 }
354 goto cleanup;
355 }
356
357 uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
358 capacity = ustr_len + 1;
359
360 while( 1 )
361 {
362 int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
363 memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
364 uresult_len = ustr_len;
365
366 utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
367 &temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
368 if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
369 {
370 efree( uresult );
371
372 uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
373 capacity = uresult_len + 1;
374
375 intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
376 }
377 else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
378 {
379 uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
380
381 intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
382 break;
383 }
384 else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
385 {
386 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
387 intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
388 "transliterator_transliterate: transliteration failed", 0 );
389 goto cleanup;
390 }
391 else
392 break;
393 }
394
395 uresult[uresult_len] = (UChar) 0;
396
397 success = 1;
398
399 cleanup:
400 if( ustr )
401 efree( ustr );
402
403 if( success ) {
404 /* frees uresult even on error */
405 INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
406 }
407 else
408 {
409 if( uresult )
410 efree( uresult );
411 RETVAL_FALSE;
412 }
413
414 cleanup_object:
415 zval_ptr_dtor( &tmp_object );
416 }
417 /* }}} */
418
PHP_METHOD(Transliterator,__construct)419 PHP_METHOD( Transliterator, __construct )
420 {
421 /* this constructor shouldn't be called as it's private */
422 zend_throw_exception( NULL,
423 "An object of this type cannot be created with the new operator.",
424 0 );
425 }
426
427 /* {{{ Get the last error code for this transliterator. */
PHP_FUNCTION(transliterator_get_error_code)428 PHP_FUNCTION( transliterator_get_error_code )
429 {
430 TRANSLITERATOR_METHOD_INIT_VARS
431
432 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
433 &object, Transliterator_ce_ptr ) == FAILURE )
434 {
435 RETURN_THROWS();
436 }
437
438 /* Fetch the object (without resetting its last error code ). */
439 to = Z_INTL_TRANSLITERATOR_P( object );
440 if (to == NULL )
441 RETURN_FALSE;
442
443 RETURN_LONG( (zend_long) TRANSLITERATOR_ERROR_CODE( to ) );
444 }
445 /* }}} */
446
447
448 /* {{{ Get the last error message for this transliterator. */
PHP_FUNCTION(transliterator_get_error_message)449 PHP_FUNCTION( transliterator_get_error_message )
450 {
451 zend_string* message = NULL;
452 TRANSLITERATOR_METHOD_INIT_VARS
453
454 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
455 &object, Transliterator_ce_ptr ) == FAILURE )
456 {
457 RETURN_THROWS();
458 }
459
460
461 /* Fetch the object (without resetting its last error code ). */
462 to = Z_INTL_TRANSLITERATOR_P( object );
463 if (to == NULL )
464 RETURN_FALSE;
465
466 /* Return last error message. */
467 message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) );
468 RETURN_STR( message );
469 }
470 /* }}} */
471