1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Gustavo Lopes <cataphract@php.net> |
12 +----------------------------------------------------------------------+
13 */
14
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18
19 #include "php_intl.h"
20 #include "transliterator.h"
21 #include "transliterator_class.h"
22 #include "intl_data.h"
23 #include "intl_convert.h"
24
25 #include <zend_exceptions.h>
26
create_transliterator(char * str_id,size_t str_id_len,zend_long direction,zval * object)27 static int create_transliterator( char *str_id, size_t str_id_len, zend_long direction, zval *object )
28 {
29 Transliterator_object *to;
30 UChar *ustr_id = NULL;
31 int32_t ustr_id_len = 0;
32 UTransliterator *utrans;
33 UParseError parse_error;
34
35 intl_error_reset( NULL );
36
37 if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
38 {
39 zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
40 return FAILURE;
41 }
42
43 object_init_ex( object, Transliterator_ce_ptr );
44 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
45
46 /* Convert transliterator id to UTF-16 */
47 intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
48 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
49 {
50 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
51 intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 );
52 zval_ptr_dtor( object );
53 return FAILURE;
54 }
55
56 /* Open ICU Transliterator. */
57 utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
58 NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
59 if (ustr_id) {
60 efree( ustr_id );
61 }
62
63 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
64 {
65 char *buf = NULL;
66 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
67 spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
68 " with id \"%s\"", str_id );
69 if( buf == NULL ) {
70 intl_error_set_custom_msg( NULL,
71 "transliterator_create: unable to open ICU transliterator", 0 );
72 }
73 else
74 {
75 intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 );
76 efree( buf );
77 }
78 zval_ptr_dtor( object );
79 return FAILURE;
80 }
81
82 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
83 /* no need to close the transliterator manually on construction error */
84 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
85 {
86 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
87 intl_error_set_custom_msg( NULL,
88 "transliterator_create: internal constructor call failed", 0 );
89 zval_ptr_dtor( object );
90 return FAILURE;
91 }
92
93 return SUCCESS;
94 }
95
96 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create)97 PHP_FUNCTION( transliterator_create )
98 {
99 char *str_id;
100 size_t str_id_len;
101 zend_long direction = TRANSLITERATOR_FORWARD;
102 int res;
103
104 TRANSLITERATOR_METHOD_INIT_VARS;
105
106 (void) to; /* unused */
107
108 if( zend_parse_parameters( ZEND_NUM_ARGS(), "s|l",
109 &str_id, &str_id_len, &direction ) == FAILURE )
110 {
111 RETURN_THROWS();
112 }
113
114 object = return_value;
115 res = create_transliterator( str_id, str_id_len, direction, object );
116 if( res == FAILURE )
117 RETURN_NULL();
118
119 /* success, leave return_value as it is (set by create_transliterator) */
120 }
121 /* }}} */
122
123 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create_from_rules)124 PHP_FUNCTION( transliterator_create_from_rules )
125 {
126 char *str_rules;
127 size_t str_rules_len;
128 UChar *ustr_rules = NULL;
129 int32_t ustr_rules_len = 0;
130 zend_long direction = TRANSLITERATOR_FORWARD;
131 UParseError parse_error;
132 UTransliterator *utrans;
133 UChar id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
134 0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
135 TRANSLITERATOR_METHOD_INIT_VARS;
136
137 if( zend_parse_parameters( ZEND_NUM_ARGS(), "s|l",
138 &str_rules, &str_rules_len, &direction ) == FAILURE )
139 {
140 RETURN_THROWS();
141 }
142
143 if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
144 {
145 zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
146 RETURN_THROWS();
147 }
148
149 object = return_value;
150 object_init_ex( object, Transliterator_ce_ptr );
151 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
152
153 intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
154 str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
155 /* (I'm not a big fan of non-obvious flow control macros ).
156 * This one checks the error value, destroys object and returns false */
157 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "String conversion of rules to UTF-16 failed" );
158
159 /* Open ICU Transliterator. */
160 utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
161 ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
162 if (ustr_rules) {
163 efree( ustr_rules );
164 }
165
166 intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) );
167 if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
168 {
169 char *msg = NULL;
170 smart_str parse_error_str;
171 parse_error_str = intl_parse_error_to_string( &parse_error );
172 spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
173 "create ICU transliterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "" );
174 smart_str_free( &parse_error_str );
175 if( msg != NULL )
176 {
177 intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 );
178 efree( msg );
179 }
180 zval_ptr_dtor( return_value );
181 RETURN_NULL();
182 }
183 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
184 /* no need to close the transliterator manually on construction error */
185 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_from_rules: internal constructor call failed" );
186 }
187 /* }}} */
188
189 /* {{{ Opens the inverse transliterator transliterator. */
PHP_FUNCTION(transliterator_create_inverse)190 PHP_FUNCTION( transliterator_create_inverse )
191 {
192 Transliterator_object *to_orig;
193 UTransliterator *utrans;
194 TRANSLITERATOR_METHOD_INIT_VARS;
195
196 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
197 &object, Transliterator_ce_ptr ) == FAILURE )
198 {
199 RETURN_THROWS();
200 }
201
202 TRANSLITERATOR_METHOD_FETCH_OBJECT;
203 to_orig = to;
204
205 object = return_value;
206 object_init_ex( object, Transliterator_ce_ptr );
207 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
208
209 utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
210 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_inverse: could not create "
211 "inverse ICU transliterator" );
212 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
213 /* no need to close the transliterator manually on construction error */
214 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create: internal constructor call failed" );
215 }
216 /* }}} */
217
218 /* {{{ Return an array with the registered transliterator IDs. */
PHP_FUNCTION(transliterator_list_ids)219 PHP_FUNCTION( transliterator_list_ids )
220 {
221 UEnumeration *en;
222 const UChar *elem;
223 int32_t elem_len;
224 UErrorCode status = U_ZERO_ERROR;
225
226 intl_error_reset( NULL );
227
228 if( zend_parse_parameters_none() == FAILURE )
229 {
230 RETURN_THROWS();
231 }
232
233 en = utrans_openIDs( &status );
234 INTL_CHECK_STATUS( status,
235 "transliterator_list_ids: Failed to obtain registered transliterators" );
236
237 array_init( return_value );
238 while( (elem = uenum_unext( en, &elem_len, &status )) )
239 {
240 zend_string *el = intl_convert_utf16_to_utf8(elem, elem_len, &status );
241
242 if( !el )
243 {
244 break;
245 }
246 else
247 {
248 add_next_index_str( return_value, el);
249 }
250 }
251 uenum_close( en );
252
253 intl_error_set_code( NULL, status );
254 if( U_FAILURE( status ) )
255 {
256 zend_array_destroy( Z_ARR_P(return_value) );
257 RETVAL_FALSE;
258 intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
259 "Failed to build array of registered transliterators", 0 );
260 }
261 }
262 /* }}} */
263
264 /* {{{ Transliterate a string. */
PHP_FUNCTION(transliterator_transliterate)265 PHP_FUNCTION( transliterator_transliterate )
266 {
267 char *str;
268 UChar *ustr = NULL,
269 *uresult = NULL;
270 size_t str_len;
271 int32_t ustr_len = 0,
272 capacity,
273 uresult_len;
274 zend_long start = 0,
275 limit = -1;
276 int success = 0;
277 zval tmp_object;
278 TRANSLITERATOR_METHOD_INIT_VARS;
279
280 object = getThis();
281
282 ZVAL_UNDEF(&tmp_object);
283
284 if (object == NULL) {
285 /* in non-OOP version, accept both a transliterator and a string */
286 zend_string *arg1_str;
287 zend_object *arg1_obj;
288
289 ZEND_PARSE_PARAMETERS_START(2, 4)
290 Z_PARAM_OBJ_OF_CLASS_OR_STR(arg1_obj, Transliterator_ce_ptr, arg1_str)
291 Z_PARAM_STRING(str, str_len)
292 Z_PARAM_OPTIONAL
293 Z_PARAM_LONG(start)
294 Z_PARAM_LONG(limit)
295 ZEND_PARSE_PARAMETERS_END();
296
297 if (arg1_str) { /* not a transliterator object as first argument */
298 int res;
299 object = &tmp_object;
300 res = create_transliterator(ZSTR_VAL(arg1_str), ZSTR_LEN(arg1_str), TRANSLITERATOR_FORWARD, object);
301 if( res == FAILURE )
302 {
303 if (!EG(exception)) {
304 zend_string *message = intl_error_get_message( NULL );
305 php_error_docref(NULL, E_WARNING, "Could not create transliterator with ID \"%s\" (%s)", ZSTR_VAL(arg1_str), ZSTR_VAL(message) );
306 zend_string_free( message );
307 }
308 ZVAL_UNDEF(&tmp_object);
309 /* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
310 goto cleanup;
311 }
312 } else {
313 ZVAL_OBJ_COPY(&tmp_object, arg1_obj);
314 object = &tmp_object;
315 }
316 } else if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|ll", &str, &str_len, &start, &limit) == FAILURE) {
317 RETURN_THROWS();
318 }
319
320 if (limit < -1) {
321 zend_argument_value_error(object ? 3 : 4, "must be greater than or equal to -1");
322 goto cleanup_object;
323 }
324
325 if (start < 0) {
326 zend_argument_value_error(object ? 2 : 3, "must be greater than or equal to 0");
327 goto cleanup_object;
328 }
329
330 if (limit != -1 && start > limit) {
331 zend_argument_value_error(object ? 2 : 3, "must be less than or equal to argument #%d ($end)", object ? 3 : 4);
332 goto cleanup_object;
333 }
334
335 /* end argument parsing/validation */
336
337 TRANSLITERATOR_METHOD_FETCH_OBJECT;
338
339 intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, TRANSLITERATOR_ERROR_CODE_P(to));
340 INTL_METHOD_CHECK_STATUS_OR_GOTO(to, "String conversion of string to UTF-16 failed", cleanup_object);
341
342 /* we've started allocating resources, goto from now on */
343
344 if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
345 {
346 char *msg;
347 spprintf( &msg, 0,
348 "transliterator_transliterate: Neither \"start\" nor the \"end\" "
349 "arguments can exceed the number of UTF-16 code units "
350 "(in this case, %d)", (int) ustr_len );
351 if(msg != NULL )
352 {
353 intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
354 msg, 1 );
355 efree( msg );
356 }
357 goto cleanup;
358 }
359
360 uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
361 capacity = ustr_len + 1;
362
363 while( 1 )
364 {
365 int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
366 memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
367 uresult_len = ustr_len;
368
369 utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
370 &temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
371 if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
372 {
373 efree( uresult );
374
375 uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
376 capacity = uresult_len + 1;
377
378 intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
379 }
380 else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
381 {
382 uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
383
384 intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
385 break;
386 }
387 else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
388 {
389 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
390 intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
391 "transliterator_transliterate: transliteration failed", 0 );
392 goto cleanup;
393 }
394 else
395 break;
396 }
397
398 uresult[uresult_len] = (UChar) 0;
399
400 success = 1;
401
402 cleanup:
403 if( ustr )
404 efree( ustr );
405
406 if( success ) {
407 /* frees uresult even on error */
408 INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
409 }
410 else
411 {
412 if( uresult )
413 efree( uresult );
414 RETVAL_FALSE;
415 }
416
417 cleanup_object:
418 zval_ptr_dtor( &tmp_object );
419 }
420 /* }}} */
421
PHP_METHOD(Transliterator,__construct)422 PHP_METHOD( Transliterator, __construct )
423 {
424 /* this constructor shouldn't be called as it's private */
425 zend_throw_exception( NULL,
426 "An object of this type cannot be created with the new operator.",
427 0 );
428 }
429
430 /* {{{ Get the last error code for this transliterator. */
PHP_FUNCTION(transliterator_get_error_code)431 PHP_FUNCTION( transliterator_get_error_code )
432 {
433 TRANSLITERATOR_METHOD_INIT_VARS
434
435 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
436 &object, Transliterator_ce_ptr ) == FAILURE )
437 {
438 RETURN_THROWS();
439 }
440
441 /* Fetch the object (without resetting its last error code ). */
442 to = Z_INTL_TRANSLITERATOR_P( object );
443 if (to == NULL )
444 RETURN_FALSE;
445
446 RETURN_LONG( (zend_long) TRANSLITERATOR_ERROR_CODE( to ) );
447 }
448 /* }}} */
449
450
451 /* {{{ Get the last error message for this transliterator. */
PHP_FUNCTION(transliterator_get_error_message)452 PHP_FUNCTION( transliterator_get_error_message )
453 {
454 zend_string* message = NULL;
455 TRANSLITERATOR_METHOD_INIT_VARS
456
457 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
458 &object, Transliterator_ce_ptr ) == FAILURE )
459 {
460 RETURN_THROWS();
461 }
462
463
464 /* Fetch the object (without resetting its last error code ). */
465 to = Z_INTL_TRANSLITERATOR_P( object );
466 if (to == NULL )
467 RETURN_FALSE;
468
469 /* Return last error message. */
470 message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) );
471 RETURN_STR( message );
472 }
473 /* }}} */
474