1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Gustavo Lopes <cataphract@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include "php_intl.h"
22 #include "transliterator.h"
23 #include "transliterator_class.h"
24 #include "transliterator_methods.h"
25 #include "intl_data.h"
26 #include "intl_convert.h"
27
28 #include <zend_exceptions.h>
29
create_transliterator(char * str_id,size_t str_id_len,zend_long direction,zval * object)30 static int create_transliterator( char *str_id, size_t str_id_len, zend_long direction, zval *object )
31 {
32 Transliterator_object *to;
33 UChar *ustr_id = NULL;
34 int32_t ustr_id_len = 0;
35 UTransliterator *utrans;
36 UParseError parse_error = {0, -1};
37
38 intl_error_reset( NULL );
39
40 if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
41 {
42 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
43 "transliterator_create: invalid direction", 0 );
44 return FAILURE;
45 }
46
47 object_init_ex( object, Transliterator_ce_ptr );
48 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
49
50 /* Convert transliterator id to UTF-16 */
51 intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
52 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
53 {
54 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
55 intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 );
56 zval_ptr_dtor( object );
57 return FAILURE;
58 }
59
60 /* Open ICU Transliterator. */
61 utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
62 NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
63 if (ustr_id) {
64 efree( ustr_id );
65 }
66
67 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
68 {
69 char *buf = NULL;
70 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
71 spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
72 " with id \"%s\"", str_id );
73 if( buf == NULL ) {
74 intl_error_set_custom_msg( NULL,
75 "transliterator_create: unable to open ICU transliterator", 0 );
76 }
77 else
78 {
79 intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 );
80 efree( buf );
81 }
82 zval_ptr_dtor( object );
83 return FAILURE;
84 }
85
86 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
87 /* no need to close the transliterator manually on construction error */
88 if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
89 {
90 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
91 intl_error_set_custom_msg( NULL,
92 "transliterator_create: internal constructor call failed", 0 );
93 zval_ptr_dtor( object );
94 return FAILURE;
95 }
96
97 return SUCCESS;
98 }
99
100 /* {{{ proto Transliterator transliterator_create( string id [, int direction ] )
101 * proto Transliterator Transliterator::create( string id [, int direction ] )
102 * Opens a transliterator by id.
103 */
PHP_FUNCTION(transliterator_create)104 PHP_FUNCTION( transliterator_create )
105 {
106 char *str_id;
107 size_t str_id_len;
108 zend_long direction = TRANSLITERATOR_FORWARD;
109 int res;
110
111 TRANSLITERATOR_METHOD_INIT_VARS;
112
113 (void) to; /* unused */
114
115 if( zend_parse_parameters( ZEND_NUM_ARGS(), "s|l",
116 &str_id, &str_id_len, &direction ) == FAILURE )
117 {
118 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
119 "transliterator_create: bad arguments", 0 );
120 RETURN_NULL();
121 }
122
123 object = return_value;
124 res = create_transliterator( str_id, str_id_len, direction, object );
125 if( res == FAILURE )
126 RETURN_NULL();
127
128 /* success, leave return_value as it is (set by create_transliterator) */
129 }
130 /* }}} */
131
132 /* {{{ proto Transliterator transliterator_create_from_rules( string rules [, int direction ] )
133 * proto Transliterator Transliterator::createFromRules( string rules [, int direction ] )
134 * Opens a transliterator by id.
135 */
PHP_FUNCTION(transliterator_create_from_rules)136 PHP_FUNCTION( transliterator_create_from_rules )
137 {
138 char *str_rules;
139 size_t str_rules_len;
140 UChar *ustr_rules = NULL;
141 int32_t ustr_rules_len = 0;
142 zend_long direction = TRANSLITERATOR_FORWARD;
143 UParseError parse_error = {0, -1};
144 UTransliterator *utrans;
145 UChar id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
146 0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
147 TRANSLITERATOR_METHOD_INIT_VARS;
148
149 if( zend_parse_parameters( ZEND_NUM_ARGS(), "s|l",
150 &str_rules, &str_rules_len, &direction ) == FAILURE )
151 {
152 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
153 "transliterator_create_from_rules: bad arguments", 0 );
154 RETURN_NULL();
155 }
156
157 if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
158 {
159 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
160 "transliterator_create_from_rules: invalid direction", 0 );
161 RETURN_NULL();
162 }
163
164 object = return_value;
165 object_init_ex( object, Transliterator_ce_ptr );
166 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
167
168 intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
169 str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
170 /* (I'm not a big fan of non-obvious flow control macros ).
171 * This one checks the error value, destroys object and returns false */
172 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "String conversion of rules to UTF-16 failed" );
173
174 /* Open ICU Transliterator. */
175 utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
176 ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
177 if (ustr_rules) {
178 efree( ustr_rules );
179 }
180
181 intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) );
182 if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
183 {
184 char *msg = NULL;
185 smart_str parse_error_str;
186 parse_error_str = intl_parse_error_to_string( &parse_error );
187 spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
188 "create ICU transliterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "" );
189 smart_str_free( &parse_error_str );
190 if( msg != NULL )
191 {
192 intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 );
193 efree( msg );
194 }
195 zval_ptr_dtor( return_value );
196 RETURN_NULL();
197 }
198 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
199 /* no need to close the transliterator manually on construction error */
200 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_from_rules: internal constructor call failed" );
201 }
202 /* }}} */
203
204 /* {{{ proto Transliterator transliterator_create_inverse( Transliterator orig_trans )
205 * proto Transliterator Transliterator::createInverse()
206 * Opens the inverse transliterator transliterator.
207 */
PHP_FUNCTION(transliterator_create_inverse)208 PHP_FUNCTION( transliterator_create_inverse )
209 {
210 Transliterator_object *to_orig;
211 UTransliterator *utrans;
212 TRANSLITERATOR_METHOD_INIT_VARS;
213
214 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
215 &object, Transliterator_ce_ptr ) == FAILURE )
216 {
217 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
218 "transliterator_create_inverse: bad arguments", 0 );
219 RETURN_NULL();
220 }
221
222 TRANSLITERATOR_METHOD_FETCH_OBJECT;
223 to_orig = to;
224
225 object = return_value;
226 object_init_ex( object, Transliterator_ce_ptr );
227 TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
228
229 utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
230 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_inverse: could not create "
231 "inverse ICU transliterator" );
232 transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
233 /* no need to close the transliterator manually on construction error */
234 INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create: internal constructor call failed" );
235 }
236 /* }}} */
237
238 /* {{{ proto array transliterator_list_ids()
239 * proto array Transliterator::listIDs()
240 * Return an array with the registered transliterator IDs.
241 */
PHP_FUNCTION(transliterator_list_ids)242 PHP_FUNCTION( transliterator_list_ids )
243 {
244 UEnumeration *en;
245 const UChar *elem;
246 int32_t elem_len;
247 UErrorCode status = U_ZERO_ERROR;
248
249 intl_error_reset( NULL );
250
251 if( zend_parse_parameters_none() == FAILURE )
252 {
253 /* seems to be the convention in this lib to return false instead of
254 * null on bad parameter types, except on constructors and factory
255 * methods */
256 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
257 "transliterator_list_ids: bad arguments", 0 );
258 RETURN_FALSE;
259 }
260
261 en = utrans_openIDs( &status );
262 INTL_CHECK_STATUS( status,
263 "transliterator_list_ids: Failed to obtain registered transliterators" );
264
265 array_init( return_value );
266 while( (elem = uenum_unext( en, &elem_len, &status )) )
267 {
268 zend_string *el = intl_convert_utf16_to_utf8(elem, elem_len, &status );
269
270 if( !el )
271 {
272 break;
273 }
274 else
275 {
276 add_next_index_str( return_value, el);
277 }
278 }
279 uenum_close( en );
280
281 intl_error_set_code( NULL, status );
282 if( U_FAILURE( status ) )
283 {
284 zend_array_destroy( Z_ARR_P(return_value) );
285 RETVAL_FALSE;
286 intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
287 "Failed to build array of registered transliterators", 0 );
288 }
289 }
290 /* }}} */
291
292 /* {{{ proto string transliterator_transliterate( Transliterator trans, string subject [, int start = 0 [, int end = -1 ]] )
293 * proto string Transliterator::transliterate( string subject [, int start = 0 [, int end = -1 ]] )
294 * Transliterate a string. */
PHP_FUNCTION(transliterator_transliterate)295 PHP_FUNCTION( transliterator_transliterate )
296 {
297 char *str;
298 UChar *ustr = NULL,
299 *uresult = NULL;
300 size_t str_len;
301 int32_t ustr_len = 0,
302 capacity,
303 uresult_len;
304 zend_long start = 0,
305 limit = -1;
306 int success = 0;
307 zval tmp_object;
308 TRANSLITERATOR_METHOD_INIT_VARS;
309
310 object = getThis();
311 ZVAL_UNDEF(&tmp_object);
312
313 if( object == NULL )
314 {
315 /* in non-OOP version, accept both a transliterator and a string */
316 zval *arg1;
317 if( zend_parse_parameters( ZEND_NUM_ARGS(), "zs|ll",
318 &arg1, &str, &str_len, &start, &limit ) == FAILURE )
319 {
320 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
321 "transliterator_transliterate: bad arguments", 0 );
322 RETURN_FALSE;
323 }
324
325 if( Z_TYPE_P( arg1 ) == IS_OBJECT &&
326 instanceof_function( Z_OBJCE_P( arg1 ), Transliterator_ce_ptr ) )
327 {
328 object = arg1;
329 }
330 else
331 { /* not a transliterator object as first argument */
332 int res;
333 if(Z_TYPE_P( arg1 ) != IS_STRING )
334 {
335 convert_to_string( arg1 );
336 }
337 object = &tmp_object;
338 res = create_transliterator( Z_STRVAL_P( arg1 ), Z_STRLEN_P( arg1 ),
339 TRANSLITERATOR_FORWARD, object );
340 if( res == FAILURE )
341 {
342 zend_string *message = intl_error_get_message( NULL );
343 php_error_docref0( NULL, E_WARNING, "Could not create "
344 "transliterator with ID \"%s\" (%s)", Z_STRVAL_P( arg1 ), ZSTR_VAL(message) );
345 zend_string_free( message );
346 ZVAL_UNDEF(&tmp_object);
347 /* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
348 goto cleanup;
349 }
350 }
351 }
352 else if( zend_parse_parameters( ZEND_NUM_ARGS(), "s|ll",
353 &str, &str_len, &start, &limit ) == FAILURE )
354 {
355 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
356 "transliterator_transliterate: bad arguments", 0 );
357 RETURN_FALSE;
358 }
359
360 if( limit < -1 )
361 {
362 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
363 "transliterator_transliterate: \"end\" argument should be "
364 "either non-negative or -1", 0 );
365 RETURN_FALSE;
366 }
367
368 if( start < 0 || ((limit != -1 ) && (start > limit )) )
369 {
370 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
371 "transliterator_transliterate: \"start\" argument should be "
372 "non-negative and not bigger than \"end\" (if defined)", 0 );
373 RETURN_FALSE;
374 }
375
376 /* end argument parsing/validation */
377
378 TRANSLITERATOR_METHOD_FETCH_OBJECT;
379
380 intl_convert_utf8_to_utf16( &ustr, &ustr_len, str, str_len,
381 TRANSLITERATOR_ERROR_CODE_P( to ) );
382 INTL_METHOD_CHECK_STATUS( to, "String conversion of string to UTF-16 failed" );
383
384 /* we've started allocating resources, goto from now on */
385
386 if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
387 {
388 char *msg;
389 spprintf( &msg, 0,
390 "transliterator_transliterate: Neither \"start\" nor the \"end\" "
391 "arguments can exceed the number of UTF-16 code units "
392 "(in this case, %d)", (int) ustr_len );
393 if(msg != NULL )
394 {
395 intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
396 msg, 1 );
397 efree( msg );
398 }
399 RETVAL_FALSE;
400 goto cleanup;
401 }
402
403 uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
404 capacity = ustr_len + 1;
405
406 while( 1 )
407 {
408 int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
409 memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
410 uresult_len = ustr_len;
411
412 utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
413 &temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
414 if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
415 {
416 efree( uresult );
417
418 uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
419 capacity = uresult_len + 1;
420
421 intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
422 }
423 else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
424 {
425 uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
426
427 intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
428 break;
429 }
430 else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
431 {
432 intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
433 intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
434 "transliterator_transliterate: transliteration failed", 0 );
435 goto cleanup;
436 }
437 else
438 break;
439 }
440
441 uresult[uresult_len] = (UChar) 0;
442
443 success = 1;
444
445 cleanup:
446 if( ustr )
447 efree( ustr );
448
449 if( success ) {
450 /* frees uresult even on error */
451 INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
452 }
453 else
454 {
455 if( uresult )
456 efree( uresult );
457 RETVAL_FALSE;
458 }
459
460 zval_ptr_dtor( &tmp_object );
461 }
462 /* }}} */
463
PHP_METHOD(Transliterator,__construct)464 PHP_METHOD( Transliterator, __construct )
465 {
466 /* this constructor shouldn't be called as it's private */
467 zend_throw_exception( NULL,
468 "An object of this type cannot be created with the new operator.",
469 0 );
470 }
471
472 /* {{{ proto int transliterator_get_error_code( Transliterator trans )
473 * proto int Transliterator::getErrorCode()
474 * Get the last error code for this transliterator.
475 */
PHP_FUNCTION(transliterator_get_error_code)476 PHP_FUNCTION( transliterator_get_error_code )
477 {
478 TRANSLITERATOR_METHOD_INIT_VARS
479
480 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
481 &object, Transliterator_ce_ptr ) == FAILURE )
482 {
483 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
484 "transliterator_get_error_code: unable to parse input params", 0 );
485
486 RETURN_FALSE;
487 }
488
489 /* Fetch the object (without resetting its last error code ). */
490 to = Z_INTL_TRANSLITERATOR_P( object );
491 if (to == NULL )
492 RETURN_FALSE;
493
494 RETURN_LONG( (zend_long) TRANSLITERATOR_ERROR_CODE( to ) );
495 }
496 /* }}} */
497
498
499 /* {{{ proto string transliterator_get_error_message( Transliterator trans )
500 * proto string Transliterator::getErrorMessage()
501 * Get the last error message for this transliterator.
502 */
PHP_FUNCTION(transliterator_get_error_message)503 PHP_FUNCTION( transliterator_get_error_message )
504 {
505 zend_string* message = NULL;
506 TRANSLITERATOR_METHOD_INIT_VARS
507
508 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
509 &object, Transliterator_ce_ptr ) == FAILURE )
510 {
511 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
512 "transliterator_get_error_message: unable to parse input params", 0 );
513
514 RETURN_FALSE;
515 }
516
517
518 /* Fetch the object (without resetting its last error code ). */
519 to = Z_INTL_TRANSLITERATOR_P( object );
520 if (to == NULL )
521 RETURN_FALSE;
522
523 /* Return last error message. */
524 message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) );
525 RETURN_STR( message );
526 }
527 /* }}} */
528
529
530 /*
531 * Local variables:
532 * tab-width: 4
533 * c-basic-offset: 4
534 * End:
535 * vim600: noet sw=4 ts=4 fdm=marker
536 * vim<600: noet sw=4 ts=4
537 */
538