1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include "php_intl.h"
22 #include "transliterator.h"
23 #include "transliterator_class.h"
24 #include "transliterator_methods.h"
25 #include "intl_data.h"
26 #include "intl_convert.h"
27 
28 #include <zend_exceptions.h>
29 
create_transliterator(char * str_id,int str_id_len,long direction,zval * object TSRMLS_DC)30 static int create_transliterator( char *str_id, int str_id_len, long direction, zval *object TSRMLS_DC )
31 {
32 	Transliterator_object *to;
33 	UChar	              *ustr_id    = NULL;
34 	int32_t               ustr_id_len = 0;
35 	UTransliterator       *utrans;
36 	UParseError           parse_error   = {0, -1};
37 
38 	intl_error_reset( NULL TSRMLS_CC );
39 
40 	if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
41 	{
42 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
43 			"transliterator_create: invalid direction", 0 TSRMLS_CC );
44 		return FAILURE;
45 	}
46 
47 	object_init_ex( object, Transliterator_ce_ptr );
48 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
49 
50 	/* Convert transliterator id to UTF-16 */
51 	intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
52 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
53 	{
54 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
55 		intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 TSRMLS_CC );
56 		zval_dtor( object );
57 		return FAILURE;
58 	}
59 
60 	/* Open ICU Transliterator. */
61 	utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
62 		NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
63 	if (ustr_id) {
64 		efree( ustr_id );
65 	}
66 
67 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
68 	{
69 		char *buf = NULL;
70 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
71 		spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
72 			" with id \"%s\"", str_id );
73 		if( buf == NULL ) {
74 			intl_error_set_custom_msg( NULL,
75 				"transliterator_create: unable to open ICU transliterator", 0 TSRMLS_CC );
76 		}
77 		else
78 		{
79 			intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 TSRMLS_CC );
80 			efree( buf );
81 		}
82 		zval_dtor( object );
83 		return FAILURE;
84 	}
85 
86 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
87 	/* no need to close the transliterator manually on construction error */
88 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
89 	{
90 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
91 		intl_error_set_custom_msg( NULL,
92 			"transliterator_create: internal constructor call failed", 0 TSRMLS_CC );
93 		zval_dtor( object );
94 		return FAILURE;
95 	}
96 
97 	return SUCCESS;
98 }
99 
100 /* {{{ proto Transliterator transliterator_create( string id [, int direction ] )
101  *     proto Transliterator Transliterator::create( string id [, int direction ] )
102  * Opens a transliterator by id.
103  */
PHP_FUNCTION(transliterator_create)104 PHP_FUNCTION( transliterator_create )
105 {
106 	char  *str_id;
107 	int   str_id_len;
108 	long  direction   = TRANSLITERATOR_FORWARD;
109 	int res;
110 
111 	TRANSLITERATOR_METHOD_INIT_VARS;
112 
113 	(void) to; /* unused */
114 
115 	if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l",
116 		&str_id, &str_id_len, &direction ) == FAILURE )
117 	{
118 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
119 			"transliterator_create: bad arguments", 0 TSRMLS_CC );
120 		RETURN_NULL();
121 	}
122 
123 	object = return_value;
124 	res = create_transliterator( str_id, str_id_len, direction, object TSRMLS_CC );
125 	if( res == FAILURE )
126 		RETURN_NULL();
127 
128 	/* success, leave return_value as it is (set by create_transliterator) */
129 }
130 /* }}} */
131 
132 /* {{{ proto Transliterator transliterator_create_from_rules( string rules [, int direction ] )
133  *     proto Transliterator Transliterator::createFromRules( string rules [, int direction ] )
134  * Opens a transliterator by id.
135  */
PHP_FUNCTION(transliterator_create_from_rules)136 PHP_FUNCTION( transliterator_create_from_rules )
137 {
138 	char		    *str_rules;
139 	int             str_rules_len;
140 	UChar		    *ustr_rules    = NULL;
141 	int32_t         ustr_rules_len = 0;
142 	long            direction      = TRANSLITERATOR_FORWARD;
143 	UParseError     parse_error    = {0, -1};
144 	UTransliterator *utrans;
145 	UChar           id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
146 					       0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
147 	TRANSLITERATOR_METHOD_INIT_VARS;
148 
149 	if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l",
150 		&str_rules, &str_rules_len, &direction ) == FAILURE )
151 	{
152 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
153 			"transliterator_create_from_rules: bad arguments", 0 TSRMLS_CC );
154 		RETURN_NULL();
155 	}
156 
157 	if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
158 	{
159 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
160 			"transliterator_create_from_rules: invalid direction", 0 TSRMLS_CC );
161 		RETURN_NULL();
162 	}
163 
164 	object = return_value;
165 	object_init_ex( object, Transliterator_ce_ptr );
166 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
167 
168 	intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
169 		str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
170 	/* (I'm not a big fan of non-obvious flow control macros ).
171 	 * This one checks the error value, destroys object and returns false */
172 	INTL_CTOR_CHECK_STATUS( to, "String conversion of rules to UTF-16 failed" );
173 
174 	/* Open ICU Transliterator. */
175 	utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
176 		ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
177 	if (ustr_rules) {
178 		efree( ustr_rules );
179 	}
180 
181 	intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) TSRMLS_CC );
182 	if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
183 	{
184 		char *msg = NULL;
185 		smart_str parse_error_str;
186 		parse_error_str = transliterator_parse_error_to_string( &parse_error );
187 		spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
188 			"create ICU transliterator from rules (%s)", parse_error_str.c );
189 		smart_str_free( &parse_error_str );
190 		if( msg != NULL )
191 		{
192 			intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 TSRMLS_CC );
193 			efree( msg );
194 		}
195 		zval_dtor( return_value );
196 		RETURN_NULL();
197     }
198 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
199 	/* no need to close the transliterator manually on construction error */
200 	INTL_CTOR_CHECK_STATUS( to, "transliterator_create_from_rules: internal constructor call failed" );
201 }
202 /* }}} */
203 
204 /* {{{ proto Transliterator transliterator_create_inverse( Transliterator orig_trans )
205  *     proto Transliterator Transliterator::createInverse()
206  * Opens the inverse transliterator transliterator.
207  */
PHP_FUNCTION(transliterator_create_inverse)208 PHP_FUNCTION( transliterator_create_inverse )
209 {
210 	Transliterator_object *to_orig;
211 	UTransliterator       *utrans;
212 	TRANSLITERATOR_METHOD_INIT_VARS;
213 
214 	if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
215 		&object, Transliterator_ce_ptr ) == FAILURE )
216 	{
217 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
218 			"transliterator_create_inverse: bad arguments", 0 TSRMLS_CC );
219 		RETURN_NULL();
220 	}
221 
222 	TRANSLITERATOR_METHOD_FETCH_OBJECT;
223 	to_orig = to;
224 
225 	object = return_value;
226 	object_init_ex( object, Transliterator_ce_ptr );
227 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
228 
229 	utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
230 	INTL_CTOR_CHECK_STATUS( to, "transliterator_create_inverse: could not create "
231 		"inverse ICU transliterator" );
232 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
233 	/* no need to close the transliterator manually on construction error */
234 	INTL_CTOR_CHECK_STATUS( to, "transliterator_create: internal constructor call failed" );
235 }
236 /* }}} */
237 
238 /* {{{ proto array transliterator_list_ids()
239  *     proto array Transliterator::listIDs()
240  * Return an array with the registered transliterator IDs.
241  */
PHP_FUNCTION(transliterator_list_ids)242 PHP_FUNCTION( transliterator_list_ids )
243 {
244 	UEnumeration  *en;
245 	const UChar	  *elem;
246 	int32_t		  elem_len;
247 	UErrorCode	  status = U_ZERO_ERROR;
248 
249 	intl_error_reset( NULL TSRMLS_CC );
250 
251 	if( zend_parse_parameters_none() == FAILURE )
252 	{
253 		/* seems to be the convention in this lib to return false instead of
254 		 * null on bad parameter types, except on constructors and factory
255 		 * methods */
256 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
257 			"transliterator_list_ids: bad arguments", 0 TSRMLS_CC );
258 		RETURN_FALSE;
259 	}
260 
261 	en = utrans_openIDs( &status );
262 	INTL_CHECK_STATUS( status,
263 		"transliterator_list_ids: Failed to obtain registered transliterators" );
264 
265 	array_init( return_value );
266 	while( (elem = uenum_unext( en, &elem_len, &status )) )
267 	{
268 		char *el_char = NULL;
269 		int  el_len   = 0;
270 
271 		intl_convert_utf16_to_utf8( &el_char, &el_len, elem, elem_len, &status );
272 
273 		if( U_FAILURE( status ) )
274 		{
275 			efree( el_char );
276 			break;
277 		}
278 		else
279 		{
280 			add_next_index_stringl( return_value, el_char, el_len, 0 );
281 		}
282 	}
283 	uenum_close( en );
284 
285 	intl_error_set_code( NULL, status TSRMLS_CC );
286 	if( U_FAILURE( status ) )
287 	{
288 		zval_dtor( return_value );
289 		RETVAL_FALSE;
290 		intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
291 			"Failed to build array of registered transliterators", 0 TSRMLS_CC );
292 	}
293 }
294 /* }}} */
295 
296 /* {{{ proto string transliterator_transliterate( Transliterator trans, string subject [, int start = 0 [, int end = -1 ]] )
297  *     proto string Transliterator::transliterate( string subject [, int start = 0 [, int end = -1 ]] )
298  * Transliterate a string. */
PHP_FUNCTION(transliterator_transliterate)299 PHP_FUNCTION( transliterator_transliterate )
300 {
301 	char	    *str;
302 	UChar		*ustr		= NULL,
303 				*uresult	= NULL;
304 	int			str_len;
305 	int32_t		ustr_len	= 0,
306 				capacity,
307 				uresult_len;
308 	long		start		= 0,
309 				limit		= -1;
310 	int			success     = 0,
311 				temp_trans  = 0;
312 	TRANSLITERATOR_METHOD_INIT_VARS;
313 
314 	object = getThis();
315 
316 	if( object == NULL )
317 	{
318 		/* in non-OOP version, accept both a transliterator and a string */
319 		zval **arg1;
320 		if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "Zs|ll",
321 			&arg1, &str, &str_len, &start, &limit ) == FAILURE )
322 		{
323 			intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
324 				"transliterator_transliterate: bad arguments", 0 TSRMLS_CC );
325 			RETURN_FALSE;
326 		}
327 
328 		if( Z_TYPE_PP( arg1 ) == IS_OBJECT &&
329 			instanceof_function( Z_OBJCE_PP( arg1 ), Transliterator_ce_ptr TSRMLS_CC ) )
330 		{
331 			object = *arg1;
332 		}
333 		else
334 		{ /* not a transliterator object as first argument */
335 			int res;
336 			if(Z_TYPE_PP( arg1 ) != IS_STRING )
337 			{
338 				SEPARATE_ZVAL( arg1 );
339 				convert_to_string( *arg1 );
340 			}
341 			ALLOC_INIT_ZVAL( object );
342 			temp_trans = 1;
343 			res = create_transliterator( Z_STRVAL_PP( arg1 ), Z_STRLEN_PP( arg1 ),
344 					TRANSLITERATOR_FORWARD, object TSRMLS_CC );
345 			if( res == FAILURE )
346 			{
347 				char *message = intl_error_get_message( NULL TSRMLS_CC );
348 				php_error_docref0( NULL TSRMLS_CC, E_WARNING, "Could not create "
349 					"transliterator with ID \"%s\" (%s)", Z_STRVAL_PP( arg1 ), message );
350 				efree( message );
351 				/* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
352 				goto cleanup;
353 			}
354 		}
355 	}
356 	else if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|ll",
357 		&str, &str_len, &start, &limit ) == FAILURE )
358 	{
359 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
360 			"transliterator_transliterate: bad arguments", 0 TSRMLS_CC );
361 		RETURN_FALSE;
362 	}
363 
364 	if( limit < -1 )
365 	{
366 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
367 			"transliterator_transliterate: \"end\" argument should be "
368 			"either non-negative or -1", 0 TSRMLS_CC );
369 		RETURN_FALSE;
370 	}
371 
372 	if( start < 0 || ((limit != -1 ) && (start > limit )) )
373 	{
374 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
375 			"transliterator_transliterate: \"start\" argument should be "
376 			"non-negative and not bigger than \"end\" (if defined)", 0 TSRMLS_CC );
377 		RETURN_FALSE;
378 	}
379 
380 	/* end argument parsing/validation */
381 
382 	TRANSLITERATOR_METHOD_FETCH_OBJECT;
383 
384 	intl_convert_utf8_to_utf16( &ustr, &ustr_len, str, str_len,
385 		TRANSLITERATOR_ERROR_CODE_P( to ) );
386 	INTL_METHOD_CHECK_STATUS( to, "String conversion of string to UTF-16 failed" );
387 
388 	/* we've started allocating resources, goto from now on */
389 
390 	if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
391 	{
392 		char *msg;
393 		spprintf( &msg, 0,
394 			"transliterator_transliterate: Neither \"start\" nor the \"end\" "
395 			"arguments can exceed the number of UTF-16 code units "
396 			"(in this case, %d)", (int) ustr_len );
397 		if(msg != NULL )
398 		{
399 			intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
400 				msg, 1 TSRMLS_CC );
401 			efree( msg );
402 		}
403 		RETVAL_FALSE;
404 		goto cleanup;
405 	}
406 
407 	uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
408 	capacity = ustr_len + 1;
409 
410 	while( 1 )
411 	{
412 		int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
413 		memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
414 		uresult_len = ustr_len;
415 
416 		utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
417 			&temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
418 		if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
419 		{
420 			efree( uresult );
421 
422 			uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
423 			capacity = uresult_len + 1;
424 
425 			intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
426 		}
427 		else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
428 		{
429 			uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
430 
431 			intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
432 			break;
433 		}
434 		else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
435 		{
436 			intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
437 			intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
438 				"transliterator_transliterate: transliteration failed", 0 TSRMLS_CC );
439 			goto cleanup;
440 		}
441 		else
442 			break;
443 	}
444 
445 	uresult[uresult_len] = (UChar) 0;
446 
447 	success = 1;
448 
449 cleanup:
450 	if( ustr )
451 		efree( ustr );
452 
453 	if( success ) {
454 		/* frees uresult even on error */
455 		INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
456 	}
457 	else
458 	{
459 		if( uresult )
460 			efree( uresult );
461 		RETVAL_FALSE;
462 	}
463 
464 	if (temp_trans )
465 		zval_ptr_dtor( &object );
466 }
467 /* }}} */
468 
PHP_METHOD(Transliterator,__construct)469 PHP_METHOD( Transliterator, __construct )
470 {
471 	/* this constructor shouldn't be called as it's private */
472 	zend_throw_exception( NULL,
473 		"An object of this type cannot be created with the new operator.",
474 		0 TSRMLS_CC );
475 }
476 
477 /* {{{ proto int transliterator_get_error_code( Transliterator trans )
478  *     proto int Transliterator::getErrorCode()
479  * Get the last error code for this transliterator.
480  */
PHP_FUNCTION(transliterator_get_error_code)481 PHP_FUNCTION( transliterator_get_error_code )
482 {
483 	TRANSLITERATOR_METHOD_INIT_VARS
484 
485 	if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
486 		&object, Transliterator_ce_ptr ) == FAILURE )
487 	{
488 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
489 			"transliterator_get_error_code: unable to parse input params", 0 TSRMLS_CC );
490 
491 		RETURN_FALSE;
492 	}
493 
494 	/* Fetch the object (without resetting its last error code ). */
495 	to = zend_object_store_get_object( object TSRMLS_CC );
496 	if (to == NULL )
497 		RETURN_FALSE;
498 
499 	RETURN_LONG( (long) TRANSLITERATOR_ERROR_CODE( to ) );
500 }
501 /* }}} */
502 
503 
504 /* {{{ proto string transliterator_get_error_message( Transliterator trans )
505  *     proto string Transliterator::getErrorMessage()
506  * Get the last error message for this transliterator.
507  */
PHP_FUNCTION(transliterator_get_error_message)508 PHP_FUNCTION( transliterator_get_error_message )
509 {
510 	const char* message = NULL;
511 	TRANSLITERATOR_METHOD_INIT_VARS
512 
513 	if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
514 		&object, Transliterator_ce_ptr ) == FAILURE )
515 	{
516 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
517 			"transliterator_get_error_message: unable to parse input params", 0 TSRMLS_CC );
518 
519 		RETURN_FALSE;
520 	}
521 
522 
523 	/* Fetch the object (without resetting its last error code ). */
524 	to = zend_object_store_get_object( object TSRMLS_CC );
525 	if (to == NULL )
526 		RETURN_FALSE;
527 
528 	/* Return last error message. */
529 	message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
530 	RETURN_STRING( message, 0 );
531 }
532 /* }}} */
533 
534 
535 /*
536  * Local variables:
537  * tab-width: 4
538  * c-basic-offset: 4
539  * End:
540  * vim600: noet sw=4 ts=4 fdm=marker
541  * vim<600: noet sw=4 ts=4
542  */
543