1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | http://www.php.net/license/3_01.txt                                  |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13  */
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include "php_intl.h"
20 #include "transliterator.h"
21 #include "transliterator_class.h"
22 #include "intl_data.h"
23 #include "intl_convert.h"
24 
25 #include <zend_exceptions.h>
26 
create_transliterator(char * str_id,size_t str_id_len,zend_long direction,zval * object)27 static int create_transliterator( char *str_id, size_t str_id_len, zend_long direction, zval *object )
28 {
29 	Transliterator_object *to;
30 	UChar	              *ustr_id    = NULL;
31 	int32_t               ustr_id_len = 0;
32 	UTransliterator       *utrans;
33 	UParseError           parse_error;
34 
35 	intl_error_reset( NULL );
36 
37 	if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
38 	{
39 		zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
40 		return FAILURE;
41 	}
42 
43 	object_init_ex( object, Transliterator_ce_ptr );
44 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
45 
46 	/* Convert transliterator id to UTF-16 */
47 	intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
48 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
49 	{
50 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
51 		intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 );
52 		zval_ptr_dtor( object );
53 		return FAILURE;
54 	}
55 
56 	/* Open ICU Transliterator. */
57 	utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
58 		NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
59 	if (ustr_id) {
60 		efree( ustr_id );
61 	}
62 
63 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
64 	{
65 		char *buf = NULL;
66 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
67 		spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
68 			" with id \"%s\"", str_id );
69 		if( buf == NULL ) {
70 			intl_error_set_custom_msg( NULL,
71 				"transliterator_create: unable to open ICU transliterator", 0 );
72 		}
73 		else
74 		{
75 			intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 );
76 			efree( buf );
77 		}
78 		zval_ptr_dtor( object );
79 		return FAILURE;
80 	}
81 
82 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
83 	/* no need to close the transliterator manually on construction error */
84 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
85 	{
86 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
87 		intl_error_set_custom_msg( NULL,
88 			"transliterator_create: internal constructor call failed", 0 );
89 		zval_ptr_dtor( object );
90 		return FAILURE;
91 	}
92 
93 	return SUCCESS;
94 }
95 
96 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create)97 PHP_FUNCTION( transliterator_create )
98 {
99 	char     *str_id;
100 	size_t    str_id_len;
101 	zend_long direction   = TRANSLITERATOR_FORWARD;
102 	int res;
103 
104 	TRANSLITERATOR_METHOD_INIT_VARS;
105 
106 	(void) to; /* unused */
107 
108 	if( zend_parse_parameters( ZEND_NUM_ARGS(), "s|l",
109 		&str_id, &str_id_len, &direction ) == FAILURE )
110 	{
111 		RETURN_THROWS();
112 	}
113 
114 	object = return_value;
115 	res = create_transliterator( str_id, str_id_len, direction, object );
116 	if( res == FAILURE )
117 		RETURN_NULL();
118 
119 	/* success, leave return_value as it is (set by create_transliterator) */
120 }
121 /* }}} */
122 
123 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create_from_rules)124 PHP_FUNCTION( transliterator_create_from_rules )
125 {
126 	char		    *str_rules;
127 	size_t          str_rules_len;
128 	UChar		    *ustr_rules    = NULL;
129 	int32_t         ustr_rules_len = 0;
130 	zend_long       direction      = TRANSLITERATOR_FORWARD;
131 	UParseError     parse_error;
132 	UTransliterator *utrans;
133 	UChar           id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
134 					       0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
135 	TRANSLITERATOR_METHOD_INIT_VARS;
136 
137 	if( zend_parse_parameters( ZEND_NUM_ARGS(), "s|l",
138 		&str_rules, &str_rules_len, &direction ) == FAILURE )
139 	{
140 		RETURN_THROWS();
141 	}
142 
143 	if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
144 	{
145 		zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
146 		RETURN_THROWS();
147 	}
148 
149 	object = return_value;
150 	object_init_ex( object, Transliterator_ce_ptr );
151 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
152 
153 	intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
154 		str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
155 	/* (I'm not a big fan of non-obvious flow control macros ).
156 	 * This one checks the error value, destroys object and returns false */
157 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "String conversion of rules to UTF-16 failed" );
158 
159 	/* Open ICU Transliterator. */
160 	utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
161 		ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
162 	if (ustr_rules) {
163 		efree( ustr_rules );
164 	}
165 
166 	intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) );
167 	if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
168 	{
169 		char *msg = NULL;
170 		smart_str parse_error_str;
171 		parse_error_str = intl_parse_error_to_string( &parse_error );
172 		spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
173 			"create ICU transliterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "" );
174 		smart_str_free( &parse_error_str );
175 		if( msg != NULL )
176 		{
177 			intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 );
178 			efree( msg );
179 		}
180 		zval_ptr_dtor( return_value );
181 		RETURN_NULL();
182     }
183 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
184 	/* no need to close the transliterator manually on construction error */
185 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_from_rules: internal constructor call failed" );
186 }
187 /* }}} */
188 
189 /* {{{ Opens the inverse transliterator transliterator. */
PHP_FUNCTION(transliterator_create_inverse)190 PHP_FUNCTION( transliterator_create_inverse )
191 {
192 	Transliterator_object *to_orig;
193 	UTransliterator       *utrans;
194 	TRANSLITERATOR_METHOD_INIT_VARS;
195 
196 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
197 		&object, Transliterator_ce_ptr ) == FAILURE )
198 	{
199 		RETURN_THROWS();
200 	}
201 
202 	TRANSLITERATOR_METHOD_FETCH_OBJECT;
203 	to_orig = to;
204 
205 	object = return_value;
206 	object_init_ex( object, Transliterator_ce_ptr );
207 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
208 
209 	utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
210 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_inverse: could not create "
211 		"inverse ICU transliterator" );
212 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
213 	/* no need to close the transliterator manually on construction error */
214 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create: internal constructor call failed" );
215 }
216 /* }}} */
217 
218 /* {{{ Return an array with the registered transliterator IDs. */
PHP_FUNCTION(transliterator_list_ids)219 PHP_FUNCTION( transliterator_list_ids )
220 {
221 	UEnumeration  *en;
222 	const UChar	  *elem;
223 	int32_t		  elem_len;
224 	UErrorCode	  status = U_ZERO_ERROR;
225 
226 	intl_error_reset( NULL );
227 
228 	if( zend_parse_parameters_none() == FAILURE )
229 	{
230 		RETURN_THROWS();
231 	}
232 
233 	en = utrans_openIDs( &status );
234 	INTL_CHECK_STATUS( status,
235 		"transliterator_list_ids: Failed to obtain registered transliterators" );
236 
237 	array_init( return_value );
238 	while( (elem = uenum_unext( en, &elem_len, &status )) )
239 	{
240 		zend_string *el = intl_convert_utf16_to_utf8(elem, elem_len, &status );
241 
242 		if( !el )
243 		{
244 			break;
245 		}
246 		else
247 		{
248 			add_next_index_str( return_value, el);
249 		}
250 	}
251 	uenum_close( en );
252 
253 	intl_error_set_code( NULL, status );
254 	if( U_FAILURE( status ) )
255 	{
256 		zend_array_destroy( Z_ARR_P(return_value) );
257 		RETVAL_FALSE;
258 		intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
259 			"Failed to build array of registered transliterators", 0 );
260 	}
261 }
262 /* }}} */
263 
264 /* {{{ Transliterate a string. */
PHP_FUNCTION(transliterator_transliterate)265 PHP_FUNCTION( transliterator_transliterate )
266 {
267 	char	    *str;
268 	UChar		*ustr		= NULL,
269 				*uresult	= NULL;
270 	size_t	    str_len;
271 	int32_t		ustr_len	= 0,
272 				capacity,
273 				uresult_len;
274 	zend_long	start		= 0,
275 				limit		= -1;
276 	int			success     = 0;
277 	zval 		tmp_object;
278 	TRANSLITERATOR_METHOD_INIT_VARS;
279 
280 	object = getThis();
281 
282 	ZVAL_UNDEF(&tmp_object);
283 
284 	if (object == NULL) {
285 		/* in non-OOP version, accept both a transliterator and a string */
286 		zend_string *arg1_str;
287 		zend_object *arg1_obj;
288 
289 		ZEND_PARSE_PARAMETERS_START(2, 4)
290 			Z_PARAM_OBJ_OF_CLASS_OR_STR(arg1_obj, Transliterator_ce_ptr, arg1_str)
291 			Z_PARAM_STRING(str, str_len)
292 			Z_PARAM_OPTIONAL
293 			Z_PARAM_LONG(start)
294 			Z_PARAM_LONG(limit)
295 		ZEND_PARSE_PARAMETERS_END();
296 
297 		if (arg1_str) { /* not a transliterator object as first argument */
298 			int res;
299 			object = &tmp_object;
300 			res = create_transliterator(ZSTR_VAL(arg1_str), ZSTR_LEN(arg1_str), TRANSLITERATOR_FORWARD, object);
301 			if( res == FAILURE )
302 			{
303 				if (!EG(exception)) {
304 					zend_string *message = intl_error_get_message( NULL );
305 					php_error_docref(NULL, E_WARNING, "Could not create transliterator with ID \"%s\" (%s)", ZSTR_VAL(arg1_str), ZSTR_VAL(message) );
306 					zend_string_free( message );
307 				}
308 				ZVAL_UNDEF(&tmp_object);
309 				/* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
310 				goto cleanup;
311 			}
312 		} else {
313 			ZVAL_OBJ_COPY(&tmp_object, arg1_obj);
314 			object = &tmp_object;
315 		}
316 	} else if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|ll", &str, &str_len, &start, &limit) == FAILURE) {
317 		RETURN_THROWS();
318 	}
319 
320 	if (limit < -1) {
321 		zend_argument_value_error(object ? 3 : 4, "must be greater than or equal to -1");
322 		goto cleanup_object;
323 	}
324 
325 	if (start < 0) {
326 		zend_argument_value_error(object ? 2 : 3, "must be greater than or equal to 0");
327 		goto cleanup_object;
328 	}
329 
330 	if (limit != -1 && start > limit) {
331 		zend_argument_value_error(object ? 2 : 3, "must be less than or equal to argument #%d ($end)", object ? 3 : 4);
332 		goto cleanup_object;
333 	}
334 
335 	/* end argument parsing/validation */
336 
337 	TRANSLITERATOR_METHOD_FETCH_OBJECT;
338 
339 	intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, TRANSLITERATOR_ERROR_CODE_P(to));
340 	INTL_METHOD_CHECK_STATUS_OR_GOTO(to, "String conversion of string to UTF-16 failed", cleanup_object);
341 
342 	/* we've started allocating resources, goto from now on */
343 
344 	if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
345 	{
346 		char *msg;
347 		spprintf( &msg, 0,
348 			"transliterator_transliterate: Neither \"start\" nor the \"end\" "
349 			"arguments can exceed the number of UTF-16 code units "
350 			"(in this case, %d)", (int) ustr_len );
351 		if(msg != NULL )
352 		{
353 			intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
354 				msg, 1 );
355 			efree( msg );
356 		}
357 		goto cleanup;
358 	}
359 
360 	uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
361 	capacity = ustr_len + 1;
362 
363 	while( 1 )
364 	{
365 		int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
366 		memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
367 		uresult_len = ustr_len;
368 
369 		utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
370 			&temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
371 		if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
372 		{
373 			efree( uresult );
374 
375 			uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
376 			capacity = uresult_len + 1;
377 
378 			intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
379 		}
380 		else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
381 		{
382 			uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
383 
384 			intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
385 			break;
386 		}
387 		else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
388 		{
389 			intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
390 			intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
391 				"transliterator_transliterate: transliteration failed", 0 );
392 			goto cleanup;
393 		}
394 		else
395 			break;
396 	}
397 
398 	uresult[uresult_len] = (UChar) 0;
399 
400 	success = 1;
401 
402 cleanup:
403 	if( ustr )
404 		efree( ustr );
405 
406 	if( success ) {
407 		/* frees uresult even on error */
408 		INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
409 	}
410 	else
411 	{
412 		if( uresult )
413 			efree( uresult );
414 		RETVAL_FALSE;
415 	}
416 
417 cleanup_object:
418 	zval_ptr_dtor( &tmp_object );
419 }
420 /* }}} */
421 
PHP_METHOD(Transliterator,__construct)422 PHP_METHOD( Transliterator, __construct )
423 {
424 	/* this constructor shouldn't be called as it's private */
425 	zend_throw_exception( NULL,
426 		"An object of this type cannot be created with the new operator.",
427 		0 );
428 }
429 
430 /* {{{ Get the last error code for this transliterator. */
PHP_FUNCTION(transliterator_get_error_code)431 PHP_FUNCTION( transliterator_get_error_code )
432 {
433 	TRANSLITERATOR_METHOD_INIT_VARS
434 
435 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
436 		&object, Transliterator_ce_ptr ) == FAILURE )
437 	{
438 		RETURN_THROWS();
439 	}
440 
441 	/* Fetch the object (without resetting its last error code ). */
442 	to = Z_INTL_TRANSLITERATOR_P( object );
443 	if (to == NULL )
444 		RETURN_FALSE;
445 
446 	RETURN_LONG( (zend_long) TRANSLITERATOR_ERROR_CODE( to ) );
447 }
448 /* }}} */
449 
450 
451 /* {{{ Get the last error message for this transliterator. */
PHP_FUNCTION(transliterator_get_error_message)452 PHP_FUNCTION( transliterator_get_error_message )
453 {
454 	zend_string* message = NULL;
455 	TRANSLITERATOR_METHOD_INIT_VARS
456 
457 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
458 		&object, Transliterator_ce_ptr ) == FAILURE )
459 	{
460 		RETURN_THROWS();
461 	}
462 
463 
464 	/* Fetch the object (without resetting its last error code ). */
465 	to = Z_INTL_TRANSLITERATOR_P( object );
466 	if (to == NULL )
467 		RETURN_FALSE;
468 
469 	/* Return last error message. */
470 	message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) );
471 	RETURN_STR( message );
472 }
473 /* }}} */
474