1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13  */
14 
15 #ifdef HAVE_CONFIG_H
16 #include <config.h>
17 #endif
18 
19 #include "php_intl.h"
20 #include "transliterator.h"
21 #include "transliterator_class.h"
22 #include "intl_data.h"
23 #include "intl_convert.h"
24 
25 #include <zend_exceptions.h>
26 
create_transliterator(char * str_id,size_t str_id_len,zend_long direction,zval * object)27 static int create_transliterator( char *str_id, size_t str_id_len, zend_long direction, zval *object )
28 {
29 	Transliterator_object *to;
30 	UChar	              *ustr_id    = NULL;
31 	int32_t               ustr_id_len = 0;
32 	UTransliterator       *utrans;
33 	UParseError           parse_error;
34 
35 	intl_error_reset( NULL );
36 
37 	if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
38 	{
39 		zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
40 		return FAILURE;
41 	}
42 
43 	object_init_ex( object, Transliterator_ce_ptr );
44 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
45 
46 	/* Convert transliterator id to UTF-16 */
47 	intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
48 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
49 	{
50 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
51 		intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 );
52 		zval_ptr_dtor( object );
53 		return FAILURE;
54 	}
55 
56 	/* Open ICU Transliterator. */
57 	utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
58 		NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
59 	if (ustr_id) {
60 		efree( ustr_id );
61 	}
62 
63 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
64 	{
65 		char *buf = NULL;
66 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
67 		spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
68 			" with id \"%s\"", str_id );
69 		if( buf == NULL ) {
70 			intl_error_set_custom_msg( NULL,
71 				"transliterator_create: unable to open ICU transliterator", 0 );
72 		}
73 		else
74 		{
75 			intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 );
76 			efree( buf );
77 		}
78 		zval_ptr_dtor( object );
79 		return FAILURE;
80 	}
81 
82 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
83 	/* no need to close the transliterator manually on construction error */
84 	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
85 	{
86 		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
87 		intl_error_set_custom_msg( NULL,
88 			"transliterator_create: internal constructor call failed", 0 );
89 		zval_ptr_dtor( object );
90 		return FAILURE;
91 	}
92 
93 	return SUCCESS;
94 }
95 
96 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create)97 PHP_FUNCTION( transliterator_create )
98 {
99 	char     *str_id;
100 	size_t    str_id_len;
101 	zend_long direction   = TRANSLITERATOR_FORWARD;
102 	int res;
103 
104 	TRANSLITERATOR_METHOD_INIT_VARS;
105 
106 	(void) to; /* unused */
107 
108 	ZEND_PARSE_PARAMETERS_START(1, 2)
109 		Z_PARAM_STRING(str_id, str_id_len)
110 		Z_PARAM_OPTIONAL
111 		Z_PARAM_LONG(direction)
112 	ZEND_PARSE_PARAMETERS_END();
113 
114 	object = return_value;
115 	res = create_transliterator( str_id, str_id_len, direction, object );
116 	if( res == FAILURE )
117 		RETURN_NULL();
118 
119 	/* success, leave return_value as it is (set by create_transliterator) */
120 }
121 /* }}} */
122 
123 /* {{{ Opens a transliterator by id. */
PHP_FUNCTION(transliterator_create_from_rules)124 PHP_FUNCTION( transliterator_create_from_rules )
125 {
126 	char		    *str_rules;
127 	size_t          str_rules_len;
128 	UChar		    *ustr_rules    = NULL;
129 	int32_t         ustr_rules_len = 0;
130 	zend_long       direction      = TRANSLITERATOR_FORWARD;
131 	UParseError     parse_error;
132 	UTransliterator *utrans;
133 	UChar           id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
134 					       0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
135 	TRANSLITERATOR_METHOD_INIT_VARS;
136 
137 	ZEND_PARSE_PARAMETERS_START(1, 2)
138 		Z_PARAM_STRING(str_rules, str_rules_len)
139 		Z_PARAM_OPTIONAL
140 		Z_PARAM_LONG(direction)
141 	ZEND_PARSE_PARAMETERS_END();
142 
143 	if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
144 	{
145 		zend_argument_value_error(2, "must be either Transliterator::FORWARD or Transliterator::REVERSE");
146 		RETURN_THROWS();
147 	}
148 
149 	object = return_value;
150 	object_init_ex( object, Transliterator_ce_ptr );
151 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
152 
153 	intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
154 		str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
155 	/* (I'm not a big fan of non-obvious flow control macros ).
156 	 * This one checks the error value, destroys object and returns false */
157 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "String conversion of rules to UTF-16 failed" );
158 
159 	/* Open ICU Transliterator. */
160 	utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
161 		ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
162 	if (ustr_rules) {
163 		efree( ustr_rules );
164 	}
165 
166 	intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) );
167 	if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
168 	{
169 		char *msg = NULL;
170 		smart_str parse_error_str;
171 		parse_error_str = intl_parse_error_to_string( &parse_error );
172 		spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
173 			"create ICU transliterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "" );
174 		smart_str_free( &parse_error_str );
175 		if( msg != NULL )
176 		{
177 			intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 );
178 			efree( msg );
179 		}
180 		zval_ptr_dtor( return_value );
181 		RETURN_NULL();
182 	}
183 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
184 	/* no need to close the transliterator manually on construction error */
185 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_from_rules: internal constructor call failed" );
186 }
187 /* }}} */
188 
189 /* {{{ Opens the inverse transliterator transliterator. */
PHP_FUNCTION(transliterator_create_inverse)190 PHP_FUNCTION( transliterator_create_inverse )
191 {
192 	Transliterator_object *to_orig;
193 	UTransliterator       *utrans;
194 	TRANSLITERATOR_METHOD_INIT_VARS;
195 
196 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
197 		&object, Transliterator_ce_ptr ) == FAILURE )
198 	{
199 		RETURN_THROWS();
200 	}
201 
202 	TRANSLITERATOR_METHOD_FETCH_OBJECT;
203 	to_orig = to;
204 
205 	object = return_value;
206 	object_init_ex( object, Transliterator_ce_ptr );
207 	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
208 
209 	utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
210 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create_inverse: could not create "
211 		"inverse ICU transliterator" );
212 	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
213 	/* no need to close the transliterator manually on construction error */
214 	INTL_METHOD_CHECK_STATUS_OR_NULL( to, "transliterator_create: internal constructor call failed" );
215 }
216 /* }}} */
217 
218 /* {{{ Return an array with the registered transliterator IDs. */
PHP_FUNCTION(transliterator_list_ids)219 PHP_FUNCTION( transliterator_list_ids )
220 {
221 	UEnumeration  *en;
222 	const UChar	  *elem;
223 	int32_t		  elem_len;
224 	UErrorCode	  status = U_ZERO_ERROR;
225 
226 	intl_error_reset( NULL );
227 
228 	ZEND_PARSE_PARAMETERS_NONE();
229 
230 	en = utrans_openIDs( &status );
231 	INTL_CHECK_STATUS( status,
232 		"transliterator_list_ids: Failed to obtain registered transliterators" );
233 
234 	array_init( return_value );
235 	while( (elem = uenum_unext( en, &elem_len, &status )) )
236 	{
237 		zend_string *el = intl_convert_utf16_to_utf8(elem, elem_len, &status );
238 
239 		if( !el )
240 		{
241 			break;
242 		}
243 		else
244 		{
245 			add_next_index_str( return_value, el);
246 		}
247 	}
248 	uenum_close( en );
249 
250 	intl_error_set_code( NULL, status );
251 	if( U_FAILURE( status ) )
252 	{
253 		zend_array_destroy( Z_ARR_P(return_value) );
254 		RETVAL_FALSE;
255 		intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
256 			"Failed to build array of registered transliterators", 0 );
257 	}
258 }
259 /* }}} */
260 
261 /* {{{ Transliterate a string. */
PHP_FUNCTION(transliterator_transliterate)262 PHP_FUNCTION( transliterator_transliterate )
263 {
264 	char	    *str;
265 	UChar		*ustr		= NULL,
266 				*uresult	= NULL;
267 	size_t	    str_len;
268 	int32_t		ustr_len	= 0,
269 				capacity,
270 				uresult_len;
271 	zend_long	start		= 0,
272 				limit		= -1;
273 	int			success     = 0;
274 	zval 		tmp_object;
275 	TRANSLITERATOR_METHOD_INIT_VARS;
276 
277 	object = getThis();
278 
279 	ZVAL_UNDEF(&tmp_object);
280 
281 	if (object == NULL) {
282 		/* in non-OOP version, accept both a transliterator and a string */
283 		zend_string *arg1_str;
284 		zend_object *arg1_obj;
285 
286 		ZEND_PARSE_PARAMETERS_START(2, 4)
287 			Z_PARAM_OBJ_OF_CLASS_OR_STR(arg1_obj, Transliterator_ce_ptr, arg1_str)
288 			Z_PARAM_STRING(str, str_len)
289 			Z_PARAM_OPTIONAL
290 			Z_PARAM_LONG(start)
291 			Z_PARAM_LONG(limit)
292 		ZEND_PARSE_PARAMETERS_END();
293 
294 		if (arg1_str) { /* not a transliterator object as first argument */
295 			int res;
296 			object = &tmp_object;
297 			res = create_transliterator(ZSTR_VAL(arg1_str), ZSTR_LEN(arg1_str), TRANSLITERATOR_FORWARD, object);
298 			if( res == FAILURE )
299 			{
300 				if (!EG(exception)) {
301 					zend_string *message = intl_error_get_message( NULL );
302 					php_error_docref(NULL, E_WARNING, "Could not create transliterator with ID \"%s\" (%s)", ZSTR_VAL(arg1_str), ZSTR_VAL(message) );
303 					zend_string_free( message );
304 				}
305 				ZVAL_UNDEF(&tmp_object);
306 				/* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
307 				goto cleanup;
308 			}
309 		} else {
310 			ZVAL_OBJ_COPY(&tmp_object, arg1_obj);
311 			object = &tmp_object;
312 		}
313 	} else if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|ll", &str, &str_len, &start, &limit) == FAILURE) {
314 		RETURN_THROWS();
315 	}
316 
317 	if (limit < -1) {
318 		zend_argument_value_error(object ? 3 : 4, "must be greater than or equal to -1");
319 		goto cleanup_object;
320 	}
321 
322 	if (start < 0) {
323 		zend_argument_value_error(object ? 2 : 3, "must be greater than or equal to 0");
324 		goto cleanup_object;
325 	}
326 
327 	if (limit != -1 && start > limit) {
328 		zend_argument_value_error(object ? 2 : 3, "must be less than or equal to argument #%d ($end)", object ? 3 : 4);
329 		goto cleanup_object;
330 	}
331 
332 	/* end argument parsing/validation */
333 
334 	TRANSLITERATOR_METHOD_FETCH_OBJECT;
335 
336 	intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, TRANSLITERATOR_ERROR_CODE_P(to));
337 	INTL_METHOD_CHECK_STATUS_OR_GOTO(to, "String conversion of string to UTF-16 failed", cleanup_object);
338 
339 	/* we've started allocating resources, goto from now on */
340 
341 	if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
342 	{
343 		char *msg;
344 		spprintf( &msg, 0,
345 			"transliterator_transliterate: Neither \"start\" nor the \"end\" "
346 			"arguments can exceed the number of UTF-16 code units "
347 			"(in this case, %d)", (int) ustr_len );
348 		if(msg != NULL )
349 		{
350 			intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
351 				msg, 1 );
352 			efree( msg );
353 		}
354 		goto cleanup;
355 	}
356 
357 	uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
358 	capacity = ustr_len + 1;
359 
360 	while( 1 )
361 	{
362 		int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
363 		memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
364 		uresult_len = ustr_len;
365 
366 		utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
367 			&temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
368 		if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
369 		{
370 			efree( uresult );
371 
372 			uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
373 			capacity = uresult_len + 1;
374 
375 			intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
376 		}
377 		else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
378 		{
379 			uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
380 
381 			intl_error_reset( TRANSLITERATOR_ERROR_P( to ) );
382 			break;
383 		}
384 		else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
385 		{
386 			intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
387 			intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
388 				"transliterator_transliterate: transliteration failed", 0 );
389 			goto cleanup;
390 		}
391 		else
392 			break;
393 	}
394 
395 	uresult[uresult_len] = (UChar) 0;
396 
397 	success = 1;
398 
399 cleanup:
400 	if( ustr )
401 		efree( ustr );
402 
403 	if( success ) {
404 		/* frees uresult even on error */
405 		INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
406 	}
407 	else
408 	{
409 		if( uresult )
410 			efree( uresult );
411 		RETVAL_FALSE;
412 	}
413 
414 cleanup_object:
415 	zval_ptr_dtor( &tmp_object );
416 }
417 /* }}} */
418 
PHP_METHOD(Transliterator,__construct)419 PHP_METHOD( Transliterator, __construct )
420 {
421 	/* this constructor shouldn't be called as it's private */
422 	zend_throw_exception( NULL,
423 		"An object of this type cannot be created with the new operator.",
424 		0 );
425 }
426 
427 /* {{{ Get the last error code for this transliterator. */
PHP_FUNCTION(transliterator_get_error_code)428 PHP_FUNCTION( transliterator_get_error_code )
429 {
430 	TRANSLITERATOR_METHOD_INIT_VARS
431 
432 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
433 		&object, Transliterator_ce_ptr ) == FAILURE )
434 	{
435 		RETURN_THROWS();
436 	}
437 
438 	/* Fetch the object (without resetting its last error code ). */
439 	to = Z_INTL_TRANSLITERATOR_P( object );
440 	if (to == NULL )
441 		RETURN_FALSE;
442 
443 	RETURN_LONG( (zend_long) TRANSLITERATOR_ERROR_CODE( to ) );
444 }
445 /* }}} */
446 
447 
448 /* {{{ Get the last error message for this transliterator. */
PHP_FUNCTION(transliterator_get_error_message)449 PHP_FUNCTION( transliterator_get_error_message )
450 {
451 	zend_string* message = NULL;
452 	TRANSLITERATOR_METHOD_INIT_VARS
453 
454 	if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "O",
455 		&object, Transliterator_ce_ptr ) == FAILURE )
456 	{
457 		RETURN_THROWS();
458 	}
459 
460 
461 	/* Fetch the object (without resetting its last error code ). */
462 	to = Z_INTL_TRANSLITERATOR_P( object );
463 	if (to == NULL )
464 		RETURN_FALSE;
465 
466 	/* Return last error message. */
467 	message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) );
468 	RETURN_STR( message );
469 }
470 /* }}} */
471