1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
14    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
15    +----------------------------------------------------------------------+
16  */
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include "php_intl.h"
23 #include "collator_class.h"
24 #include "collator_is_numeric.h"
25 #include "collator_convert.h"
26 #include "intl_convert.h"
27 
28 #include <unicode/ustring.h>
29 #include <php.h>
30 
31 #if (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION <= 1)
32 #define CAST_OBJECT_SHOULD_FREE ,0
33 #else
34 #define CAST_OBJECT_SHOULD_FREE
35 #endif
36 
37 #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
38 			zval_add_ref( &retval );             \
39 			return retval;                       \
40 	}
41 
42 /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
collator_convert_hash_item_from_utf8_to_utf16(HashTable * hash,int hashKeyType,char * hashKey,ulong hashIndex,UErrorCode * status)43 static void collator_convert_hash_item_from_utf8_to_utf16(
44 	HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
45 	UErrorCode* status )
46 {
47 	const char* old_val;
48 	int         old_val_len;
49 	UChar*      new_val      = NULL;
50 	int         new_val_len  = 0;
51 	zval**      hashData     = NULL;
52 	zval*       znew_val     = NULL;
53 
54 	/* Get current hash item. */
55 	zend_hash_get_current_data( hash, (void**) &hashData );
56 
57 	/* Process string values only. */
58 	if( Z_TYPE_P( *hashData ) != IS_STRING )
59 		return;
60 
61 	old_val     = Z_STRVAL_P( *hashData );
62 	old_val_len = Z_STRLEN_P( *hashData );
63 
64 	/* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
65 	intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
66 	if( U_FAILURE( *status ) )
67 		return;
68 
69 	/* Update current hash item with the converted value. */
70 	MAKE_STD_ZVAL( znew_val );
71 	ZVAL_STRINGL( znew_val, (char*)new_val, UBYTES(new_val_len), FALSE );
72 
73 	if( hashKeyType == HASH_KEY_IS_STRING )
74 	{
75 		zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
76 			(void*) &znew_val, sizeof(zval*), NULL );
77 	}
78 	else /* hashKeyType == HASH_KEY_IS_LONG */
79 	{
80 		zend_hash_index_update( hash, hashIndex,
81 			(void*) &znew_val, sizeof(zval*), NULL );
82 	}
83 }
84 /* }}} */
85 
86 /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
collator_convert_hash_item_from_utf16_to_utf8(HashTable * hash,int hashKeyType,char * hashKey,ulong hashIndex,UErrorCode * status)87 static void collator_convert_hash_item_from_utf16_to_utf8(
88 	HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
89 	UErrorCode* status )
90 {
91 	const char* old_val;
92 	int         old_val_len;
93 	char*       new_val      = NULL;
94 	int         new_val_len  = 0;
95 	zval**      hashData     = NULL;
96 	zval*       znew_val     = NULL;
97 
98 	/* Get current hash item. */
99 	zend_hash_get_current_data( hash, (void**) &hashData );
100 
101 	/* Process string values only. */
102 	if( Z_TYPE_P( *hashData ) != IS_STRING )
103 		return;
104 
105 	old_val     = Z_STRVAL_P( *hashData );
106 	old_val_len = Z_STRLEN_P( *hashData );
107 
108 	/* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
109 	intl_convert_utf16_to_utf8( &new_val, &new_val_len,
110 		(UChar*)old_val, UCHARS(old_val_len), status );
111 	if( U_FAILURE( *status ) )
112 		return;
113 
114 	/* Update current hash item with the converted value. */
115 	MAKE_STD_ZVAL( znew_val );
116 	ZVAL_STRINGL( znew_val, (char*)new_val, new_val_len, FALSE );
117 
118 	if( hashKeyType == HASH_KEY_IS_STRING )
119 	{
120 		zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
121 			(void*) &znew_val, sizeof(zval*), NULL );
122 	}
123 	else /* hashKeyType == HASH_KEY_IS_LONG */
124 	{
125 		zend_hash_index_update( hash, hashIndex,
126 			(void*) &znew_val, sizeof(zval*), NULL );
127 	}
128 }
129 /* }}} */
130 
131 /* {{{ collator_convert_hash_from_utf8_to_utf16
132  *  Convert values of the given hash from UTF-8 encoding to UTF-16LE.
133  */
collator_convert_hash_from_utf8_to_utf16(HashTable * hash,UErrorCode * status)134 void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
135 {
136 	ulong    hashIndex    = 0;
137 	char*    hashKey      = NULL;
138 	int      hashKeyType  = 0;
139 
140 	zend_hash_internal_pointer_reset( hash );
141 	while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
142 			!= HASH_KEY_NON_EXISTENT )
143 	{
144 		/* Convert current hash item from UTF-8 to UTF-16LE. */
145 		collator_convert_hash_item_from_utf8_to_utf16(
146 			hash, hashKeyType, hashKey, hashIndex, status );
147 		if( U_FAILURE( *status ) )
148 			return;
149 
150 		/* Proceed to the next item. */
151 		zend_hash_move_forward( hash );
152 	}
153 }
154 /* }}} */
155 
156 /* {{{ collator_convert_hash_from_utf16_to_utf8
157  * Convert values of the given hash from UTF-16LE encoding to UTF-8.
158  */
collator_convert_hash_from_utf16_to_utf8(HashTable * hash,UErrorCode * status)159 void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
160 {
161 	ulong    hashIndex    = 0;
162 	char*    hashKey      = NULL;
163 	int      hashKeyType  = 0;
164 
165 	zend_hash_internal_pointer_reset( hash );
166 	while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
167 			!= HASH_KEY_NON_EXISTENT )
168 	{
169 		/* Convert current hash item from UTF-16LE to UTF-8. */
170 		collator_convert_hash_item_from_utf16_to_utf8(
171 			hash, hashKeyType, hashKey, hashIndex, status );
172 		if( U_FAILURE( *status ) ) {
173 			return;
174 		}
175 
176 		/* Proceed to the next item. */
177 		zend_hash_move_forward( hash );
178 	}
179 }
180 /* }}} */
181 
182 /* {{{ collator_convert_zstr_utf16_to_utf8
183  *
184  * Convert string from utf16 to utf8.
185  *
186  * @param  zval* utf16_zval String to convert.
187  *
188  * @return zval* Converted string.
189  */
collator_convert_zstr_utf16_to_utf8(zval * utf16_zval)190 zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval )
191 {
192 	zval* utf8_zval   = NULL;
193 	char* str         = NULL;
194 	int   str_len     = 0;
195 	UErrorCode status = U_ZERO_ERROR;
196 
197 	/* Convert to utf8 then. */
198 	intl_convert_utf16_to_utf8( &str, &str_len,
199 		(UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
200 	if( U_FAILURE( status ) )
201 		php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
202 
203 	ALLOC_INIT_ZVAL( utf8_zval );
204 	ZVAL_STRINGL( utf8_zval, str, str_len, FALSE );
205 
206 	return utf8_zval;
207 }
208 /* }}} */
209 
210 /* {{{ collator_convert_zstr_utf8_to_utf16
211  *
212  * Convert string from utf8 to utf16.
213  *
214  * @param  zval* utf8_zval String to convert.
215  *
216  * @return zval* Converted string.
217  */
collator_convert_zstr_utf8_to_utf16(zval * utf8_zval)218 zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval )
219 {
220 	zval* zstr        = NULL;
221 	UChar* ustr       = NULL;
222 	int    ustr_len   = 0;
223 	UErrorCode status = U_ZERO_ERROR;
224 
225 	/* Convert the string to UTF-16. */
226 	intl_convert_utf8_to_utf16(
227 			&ustr, &ustr_len,
228 			Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
229 			&status );
230 	if( U_FAILURE( status ) )
231 		php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
232 
233 	/* Set string. */
234 	ALLOC_INIT_ZVAL( zstr );
235 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
236 
237 	return zstr;
238 }
239 /* }}} */
240 
241 /* {{{ collator_convert_object_to_string
242  * Convert object to UTF16-encoded string.
243  */
collator_convert_object_to_string(zval * obj TSRMLS_DC)244 zval* collator_convert_object_to_string( zval* obj TSRMLS_DC )
245 {
246 	zval* zstr        = NULL;
247 	UErrorCode status = U_ZERO_ERROR;
248 	UChar* ustr       = NULL;
249 	int    ustr_len   = 0;
250 
251 	/* Bail out if it's not an object. */
252 	if( Z_TYPE_P( obj ) != IS_OBJECT )
253 	{
254 		COLLATOR_CONVERT_RETURN_FAILED( obj );
255 	}
256 
257 	/* Try object's handlers. */
258 	if( Z_OBJ_HT_P(obj)->get )
259 	{
260 		zstr = Z_OBJ_HT_P(obj)->get( obj TSRMLS_CC );
261 
262 		switch( Z_TYPE_P( zstr ) )
263 		{
264 			case IS_OBJECT:
265 				{
266 					/* Bail out. */
267 					zval_ptr_dtor( &zstr );
268 					COLLATOR_CONVERT_RETURN_FAILED( obj );
269 				} break;
270 
271 			case IS_STRING:
272 				break;
273 
274 			default:
275 				{
276 					convert_to_string( zstr );
277 				} break;
278 		}
279 	}
280 	else if( Z_OBJ_HT_P(obj)->cast_object )
281 	{
282 		ALLOC_INIT_ZVAL( zstr );
283 
284 		if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING CAST_OBJECT_SHOULD_FREE TSRMLS_CC ) == FAILURE )
285 		{
286 			/* cast_object failed => bail out. */
287 			zval_ptr_dtor( &zstr );
288 			COLLATOR_CONVERT_RETURN_FAILED( obj );
289 		}
290 	}
291 
292 	/* Object wasn't successfuly converted => bail out. */
293 	if( zstr == NULL )
294 	{
295 		COLLATOR_CONVERT_RETURN_FAILED( obj );
296 	}
297 
298 	/* Convert the string to UTF-16. */
299 	intl_convert_utf8_to_utf16(
300 			&ustr, &ustr_len,
301 			Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
302 			&status );
303 	if( U_FAILURE( status ) )
304 		php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
305 
306 	/* Cleanup zstr to hold utf16 string. */
307 	zval_dtor( zstr );
308 
309 	/* Set string. */
310 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
311 
312 	/* Don't free ustr cause it's set in zstr without copy.
313 	 * efree( ustr );
314 	 */
315 
316 	return zstr;
317 }
318 /* }}} */
319 
320 /* {{{ collator_convert_string_to_number
321  *
322  * Convert string to number.
323  *
324  * @param  zval* str String to convert.
325  *
326  * @return zval* Number. If str is not numeric string return number zero.
327  */
collator_convert_string_to_number(zval * str)328 zval* collator_convert_string_to_number( zval* str )
329 {
330 	zval* num = collator_convert_string_to_number_if_possible( str );
331 	if( num == str )
332 	{
333 		/* String wasn't converted => return zero. */
334 		zval_ptr_dtor( &num );
335 
336 		ALLOC_INIT_ZVAL( num );
337 		ZVAL_LONG( num, 0 );
338 	}
339 
340 	return num;
341 }
342 /* }}} */
343 
344 /* {{{ collator_convert_string_to_double
345  *
346  * Convert string to double.
347  *
348  * @param  zval* str String to convert.
349  *
350  * @return zval* Number. If str is not numeric string return number zero.
351  */
collator_convert_string_to_double(zval * str)352 zval* collator_convert_string_to_double( zval* str )
353 {
354 	zval* num = collator_convert_string_to_number( str );
355 	if( Z_TYPE_P(num) == IS_LONG )
356 	{
357 		ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
358 	}
359 
360 	return num;
361 }
362 /* }}} */
363 
364 /* {{{ collator_convert_string_to_number_if_possible
365  *
366  * Convert string to numer.
367  *
368  * @param  zval* str String to convert.
369  *
370  * @return zval* Number if str is numeric string. Otherwise
371  *               original str param.
372  */
collator_convert_string_to_number_if_possible(zval * str)373 zval* collator_convert_string_to_number_if_possible( zval* str )
374 {
375 	zval* num      = NULL;
376 	int is_numeric = 0;
377 	long lval      = 0;
378 	double dval    = 0;
379 
380 	if( Z_TYPE_P( str ) != IS_STRING )
381 	{
382 		COLLATOR_CONVERT_RETURN_FAILED( str );
383 	}
384 
385 	if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
386 	{
387 		ALLOC_INIT_ZVAL( num );
388 
389 		if( is_numeric == IS_LONG )
390 			Z_LVAL_P(num) = lval;
391 		if( is_numeric == IS_DOUBLE )
392 			Z_DVAL_P(num) = dval;
393 
394 		Z_TYPE_P(num) = is_numeric;
395 	}
396 	else
397 	{
398 		COLLATOR_CONVERT_RETURN_FAILED( str );
399 	}
400 
401 	return num;
402 }
403 /* }}} */
404 
405 /* {{{ collator_make_printable_zval
406  *
407  * Returns string from input zval.
408  *
409  * @param  zval* arg zval to get string from
410  *
411  * @return zval* UTF16 string.
412  */
collator_make_printable_zval(zval * arg)413 zval* collator_make_printable_zval( zval* arg )
414 {
415 	zval arg_copy;
416 	int use_copy = 0;
417 	zval* str    = NULL;
418 
419 	if( Z_TYPE_P(arg) != IS_STRING )
420 	{
421 		zend_make_printable_zval(arg, &arg_copy, &use_copy);
422 
423 		if( use_copy )
424 		{
425 			str = collator_convert_zstr_utf8_to_utf16( &arg_copy );
426 			zval_dtor( &arg_copy );
427 		}
428 		else
429 		{
430 			str = collator_convert_zstr_utf8_to_utf16( arg );
431 		}
432 	}
433 	else
434 	{
435 		COLLATOR_CONVERT_RETURN_FAILED( arg );
436 	}
437 
438 	return str;
439 }
440 /* }}} */
441 
442 /* {{{ collator_normalize_sort_argument
443  *
444  * Normalize argument to use in sort's compare function.
445  *
446  * @param  zval* arg Sort's argument to normalize.
447  *
448  * @return zval* Normalized copy of arg or unmodified arg
449  *               if normalization is not needed.
450  */
collator_normalize_sort_argument(zval * arg)451 zval* collator_normalize_sort_argument( zval* arg )
452 {
453 	zval* n_arg = NULL;
454 
455 	if( Z_TYPE_P( arg ) != IS_STRING )
456 	{
457 		/* If its not a string then nothing to do.
458 		 * Return original arg.
459 		 */
460 		COLLATOR_CONVERT_RETURN_FAILED( arg );
461 	}
462 
463 	/* Try convert to number. */
464 	n_arg = collator_convert_string_to_number_if_possible( arg );
465 
466 	if( n_arg == arg )
467 	{
468 		/* Conversion to number failed. */
469 		zval_ptr_dtor( &n_arg );
470 
471 		/* Convert string to utf8. */
472 		n_arg = collator_convert_zstr_utf16_to_utf8( arg );
473 	}
474 
475 	return n_arg;
476 }
477 /* }}} */
478 /*
479  * Local variables:
480  * tab-width: 4
481  * c-basic-offset: 4
482  * End:
483  * vim600: noet sw=4 ts=4 fdm=marker
484  * vim<600: noet sw=4 ts=4
485  */
486