1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
14    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
15    +----------------------------------------------------------------------+
16  */
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include "php_intl.h"
23 #include "collator_class.h"
24 #include "collator_is_numeric.h"
25 #include "collator_convert.h"
26 #include "intl_convert.h"
27 
28 #include <unicode/ustring.h>
29 #include <php.h>
30 
31 #if PHP_VERSION_ID <= 50100
32 #define CAST_OBJECT_SHOULD_FREE ,0
33 #else
34 #define CAST_OBJECT_SHOULD_FREE
35 #endif
36 
37 #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
38 			Z_TRY_ADDREF_P(retval);              \
39 			return retval;                       \
40 	}
41 
42 /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
collator_convert_hash_item_from_utf8_to_utf16(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)43 static void collator_convert_hash_item_from_utf8_to_utf16(
44 	HashTable* hash, zval *hashData, zend_string *hashKey, zend_ulong hashIndex,
45 	UErrorCode* status )
46 {
47 	const char* old_val;
48 	size_t      old_val_len;
49 	UChar*      new_val      = NULL;
50 	int32_t     new_val_len  = 0;
51 	zval        znew_val;
52 
53 	/* Process string values only. */
54 	if( Z_TYPE_P( hashData ) != IS_STRING )
55 		return;
56 
57 	old_val     = Z_STRVAL_P( hashData );
58 	old_val_len = Z_STRLEN_P( hashData );
59 
60 	/* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
61 	intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
62 	if( U_FAILURE( *status ) )
63 		return;
64 
65 	/* Update current hash item with the converted value. */
66 	ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
67 	//???
68 	efree(new_val);
69 	/* hack to fix use of initialized value */
70 	Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
71 
72 	if( hashKey)
73 	{
74 		zend_hash_update( hash, hashKey, &znew_val);
75 	}
76 	else /* hashKeyType == HASH_KEY_IS_LONG */
77 	{
78 		zend_hash_index_update( hash, hashIndex, &znew_val);
79 	}
80 }
81 /* }}} */
82 
83 /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
collator_convert_hash_item_from_utf16_to_utf8(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)84 static void collator_convert_hash_item_from_utf16_to_utf8(
85 	HashTable* hash, zval * hashData, zend_string* hashKey, zend_ulong hashIndex,
86 	UErrorCode* status )
87 {
88 	const char* old_val;
89 	size_t      old_val_len;
90 	zend_string* u8str;
91 	zval        znew_val;
92 
93 	/* Process string values only. */
94 	if( Z_TYPE_P( hashData ) != IS_STRING )
95 		return;
96 
97 	old_val     = Z_STRVAL_P( hashData );
98 	old_val_len = Z_STRLEN_P( hashData );
99 
100 	/* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
101 	u8str = intl_convert_utf16_to_utf8(
102 		(UChar*)old_val, UCHARS(old_val_len), status );
103 	if( !u8str )
104 		return;
105 
106 	/* Update current hash item with the converted value. */
107 	ZVAL_NEW_STR( &znew_val, u8str);
108 
109 	if( hashKey )
110 	{
111 		zend_hash_update( hash, hashKey, &znew_val);
112 	}
113 	else /* hashKeyType == HASH_KEY_IS_LONG */
114 	{
115 		zend_hash_index_update( hash, hashIndex, &znew_val);
116 	}
117 }
118 /* }}} */
119 
120 /* {{{ collator_convert_hash_from_utf8_to_utf16
121  *  Convert values of the given hash from UTF-8 encoding to UTF-16LE.
122  */
collator_convert_hash_from_utf8_to_utf16(HashTable * hash,UErrorCode * status)123 void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
124 {
125 	zend_ulong    hashIndex;
126 	zval *hashData;
127 	zend_string *hashKey;
128 
129 	ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
130 		/* Convert current hash item from UTF-8 to UTF-16LE. */
131 		collator_convert_hash_item_from_utf8_to_utf16(
132 			hash, hashData, hashKey, hashIndex, status );
133 		if( U_FAILURE( *status ) )
134 			return;
135 	} ZEND_HASH_FOREACH_END();
136 }
137 /* }}} */
138 
139 /* {{{ collator_convert_hash_from_utf16_to_utf8
140  * Convert values of the given hash from UTF-16LE encoding to UTF-8.
141  */
collator_convert_hash_from_utf16_to_utf8(HashTable * hash,UErrorCode * status)142 void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
143 {
144 	zend_ulong hashIndex;
145 	zend_string *hashKey;
146 	zval *hashData;
147 
148 	ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
149 		/* Convert current hash item from UTF-16LE to UTF-8. */
150 		collator_convert_hash_item_from_utf16_to_utf8(
151 			hash, hashData, hashKey, hashIndex, status );
152 		if( U_FAILURE( *status ) ) {
153 			return;
154 		}
155 	} ZEND_HASH_FOREACH_END();
156 }
157 /* }}} */
158 
159 /* {{{ collator_convert_zstr_utf16_to_utf8
160  *
161  * Convert string from utf16 to utf8.
162  *
163  * @param  zval* utf16_zval String to convert.
164  *
165  * @return zval* Converted string.
166  */
collator_convert_zstr_utf16_to_utf8(zval * utf16_zval,zval * rv)167 zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )
168 {
169 	zend_string* u8str;
170 	UErrorCode status = U_ZERO_ERROR;
171 
172 	/* Convert to utf8 then. */
173 	u8str = intl_convert_utf16_to_utf8(
174 		(UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
175 	if( !u8str ) {
176 		php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
177 		ZVAL_EMPTY_STRING( rv );
178 	} else {
179 		ZVAL_NEW_STR( rv, u8str );
180 	}
181 	return rv;
182 }
183 /* }}} */
184 
185 /* {{{ collator_convert_zstr_utf8_to_utf16
186  *
187  * Convert string from utf8 to utf16.
188  *
189  * @param  zval* utf8_zval String to convert.
190  *
191  * @return zval* Converted string.
192  */
collator_convert_zstr_utf8_to_utf16(zval * utf8_zval,zval * rv)193 zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval, zval *rv )
194 {
195 	zval* zstr        = NULL;
196 	UChar* ustr       = NULL;
197 	int32_t ustr_len   = 0;
198 	UErrorCode status = U_ZERO_ERROR;
199 
200 	/* Convert the string to UTF-16. */
201 	intl_convert_utf8_to_utf16(
202 			&ustr, &ustr_len,
203 			Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
204 			&status );
205 	if( U_FAILURE( status ) )
206 		php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
207 
208 	/* Set string. */
209 	zstr = rv;
210 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
211 	//???
212 	efree((char *)ustr);
213 
214 	return zstr;
215 }
216 /* }}} */
217 
218 /* {{{ collator_convert_object_to_string
219  * Convert object to UTF16-encoded string.
220  */
collator_convert_object_to_string(zval * obj,zval * rv)221 zval* collator_convert_object_to_string( zval* obj, zval *rv )
222 {
223 	zval* zstr        = NULL;
224 	UErrorCode status = U_ZERO_ERROR;
225 	UChar* ustr       = NULL;
226 	int32_t ustr_len  = 0;
227 
228 	/* Bail out if it's not an object. */
229 	if( Z_TYPE_P( obj ) != IS_OBJECT )
230 	{
231 		COLLATOR_CONVERT_RETURN_FAILED( obj );
232 	}
233 
234 	/* Try object's handlers. */
235 	if( Z_OBJ_HT_P(obj)->get )
236 	{
237 		zstr = Z_OBJ_HT_P(obj)->get( obj, rv );
238 
239 		switch( Z_TYPE_P( zstr ) )
240 		{
241 			case IS_OBJECT:
242 				{
243 					/* Bail out. */
244 					zval_ptr_dtor( zstr );
245 					COLLATOR_CONVERT_RETURN_FAILED( obj );
246 				} break;
247 
248 			case IS_STRING:
249 				break;
250 
251 			default:
252 				{
253 					convert_to_string( zstr );
254 				} break;
255 		}
256 	}
257 	else if( Z_OBJ_HT_P(obj)->cast_object )
258 	{
259 		zstr = rv;
260 
261 		if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING CAST_OBJECT_SHOULD_FREE ) == FAILURE )
262 		{
263 			/* cast_object failed => bail out. */
264 			zval_ptr_dtor( zstr );
265 			COLLATOR_CONVERT_RETURN_FAILED( obj );
266 		}
267 	}
268 
269 	/* Object wasn't successfully converted => bail out. */
270 	if( zstr == NULL )
271 	{
272 		COLLATOR_CONVERT_RETURN_FAILED( obj );
273 	}
274 
275 	/* Convert the string to UTF-16. */
276 	intl_convert_utf8_to_utf16(
277 			&ustr, &ustr_len,
278 			Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
279 			&status );
280 	if( U_FAILURE( status ) )
281 		php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
282 
283 	/* Cleanup zstr to hold utf16 string. */
284 	zval_dtor( zstr );
285 
286 	/* Set string. */
287 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
288 	//???
289 	efree((char *)ustr);
290 
291 	/* Don't free ustr cause it's set in zstr without copy.
292 	 * efree( ustr );
293 	 */
294 
295 	return zstr;
296 }
297 /* }}} */
298 
299 /* {{{ collator_convert_string_to_number
300  *
301  * Convert string to number.
302  *
303  * @param  zval* str String to convert.
304  *
305  * @return zval* Number. If str is not numeric string return number zero.
306  */
collator_convert_string_to_number(zval * str,zval * rv)307 zval* collator_convert_string_to_number( zval* str, zval *rv )
308 {
309 	zval* num = collator_convert_string_to_number_if_possible( str, rv );
310 	if( num == str )
311 	{
312 		/* String wasn't converted => return zero. */
313 		zval_ptr_dtor( num );
314 
315 		num = rv;
316 		ZVAL_LONG( num, 0 );
317 	}
318 
319 	return num;
320 }
321 /* }}} */
322 
323 /* {{{ collator_convert_string_to_double
324  *
325  * Convert string to double.
326  *
327  * @param  zval* str String to convert.
328  *
329  * @return zval* Number. If str is not numeric string return number zero.
330  */
collator_convert_string_to_double(zval * str,zval * rv)331 zval* collator_convert_string_to_double( zval* str, zval *rv )
332 {
333 	zval* num = collator_convert_string_to_number( str, rv );
334 	if( Z_TYPE_P(num) == IS_LONG )
335 	{
336 		ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
337 	}
338 
339 	return num;
340 }
341 /* }}} */
342 
343 /* {{{ collator_convert_string_to_number_if_possible
344  *
345  * Convert string to numer.
346  *
347  * @param  zval* str String to convert.
348  *
349  * @return zval* Number if str is numeric string. Otherwise
350  *               original str param.
351  */
collator_convert_string_to_number_if_possible(zval * str,zval * rv)352 zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv )
353 {
354 	int is_numeric = 0;
355 	zend_long lval      = 0;
356 	double dval    = 0;
357 
358 	if( Z_TYPE_P( str ) != IS_STRING )
359 	{
360 		COLLATOR_CONVERT_RETURN_FAILED( str );
361 	}
362 
363 	if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
364 	{
365 		if( is_numeric == IS_LONG ) {
366 			ZVAL_LONG(rv, lval);
367 		}
368 		if( is_numeric == IS_DOUBLE )
369 			ZVAL_DOUBLE(rv, dval);
370 	}
371 	else
372 	{
373 		COLLATOR_CONVERT_RETURN_FAILED( str );
374 	}
375 
376 	return rv;
377 }
378 /* }}} */
379 
380 /* {{{ collator_make_printable_zval
381  *
382  * Returns string from input zval.
383  *
384  * @param  zval* arg zval to get string from
385  *
386  * @return zval* UTF16 string.
387  */
collator_make_printable_zval(zval * arg,zval * rv)388 zval* collator_make_printable_zval( zval* arg, zval *rv)
389 {
390 	zval arg_copy;
391 	int use_copy = 0;
392 	zval* str    = NULL;
393 
394 	if( Z_TYPE_P(arg) != IS_STRING )
395 	{
396 
397 		use_copy = zend_make_printable_zval(arg, &arg_copy);
398 
399 		if( use_copy )
400 		{
401 			str = collator_convert_zstr_utf8_to_utf16( &arg_copy, rv );
402 			zval_dtor( &arg_copy );
403 		}
404 		else
405 		{
406 			str = collator_convert_zstr_utf8_to_utf16( arg, rv );
407 		}
408 	}
409 	else
410 	{
411 		COLLATOR_CONVERT_RETURN_FAILED( arg );
412 	}
413 
414 	return str;
415 }
416 /* }}} */
417 
418 /* {{{ collator_normalize_sort_argument
419  *
420  * Normalize argument to use in sort's compare function.
421  *
422  * @param  zval* arg Sort's argument to normalize.
423  *
424  * @return zval* Normalized copy of arg or unmodified arg
425  *               if normalization is not needed.
426  */
collator_normalize_sort_argument(zval * arg,zval * rv)427 zval* collator_normalize_sort_argument( zval* arg, zval *rv )
428 {
429 	zval* n_arg = NULL;
430 
431 	if( Z_TYPE_P( arg ) != IS_STRING )
432 	{
433 		/* If its not a string then nothing to do.
434 		 * Return original arg.
435 		 */
436 		COLLATOR_CONVERT_RETURN_FAILED( arg );
437 	}
438 
439 	/* Try convert to number. */
440 	n_arg = collator_convert_string_to_number_if_possible( arg, rv );
441 
442 	if( n_arg == arg )
443 	{
444 		/* Conversion to number failed. */
445 		zval_ptr_dtor( n_arg );
446 
447 		/* Convert string to utf8. */
448 		n_arg = collator_convert_zstr_utf16_to_utf8( arg, rv );
449 	}
450 
451 	return n_arg;
452 }
453 /* }}} */
454 /*
455  * Local variables:
456  * tab-width: 4
457  * c-basic-offset: 4
458  * End:
459  * vim600: noet sw=4 ts=4 fdm=marker
460  * vim<600: noet sw=4 ts=4
461  */
462