1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
14    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
15    +----------------------------------------------------------------------+
16  */
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include "php_intl.h"
23 #include "collator_class.h"
24 #include "collator_is_numeric.h"
25 #include "collator_convert.h"
26 #include "intl_convert.h"
27 
28 #include <unicode/ustring.h>
29 #include <php.h>
30 
31 #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
32 			Z_TRY_ADDREF_P(retval);              \
33 			return retval;                       \
34 	}
35 
36 /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
collator_convert_hash_item_from_utf8_to_utf16(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)37 static void collator_convert_hash_item_from_utf8_to_utf16(
38 	HashTable* hash, zval *hashData, zend_string *hashKey, zend_ulong hashIndex,
39 	UErrorCode* status )
40 {
41 	const char* old_val;
42 	size_t      old_val_len;
43 	UChar*      new_val      = NULL;
44 	int32_t     new_val_len  = 0;
45 	zval        znew_val;
46 
47 	/* Process string values only. */
48 	if( Z_TYPE_P( hashData ) != IS_STRING )
49 		return;
50 
51 	old_val     = Z_STRVAL_P( hashData );
52 	old_val_len = Z_STRLEN_P( hashData );
53 
54 	/* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
55 	intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
56 	if( U_FAILURE( *status ) )
57 		return;
58 
59 	/* Update current hash item with the converted value. */
60 	ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
61 	//???
62 	efree(new_val);
63 	/* hack to fix use of initialized value */
64 	Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
65 
66 	if( hashKey)
67 	{
68 		zend_hash_update( hash, hashKey, &znew_val);
69 	}
70 	else /* hashKeyType == HASH_KEY_IS_LONG */
71 	{
72 		zend_hash_index_update( hash, hashIndex, &znew_val);
73 	}
74 }
75 /* }}} */
76 
77 /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
collator_convert_hash_item_from_utf16_to_utf8(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)78 static void collator_convert_hash_item_from_utf16_to_utf8(
79 	HashTable* hash, zval * hashData, zend_string* hashKey, zend_ulong hashIndex,
80 	UErrorCode* status )
81 {
82 	const char* old_val;
83 	size_t      old_val_len;
84 	zend_string* u8str;
85 	zval        znew_val;
86 
87 	/* Process string values only. */
88 	if( Z_TYPE_P( hashData ) != IS_STRING )
89 		return;
90 
91 	old_val     = Z_STRVAL_P( hashData );
92 	old_val_len = Z_STRLEN_P( hashData );
93 
94 	/* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
95 	u8str = intl_convert_utf16_to_utf8(
96 		(UChar*)old_val, UCHARS(old_val_len), status );
97 	if( !u8str )
98 		return;
99 
100 	/* Update current hash item with the converted value. */
101 	ZVAL_NEW_STR( &znew_val, u8str);
102 
103 	if( hashKey )
104 	{
105 		zend_hash_update( hash, hashKey, &znew_val);
106 	}
107 	else /* hashKeyType == HASH_KEY_IS_LONG */
108 	{
109 		zend_hash_index_update( hash, hashIndex, &znew_val);
110 	}
111 }
112 /* }}} */
113 
114 /* {{{ collator_convert_hash_from_utf8_to_utf16
115  *  Convert values of the given hash from UTF-8 encoding to UTF-16LE.
116  */
collator_convert_hash_from_utf8_to_utf16(HashTable * hash,UErrorCode * status)117 void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
118 {
119 	zend_ulong    hashIndex;
120 	zval *hashData;
121 	zend_string *hashKey;
122 
123 	ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
124 		/* Convert current hash item from UTF-8 to UTF-16LE. */
125 		collator_convert_hash_item_from_utf8_to_utf16(
126 			hash, hashData, hashKey, hashIndex, status );
127 		if( U_FAILURE( *status ) )
128 			return;
129 	} ZEND_HASH_FOREACH_END();
130 }
131 /* }}} */
132 
133 /* {{{ collator_convert_hash_from_utf16_to_utf8
134  * Convert values of the given hash from UTF-16LE encoding to UTF-8.
135  */
collator_convert_hash_from_utf16_to_utf8(HashTable * hash,UErrorCode * status)136 void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
137 {
138 	zend_ulong hashIndex;
139 	zend_string *hashKey;
140 	zval *hashData;
141 
142 	ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
143 		/* Convert current hash item from UTF-16LE to UTF-8. */
144 		collator_convert_hash_item_from_utf16_to_utf8(
145 			hash, hashData, hashKey, hashIndex, status );
146 		if( U_FAILURE( *status ) ) {
147 			return;
148 		}
149 	} ZEND_HASH_FOREACH_END();
150 }
151 /* }}} */
152 
153 /* {{{ collator_convert_zstr_utf16_to_utf8
154  *
155  * Convert string from utf16 to utf8.
156  *
157  * @param  zval* utf16_zval String to convert.
158  *
159  * @return zval* Converted string.
160  */
collator_convert_zstr_utf16_to_utf8(zval * utf16_zval,zval * rv)161 zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )
162 {
163 	zend_string* u8str;
164 	UErrorCode status = U_ZERO_ERROR;
165 
166 	/* Convert to utf8 then. */
167 	u8str = intl_convert_utf16_to_utf8(
168 		(UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
169 	if( !u8str ) {
170 		php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
171 		ZVAL_EMPTY_STRING( rv );
172 	} else {
173 		ZVAL_NEW_STR( rv, u8str );
174 	}
175 	return rv;
176 }
177 /* }}} */
178 
179 /* {{{ collator_convert_zstr_utf8_to_utf16
180  *
181  * Convert string from utf8 to utf16.
182  *
183  * @param  zval* utf8_zval String to convert.
184  *
185  * @return zval* Converted string.
186  */
collator_convert_zstr_utf8_to_utf16(zval * utf8_zval,zval * rv)187 zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval, zval *rv )
188 {
189 	zval* zstr        = NULL;
190 	UChar* ustr       = NULL;
191 	int32_t ustr_len   = 0;
192 	UErrorCode status = U_ZERO_ERROR;
193 
194 	/* Convert the string to UTF-16. */
195 	intl_convert_utf8_to_utf16(
196 			&ustr, &ustr_len,
197 			Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
198 			&status );
199 	if( U_FAILURE( status ) )
200 		php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
201 
202 	/* Set string. */
203 	zstr = rv;
204 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
205 	//???
206 	efree((char *)ustr);
207 
208 	return zstr;
209 }
210 /* }}} */
211 
212 /* {{{ collator_convert_object_to_string
213  * Convert object to UTF16-encoded string.
214  */
collator_convert_object_to_string(zval * obj,zval * rv)215 zval* collator_convert_object_to_string( zval* obj, zval *rv )
216 {
217 	zval* zstr        = NULL;
218 	UErrorCode status = U_ZERO_ERROR;
219 	UChar* ustr       = NULL;
220 	int32_t ustr_len  = 0;
221 
222 	/* Bail out if it's not an object. */
223 	if( Z_TYPE_P( obj ) != IS_OBJECT )
224 	{
225 		COLLATOR_CONVERT_RETURN_FAILED( obj );
226 	}
227 
228 	/* Try object's handlers. */
229 	if( Z_OBJ_HT_P(obj)->get )
230 	{
231 		zstr = Z_OBJ_HT_P(obj)->get( obj, rv );
232 
233 		switch( Z_TYPE_P( zstr ) )
234 		{
235 			case IS_OBJECT:
236 				{
237 					/* Bail out. */
238 					zval_ptr_dtor( zstr );
239 					COLLATOR_CONVERT_RETURN_FAILED( obj );
240 				} break;
241 
242 			case IS_STRING:
243 				break;
244 
245 			default:
246 				{
247 					convert_to_string( zstr );
248 				} break;
249 		}
250 	}
251 	else if( Z_OBJ_HT_P(obj)->cast_object )
252 	{
253 		zstr = rv;
254 
255 		if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING ) == FAILURE )
256 		{
257 			/* cast_object failed => bail out. */
258 			zval_ptr_dtor( zstr );
259 			COLLATOR_CONVERT_RETURN_FAILED( obj );
260 		}
261 	}
262 
263 	/* Object wasn't successfully converted => bail out. */
264 	if( zstr == NULL )
265 	{
266 		COLLATOR_CONVERT_RETURN_FAILED( obj );
267 	}
268 
269 	/* Convert the string to UTF-16. */
270 	intl_convert_utf8_to_utf16(
271 			&ustr, &ustr_len,
272 			Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
273 			&status );
274 	if( U_FAILURE( status ) )
275 		php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
276 
277 	/* Cleanup zstr to hold utf16 string. */
278 	zval_ptr_dtor_str( zstr );
279 
280 	/* Set string. */
281 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
282 	//???
283 	efree((char *)ustr);
284 
285 	/* Don't free ustr cause it's set in zstr without copy.
286 	 * efree( ustr );
287 	 */
288 
289 	return zstr;
290 }
291 /* }}} */
292 
293 /* {{{ collator_convert_string_to_number
294  *
295  * Convert string to number.
296  *
297  * @param  zval* str String to convert.
298  *
299  * @return zval* Number. If str is not numeric string return number zero.
300  */
collator_convert_string_to_number(zval * str,zval * rv)301 zval* collator_convert_string_to_number( zval* str, zval *rv )
302 {
303 	zval* num = collator_convert_string_to_number_if_possible( str, rv );
304 	if( num == str )
305 	{
306 		/* String wasn't converted => return zero. */
307 		zval_ptr_dtor( num );
308 
309 		num = rv;
310 		ZVAL_LONG( num, 0 );
311 	}
312 
313 	return num;
314 }
315 /* }}} */
316 
317 /* {{{ collator_convert_string_to_double
318  *
319  * Convert string to double.
320  *
321  * @param  zval* str String to convert.
322  *
323  * @return zval* Number. If str is not numeric string return number zero.
324  */
collator_convert_string_to_double(zval * str,zval * rv)325 zval* collator_convert_string_to_double( zval* str, zval *rv )
326 {
327 	zval* num = collator_convert_string_to_number( str, rv );
328 	if( Z_TYPE_P(num) == IS_LONG )
329 	{
330 		ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
331 	}
332 
333 	return num;
334 }
335 /* }}} */
336 
337 /* {{{ collator_convert_string_to_number_if_possible
338  *
339  * Convert string to numer.
340  *
341  * @param  zval* str String to convert.
342  *
343  * @return zval* Number if str is numeric string. Otherwise
344  *               original str param.
345  */
collator_convert_string_to_number_if_possible(zval * str,zval * rv)346 zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv )
347 {
348 	int is_numeric = 0;
349 	zend_long lval      = 0;
350 	double dval    = 0;
351 
352 	if( Z_TYPE_P( str ) != IS_STRING )
353 	{
354 		COLLATOR_CONVERT_RETURN_FAILED( str );
355 	}
356 
357 	if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
358 	{
359 		if( is_numeric == IS_LONG ) {
360 			ZVAL_LONG(rv, lval);
361 		}
362 		if( is_numeric == IS_DOUBLE )
363 			ZVAL_DOUBLE(rv, dval);
364 	}
365 	else
366 	{
367 		COLLATOR_CONVERT_RETURN_FAILED( str );
368 	}
369 
370 	return rv;
371 }
372 /* }}} */
373 
374 /* {{{ collator_make_printable_zval
375  *
376  * Returns string from input zval.
377  *
378  * @param  zval* arg zval to get string from
379  *
380  * @return zval* UTF16 string.
381  */
collator_make_printable_zval(zval * arg,zval * rv)382 zval* collator_make_printable_zval( zval* arg, zval *rv)
383 {
384 	zval arg_copy;
385 	zval* str    = NULL;
386 
387 	if( Z_TYPE_P(arg) != IS_STRING )
388 	{
389 
390 		int use_copy = zend_make_printable_zval(arg, &arg_copy);
391 
392 		if( use_copy )
393 		{
394 			str = collator_convert_zstr_utf8_to_utf16( &arg_copy, rv );
395 			zval_ptr_dtor_str( &arg_copy );
396 		}
397 		else
398 		{
399 			str = collator_convert_zstr_utf8_to_utf16( arg, rv );
400 		}
401 	}
402 	else
403 	{
404 		COLLATOR_CONVERT_RETURN_FAILED( arg );
405 	}
406 
407 	return str;
408 }
409 /* }}} */
410 
411 /* {{{ collator_normalize_sort_argument
412  *
413  * Normalize argument to use in sort's compare function.
414  *
415  * @param  zval* arg Sort's argument to normalize.
416  *
417  * @return zval* Normalized copy of arg or unmodified arg
418  *               if normalization is not needed.
419  */
collator_normalize_sort_argument(zval * arg,zval * rv)420 zval* collator_normalize_sort_argument( zval* arg, zval *rv )
421 {
422 	zval* n_arg = NULL;
423 
424 	if( Z_TYPE_P( arg ) != IS_STRING )
425 	{
426 		/* If its not a string then nothing to do.
427 		 * Return original arg.
428 		 */
429 		COLLATOR_CONVERT_RETURN_FAILED( arg );
430 	}
431 
432 	/* Try convert to number. */
433 	n_arg = collator_convert_string_to_number_if_possible( arg, rv );
434 
435 	if( n_arg == arg )
436 	{
437 		/* Conversion to number failed. */
438 		zval_ptr_dtor( n_arg );
439 
440 		/* Convert string to utf8. */
441 		n_arg = collator_convert_zstr_utf16_to_utf8( arg, rv );
442 	}
443 
444 	return n_arg;
445 }
446 /* }}} */
447