1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | http://www.php.net/license/3_01.txt                                  |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
12    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
13    +----------------------------------------------------------------------+
14  */
15 
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "php_intl.h"
21 #include "collator_class.h"
22 #include "collator_is_numeric.h"
23 #include "collator_convert.h"
24 #include "intl_convert.h"
25 
26 #include <unicode/ustring.h>
27 #include <php.h>
28 
29 #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
30 			Z_TRY_ADDREF_P(retval);              \
31 			return retval;                       \
32 	}
33 
34 /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
collator_convert_hash_item_from_utf8_to_utf16(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)35 static void collator_convert_hash_item_from_utf8_to_utf16(
36 	HashTable* hash, zval *hashData, zend_string *hashKey, zend_ulong hashIndex,
37 	UErrorCode* status )
38 {
39 	const char* old_val;
40 	size_t      old_val_len;
41 	UChar*      new_val      = NULL;
42 	int32_t     new_val_len  = 0;
43 	zval        znew_val;
44 
45 	/* Process string values only. */
46 	if( Z_TYPE_P( hashData ) != IS_STRING )
47 		return;
48 
49 	old_val     = Z_STRVAL_P( hashData );
50 	old_val_len = Z_STRLEN_P( hashData );
51 
52 	/* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
53 	intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
54 	if( U_FAILURE( *status ) )
55 		return;
56 
57 	/* Update current hash item with the converted value. */
58 	ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
59 	//???
60 	efree(new_val);
61 	/* hack to fix use of initialized value */
62 	Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
63 
64 	if( hashKey)
65 	{
66 		zend_hash_update( hash, hashKey, &znew_val);
67 	}
68 	else /* hashKeyType == HASH_KEY_IS_LONG */
69 	{
70 		zend_hash_index_update( hash, hashIndex, &znew_val);
71 	}
72 }
73 /* }}} */
74 
75 /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
collator_convert_hash_item_from_utf16_to_utf8(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)76 static void collator_convert_hash_item_from_utf16_to_utf8(
77 	HashTable* hash, zval * hashData, zend_string* hashKey, zend_ulong hashIndex,
78 	UErrorCode* status )
79 {
80 	const char* old_val;
81 	size_t      old_val_len;
82 	zend_string* u8str;
83 	zval        znew_val;
84 
85 	/* Process string values only. */
86 	if( Z_TYPE_P( hashData ) != IS_STRING )
87 		return;
88 
89 	old_val     = Z_STRVAL_P( hashData );
90 	old_val_len = Z_STRLEN_P( hashData );
91 
92 	/* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
93 	u8str = intl_convert_utf16_to_utf8(
94 		(UChar*)old_val, UCHARS(old_val_len), status );
95 	if( !u8str )
96 		return;
97 
98 	/* Update current hash item with the converted value. */
99 	ZVAL_NEW_STR( &znew_val, u8str);
100 
101 	if( hashKey )
102 	{
103 		zend_hash_update( hash, hashKey, &znew_val);
104 	}
105 	else /* hashKeyType == HASH_KEY_IS_LONG */
106 	{
107 		zend_hash_index_update( hash, hashIndex, &znew_val);
108 	}
109 }
110 /* }}} */
111 
112 /* {{{ collator_convert_hash_from_utf8_to_utf16
113  *  Convert values of the given hash from UTF-8 encoding to UTF-16LE.
114  */
collator_convert_hash_from_utf8_to_utf16(HashTable * hash,UErrorCode * status)115 void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
116 {
117 	zend_ulong    hashIndex;
118 	zval *hashData;
119 	zend_string *hashKey;
120 
121 	ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
122 		/* Convert current hash item from UTF-8 to UTF-16LE. */
123 		collator_convert_hash_item_from_utf8_to_utf16(
124 			hash, hashData, hashKey, hashIndex, status );
125 		if( U_FAILURE( *status ) )
126 			return;
127 	} ZEND_HASH_FOREACH_END();
128 }
129 /* }}} */
130 
131 /* {{{ collator_convert_hash_from_utf16_to_utf8
132  * Convert values of the given hash from UTF-16LE encoding to UTF-8.
133  */
collator_convert_hash_from_utf16_to_utf8(HashTable * hash,UErrorCode * status)134 void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
135 {
136 	zend_ulong hashIndex;
137 	zend_string *hashKey;
138 	zval *hashData;
139 
140 	ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
141 		/* Convert current hash item from UTF-16LE to UTF-8. */
142 		collator_convert_hash_item_from_utf16_to_utf8(
143 			hash, hashData, hashKey, hashIndex, status );
144 		if( U_FAILURE( *status ) ) {
145 			return;
146 		}
147 	} ZEND_HASH_FOREACH_END();
148 }
149 /* }}} */
150 
151 /* {{{ collator_convert_zstr_utf16_to_utf8
152  *
153  * Convert string from utf16 to utf8.
154  *
155  * @param  zval* utf16_zval String to convert.
156  *
157  * @return zval* Converted string.
158  */
collator_convert_zstr_utf16_to_utf8(zval * utf16_zval,zval * rv)159 zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )
160 {
161 	zend_string* u8str;
162 	UErrorCode status = U_ZERO_ERROR;
163 
164 	/* Convert to utf8 then. */
165 	u8str = intl_convert_utf16_to_utf8(
166 		(UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
167 	if( !u8str ) {
168 		php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
169 		ZVAL_EMPTY_STRING( rv );
170 	} else {
171 		ZVAL_NEW_STR( rv, u8str );
172 	}
173 	return rv;
174 }
175 /* }}} */
176 
177 /* {{{ collator_convert_zstr_utf8_to_utf16
178  *
179  * Convert string from utf8 to utf16.
180  *
181  * @param  zval* utf8_zval String to convert.
182  *
183  * @return zval* Converted string.
184  */
collator_convert_zstr_utf8_to_utf16(zval * utf8_zval,zval * rv)185 zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval, zval *rv )
186 {
187 	zval* zstr        = NULL;
188 	UChar* ustr       = NULL;
189 	int32_t ustr_len   = 0;
190 	UErrorCode status = U_ZERO_ERROR;
191 
192 	/* Convert the string to UTF-16. */
193 	intl_convert_utf8_to_utf16(
194 			&ustr, &ustr_len,
195 			Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
196 			&status );
197 	// FIXME Or throw error or use intl internal error handler
198 	if( U_FAILURE( status ) )
199 		php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
200 
201 	/* Set string. */
202 	zstr = rv;
203 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
204 	//???
205 	efree((char *)ustr);
206 
207 	return zstr;
208 }
209 /* }}} */
210 
211 /* {{{ collator_convert_object_to_string
212  * Convert object to UTF16-encoded string.
213  */
collator_convert_object_to_string(zval * obj,zval * rv)214 zval* collator_convert_object_to_string( zval* obj, zval *rv )
215 {
216 	zval* zstr        = NULL;
217 	UErrorCode status = U_ZERO_ERROR;
218 	UChar* ustr       = NULL;
219 	int32_t ustr_len  = 0;
220 
221 	/* Bail out if it's not an object. */
222 	if( Z_TYPE_P( obj ) != IS_OBJECT )
223 	{
224 		COLLATOR_CONVERT_RETURN_FAILED( obj );
225 	}
226 
227 	/* Try object's handlers. */
228 	zstr = rv;
229 
230 	if( Z_OBJ_HT_P(obj)->cast_object( Z_OBJ_P(obj), zstr, IS_STRING ) == FAILURE )
231 	{
232 		/* cast_object failed => bail out. */
233 		zval_ptr_dtor( zstr );
234 		COLLATOR_CONVERT_RETURN_FAILED( obj );
235 	}
236 
237 	/* Object wasn't successfully converted => bail out. */
238 	if( zstr == NULL )
239 	{
240 		COLLATOR_CONVERT_RETURN_FAILED( obj );
241 	}
242 
243 	/* Convert the string to UTF-16. */
244 	intl_convert_utf8_to_utf16(
245 			&ustr, &ustr_len,
246 			Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
247 			&status );
248 	// FIXME Or throw error or use intl internal error handler
249 	if( U_FAILURE( status ) )
250 		php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
251 
252 	/* Cleanup zstr to hold utf16 string. */
253 	zval_ptr_dtor_str( zstr );
254 
255 	/* Set string. */
256 	ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
257 	//???
258 	efree((char *)ustr);
259 
260 	/* Don't free ustr cause it's set in zstr without copy.
261 	 * efree( ustr );
262 	 */
263 
264 	return zstr;
265 }
266 /* }}} */
267 
268 /* {{{ collator_convert_string_to_number
269  *
270  * Convert string to number.
271  *
272  * @param  zval* str String to convert.
273  *
274  * @return zval* Number. If str is not numeric string return number zero.
275  */
collator_convert_string_to_number(zval * str,zval * rv)276 zval* collator_convert_string_to_number( zval* str, zval *rv )
277 {
278 	zval* num = collator_convert_string_to_number_if_possible( str, rv );
279 	if( num == str )
280 	{
281 		/* String wasn't converted => return zero. */
282 		zval_ptr_dtor( num );
283 
284 		num = rv;
285 		ZVAL_LONG( num, 0 );
286 	}
287 
288 	return num;
289 }
290 /* }}} */
291 
292 /* {{{ collator_convert_string_to_double
293  *
294  * Convert string to double.
295  *
296  * @param  zval* str String to convert.
297  *
298  * @return zval* Number. If str is not numeric string return number zero.
299  */
collator_convert_string_to_double(zval * str,zval * rv)300 zval* collator_convert_string_to_double( zval* str, zval *rv )
301 {
302 	zval* num = collator_convert_string_to_number( str, rv );
303 	if( Z_TYPE_P(num) == IS_LONG )
304 	{
305 		ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
306 	}
307 
308 	return num;
309 }
310 /* }}} */
311 
312 /* {{{ collator_convert_string_to_number_if_possible
313  *
314  * Convert string to numer.
315  *
316  * @param  zval* str String to convert.
317  *
318  * @return zval* Number if str is numeric string. Otherwise
319  *               original str param.
320  */
collator_convert_string_to_number_if_possible(zval * str,zval * rv)321 zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv )
322 {
323 	zend_uchar is_numeric = 0;
324 	zend_long lval      = 0;
325 	double dval    = 0;
326 
327 	if( Z_TYPE_P( str ) != IS_STRING )
328 	{
329 		COLLATOR_CONVERT_RETURN_FAILED( str );
330 	}
331 
332 	if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ 1 ) ) )
333 	{
334 		if( is_numeric == IS_LONG ) {
335 			ZVAL_LONG(rv, lval);
336 		}
337 		if( is_numeric == IS_DOUBLE )
338 			ZVAL_DOUBLE(rv, dval);
339 	}
340 	else
341 	{
342 		COLLATOR_CONVERT_RETURN_FAILED( str );
343 	}
344 
345 	return rv;
346 }
347 /* }}} */
348 
349 /* {{{ collator_make_printable_zval
350  *
351  * Returns string from input zval.
352  *
353  * @param  zval* arg zval to get string from
354  *
355  * @return zval* UTF16 string.
356  */
collator_make_printable_zval(zval * arg,zval * rv)357 zval* collator_make_printable_zval( zval* arg, zval *rv)
358 {
359 	zval arg_copy;
360 	zval* str    = NULL;
361 
362 	if( Z_TYPE_P(arg) != IS_STRING )
363 	{
364 
365 		int use_copy = zend_make_printable_zval(arg, &arg_copy);
366 
367 		if( use_copy )
368 		{
369 			str = collator_convert_zstr_utf8_to_utf16( &arg_copy, rv );
370 			zval_ptr_dtor_str( &arg_copy );
371 		}
372 		else
373 		{
374 			str = collator_convert_zstr_utf8_to_utf16( arg, rv );
375 		}
376 	}
377 	else
378 	{
379 		COLLATOR_CONVERT_RETURN_FAILED( arg );
380 	}
381 
382 	return str;
383 }
384 /* }}} */
385 
386 /* {{{ collator_normalize_sort_argument
387  *
388  * Normalize argument to use in sort's compare function.
389  *
390  * @param  zval* arg Sort's argument to normalize.
391  *
392  * @return zval* Normalized copy of arg or unmodified arg
393  *               if normalization is not needed.
394  */
collator_normalize_sort_argument(zval * arg,zval * rv)395 zval* collator_normalize_sort_argument( zval* arg, zval *rv )
396 {
397 	zval* n_arg = NULL;
398 
399 	if( Z_TYPE_P( arg ) != IS_STRING )
400 	{
401 		/* If its not a string then nothing to do.
402 		 * Return original arg.
403 		 */
404 		COLLATOR_CONVERT_RETURN_FAILED( arg );
405 	}
406 
407 	/* Try convert to number. */
408 	n_arg = collator_convert_string_to_number_if_possible( arg, rv );
409 
410 	if( n_arg == arg )
411 	{
412 		/* Conversion to number failed. */
413 		zval_ptr_dtor( n_arg );
414 
415 		/* Convert string to utf8. */
416 		n_arg = collator_convert_zstr_utf16_to_utf8( arg, rv );
417 	}
418 
419 	return n_arg;
420 }
421 /* }}} */
422