1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
14 | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
15 +----------------------------------------------------------------------+
16 */
17
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include "php_intl.h"
23 #include "collator_class.h"
24 #include "collator_is_numeric.h"
25 #include "collator_convert.h"
26 #include "intl_convert.h"
27
28 #include <unicode/ustring.h>
29 #include <php.h>
30
31 #if (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION <= 1)
32 #define CAST_OBJECT_SHOULD_FREE ,0
33 #else
34 #define CAST_OBJECT_SHOULD_FREE
35 #endif
36
37 #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
38 zval_add_ref( &retval ); \
39 return retval; \
40 }
41
42 /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
collator_convert_hash_item_from_utf8_to_utf16(HashTable * hash,int hashKeyType,char * hashKey,ulong hashIndex,UErrorCode * status)43 static void collator_convert_hash_item_from_utf8_to_utf16(
44 HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
45 UErrorCode* status )
46 {
47 const char* old_val;
48 int old_val_len;
49 UChar* new_val = NULL;
50 int new_val_len = 0;
51 zval** hashData = NULL;
52 zval* znew_val = NULL;
53
54 /* Get current hash item. */
55 zend_hash_get_current_data( hash, (void**) &hashData );
56
57 /* Process string values only. */
58 if( Z_TYPE_P( *hashData ) != IS_STRING )
59 return;
60
61 old_val = Z_STRVAL_P( *hashData );
62 old_val_len = Z_STRLEN_P( *hashData );
63
64 /* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
65 intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
66 if( U_FAILURE( *status ) )
67 return;
68
69 /* Update current hash item with the converted value. */
70 MAKE_STD_ZVAL( znew_val );
71 ZVAL_STRINGL( znew_val, (char*)new_val, UBYTES(new_val_len), FALSE );
72
73 if( hashKeyType == HASH_KEY_IS_STRING )
74 {
75 zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
76 (void*) &znew_val, sizeof(zval*), NULL );
77 }
78 else /* hashKeyType == HASH_KEY_IS_LONG */
79 {
80 zend_hash_index_update( hash, hashIndex,
81 (void*) &znew_val, sizeof(zval*), NULL );
82 }
83 }
84 /* }}} */
85
86 /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
collator_convert_hash_item_from_utf16_to_utf8(HashTable * hash,int hashKeyType,char * hashKey,ulong hashIndex,UErrorCode * status)87 static void collator_convert_hash_item_from_utf16_to_utf8(
88 HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
89 UErrorCode* status )
90 {
91 const char* old_val;
92 int old_val_len;
93 char* new_val = NULL;
94 int new_val_len = 0;
95 zval** hashData = NULL;
96 zval* znew_val = NULL;
97
98 /* Get current hash item. */
99 zend_hash_get_current_data( hash, (void**) &hashData );
100
101 /* Process string values only. */
102 if( Z_TYPE_P( *hashData ) != IS_STRING )
103 return;
104
105 old_val = Z_STRVAL_P( *hashData );
106 old_val_len = Z_STRLEN_P( *hashData );
107
108 /* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
109 intl_convert_utf16_to_utf8( &new_val, &new_val_len,
110 (UChar*)old_val, UCHARS(old_val_len), status );
111 if( U_FAILURE( *status ) )
112 return;
113
114 /* Update current hash item with the converted value. */
115 MAKE_STD_ZVAL( znew_val );
116 ZVAL_STRINGL( znew_val, (char*)new_val, new_val_len, FALSE );
117
118 if( hashKeyType == HASH_KEY_IS_STRING )
119 {
120 zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
121 (void*) &znew_val, sizeof(zval*), NULL );
122 }
123 else /* hashKeyType == HASH_KEY_IS_LONG */
124 {
125 zend_hash_index_update( hash, hashIndex,
126 (void*) &znew_val, sizeof(zval*), NULL );
127 }
128 }
129 /* }}} */
130
131 /* {{{ collator_convert_hash_from_utf8_to_utf16
132 * Convert values of the given hash from UTF-8 encoding to UTF-16LE.
133 */
collator_convert_hash_from_utf8_to_utf16(HashTable * hash,UErrorCode * status)134 void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
135 {
136 ulong hashIndex = 0;
137 char* hashKey = NULL;
138 int hashKeyType = 0;
139
140 zend_hash_internal_pointer_reset( hash );
141 while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
142 != HASH_KEY_NON_EXISTANT )
143 {
144 /* Convert current hash item from UTF-8 to UTF-16LE. */
145 collator_convert_hash_item_from_utf8_to_utf16(
146 hash, hashKeyType, hashKey, hashIndex, status );
147 if( U_FAILURE( *status ) )
148 return;
149
150 /* Proceed to the next item. */
151 zend_hash_move_forward( hash );
152 }
153 }
154 /* }}} */
155
156 /* {{{ collator_convert_hash_from_utf16_to_utf8
157 * Convert values of the given hash from UTF-16LE encoding to UTF-8.
158 */
collator_convert_hash_from_utf16_to_utf8(HashTable * hash,UErrorCode * status)159 void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
160 {
161 ulong hashIndex = 0;
162 char* hashKey = NULL;
163 int hashKeyType = 0;
164
165 zend_hash_internal_pointer_reset( hash );
166 while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
167 != HASH_KEY_NON_EXISTANT )
168 {
169 /* Convert current hash item from UTF-16LE to UTF-8. */
170 collator_convert_hash_item_from_utf16_to_utf8(
171 hash, hashKeyType, hashKey, hashIndex, status );
172 if( U_FAILURE( *status ) ) {
173 return;
174 }
175
176 /* Proceed to the next item. */
177 zend_hash_move_forward( hash );
178 }
179 }
180 /* }}} */
181
182 /* {{{ collator_convert_zstr_utf16_to_utf8
183 *
184 * Convert string from utf16 to utf8.
185 *
186 * @param zval* utf16_zval String to convert.
187 *
188 * @return zval* Converted string.
189 */
collator_convert_zstr_utf16_to_utf8(zval * utf16_zval)190 zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval )
191 {
192 zval* utf8_zval = NULL;
193 char* str = NULL;
194 int str_len = 0;
195 UErrorCode status = U_ZERO_ERROR;
196
197 /* Convert to utf8 then. */
198 intl_convert_utf16_to_utf8( &str, &str_len,
199 (UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
200 if( U_FAILURE( status ) )
201 php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
202
203 ALLOC_INIT_ZVAL( utf8_zval );
204 ZVAL_STRINGL( utf8_zval, str, str_len, FALSE );
205
206 return utf8_zval;
207 }
208 /* }}} */
209
210 /* {{{ collator_convert_zstr_utf8_to_utf16
211 *
212 * Convert string from utf8 to utf16.
213 *
214 * @param zval* utf8_zval String to convert.
215 *
216 * @return zval* Converted string.
217 */
collator_convert_zstr_utf8_to_utf16(zval * utf8_zval)218 zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval )
219 {
220 zval* zstr = NULL;
221 UChar* ustr = NULL;
222 int ustr_len = 0;
223 UErrorCode status = U_ZERO_ERROR;
224
225 /* Convert the string to UTF-16. */
226 intl_convert_utf8_to_utf16(
227 &ustr, &ustr_len,
228 Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
229 &status );
230 if( U_FAILURE( status ) )
231 php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
232
233 /* Set string. */
234 ALLOC_INIT_ZVAL( zstr );
235 ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
236
237 return zstr;
238 }
239 /* }}} */
240
241 /* {{{ collator_convert_object_to_string
242 * Convert object to UTF16-encoded string.
243 */
collator_convert_object_to_string(zval * obj TSRMLS_DC)244 zval* collator_convert_object_to_string( zval* obj TSRMLS_DC )
245 {
246 zval* zstr = NULL;
247 UErrorCode status = U_ZERO_ERROR;
248 UChar* ustr = NULL;
249 int ustr_len = 0;
250
251 /* Bail out if it's not an object. */
252 if( Z_TYPE_P( obj ) != IS_OBJECT )
253 {
254 COLLATOR_CONVERT_RETURN_FAILED( obj );
255 }
256
257 /* Try object's handlers. */
258 if( Z_OBJ_HT_P(obj)->get )
259 {
260 zstr = Z_OBJ_HT_P(obj)->get( obj TSRMLS_CC );
261
262 switch( Z_TYPE_P( zstr ) )
263 {
264 case IS_OBJECT:
265 {
266 /* Bail out. */
267 zval_ptr_dtor( &zstr );
268 COLLATOR_CONVERT_RETURN_FAILED( obj );
269 } break;
270
271 case IS_STRING:
272 break;
273
274 default:
275 {
276 convert_to_string( zstr );
277 } break;
278 }
279 }
280 else if( Z_OBJ_HT_P(obj)->cast_object )
281 {
282 ALLOC_INIT_ZVAL( zstr );
283
284 if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING CAST_OBJECT_SHOULD_FREE TSRMLS_CC ) == FAILURE )
285 {
286 /* cast_object failed => bail out. */
287 zval_ptr_dtor( &zstr );
288 COLLATOR_CONVERT_RETURN_FAILED( obj );
289 }
290 }
291
292 /* Object wasn't successfuly converted => bail out. */
293 if( zstr == NULL )
294 {
295 COLLATOR_CONVERT_RETURN_FAILED( obj );
296 }
297
298 /* Convert the string to UTF-16. */
299 intl_convert_utf8_to_utf16(
300 &ustr, &ustr_len,
301 Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
302 &status );
303 if( U_FAILURE( status ) )
304 php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
305
306 /* Cleanup zstr to hold utf16 string. */
307 zval_dtor( zstr );
308
309 /* Set string. */
310 ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
311
312 /* Don't free ustr cause it's set in zstr without copy.
313 * efree( ustr );
314 */
315
316 return zstr;
317 }
318 /* }}} */
319
320 /* {{{ collator_convert_string_to_number
321 *
322 * Convert string to number.
323 *
324 * @param zval* str String to convert.
325 *
326 * @return zval* Number. If str is not numeric string return number zero.
327 */
collator_convert_string_to_number(zval * str)328 zval* collator_convert_string_to_number( zval* str )
329 {
330 zval* num = collator_convert_string_to_number_if_possible( str );
331 if( num == str )
332 {
333 /* String wasn't converted => return zero. */
334 zval_ptr_dtor( &num );
335
336 ALLOC_INIT_ZVAL( num );
337 ZVAL_LONG( num, 0 );
338 }
339
340 return num;
341 }
342 /* }}} */
343
344 /* {{{ collator_convert_string_to_double
345 *
346 * Convert string to double.
347 *
348 * @param zval* str String to convert.
349 *
350 * @return zval* Number. If str is not numeric string return number zero.
351 */
collator_convert_string_to_double(zval * str)352 zval* collator_convert_string_to_double( zval* str )
353 {
354 zval* num = collator_convert_string_to_number( str );
355 if( Z_TYPE_P(num) == IS_LONG )
356 {
357 ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
358 }
359
360 return num;
361 }
362 /* }}} */
363
364 /* {{{ collator_convert_string_to_number_if_possible
365 *
366 * Convert string to numer.
367 *
368 * @param zval* str String to convert.
369 *
370 * @return zval* Number if str is numeric string. Otherwise
371 * original str param.
372 */
collator_convert_string_to_number_if_possible(zval * str)373 zval* collator_convert_string_to_number_if_possible( zval* str )
374 {
375 zval* num = NULL;
376 int is_numeric = 0;
377 long lval = 0;
378 double dval = 0;
379
380 if( Z_TYPE_P( str ) != IS_STRING )
381 {
382 COLLATOR_CONVERT_RETURN_FAILED( str );
383 }
384
385 if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
386 {
387 ALLOC_INIT_ZVAL( num );
388
389 if( is_numeric == IS_LONG )
390 Z_LVAL_P(num) = lval;
391 if( is_numeric == IS_DOUBLE )
392 Z_DVAL_P(num) = dval;
393
394 Z_TYPE_P(num) = is_numeric;
395 }
396 else
397 {
398 COLLATOR_CONVERT_RETURN_FAILED( str );
399 }
400
401 return num;
402 }
403 /* }}} */
404
405 /* {{{ collator_make_printable_zval
406 *
407 * Returns string from input zval.
408 *
409 * @param zval* arg zval to get string from
410 *
411 * @return zval* UTF16 string.
412 */
collator_make_printable_zval(zval * arg)413 zval* collator_make_printable_zval( zval* arg )
414 {
415 zval arg_copy;
416 int use_copy = 0;
417 zval* str = NULL;
418
419 if( Z_TYPE_P(arg) != IS_STRING )
420 {
421 zend_make_printable_zval(arg, &arg_copy, &use_copy);
422
423 if( use_copy )
424 {
425 str = collator_convert_zstr_utf8_to_utf16( &arg_copy );
426 zval_dtor( &arg_copy );
427 }
428 else
429 {
430 str = collator_convert_zstr_utf8_to_utf16( arg );
431 }
432 }
433 else
434 {
435 COLLATOR_CONVERT_RETURN_FAILED( arg );
436 }
437
438 return str;
439 }
440 /* }}} */
441
442 /* {{{ collator_normalize_sort_argument
443 *
444 * Normalize argument to use in sort's compare function.
445 *
446 * @param zval* arg Sort's argument to normalize.
447 *
448 * @return zval* Normalized copy of arg or unmodified arg
449 * if normalization is not needed.
450 */
collator_normalize_sort_argument(zval * arg)451 zval* collator_normalize_sort_argument( zval* arg )
452 {
453 zval* n_arg = NULL;
454
455 if( Z_TYPE_P( arg ) != IS_STRING )
456 {
457 /* If its not a string then nothing to do.
458 * Return original arg.
459 */
460 COLLATOR_CONVERT_RETURN_FAILED( arg );
461 }
462
463 /* Try convert to number. */
464 n_arg = collator_convert_string_to_number_if_possible( arg );
465
466 if( n_arg == arg )
467 {
468 /* Conversion to number failed. */
469 zval_ptr_dtor( &n_arg );
470
471 /* Convert string to utf8. */
472 n_arg = collator_convert_zstr_utf16_to_utf8( arg );
473 }
474
475 return n_arg;
476 }
477 /* }}} */
478 /*
479 * Local variables:
480 * tab-width: 4
481 * c-basic-offset: 4
482 * End:
483 * vim600: noet sw=4 ts=4 fdm=marker
484 * vim<600: noet sw=4 ts=4
485 */
486