1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | http://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
12 | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
13 +----------------------------------------------------------------------+
14 */
15
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19
20 #include "php_intl.h"
21 #include "collator_class.h"
22 #include "collator_is_numeric.h"
23 #include "collator_convert.h"
24 #include "intl_convert.h"
25
26 #include <unicode/ustring.h>
27 #include <php.h>
28
29 #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
30 Z_TRY_ADDREF_P(retval); \
31 return retval; \
32 }
33
34 /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
collator_convert_hash_item_from_utf8_to_utf16(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)35 static void collator_convert_hash_item_from_utf8_to_utf16(
36 HashTable* hash, zval *hashData, zend_string *hashKey, zend_ulong hashIndex,
37 UErrorCode* status )
38 {
39 const char* old_val;
40 size_t old_val_len;
41 UChar* new_val = NULL;
42 int32_t new_val_len = 0;
43 zval znew_val;
44
45 /* Process string values only. */
46 if( Z_TYPE_P( hashData ) != IS_STRING )
47 return;
48
49 old_val = Z_STRVAL_P( hashData );
50 old_val_len = Z_STRLEN_P( hashData );
51
52 /* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
53 intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
54 if( U_FAILURE( *status ) )
55 return;
56
57 /* Update current hash item with the converted value. */
58 ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
59 //???
60 efree(new_val);
61 /* hack to fix use of initialized value */
62 Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
63
64 if( hashKey)
65 {
66 zend_hash_update( hash, hashKey, &znew_val);
67 }
68 else /* hashKeyType == HASH_KEY_IS_LONG */
69 {
70 zend_hash_index_update( hash, hashIndex, &znew_val);
71 }
72 }
73 /* }}} */
74
75 /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
collator_convert_hash_item_from_utf16_to_utf8(HashTable * hash,zval * hashData,zend_string * hashKey,zend_ulong hashIndex,UErrorCode * status)76 static void collator_convert_hash_item_from_utf16_to_utf8(
77 HashTable* hash, zval * hashData, zend_string* hashKey, zend_ulong hashIndex,
78 UErrorCode* status )
79 {
80 const char* old_val;
81 size_t old_val_len;
82 zend_string* u8str;
83 zval znew_val;
84
85 /* Process string values only. */
86 if( Z_TYPE_P( hashData ) != IS_STRING )
87 return;
88
89 old_val = Z_STRVAL_P( hashData );
90 old_val_len = Z_STRLEN_P( hashData );
91
92 /* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
93 u8str = intl_convert_utf16_to_utf8(
94 (UChar*)old_val, UCHARS(old_val_len), status );
95 if( !u8str )
96 return;
97
98 /* Update current hash item with the converted value. */
99 ZVAL_NEW_STR( &znew_val, u8str);
100
101 if( hashKey )
102 {
103 zend_hash_update( hash, hashKey, &znew_val);
104 }
105 else /* hashKeyType == HASH_KEY_IS_LONG */
106 {
107 zend_hash_index_update( hash, hashIndex, &znew_val);
108 }
109 }
110 /* }}} */
111
112 /* {{{ collator_convert_hash_from_utf8_to_utf16
113 * Convert values of the given hash from UTF-8 encoding to UTF-16LE.
114 */
collator_convert_hash_from_utf8_to_utf16(HashTable * hash,UErrorCode * status)115 void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
116 {
117 zend_ulong hashIndex;
118 zval *hashData;
119 zend_string *hashKey;
120
121 ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
122 /* Convert current hash item from UTF-8 to UTF-16LE. */
123 collator_convert_hash_item_from_utf8_to_utf16(
124 hash, hashData, hashKey, hashIndex, status );
125 if( U_FAILURE( *status ) )
126 return;
127 } ZEND_HASH_FOREACH_END();
128 }
129 /* }}} */
130
131 /* {{{ collator_convert_hash_from_utf16_to_utf8
132 * Convert values of the given hash from UTF-16LE encoding to UTF-8.
133 */
collator_convert_hash_from_utf16_to_utf8(HashTable * hash,UErrorCode * status)134 void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
135 {
136 zend_ulong hashIndex;
137 zend_string *hashKey;
138 zval *hashData;
139
140 ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
141 /* Convert current hash item from UTF-16LE to UTF-8. */
142 collator_convert_hash_item_from_utf16_to_utf8(
143 hash, hashData, hashKey, hashIndex, status );
144 if( U_FAILURE( *status ) ) {
145 return;
146 }
147 } ZEND_HASH_FOREACH_END();
148 }
149 /* }}} */
150
151 /* {{{ collator_convert_zstr_utf16_to_utf8
152 *
153 * Convert string from utf16 to utf8.
154 *
155 * @param zval* utf16_zval String to convert.
156 *
157 * @return zval* Converted string.
158 */
collator_convert_zstr_utf16_to_utf8(zval * utf16_zval,zval * rv)159 zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )
160 {
161 zend_string* u8str;
162 UErrorCode status = U_ZERO_ERROR;
163
164 /* Convert to utf8 then. */
165 u8str = intl_convert_utf16_to_utf8(
166 (UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
167 if( !u8str ) {
168 php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
169 ZVAL_EMPTY_STRING( rv );
170 } else {
171 ZVAL_NEW_STR( rv, u8str );
172 }
173 return rv;
174 }
175 /* }}} */
176
177 /* {{{ collator_convert_zstr_utf8_to_utf16
178 *
179 * Convert string from utf8 to utf16.
180 *
181 * @param zval* utf8_zval String to convert.
182 *
183 * @return zval* Converted string.
184 */
collator_convert_zstr_utf8_to_utf16(zval * utf8_zval,zval * rv)185 zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval, zval *rv )
186 {
187 zval* zstr = NULL;
188 UChar* ustr = NULL;
189 int32_t ustr_len = 0;
190 UErrorCode status = U_ZERO_ERROR;
191
192 /* Convert the string to UTF-16. */
193 intl_convert_utf8_to_utf16(
194 &ustr, &ustr_len,
195 Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
196 &status );
197 // FIXME Or throw error or use intl internal error handler
198 if( U_FAILURE( status ) )
199 php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
200
201 /* Set string. */
202 zstr = rv;
203 ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
204 //???
205 efree((char *)ustr);
206
207 return zstr;
208 }
209 /* }}} */
210
211 /* {{{ collator_convert_object_to_string
212 * Convert object to UTF16-encoded string.
213 */
collator_convert_object_to_string(zval * obj,zval * rv)214 zval* collator_convert_object_to_string( zval* obj, zval *rv )
215 {
216 zval* zstr = NULL;
217 UErrorCode status = U_ZERO_ERROR;
218 UChar* ustr = NULL;
219 int32_t ustr_len = 0;
220
221 /* Bail out if it's not an object. */
222 if( Z_TYPE_P( obj ) != IS_OBJECT )
223 {
224 COLLATOR_CONVERT_RETURN_FAILED( obj );
225 }
226
227 /* Try object's handlers. */
228 zstr = rv;
229
230 if( Z_OBJ_HT_P(obj)->cast_object( Z_OBJ_P(obj), zstr, IS_STRING ) == FAILURE )
231 {
232 /* cast_object failed => bail out. */
233 zval_ptr_dtor( zstr );
234 COLLATOR_CONVERT_RETURN_FAILED( obj );
235 }
236
237 /* Object wasn't successfully converted => bail out. */
238 if( zstr == NULL )
239 {
240 COLLATOR_CONVERT_RETURN_FAILED( obj );
241 }
242
243 /* Convert the string to UTF-16. */
244 intl_convert_utf8_to_utf16(
245 &ustr, &ustr_len,
246 Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
247 &status );
248 // FIXME Or throw error or use intl internal error handler
249 if( U_FAILURE( status ) )
250 php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
251
252 /* Cleanup zstr to hold utf16 string. */
253 zval_ptr_dtor_str( zstr );
254
255 /* Set string. */
256 ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
257 //???
258 efree((char *)ustr);
259
260 /* Don't free ustr cause it's set in zstr without copy.
261 * efree( ustr );
262 */
263
264 return zstr;
265 }
266 /* }}} */
267
268 /* {{{ collator_convert_string_to_number
269 *
270 * Convert string to number.
271 *
272 * @param zval* str String to convert.
273 *
274 * @return zval* Number. If str is not numeric string return number zero.
275 */
collator_convert_string_to_number(zval * str,zval * rv)276 zval* collator_convert_string_to_number( zval* str, zval *rv )
277 {
278 zval* num = collator_convert_string_to_number_if_possible( str, rv );
279 if( num == str )
280 {
281 /* String wasn't converted => return zero. */
282 zval_ptr_dtor( num );
283
284 num = rv;
285 ZVAL_LONG( num, 0 );
286 }
287
288 return num;
289 }
290 /* }}} */
291
292 /* {{{ collator_convert_string_to_double
293 *
294 * Convert string to double.
295 *
296 * @param zval* str String to convert.
297 *
298 * @return zval* Number. If str is not numeric string return number zero.
299 */
collator_convert_string_to_double(zval * str,zval * rv)300 zval* collator_convert_string_to_double( zval* str, zval *rv )
301 {
302 zval* num = collator_convert_string_to_number( str, rv );
303 if( Z_TYPE_P(num) == IS_LONG )
304 {
305 ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
306 }
307
308 return num;
309 }
310 /* }}} */
311
312 /* {{{ collator_convert_string_to_number_if_possible
313 *
314 * Convert string to numer.
315 *
316 * @param zval* str String to convert.
317 *
318 * @return zval* Number if str is numeric string. Otherwise
319 * original str param.
320 */
collator_convert_string_to_number_if_possible(zval * str,zval * rv)321 zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv )
322 {
323 zend_uchar is_numeric = 0;
324 zend_long lval = 0;
325 double dval = 0;
326
327 if( Z_TYPE_P( str ) != IS_STRING )
328 {
329 COLLATOR_CONVERT_RETURN_FAILED( str );
330 }
331
332 if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ 1 ) ) )
333 {
334 if( is_numeric == IS_LONG ) {
335 ZVAL_LONG(rv, lval);
336 }
337 if( is_numeric == IS_DOUBLE )
338 ZVAL_DOUBLE(rv, dval);
339 }
340 else
341 {
342 COLLATOR_CONVERT_RETURN_FAILED( str );
343 }
344
345 return rv;
346 }
347 /* }}} */
348
349 /* {{{ collator_make_printable_zval
350 *
351 * Returns string from input zval.
352 *
353 * @param zval* arg zval to get string from
354 *
355 * @return zval* UTF16 string.
356 */
collator_make_printable_zval(zval * arg,zval * rv)357 zval* collator_make_printable_zval( zval* arg, zval *rv)
358 {
359 zval arg_copy;
360 zval* str = NULL;
361
362 if( Z_TYPE_P(arg) != IS_STRING )
363 {
364
365 int use_copy = zend_make_printable_zval(arg, &arg_copy);
366
367 if( use_copy )
368 {
369 str = collator_convert_zstr_utf8_to_utf16( &arg_copy, rv );
370 zval_ptr_dtor_str( &arg_copy );
371 }
372 else
373 {
374 str = collator_convert_zstr_utf8_to_utf16( arg, rv );
375 }
376 }
377 else
378 {
379 COLLATOR_CONVERT_RETURN_FAILED( arg );
380 }
381
382 return str;
383 }
384 /* }}} */
385
386 /* {{{ collator_normalize_sort_argument
387 *
388 * Normalize argument to use in sort's compare function.
389 *
390 * @param zval* arg Sort's argument to normalize.
391 *
392 * @return zval* Normalized copy of arg or unmodified arg
393 * if normalization is not needed.
394 */
collator_normalize_sort_argument(zval * arg,zval * rv)395 zval* collator_normalize_sort_argument( zval* arg, zval *rv )
396 {
397 zval* n_arg = NULL;
398
399 if( Z_TYPE_P( arg ) != IS_STRING )
400 {
401 /* If its not a string then nothing to do.
402 * Return original arg.
403 */
404 COLLATOR_CONVERT_RETURN_FAILED( arg );
405 }
406
407 /* Try convert to number. */
408 n_arg = collator_convert_string_to_number_if_possible( arg, rv );
409
410 if( n_arg == arg )
411 {
412 /* Conversion to number failed. */
413 zval_ptr_dtor( n_arg );
414
415 /* Convert string to utf8. */
416 n_arg = collator_convert_zstr_utf16_to_utf8( arg, rv );
417 }
418
419 return n_arg;
420 }
421 /* }}} */
422