1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | http://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
12 | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
13 +----------------------------------------------------------------------+
14 */
15
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19
20 #include "php_intl.h"
21 #include "collator.h"
22 #include "collator_class.h"
23 #include "collator_sort.h"
24 #include "collator_convert.h"
25 #include "intl_convert.h"
26
27 #if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED)
28 typedef zend_long ptrdiff_t;
29 #endif
30
31 /**
32 * Declare 'index' which will point to sort key in sort key
33 * buffer.
34 */
35 typedef struct _collator_sort_key_index {
36 char* key; /* pointer to sort key */
37 zval* zstr; /* pointer to original string(hash-item) */
38 } collator_sort_key_index_t;
39
40 ZEND_EXTERN_MODULE_GLOBALS( intl )
41
42 static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
43 static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
44
45 static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
46 static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
47
48 static const size_t DEF_UTF16_BUF_SIZE = 1024;
49
50 /* {{{ collator_regular_compare_function */
collator_regular_compare_function(zval * result,zval * op1,zval * op2)51 static int collator_regular_compare_function(zval *result, zval *op1, zval *op2)
52 {
53 int rc = SUCCESS;
54 zval str1, str2;
55 zval num1, num2;
56 zval norm1, norm2;
57 zval *num1_p = NULL, *num2_p = NULL;
58 zval *norm1_p = NULL, *norm2_p = NULL;
59 zval *str1_p, *str2_p;
60
61 ZVAL_NULL(&str1);
62 str1_p = collator_convert_object_to_string( op1, &str1 );
63 ZVAL_NULL(&str2);
64 str2_p = collator_convert_object_to_string( op2, &str2 );
65
66 /* If both args are strings AND either of args is not numeric string
67 * then use ICU-compare. Otherwise PHP-compare. */
68 if( Z_TYPE_P(str1_p) == IS_STRING && Z_TYPE_P(str2_p) == IS_STRING &&
69 ( str1_p == ( num1_p = collator_convert_string_to_number_if_possible( str1_p, &num1 ) ) ||
70 str2_p == ( num2_p = collator_convert_string_to_number_if_possible( str2_p, &num2 ) ) ) )
71 {
72 /* Compare the strings using ICU. */
73 ZEND_ASSERT(INTL_G(current_collator) != NULL);
74 ZVAL_LONG(result, ucol_strcoll(
75 INTL_G(current_collator),
76 INTL_Z_STRVAL_P(str1_p), INTL_Z_STRLEN_P(str1_p),
77 INTL_Z_STRVAL_P(str2_p), INTL_Z_STRLEN_P(str2_p) ));
78 }
79 else
80 {
81 /* num1 is set if str1 and str2 are strings. */
82 if( num1_p )
83 {
84 if( num1_p == str1_p )
85 {
86 /* str1 is string but not numeric string
87 * just convert it to utf8.
88 */
89 norm1_p = collator_convert_zstr_utf16_to_utf8( str1_p, &norm1 );
90
91 /* num2 is not set but str2 is string => do normalization. */
92 norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
93 }
94 else
95 {
96 /* str1 is numeric strings => passthru to PHP-compare. */
97 Z_TRY_ADDREF_P(num1_p);
98 norm1_p = num1_p;
99
100 /* str2 is numeric strings => passthru to PHP-compare. */
101 Z_TRY_ADDREF_P(num2_p);
102 norm2_p = num2_p;
103 }
104 }
105 else
106 {
107 /* num1 is not set if str1 or str2 is not a string => do normalization. */
108 norm1_p = collator_normalize_sort_argument( str1_p, &norm1 );
109
110 /* if num1 is not set then num2 is not set as well => do normalization. */
111 norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
112 }
113
114 rc = compare_function( result, norm1_p, norm2_p );
115
116 zval_ptr_dtor( norm1_p );
117 zval_ptr_dtor( norm2_p );
118 }
119
120 if( num1_p )
121 zval_ptr_dtor( num1_p );
122
123 if( num2_p )
124 zval_ptr_dtor( num2_p );
125
126 zval_ptr_dtor( str1_p );
127 zval_ptr_dtor( str2_p );
128
129 return rc;
130 }
131 /* }}} */
132
133 /* {{{ collator_numeric_compare_function
134 * Convert input args to double and compare it.
135 */
collator_numeric_compare_function(zval * result,zval * op1,zval * op2)136 static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2)
137 {
138 zval num1, num2;
139 zval *num1_p = NULL;
140 zval *num2_p = NULL;
141
142 if( Z_TYPE_P(op1) == IS_STRING )
143 {
144 num1_p = collator_convert_string_to_double( op1, &num1 );
145 op1 = num1_p;
146 }
147
148 if( Z_TYPE_P(op2) == IS_STRING )
149 {
150 num2_p = collator_convert_string_to_double( op2, &num2 );
151 op2 = num2_p;
152 }
153
154 ZVAL_LONG(result, numeric_compare_function(op1, op2));
155
156 if( num1_p )
157 zval_ptr_dtor( num1_p );
158 if( num2_p )
159 zval_ptr_dtor( num2_p );
160
161 return SUCCESS;
162 }
163 /* }}} */
164
165 /* {{{ collator_icu_compare_function
166 * Direct use of ucol_strcoll.
167 */
collator_icu_compare_function(zval * result,zval * op1,zval * op2)168 static int collator_icu_compare_function(zval *result, zval *op1, zval *op2)
169 {
170 zval str1, str2;
171 int rc = SUCCESS;
172 zval *str1_p = NULL;
173 zval *str2_p = NULL;
174
175 str1_p = collator_make_printable_zval( op1, &str1);
176 str2_p = collator_make_printable_zval( op2, &str2 );
177
178 /* Compare the strings using ICU. */
179 ZEND_ASSERT(INTL_G(current_collator) != NULL);
180 ZVAL_LONG(result, ucol_strcoll(
181 INTL_G(current_collator),
182 INTL_Z_STRVAL_P(str1_p), INTL_Z_STRLEN_P(str1_p),
183 INTL_Z_STRVAL_P(str2_p), INTL_Z_STRLEN_P(str2_p) ));
184
185 zval_ptr_dtor( str1_p );
186 zval_ptr_dtor( str2_p );
187
188 return rc;
189 }
190 /* }}} */
191
192 /* {{{ collator_compare_func
193 * Taken from PHP7 source (array_data_compare).
194 */
collator_compare_func(Bucket * f,Bucket * s)195 static int collator_compare_func(Bucket *f, Bucket *s)
196 {
197 zval result;
198 zval *first = &f->val;
199 zval *second = &s->val;
200
201 if( INTL_G(compare_func)( &result, first, second) == FAILURE )
202 return 0;
203
204 if( Z_TYPE(result) == IS_DOUBLE )
205 {
206 if( Z_DVAL(result) < 0 )
207 return -1;
208 else if( Z_DVAL(result) > 0 )
209 return 1;
210 else
211 return 0;
212 }
213
214 convert_to_long(&result);
215
216 if( Z_LVAL(result) < 0 )
217 return -1;
218 else if( Z_LVAL(result) > 0 )
219 return 1;
220
221 return 0;
222 }
223 /* }}} */
224
225 /* {{{ Compare sort keys */
collator_cmp_sort_keys(const void * p1,const void * p2)226 static int collator_cmp_sort_keys( const void *p1, const void *p2 )
227 {
228 char* key1 = ((collator_sort_key_index_t*)p1)->key;
229 char* key2 = ((collator_sort_key_index_t*)p2)->key;
230
231 return strcmp( key1, key2 );
232 }
233 /* }}} */
234
235 /* {{{ Choose compare function according to sort flags. */
collator_get_compare_function(const zend_long sort_flags)236 static collator_compare_func_t collator_get_compare_function( const zend_long sort_flags )
237 {
238 collator_compare_func_t func;
239
240 switch( sort_flags )
241 {
242 case COLLATOR_SORT_NUMERIC:
243 func = collator_numeric_compare_function;
244 break;
245
246 case COLLATOR_SORT_STRING:
247 func = collator_icu_compare_function;
248 break;
249
250 case COLLATOR_SORT_REGULAR:
251 default:
252 func = collator_regular_compare_function;
253 break;
254 }
255
256 return func;
257 }
258 /* }}} */
259
260 /* {{{ Common code shared by collator_sort() and collator_asort() API functions. */
collator_sort_internal(int renumber,INTERNAL_FUNCTION_PARAMETERS)261 static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS )
262 {
263 UCollator* saved_collator;
264 zval* array = NULL;
265 HashTable* hash = NULL;
266 zend_long sort_flags = COLLATOR_SORT_REGULAR;
267
268 COLLATOR_METHOD_INIT_VARS
269
270 /* Parse parameters. */
271 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Oa/|l",
272 &object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE )
273 {
274 RETURN_THROWS();
275 }
276
277 /* Fetch the object. */
278 COLLATOR_METHOD_FETCH_OBJECT;
279
280 if (!co->ucoll) {
281 zend_throw_error(NULL, "Object not initialized");
282 RETURN_THROWS();
283 }
284
285 /* Set 'compare function' according to sort flags. */
286 INTL_G(compare_func) = collator_get_compare_function( sort_flags );
287
288 hash = Z_ARRVAL_P( array );
289
290 /* Convert strings in the specified array from UTF-8 to UTF-16. */
291 collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) );
292 COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" );
293
294 /* Save specified collator in the request-global (?) variable. */
295 saved_collator = INTL_G( current_collator );
296 INTL_G( current_collator ) = co->ucoll;
297
298 /* Sort specified array. */
299 zend_hash_sort(hash, collator_compare_func, renumber);
300
301 /* Restore saved collator. */
302 INTL_G( current_collator ) = saved_collator;
303
304 /* Convert strings in the specified array back to UTF-8. */
305 collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) );
306 COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" );
307
308 RETURN_TRUE;
309 }
310 /* }}} */
311
312 /* {{{ Sort array using specified collator. */
PHP_FUNCTION(collator_sort)313 PHP_FUNCTION( collator_sort )
314 {
315 collator_sort_internal( true, INTERNAL_FUNCTION_PARAM_PASSTHRU );
316 }
317 /* }}} */
318
collator_sortkey_swap(collator_sort_key_index_t * p,collator_sort_key_index_t * q)319 static void collator_sortkey_swap(collator_sort_key_index_t *p, collator_sort_key_index_t *q) /* {{{ */
320 {
321 collator_sort_key_index_t t;
322 t = *p;
323 *p = *q;
324 *q = t;
325 }
326 /* }}} */
327
328 /* {{{ Equivalent to standard PHP sort using Collator.
329 * Uses ICU ucol_getSortKey for performance.
330 */
PHP_FUNCTION(collator_sort_with_sort_keys)331 PHP_FUNCTION( collator_sort_with_sort_keys )
332 {
333 zval* array = NULL;
334 zval garbage;
335 HashTable* hash = NULL;
336 zval* hashData = NULL; /* currently processed item of input hash */
337
338 char* sortKeyBuf = NULL; /* buffer to store sort keys */
339 uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
340 ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */
341 uint32_t sortKeyLen = 0; /* the length of currently processing key */
342 uint32_t bufLeft = 0;
343 uint32_t bufIncrement = 0;
344
345 collator_sort_key_index_t* sortKeyIndxBuf = NULL; /* buffer to store 'indexes' which will be passed to 'qsort' */
346 uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
347 uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
348
349 uint32_t sortKeyCount = 0;
350 uint32_t j = 0;
351
352 UChar* utf16_buf = NULL; /* tmp buffer to hold current processing string in utf-16 */
353 int utf16_buf_size = DEF_UTF16_BUF_SIZE; /* the length of utf16_buf */
354 int utf16_len = 0; /* length of converted string */
355
356 COLLATOR_METHOD_INIT_VARS
357
358 /* Parse parameters. */
359 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Oa",
360 &object, Collator_ce_ptr, &array ) == FAILURE )
361 {
362 RETURN_THROWS();
363 }
364
365 /* Fetch the object. */
366 COLLATOR_METHOD_FETCH_OBJECT;
367
368 if (!co || !co->ucoll) {
369 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
370 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
371 "Object not initialized", 0 );
372 zend_throw_error(NULL, "Object not initialized");
373
374 RETURN_THROWS();
375 }
376
377 /*
378 * Sort specified array.
379 */
380 hash = Z_ARRVAL_P( array );
381
382 if( !hash || zend_hash_num_elements( hash ) == 0 )
383 RETURN_TRUE;
384
385 /* Create bufers */
386 sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) );
387 sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) );
388 utf16_buf = eumalloc( utf16_buf_size );
389
390 /* Iterate through input hash and create a sort key for each value. */
391 ZEND_HASH_FOREACH_VAL(hash, hashData) {
392 /* Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf. */
393
394 utf16_len = utf16_buf_size;
395
396 /* Process string values only. */
397 if( Z_TYPE_P( hashData ) == IS_STRING )
398 {
399 intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_P( hashData ), Z_STRLEN_P( hashData ), COLLATOR_ERROR_CODE_P( co ) );
400
401 if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
402 {
403 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
404 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 );
405
406 if( utf16_buf )
407 efree( utf16_buf );
408
409 efree( sortKeyIndxBuf );
410 efree( sortKeyBuf );
411
412 RETURN_FALSE;
413 }
414 }
415 else
416 {
417 /* Set empty string */
418 utf16_len = 0;
419 utf16_buf[utf16_len] = 0;
420 }
421
422 if( (utf16_len + 1) > utf16_buf_size )
423 utf16_buf_size = utf16_len + 1;
424
425 /* Get sort key, reallocating the buffer if needed. */
426 bufLeft = sortKeyBufSize - sortKeyBufOffset;
427
428 sortKeyLen = ucol_getSortKey( co->ucoll,
429 utf16_buf,
430 utf16_len,
431 (uint8_t*)sortKeyBuf + sortKeyBufOffset,
432 bufLeft );
433
434 /* check for sortKeyBuf overflow, increasing its size of the buffer if needed */
435 if( sortKeyLen > bufLeft )
436 {
437 bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
438
439 sortKeyBufSize += bufIncrement;
440 bufLeft += bufIncrement;
441
442 sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize );
443
444 sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft );
445 }
446
447 /* check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */
448 if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize )
449 {
450 bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
451
452 sortKeyIndxBufSize += bufIncrement;
453
454 sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize );
455 }
456
457 sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; /* remember just offset, cause address */
458 /* of 'sortKeyBuf' may be changed due to realloc. */
459 sortKeyIndxBuf[sortKeyCount].zstr = hashData;
460
461 sortKeyBufOffset += sortKeyLen;
462 ++sortKeyCount;
463
464 } ZEND_HASH_FOREACH_END();
465
466 /* update ptrs to point to valid keys. */
467 for( j = 0; j < sortKeyCount; j++ )
468 sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
469
470 /* sort it */
471 zend_sort( sortKeyIndxBuf, sortKeyCount,
472 sortKeyIndxSize, collator_cmp_sort_keys, (swap_func_t)collator_sortkey_swap);
473
474 ZVAL_COPY_VALUE(&garbage, array);
475 /* for resulting hash we'll assign new hash keys rather then reordering */
476 array_init(array);
477
478 for( j = 0; j < sortKeyCount; j++ )
479 {
480 Z_TRY_ADDREF_P( sortKeyIndxBuf[j].zstr );
481 zend_hash_next_index_insert( Z_ARRVAL_P(array), sortKeyIndxBuf[j].zstr);
482 }
483
484 if( utf16_buf )
485 efree( utf16_buf );
486
487 zval_ptr_dtor(&garbage);
488 efree( sortKeyIndxBuf );
489 efree( sortKeyBuf );
490
491 RETURN_TRUE;
492 }
493 /* }}} */
494
495 /* {{{ Sort array using specified collator, maintaining index association. */
PHP_FUNCTION(collator_asort)496 PHP_FUNCTION( collator_asort )
497 {
498 collator_sort_internal( false, INTERNAL_FUNCTION_PARAM_PASSTHRU );
499 }
500 /* }}} */
501
502 /* {{{ Get a sort key for a string from a Collator. */
PHP_FUNCTION(collator_get_sort_key)503 PHP_FUNCTION( collator_get_sort_key )
504 {
505 char* str = NULL;
506 size_t str_len = 0;
507 UChar* ustr = NULL;
508 int32_t ustr_len = 0;
509 int key_len = 0;
510 zend_string* key_str;
511
512 COLLATOR_METHOD_INIT_VARS
513
514 /* Parse parameters. */
515 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Os",
516 &object, Collator_ce_ptr, &str, &str_len ) == FAILURE )
517 {
518 RETURN_THROWS();
519 }
520
521 /* Fetch the object. */
522 COLLATOR_METHOD_FETCH_OBJECT;
523
524 if (!co || !co->ucoll) {
525 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
526 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
527 "Object not initialized", 0 );
528 zend_throw_error(NULL, "Object not initialized");
529
530 RETURN_THROWS();
531 }
532
533 /*
534 * Compare given strings (converting them to UTF-16 first).
535 */
536
537 /* First convert the strings to UTF-16. */
538 intl_convert_utf8_to_utf16(
539 &ustr, &ustr_len, str, str_len, COLLATOR_ERROR_CODE_P( co ) );
540 if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
541 {
542 /* Set global error code. */
543 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
544
545 /* Set error messages. */
546 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
547 "Error converting first argument to UTF-16", 0 );
548 efree( ustr );
549 RETURN_FALSE;
550 }
551
552 /* ucol_getSortKey is exception in that the key length includes the
553 * NUL terminator*/
554 key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, NULL, 0);
555 if(!key_len) {
556 efree( ustr );
557 RETURN_FALSE;
558 }
559 key_str = zend_string_alloc(key_len, 0);
560 key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, (uint8_t*)ZSTR_VAL(key_str), key_len);
561 efree( ustr );
562 if(!key_len) {
563 RETURN_FALSE;
564 }
565 ZSTR_LEN(key_str) = key_len - 1;
566 RETVAL_NEW_STR(key_str);
567 }
568 /* }}} */
569