1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
12 | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
13 +----------------------------------------------------------------------+
14 */
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include "php_intl.h"
21 #include "collator.h"
22 #include "collator_class.h"
23 #include "collator_sort.h"
24 #include "collator_convert.h"
25 #include "intl_convert.h"
26
27 #if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED)
28 typedef zend_long ptrdiff_t;
29 #endif
30
31 /**
32 * Declare 'index' which will point to sort key in sort key
33 * buffer.
34 */
35 typedef struct _collator_sort_key_index {
36 char* key; /* pointer to sort key */
37 zval* zstr; /* pointer to original string(hash-item) */
38 } collator_sort_key_index_t;
39
40 ZEND_EXTERN_MODULE_GLOBALS( intl )
41
42 static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
43 static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
44
45 static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
46 static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
47
48 static const size_t DEF_UTF16_BUF_SIZE = 1024;
49
50 /* {{{ collator_regular_compare_function */
collator_regular_compare_function(zval * result,zval * op1,zval * op2)51 static int collator_regular_compare_function(zval *result, zval *op1, zval *op2)
52 {
53 int rc = SUCCESS;
54 zval str1, str2;
55 zval num1, num2;
56 zval norm1, norm2;
57 zval *num1_p = NULL, *num2_p = NULL;
58 zval *norm1_p = NULL, *norm2_p = NULL;
59 zval *str1_p, *str2_p;
60
61 ZVAL_NULL(&str1);
62 str1_p = collator_convert_object_to_string( op1, &str1 );
63 ZVAL_NULL(&str2);
64 str2_p = collator_convert_object_to_string( op2, &str2 );
65
66 /* If both args are strings AND either of args is not numeric string
67 * then use ICU-compare. Otherwise PHP-compare. */
68 if( Z_TYPE_P(str1_p) == IS_STRING && Z_TYPE_P(str2_p) == IS_STRING &&
69 ( str1_p == ( num1_p = collator_convert_string_to_number_if_possible( str1_p, &num1 ) ) ||
70 str2_p == ( num2_p = collator_convert_string_to_number_if_possible( str2_p, &num2 ) ) ) )
71 {
72 /* Compare the strings using ICU. */
73 ZEND_ASSERT(INTL_G(current_collator) != NULL);
74 ZVAL_LONG(result, ucol_strcoll(
75 INTL_G(current_collator),
76 INTL_ZSTR_VAL(Z_STR_P(str1_p)), INTL_ZSTR_LEN(Z_STR_P(str1_p)),
77 INTL_ZSTR_VAL(Z_STR_P(str2_p)), INTL_ZSTR_LEN(Z_STR_P(str2_p)) ));
78 }
79 else
80 {
81 /* num1 is set if str1 and str2 are strings. */
82 if( num1_p )
83 {
84 if( num1_p == str1_p )
85 {
86 /* str1 is string but not numeric string
87 * just convert it to utf8.
88 */
89 norm1_p = collator_convert_zstr_utf16_to_utf8( str1_p, &norm1 );
90
91 /* num2 is not set but str2 is string => do normalization. */
92 norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
93 }
94 else
95 {
96 /* str1 is numeric strings => passthru to PHP-compare. */
97 Z_TRY_ADDREF_P(num1_p);
98 norm1_p = num1_p;
99
100 /* str2 is numeric strings => passthru to PHP-compare. */
101 Z_TRY_ADDREF_P(num2_p);
102 norm2_p = num2_p;
103 }
104 }
105 else
106 {
107 /* num1 is not set if str1 or str2 is not a string => do normalization. */
108 norm1_p = collator_normalize_sort_argument( str1_p, &norm1 );
109
110 /* if num1 is not set then num2 is not set as well => do normalization. */
111 norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
112 }
113
114 rc = compare_function( result, norm1_p, norm2_p );
115
116 zval_ptr_dtor( norm1_p );
117 zval_ptr_dtor( norm2_p );
118 }
119
120 if( num1_p )
121 zval_ptr_dtor( num1_p );
122
123 if( num2_p )
124 zval_ptr_dtor( num2_p );
125
126 zval_ptr_dtor( str1_p );
127 zval_ptr_dtor( str2_p );
128
129 return rc;
130 }
131 /* }}} */
132
133 /* {{{ collator_numeric_compare_function
134 * Convert input args to double and compare it.
135 */
collator_numeric_compare_function(zval * result,zval * op1,zval * op2)136 static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2)
137 {
138 zval num1, num2;
139 zval *num1_p = NULL;
140 zval *num2_p = NULL;
141
142 if( Z_TYPE_P(op1) == IS_STRING )
143 {
144 num1_p = collator_convert_string_to_double( op1, &num1 );
145 op1 = num1_p;
146 }
147
148 if( Z_TYPE_P(op2) == IS_STRING )
149 {
150 num2_p = collator_convert_string_to_double( op2, &num2 );
151 op2 = num2_p;
152 }
153
154 ZVAL_LONG(result, numeric_compare_function(op1, op2));
155
156 if( num1_p )
157 zval_ptr_dtor( num1_p );
158 if( num2_p )
159 zval_ptr_dtor( num2_p );
160
161 return SUCCESS;
162 }
163 /* }}} */
164
165 /* {{{ collator_icu_compare_function
166 * Direct use of ucol_strcoll.
167 */
collator_icu_compare_function(zval * result,zval * op1,zval * op2)168 static int collator_icu_compare_function(zval *result, zval *op1, zval *op2)
169 {
170 int rc = SUCCESS;
171 zend_string *str1 = collator_zval_to_string(op1);
172 zend_string *str2 = collator_zval_to_string(op2);
173
174 /* Compare the strings using ICU. */
175 ZEND_ASSERT(INTL_G(current_collator) != NULL);
176 ZVAL_LONG(result, ucol_strcoll(
177 INTL_G(current_collator),
178 INTL_ZSTR_VAL(str1), INTL_ZSTR_LEN(str1),
179 INTL_ZSTR_VAL(str2), INTL_ZSTR_LEN(str2) ));
180
181 zend_string_release(str1);
182 zend_string_release(str2);
183
184 return rc;
185 }
186 /* }}} */
187
188 /* {{{ collator_compare_func
189 * Taken from PHP7 source (array_data_compare).
190 */
collator_compare_func(Bucket * f,Bucket * s)191 static int collator_compare_func(Bucket *f, Bucket *s)
192 {
193 zval result;
194 zval *first = &f->val;
195 zval *second = &s->val;
196
197 if( INTL_G(compare_func)( &result, first, second) == FAILURE )
198 return 0;
199
200 if( Z_TYPE(result) == IS_DOUBLE )
201 {
202 if( Z_DVAL(result) < 0 )
203 return -1;
204 else if( Z_DVAL(result) > 0 )
205 return 1;
206 else
207 return 0;
208 }
209
210 convert_to_long(&result);
211
212 if( Z_LVAL(result) < 0 )
213 return -1;
214 else if( Z_LVAL(result) > 0 )
215 return 1;
216
217 return 0;
218 }
219 /* }}} */
220
221 /* {{{ Compare sort keys */
collator_cmp_sort_keys(const void * p1,const void * p2)222 static int collator_cmp_sort_keys( const void *p1, const void *p2 )
223 {
224 char* key1 = ((collator_sort_key_index_t*)p1)->key;
225 char* key2 = ((collator_sort_key_index_t*)p2)->key;
226
227 return strcmp( key1, key2 );
228 }
229 /* }}} */
230
231 /* {{{ Choose compare function according to sort flags. */
collator_get_compare_function(const zend_long sort_flags)232 static collator_compare_func_t collator_get_compare_function( const zend_long sort_flags )
233 {
234 collator_compare_func_t func;
235
236 switch( sort_flags )
237 {
238 case COLLATOR_SORT_NUMERIC:
239 func = collator_numeric_compare_function;
240 break;
241
242 case COLLATOR_SORT_STRING:
243 func = collator_icu_compare_function;
244 break;
245
246 case COLLATOR_SORT_REGULAR:
247 default:
248 func = collator_regular_compare_function;
249 break;
250 }
251
252 return func;
253 }
254 /* }}} */
255
256 /* {{{ Common code shared by collator_sort() and collator_asort() API functions. */
collator_sort_internal(int renumber,INTERNAL_FUNCTION_PARAMETERS)257 static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS )
258 {
259 UCollator* saved_collator;
260 zval* array = NULL;
261 HashTable* hash = NULL;
262 zend_long sort_flags = COLLATOR_SORT_REGULAR;
263
264 COLLATOR_METHOD_INIT_VARS
265
266 /* Parse parameters. */
267 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Oa/|l",
268 &object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE )
269 {
270 RETURN_THROWS();
271 }
272
273 /* Fetch the object. */
274 COLLATOR_METHOD_FETCH_OBJECT;
275
276 if (!co->ucoll) {
277 zend_throw_error(NULL, "Object not initialized");
278 RETURN_THROWS();
279 }
280
281 /* Set 'compare function' according to sort flags. */
282 INTL_G(compare_func) = collator_get_compare_function( sort_flags );
283
284 hash = Z_ARRVAL_P( array );
285
286 /* Convert strings in the specified array from UTF-8 to UTF-16. */
287 collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) );
288 COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" );
289
290 /* Save specified collator in the request-global (?) variable. */
291 saved_collator = INTL_G( current_collator );
292 INTL_G( current_collator ) = co->ucoll;
293
294 /* Sort specified array. */
295 zend_hash_sort(hash, collator_compare_func, renumber);
296
297 /* Restore saved collator. */
298 INTL_G( current_collator ) = saved_collator;
299
300 /* Convert strings in the specified array back to UTF-8. */
301 collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) );
302 COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" );
303
304 RETURN_TRUE;
305 }
306 /* }}} */
307
308 /* {{{ Sort array using specified collator. */
PHP_FUNCTION(collator_sort)309 PHP_FUNCTION( collator_sort )
310 {
311 collator_sort_internal( true, INTERNAL_FUNCTION_PARAM_PASSTHRU );
312 }
313 /* }}} */
314
collator_sortkey_swap(collator_sort_key_index_t * p,collator_sort_key_index_t * q)315 static void collator_sortkey_swap(collator_sort_key_index_t *p, collator_sort_key_index_t *q) /* {{{ */
316 {
317 collator_sort_key_index_t t;
318 t = *p;
319 *p = *q;
320 *q = t;
321 }
322 /* }}} */
323
324 /* {{{ Equivalent to standard PHP sort using Collator.
325 * Uses ICU ucol_getSortKey for performance.
326 */
PHP_FUNCTION(collator_sort_with_sort_keys)327 PHP_FUNCTION( collator_sort_with_sort_keys )
328 {
329 zval* array = NULL;
330 zval garbage;
331 HashTable* hash = NULL;
332 zval* hashData = NULL; /* currently processed item of input hash */
333
334 char* sortKeyBuf = NULL; /* buffer to store sort keys */
335 uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
336 ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */
337 uint32_t sortKeyLen = 0; /* the length of currently processing key */
338 uint32_t bufLeft = 0;
339 uint32_t bufIncrement = 0;
340
341 collator_sort_key_index_t* sortKeyIndxBuf = NULL; /* buffer to store 'indexes' which will be passed to 'qsort' */
342 uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
343 uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
344
345 uint32_t sortKeyCount = 0;
346 uint32_t j = 0;
347
348 UChar* utf16_buf = NULL; /* tmp buffer to hold current processing string in utf-16 */
349 int utf16_buf_size = DEF_UTF16_BUF_SIZE; /* the length of utf16_buf */
350 int utf16_len = 0; /* length of converted string */
351
352 COLLATOR_METHOD_INIT_VARS
353
354 /* Parse parameters. */
355 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Oa",
356 &object, Collator_ce_ptr, &array ) == FAILURE )
357 {
358 RETURN_THROWS();
359 }
360
361 /* Fetch the object. */
362 COLLATOR_METHOD_FETCH_OBJECT;
363
364 if (!co || !co->ucoll) {
365 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
366 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
367 "Object not initialized", 0 );
368 zend_throw_error(NULL, "Object not initialized");
369
370 RETURN_THROWS();
371 }
372
373 /*
374 * Sort specified array.
375 */
376 hash = Z_ARRVAL_P( array );
377
378 if( !hash || zend_hash_num_elements( hash ) == 0 )
379 RETURN_TRUE;
380
381 /* Create bufers */
382 sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) );
383 sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) );
384 utf16_buf = eumalloc( utf16_buf_size );
385
386 /* Iterate through input hash and create a sort key for each value. */
387 ZEND_HASH_FOREACH_VAL(hash, hashData) {
388 /* Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf. */
389
390 utf16_len = utf16_buf_size;
391
392 /* Process string values only. */
393 if( Z_TYPE_P( hashData ) == IS_STRING )
394 {
395 intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_P( hashData ), Z_STRLEN_P( hashData ), COLLATOR_ERROR_CODE_P( co ) );
396
397 if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
398 {
399 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
400 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 );
401
402 if( utf16_buf )
403 efree( utf16_buf );
404
405 efree( sortKeyIndxBuf );
406 efree( sortKeyBuf );
407
408 RETURN_FALSE;
409 }
410 }
411 else
412 {
413 /* Set empty string */
414 utf16_len = 0;
415 utf16_buf[utf16_len] = 0;
416 }
417
418 if( (utf16_len + 1) > utf16_buf_size )
419 utf16_buf_size = utf16_len + 1;
420
421 /* Get sort key, reallocating the buffer if needed. */
422 bufLeft = sortKeyBufSize - sortKeyBufOffset;
423
424 sortKeyLen = ucol_getSortKey( co->ucoll,
425 utf16_buf,
426 utf16_len,
427 (uint8_t*)sortKeyBuf + sortKeyBufOffset,
428 bufLeft );
429
430 /* check for sortKeyBuf overflow, increasing its size of the buffer if needed */
431 if( sortKeyLen > bufLeft )
432 {
433 bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
434
435 sortKeyBufSize += bufIncrement;
436 bufLeft += bufIncrement;
437
438 sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize );
439
440 sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft );
441 }
442
443 /* check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */
444 if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize )
445 {
446 bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
447
448 sortKeyIndxBufSize += bufIncrement;
449
450 sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize );
451 }
452
453 sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; /* remember just offset, cause address */
454 /* of 'sortKeyBuf' may be changed due to realloc. */
455 sortKeyIndxBuf[sortKeyCount].zstr = hashData;
456
457 sortKeyBufOffset += sortKeyLen;
458 ++sortKeyCount;
459
460 } ZEND_HASH_FOREACH_END();
461
462 /* update ptrs to point to valid keys. */
463 for( j = 0; j < sortKeyCount; j++ )
464 sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
465
466 /* sort it */
467 zend_sort( sortKeyIndxBuf, sortKeyCount,
468 sortKeyIndxSize, collator_cmp_sort_keys, (swap_func_t)collator_sortkey_swap);
469
470 ZVAL_COPY_VALUE(&garbage, array);
471 /* for resulting hash we'll assign new hash keys rather then reordering */
472 array_init(array);
473
474 for( j = 0; j < sortKeyCount; j++ )
475 {
476 Z_TRY_ADDREF_P( sortKeyIndxBuf[j].zstr );
477 zend_hash_next_index_insert( Z_ARRVAL_P(array), sortKeyIndxBuf[j].zstr);
478 }
479
480 if( utf16_buf )
481 efree( utf16_buf );
482
483 zval_ptr_dtor(&garbage);
484 efree( sortKeyIndxBuf );
485 efree( sortKeyBuf );
486
487 RETURN_TRUE;
488 }
489 /* }}} */
490
491 /* {{{ Sort array using specified collator, maintaining index association. */
PHP_FUNCTION(collator_asort)492 PHP_FUNCTION( collator_asort )
493 {
494 collator_sort_internal( false, INTERNAL_FUNCTION_PARAM_PASSTHRU );
495 }
496 /* }}} */
497
498 /* {{{ Get a sort key for a string from a Collator. */
PHP_FUNCTION(collator_get_sort_key)499 PHP_FUNCTION( collator_get_sort_key )
500 {
501 char* str = NULL;
502 size_t str_len = 0;
503 UChar* ustr = NULL;
504 int32_t ustr_len = 0;
505 int key_len = 0;
506 zend_string* key_str;
507
508 COLLATOR_METHOD_INIT_VARS
509
510 /* Parse parameters. */
511 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Os",
512 &object, Collator_ce_ptr, &str, &str_len ) == FAILURE )
513 {
514 RETURN_THROWS();
515 }
516
517 /* Fetch the object. */
518 COLLATOR_METHOD_FETCH_OBJECT;
519
520 if (!co || !co->ucoll) {
521 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
522 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
523 "Object not initialized", 0 );
524 zend_throw_error(NULL, "Object not initialized");
525
526 RETURN_THROWS();
527 }
528
529 /*
530 * Compare given strings (converting them to UTF-16 first).
531 */
532
533 /* First convert the strings to UTF-16. */
534 intl_convert_utf8_to_utf16(
535 &ustr, &ustr_len, str, str_len, COLLATOR_ERROR_CODE_P( co ) );
536 if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
537 {
538 /* Set global error code. */
539 intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) );
540
541 /* Set error messages. */
542 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
543 "Error converting first argument to UTF-16", 0 );
544 efree( ustr );
545 RETURN_FALSE;
546 }
547
548 /* ucol_getSortKey is exception in that the key length includes the
549 * NUL terminator*/
550 key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, NULL, 0);
551 if(!key_len) {
552 efree( ustr );
553 RETURN_FALSE;
554 }
555 key_str = zend_string_alloc(key_len, 0);
556 key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, (uint8_t*)ZSTR_VAL(key_str), key_len);
557 efree( ustr );
558 if(!key_len) {
559 RETURN_FALSE;
560 }
561 ZSTR_LEN(key_str) = key_len - 1;
562 RETVAL_NEW_STR(key_str);
563 }
564 /* }}} */
565