/* +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Authors: Vadim Savchuk | | Dmitry Lakhtyuk | +----------------------------------------------------------------------+ */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "php_intl.h" #include "collator.h" #include "collator_class.h" #include "collator_sort.h" #include "collator_convert.h" #include "intl_convert.h" #if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED) typedef long ptrdiff_t; #endif /** * Declare 'index' which will point to sort key in sort key * buffer. */ typedef struct _collator_sort_key_index { char* key; /* pointer to sort key */ zval** zstr; /* pointer to original string(hash-item) */ } collator_sort_key_index_t; ZEND_EXTERN_MODULE_GLOBALS( intl ) static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576; static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576; static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576; static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576; static const size_t DEF_UTF16_BUF_SIZE = 1024; /* {{{ collator_regular_compare_function */ static int collator_regular_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) { Collator_object* co = NULL; int rc = SUCCESS; zval* str1 = collator_convert_object_to_string( op1 TSRMLS_CC ); zval* str2 = collator_convert_object_to_string( op2 TSRMLS_CC ); zval* num1 = NULL; zval* num2 = NULL; zval* norm1 = NULL; zval* norm2 = NULL; /* If both args are strings AND either of args is not numeric string * then use ICU-compare. Otherwise PHP-compare. */ if( Z_TYPE_P(str1) == IS_STRING && Z_TYPE_P(str2) == IS_STRING && ( str1 == ( num1 = collator_convert_string_to_number_if_possible( str1 ) ) || str2 == ( num2 = collator_convert_string_to_number_if_possible( str2 ) ) ) ) { /* Fetch collator object. */ co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC ); if (!co || !co->ucoll) { intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Object not initialized", 0 TSRMLS_CC ); php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized"); } /* Compare the strings using ICU. */ result->value.lval = ucol_strcoll( co->ucoll, INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1), INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) ); result->type = IS_LONG; } else { /* num1 is set if str1 and str2 are strings. */ if( num1 ) { if( num1 == str1 ) { /* str1 is string but not numeric string * just convert it to utf8. */ norm1 = collator_convert_zstr_utf16_to_utf8( str1 ); /* num2 is not set but str2 is string => do normalization. */ norm2 = collator_normalize_sort_argument( str2 ); } else { /* str1 is numeric strings => passthru to PHP-compare. */ zval_add_ref( &num1 ); norm1 = num1; /* str2 is numeric strings => passthru to PHP-compare. */ zval_add_ref( &num2 ); norm2 = num2; } } else { /* num1 is not set if str1 or str2 is not a string => do normalization. */ norm1 = collator_normalize_sort_argument( str1 ); /* if num1 is not set then num2 is not set as well => do normalization. */ norm2 = collator_normalize_sort_argument( str2 ); } rc = compare_function( result, norm1, norm2 TSRMLS_CC ); zval_ptr_dtor( &norm1 ); zval_ptr_dtor( &norm2 ); } if( num1 ) zval_ptr_dtor( &num1 ); if( num2 ) zval_ptr_dtor( &num2 ); zval_ptr_dtor( &str1 ); zval_ptr_dtor( &str2 ); return rc; } /* }}} */ /* {{{ collator_numeric_compare_function * Convert input args to double and compare it. */ static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) { int rc = SUCCESS; zval* num1 = NULL; zval* num2 = NULL; if( Z_TYPE_P(op1) == IS_STRING ) { num1 = collator_convert_string_to_double( op1 ); op1 = num1; } if( Z_TYPE_P(op2) == IS_STRING ) { num2 = collator_convert_string_to_double( op2 ); op2 = num2; } rc = numeric_compare_function( result, op1, op2 TSRMLS_CC); if( num1 ) zval_ptr_dtor( &num1 ); if( num2 ) zval_ptr_dtor( &num2 ); return rc; } /* }}} */ /* {{{ collator_icu_compare_function * Direct use of ucol_strcoll. */ static int collator_icu_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) { int rc = SUCCESS; Collator_object* co = NULL; zval* str1 = NULL; zval* str2 = NULL; str1 = collator_make_printable_zval( op1 ); str2 = collator_make_printable_zval( op2 ); /* Fetch collator object. */ co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC ); /* Compare the strings using ICU. */ result->value.lval = ucol_strcoll( co->ucoll, INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1), INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) ); result->type = IS_LONG; zval_ptr_dtor( &str1 ); zval_ptr_dtor( &str2 ); return rc; } /* }}} */ /* {{{ collator_compare_func * Taken from PHP5 source (array_data_compare). */ static int collator_compare_func( const void* a, const void* b TSRMLS_DC ) { Bucket *f; Bucket *s; zval result; zval *first; zval *second; f = *((Bucket **) a); s = *((Bucket **) b); first = *((zval **) f->pData); second = *((zval **) s->pData); if( INTL_G(compare_func)( &result, first, second TSRMLS_CC) == FAILURE ) return 0; if( Z_TYPE(result) == IS_DOUBLE ) { if( Z_DVAL(result) < 0 ) return -1; else if( Z_DVAL(result) > 0 ) return 1; else return 0; } convert_to_long(&result); if( Z_LVAL(result) < 0 ) return -1; else if( Z_LVAL(result) > 0 ) return 1; return 0; } /* }}} */ /* {{{ collator_cmp_sort_keys * Compare sort keys */ static int collator_cmp_sort_keys( const void *p1, const void *p2 TSRMLS_DC ) { char* key1 = ((collator_sort_key_index_t*)p1)->key; char* key2 = ((collator_sort_key_index_t*)p2)->key; return strcmp( key1, key2 ); } /* }}} */ /* {{{ collator_get_compare_function * Choose compare function according to sort flags. */ static collator_compare_func_t collator_get_compare_function( const long sort_flags ) { collator_compare_func_t func; switch( sort_flags ) { case COLLATOR_SORT_NUMERIC: func = collator_numeric_compare_function; break; case COLLATOR_SORT_STRING: func = collator_icu_compare_function; break; case COLLATOR_SORT_REGULAR: default: func = collator_regular_compare_function; break; } return func; } /* }}} */ /* {{{ collator_sort_internal * Common code shared by collator_sort() and collator_asort() API functions. */ static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS ) { zval* array = NULL; HashTable* hash = NULL; zval* saved_collator = NULL; long sort_flags = COLLATOR_SORT_REGULAR; COLLATOR_METHOD_INIT_VARS /* Parse parameters. */ if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa|l", &object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "collator_sort_internal: unable to parse input params", 0 TSRMLS_CC ); RETURN_FALSE; } /* Fetch the object. */ COLLATOR_METHOD_FETCH_OBJECT; /* Set 'compare function' according to sort flags. */ INTL_G(compare_func) = collator_get_compare_function( sort_flags ); hash = HASH_OF( array ); /* Convert strings in the specified array from UTF-8 to UTF-16. */ collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) ); COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" ); /* Save specified collator in the request-global (?) variable. */ saved_collator = INTL_G( current_collator ); INTL_G( current_collator ) = object; /* Sort specified array. */ zend_hash_sort( hash, zend_qsort, collator_compare_func, renumber TSRMLS_CC ); /* Restore saved collator. */ INTL_G( current_collator ) = saved_collator; /* Convert strings in the specified array back to UTF-8. */ collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) ); COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" ); RETURN_TRUE; } /* }}} */ /* {{{ proto bool Collator::sort( Collator $coll, array(string) $arr [, int $sort_flags] ) * Sort array using specified collator. }}} */ /* {{{ proto bool collator_sort( Collator $coll, array(string) $arr [, int $sort_flags] ) * Sort array using specified collator. */ PHP_FUNCTION( collator_sort ) { collator_sort_internal( TRUE, INTERNAL_FUNCTION_PARAM_PASSTHRU ); } /* }}} */ /* {{{ proto bool Collator::sortWithSortKeys( Collator $coll, array(string) $arr ) * Equivalent to standard PHP sort using Collator. * Uses ICU ucol_getSortKey for performance. }}} */ /* {{{ proto bool collator_sort_with_sort_keys( Collator $coll, array(string) $arr ) * Equivalent to standard PHP sort using Collator. * Uses ICU ucol_getSortKey for performance. */ PHP_FUNCTION( collator_sort_with_sort_keys ) { zval* array = NULL; HashTable* hash = NULL; zval** hashData = NULL; /* currently processed item of input hash */ char* sortKeyBuf = NULL; /* buffer to store sort keys */ uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */ ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */ int32_t sortKeyLen = 0; /* the length of currently processing key */ uint32_t bufLeft = 0; uint32_t bufIncrement = 0; collator_sort_key_index_t* sortKeyIndxBuf = NULL; /* buffer to store 'indexes' which will be passed to 'qsort' */ uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE; uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t ); uint32_t sortKeyCount = 0; uint32_t j = 0; UChar* utf16_buf = NULL; /* tmp buffer to hold current processing string in utf-16 */ int utf16_buf_size = DEF_UTF16_BUF_SIZE; /* the length of utf16_buf */ int utf16_len = 0; /* length of converted string */ HashTable* sortedHash = NULL; COLLATOR_METHOD_INIT_VARS /* Parse parameters. */ if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa", &object, Collator_ce_ptr, &array ) == FAILURE ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "collator_sort_with_sort_keys: unable to parse input params", 0 TSRMLS_CC ); RETURN_FALSE; } /* Fetch the object. */ COLLATOR_METHOD_FETCH_OBJECT; if (!co || !co->ucoll) { intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Object not initialized", 0 TSRMLS_CC ); php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized"); RETURN_FALSE; } /* * Sort specified array. */ hash = HASH_OF( array ); if( !hash || zend_hash_num_elements( hash ) == 0 ) RETURN_TRUE; /* Create bufers */ sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) ); sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) ); utf16_buf = eumalloc( utf16_buf_size ); /* Iterate through input hash and create a sort key for each value. */ zend_hash_internal_pointer_reset( hash ); while( zend_hash_get_current_data( hash, (void**) &hashData ) == SUCCESS ) { /* Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf. */ utf16_len = utf16_buf_size; /* Process string values only. */ if( Z_TYPE_PP( hashData ) == IS_STRING ) { intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_PP( hashData ), Z_STRLEN_PP( hashData ), COLLATOR_ERROR_CODE_P( co ) ); if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) ) { intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 TSRMLS_CC ); if( utf16_buf ) efree( utf16_buf ); efree( sortKeyIndxBuf ); efree( sortKeyBuf ); RETURN_FALSE; } } else { /* Set empty string */ utf16_len = 0; utf16_buf[utf16_len] = 0; } if( (utf16_len + 1) > utf16_buf_size ) utf16_buf_size = utf16_len + 1; /* Get sort key, reallocating the buffer if needed. */ bufLeft = sortKeyBufSize - sortKeyBufOffset; sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft ); /* check for sortKeyBuf overflow, increasing its size of the buffer if needed */ if( sortKeyLen > bufLeft ) { bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT; sortKeyBufSize += bufIncrement; bufLeft += bufIncrement; sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize ); sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft ); } /* check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */ if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize ) { bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT; sortKeyIndxBufSize += bufIncrement; sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize ); } sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; /* remeber just offset, cause address */ /* of 'sortKeyBuf' may be changed due to realloc. */ sortKeyIndxBuf[sortKeyCount].zstr = hashData; sortKeyBufOffset += sortKeyLen; ++sortKeyCount; zend_hash_move_forward( hash ); } /* update ptrs to point to valid keys. */ for( j = 0; j < sortKeyCount; j++ ) sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key; /* sort it */ zend_qsort( sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize, collator_cmp_sort_keys TSRMLS_CC ); /* for resulting hash we'll assign new hash keys rather then reordering */ ALLOC_HASHTABLE( sortedHash ); zend_hash_init( sortedHash, 0, NULL, ZVAL_PTR_DTOR, 0 ); for( j = 0; j < sortKeyCount; j++ ) { zval_add_ref( sortKeyIndxBuf[j].zstr ); zend_hash_next_index_insert( sortedHash, sortKeyIndxBuf[j].zstr, sizeof(zval **), NULL ); } /* Save sorted hash into return variable. */ zval_dtor( array ); (array)->value.ht = sortedHash; (array)->type = IS_ARRAY; if( utf16_buf ) efree( utf16_buf ); efree( sortKeyIndxBuf ); efree( sortKeyBuf ); RETURN_TRUE; } /* }}} */ /* {{{ proto bool Collator::asort( Collator $coll, array(string) $arr ) * Sort array using specified collator, maintaining index association. }}} */ /* {{{ proto bool collator_asort( Collator $coll, array(string) $arr ) * Sort array using specified collator, maintaining index association. */ PHP_FUNCTION( collator_asort ) { collator_sort_internal( FALSE, INTERNAL_FUNCTION_PARAM_PASSTHRU ); } /* }}} */ /* {{{ proto bool Collator::getSortKey( Collator $coll, string $str ) * Get a sort key for a string from a Collator. }}} */ /* {{{ proto bool collator_get_sort_key( Collator $coll, string $str ) * Get a sort key for a string from a Collator. }}} */ PHP_FUNCTION( collator_get_sort_key ) { char* str = NULL; int str_len = 0; UChar* ustr = NULL; int ustr_len = 0; uint8_t* key = NULL; int key_len = 0; COLLATOR_METHOD_INIT_VARS /* Parse parameters. */ if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Os", &object, Collator_ce_ptr, &str, &str_len ) == FAILURE ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "collator_get_sort_key: unable to parse input params", 0 TSRMLS_CC ); RETURN_FALSE; } /* Fetch the object. */ COLLATOR_METHOD_FETCH_OBJECT; if (!co || !co->ucoll) { intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Object not initialized", 0 TSRMLS_CC ); php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized"); RETURN_FALSE; } /* * Compare given strings (converting them to UTF-16 first). */ /* First convert the strings to UTF-16. */ intl_convert_utf8_to_utf16( &ustr, &ustr_len, str, str_len, COLLATOR_ERROR_CODE_P( co ) ); if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) ) { /* Set global error code. */ intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); /* Set error messages. */ intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Error converting first argument to UTF-16", 0 TSRMLS_CC ); efree( ustr ); RETURN_FALSE; } /* ucol_getSortKey is exception in that the key length includes the * NUL terminator*/ key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, key, 0); if(!key_len) { efree( ustr ); RETURN_FALSE; } key = emalloc(key_len); key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, key, key_len); efree( ustr ); if(!key_len) { RETURN_FALSE; } RETURN_STRINGL((char *)key, key_len - 1, 0); } /* }}} */ /* * Local variables: * tab-width: 4 * c-basic-offset: 4 * End: * vim600: noet sw=4 ts=4 fdm=marker * vim<600: noet sw=4 ts=4 */