1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <unicode/ustring.h>
22 #include <unicode/udata.h>
23 #include <unicode/putil.h>
24 #include <unicode/ures.h>
25
26 #include "php_intl.h"
27 #include "locale.h"
28 #include "locale_class.h"
29 #include "locale_methods.h"
30 #include "intl_convert.h"
31 #include "intl_data.h"
32
33 #include <zend_API.h>
34 #include <zend.h>
35 #include <php.h>
36 #include "main/php_ini.h"
37 #include "zend_smart_str.h"
38
39 ZEND_EXTERN_MODULE_GLOBALS( intl )
40
41 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
42 #define SEPARATOR "_"
43 #define SEPARATOR1 "-"
44 #define DELIMITER "-_"
45 #define EXTLANG_PREFIX "a"
46 #define PRIVATE_PREFIX "x"
47 #define DISP_NAME "name"
48
49 #define MAX_NO_VARIANT 15
50 #define MAX_NO_EXTLANG 3
51 #define MAX_NO_PRIVATE 15
52 #define MAX_NO_LOOKUP_LANG_TAG 100
53
54 #define LOC_NOT_FOUND 1
55
56 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
57 #define VARIANT_KEYNAME_LEN 11
58 #define EXTLANG_KEYNAME_LEN 10
59 #define PRIVATE_KEYNAME_LEN 11
60
61 /* Based on IANA registry at the time of writing this code
62 *
63 */
64 static const char * const LOC_GRANDFATHERED[] = {
65 "art-lojban", "i-klingon", "i-lux", "i-navajo", "no-bok", "no-nyn",
66 "cel-gaulish", "en-GB-oed", "i-ami",
67 "i-bnn", "i-default", "i-enochian",
68 "i-mingo", "i-pwn", "i-tao",
69 "i-tay", "i-tsu", "sgn-BE-fr",
70 "sgn-BE-nl", "sgn-CH-de", "zh-cmn",
71 "zh-cmn-Hans", "zh-cmn-Hant", "zh-gan" ,
72 "zh-guoyu", "zh-hakka", "zh-min",
73 "zh-min-nan", "zh-wuu", "zh-xiang",
74 "zh-yue", NULL
75 };
76
77 /* Based on IANA registry at the time of writing this code
78 * This array lists the preferred values for the grandfathered tags if applicable
79 * This is in sync with the array LOC_GRANDFATHERED
80 * e.g. the offsets of the grandfathered tags match the offset of the preferred value
81 */
82 static const int LOC_PREFERRED_GRANDFATHERED_LEN = 6;
83 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
84 "jbo", "tlh", "lb",
85 "nv", "nb", "nn",
86 NULL
87 };
88
89 /*returns TRUE if a is an ID separator FALSE otherwise*/
90 #define isIDSeparator(a) (a == '_' || a == '-')
91 #define isKeywordSeparator(a) (a == '@' )
92 #define isEndOfTag(a) (a == '\0' )
93
94 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
95
96 /*returns TRUE if one of the special prefixes is here (s=string)
97 'x-' or 'i-' */
98 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
99 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
100
101 /* Dot terminates it because of POSIX form where dot precedes the codepage
102 * except for variant */
103 #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
104
105 /* {{{ return the offset of 'key' in the array 'list'.
106 * returns -1 if not present */
findOffset(const char * const * list,const char * key)107 static int16_t findOffset(const char* const* list, const char* key)
108 {
109 const char* const* anchor = list;
110 while (*list != NULL) {
111 if (strcmp(key, *list) == 0) {
112 return (int16_t)(list - anchor);
113 }
114 list++;
115 }
116
117 return -1;
118
119 }
120 /*}}}*/
121
getPreferredTag(const char * gf_tag)122 static char* getPreferredTag(const char* gf_tag)
123 {
124 char* result = NULL;
125 zend_off_t grOffset = 0;
126
127 grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
128 if(grOffset < 0) {
129 return NULL;
130 }
131 if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
132 /* return preferred tag */
133 result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
134 } else {
135 /* Return correct grandfathered language tag */
136 result = estrdup( LOC_GRANDFATHERED[grOffset] );
137 }
138 return result;
139 }
140
141 /* {{{
142 * returns the position of next token for lookup
143 * or -1 if no token
144 * strtokr equivalent search for token in reverse direction
145 */
getStrrtokenPos(char * str,zend_off_t savedPos)146 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
147 {
148 zend_off_t result =-1;
149 zend_off_t i;
150
151 for(i=savedPos-1; i>=0; i--) {
152 if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
153 /* delimiter found; check for singleton */
154 if(i>=2 && isIDSeparator(*(str+i-2)) ){
155 /* a singleton; so send the position of token before the singleton */
156 result = i-2;
157 } else {
158 result = i;
159 }
160 break;
161 }
162 }
163 if(result < 1){
164 /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
165 result =-1;
166 }
167 return result;
168 }
169 /* }}} */
170
171 /* {{{
172 * returns the position of a singleton if present
173 * returns -1 if no singleton
174 * strtok equivalent search for singleton
175 */
getSingletonPos(const char * str)176 static zend_off_t getSingletonPos(const char* str)
177 {
178 zend_off_t result =-1;
179 size_t len = 0;
180
181 if( str && ((len=strlen(str))>0) ){
182 zend_off_t i = 0;
183 for( i=0; (size_t)i < len ; i++){
184 if( isIDSeparator(*(str+i)) ){
185 if( i==1){
186 /* string is of the form x-avy or a-prv1 */
187 result =0;
188 break;
189 } else {
190 /* delimiter found; check for singleton */
191 if( isIDSeparator(*(str+i+2)) ){
192 /* a singleton; so send the position of separator before singleton */
193 result = i+1;
194 break;
195 }
196 }
197 }
198 }/* end of for */
199
200 }
201 return result;
202 }
203 /* }}} */
204
205 /* {{{ proto static string Locale::getDefault( )
206 Get default locale */
207 /* }}} */
208 /* {{{ proto static string locale_get_default( )
209 Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)210 PHP_NAMED_FUNCTION(zif_locale_get_default)
211 {
212 RETURN_STRING( intl_locale_get_default( ) );
213 }
214
215 /* }}} */
216
217 /* {{{ proto static string Locale::setDefault( string $locale )
218 Set default locale */
219 /* }}} */
220 /* {{{ proto static string locale_set_default( string $locale )
221 Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)222 PHP_NAMED_FUNCTION(zif_locale_set_default)
223 {
224 zend_string* locale_name;
225 zend_string *ini_name;
226 char *default_locale = NULL;
227
228 if(zend_parse_parameters( ZEND_NUM_ARGS(), "S", &locale_name) == FAILURE)
229 {
230 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
231 "locale_set_default: unable to parse input params", 0 );
232
233 RETURN_FALSE;
234 }
235
236 if (ZSTR_LEN(locale_name) == 0) {
237 default_locale = (char *)uloc_getDefault();
238 locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
239 }
240
241 ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
242 zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
243 zend_string_release_ex(ini_name, 0);
244 if (default_locale != NULL) {
245 zend_string_release_ex(locale_name, 0);
246 }
247
248 RETURN_TRUE;
249 }
250 /* }}} */
251
252 /* {{{
253 * Gets the value from ICU
254 * common code shared by get_primary_language,get_script or get_region or get_variant
255 * result = 0 if error, 1 if successful , -1 if no value
256 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)257 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
258 {
259 zend_string* tag_value = NULL;
260 int32_t tag_value_len = 512;
261
262 char* mod_loc_name = NULL;
263
264 int32_t buflen = 512;
265 UErrorCode status = U_ZERO_ERROR;
266
267 if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
268 return NULL;
269 }
270
271 if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
272 /* Handle grandfathered languages */
273 zend_off_t grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
274 if( grOffset >= 0 ){
275 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
276 return zend_string_init(loc_name, strlen(loc_name), 0);
277 } else {
278 /* Since Grandfathered , no value , do nothing , retutn NULL */
279 return NULL;
280 }
281 }
282
283 if( fromParseLocale==1 ){
284 zend_off_t singletonPos = 0;
285
286 /* Handle singletons */
287 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
288 if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
289 return zend_string_init(loc_name, strlen(loc_name), 0);
290 }
291 }
292
293 singletonPos = getSingletonPos( loc_name );
294 if( singletonPos == 0){
295 /* singleton at start of script, region , variant etc.
296 * or invalid singleton at start of language */
297 return NULL;
298 } else if( singletonPos > 0 ){
299 /* singleton at some position except at start
300 * strip off the singleton and rest of the loc_name */
301 mod_loc_name = estrndup ( loc_name , singletonPos-1);
302 }
303 } /* end of if fromParse */
304
305 } /* end of if != LOC_CANONICAL_TAG */
306
307 if( mod_loc_name == NULL){
308 mod_loc_name = estrdup(loc_name );
309 }
310
311 /* Proceed to ICU */
312 do{
313 if (tag_value) {
314 tag_value = zend_string_realloc( tag_value , buflen, 0);
315 } else {
316 tag_value = zend_string_alloc( buflen, 0);
317 }
318 tag_value_len = buflen;
319
320 if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
321 buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
322 }
323 if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
324 buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
325 }
326 if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
327 buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
328 }
329 if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
330 buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
331 }
332 if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
333 buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
334 }
335
336 if( U_FAILURE( status ) ) {
337 if( status == U_BUFFER_OVERFLOW_ERROR ) {
338 status = U_ZERO_ERROR;
339 buflen++; /* add space for \0 */
340 continue;
341 }
342
343 /* Error in retrieving data */
344 *result = 0;
345 if( tag_value ){
346 zend_string_release_ex( tag_value, 0 );
347 }
348 if( mod_loc_name ){
349 efree( mod_loc_name);
350 }
351 return NULL;
352 }
353 } while( buflen > tag_value_len );
354
355 if( buflen ==0 ){
356 /* No value found */
357 *result = -1;
358 if( tag_value ){
359 zend_string_release_ex( tag_value, 0 );
360 }
361 if( mod_loc_name ){
362 efree( mod_loc_name);
363 }
364 return NULL;
365 } else {
366 *result = 1;
367 }
368
369 if( mod_loc_name ){
370 efree( mod_loc_name);
371 }
372
373 tag_value->len = strlen(tag_value->val);
374 return tag_value;
375 }
376 /* }}} */
377
378 /* {{{
379 * Gets the value from ICU , called when PHP userspace function is called
380 * common code shared by get_primary_language,get_script or get_region or get_variant
381 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)382 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
383 {
384
385 const char* loc_name = NULL;
386 size_t loc_name_len = 0;
387
388 zend_string* tag_value = NULL;
389 char* empty_result = "";
390
391 int result = 0;
392 char* msg = NULL;
393
394 UErrorCode status = U_ZERO_ERROR;
395
396 intl_error_reset( NULL );
397
398 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
399 &loc_name ,&loc_name_len ) == FAILURE) {
400 spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
401 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 );
402 efree(msg);
403
404 RETURN_FALSE;
405 }
406
407 if(loc_name_len == 0) {
408 loc_name = intl_locale_get_default();
409 loc_name_len = strlen(loc_name);
410 }
411
412 INTL_CHECK_LOCALE_LEN(loc_name_len);
413
414 /* Call ICU get */
415 tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
416
417 /* No value found */
418 if( result == -1 ) {
419 if( tag_value){
420 zend_string_release_ex( tag_value, 0 );
421 }
422 RETURN_STRING( empty_result);
423 }
424
425 /* value found */
426 if( tag_value){
427 RETVAL_STR( tag_value );
428 return;
429 }
430
431 /* Error encountered while fetching the value */
432 if( result ==0) {
433 spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
434 intl_error_set( NULL, status, msg , 1 );
435 efree(msg);
436 RETURN_NULL();
437 }
438
439 }
440 /* }}} */
441
442 /* {{{ proto static string Locale::getScript($locale)
443 * gets the script for the $locale
444 }}} */
445 /* {{{ proto static string locale_get_script($locale)
446 * gets the script for the $locale
447 */
PHP_FUNCTION(locale_get_script)448 PHP_FUNCTION( locale_get_script )
449 {
450 get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
451 }
452 /* }}} */
453
454 /* {{{ proto static string Locale::getRegion($locale)
455 * gets the region for the $locale
456 }}} */
457 /* {{{ proto static string locale_get_region($locale)
458 * gets the region for the $locale
459 */
PHP_FUNCTION(locale_get_region)460 PHP_FUNCTION( locale_get_region )
461 {
462 get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
463 }
464 /* }}} */
465
466 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
467 * gets the primary language for the $locale
468 }}} */
469 /* {{{ proto static string locale_get_primary_language($locale)
470 * gets the primary language for the $locale
471 */
PHP_FUNCTION(locale_get_primary_language)472 PHP_FUNCTION(locale_get_primary_language )
473 {
474 get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
475 }
476 /* }}} */
477
478
479 /* {{{
480 * common code shared by display_xyz functions to get the value from ICU
481 }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)482 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
483 {
484 const char* loc_name = NULL;
485 size_t loc_name_len = 0;
486
487 const char* disp_loc_name = NULL;
488 size_t disp_loc_name_len = 0;
489 int free_loc_name = 0;
490
491 UChar* disp_name = NULL;
492 int32_t disp_name_len = 0;
493
494 char* mod_loc_name = NULL;
495
496 int32_t buflen = 512;
497 UErrorCode status = U_ZERO_ERROR;
498
499 zend_string* u8str;
500
501 char* msg = NULL;
502
503 intl_error_reset( NULL );
504
505 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s",
506 &loc_name, &loc_name_len ,
507 &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
508 {
509 spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
510 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 );
511 efree(msg);
512 RETURN_FALSE;
513 }
514
515 if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
516 /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
517 spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
518 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 );
519 efree(msg);
520 RETURN_FALSE;
521 }
522
523 if(loc_name_len == 0) {
524 loc_name = intl_locale_get_default();
525 }
526
527 if( strcmp(tag_name, DISP_NAME) != 0 ){
528 /* Handle grandfathered languages */
529 int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
530 if( grOffset >= 0 ){
531 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
532 mod_loc_name = getPreferredTag( loc_name );
533 } else {
534 /* Since Grandfathered, no value, do nothing, retutn NULL */
535 RETURN_FALSE;
536 }
537 }
538 } /* end of if != LOC_CANONICAL_TAG */
539
540 if( mod_loc_name==NULL ){
541 mod_loc_name = estrdup( loc_name );
542 }
543
544 /* Check if disp_loc_name passed , if not use default locale */
545 if( !disp_loc_name){
546 disp_loc_name = estrdup(intl_locale_get_default());
547 free_loc_name = 1;
548 }
549
550 /* Get the disp_value for the given locale */
551 do{
552 disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
553 disp_name_len = buflen;
554
555 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
556 buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
557 } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
558 buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
559 } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
560 buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
561 } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
562 buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
563 } else if( strcmp(tag_name , DISP_NAME)==0 ){
564 buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
565 }
566
567 /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
568 if( U_FAILURE( status ) )
569 {
570 if( status == U_BUFFER_OVERFLOW_ERROR )
571 {
572 status = U_ZERO_ERROR;
573 continue;
574 }
575
576 spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
577 intl_error_set( NULL, status, msg , 1 );
578 efree(msg);
579 if( disp_name){
580 efree( disp_name );
581 }
582 if( mod_loc_name){
583 efree( mod_loc_name );
584 }
585 if (free_loc_name) {
586 efree((void *)disp_loc_name);
587 disp_loc_name = NULL;
588 }
589 RETURN_FALSE;
590 }
591 } while( buflen > disp_name_len );
592
593 if( mod_loc_name){
594 efree( mod_loc_name );
595 }
596 if (free_loc_name) {
597 efree((void *)disp_loc_name);
598 disp_loc_name = NULL;
599 }
600 /* Convert display locale name from UTF-16 to UTF-8. */
601 u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
602 efree( disp_name );
603 if( !u8str )
604 {
605 spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
606 intl_error_set( NULL, status, msg , 1 );
607 efree(msg);
608 RETURN_FALSE;
609 }
610
611 RETVAL_NEW_STR( u8str );
612 }
613 /* }}} */
614
615 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
616 * gets the name for the $locale in $in_locale or default_locale
617 }}} */
618 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
619 * gets the name for the $locale in $in_locale or default_locale
620 */
PHP_FUNCTION(locale_get_display_name)621 PHP_FUNCTION(locale_get_display_name)
622 {
623 get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
624 }
625 /* }}} */
626
627 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
628 * gets the language for the $locale in $in_locale or default_locale
629 }}} */
630 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
631 * gets the language for the $locale in $in_locale or default_locale
632 */
PHP_FUNCTION(locale_get_display_language)633 PHP_FUNCTION(locale_get_display_language)
634 {
635 get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
636 }
637 /* }}} */
638
639 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
640 * gets the script for the $locale in $in_locale or default_locale
641 }}} */
642 /* {{{ proto static string get_display_script($locale, $in_locale = null)
643 * gets the script for the $locale in $in_locale or default_locale
644 */
PHP_FUNCTION(locale_get_display_script)645 PHP_FUNCTION(locale_get_display_script)
646 {
647 get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
648 }
649 /* }}} */
650
651 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
652 * gets the region for the $locale in $in_locale or default_locale
653 }}} */
654 /* {{{ proto static string get_display_region($locale, $in_locale = null)
655 * gets the region for the $locale in $in_locale or default_locale
656 */
PHP_FUNCTION(locale_get_display_region)657 PHP_FUNCTION(locale_get_display_region)
658 {
659 get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
660 }
661 /* }}} */
662
663 /* {{{
664 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
665 * gets the variant for the $locale in $in_locale or default_locale
666 }}} */
667 /* {{{
668 * proto static string get_display_variant($locale, $in_locale = null)
669 * gets the variant for the $locale in $in_locale or default_locale
670 */
PHP_FUNCTION(locale_get_display_variant)671 PHP_FUNCTION(locale_get_display_variant)
672 {
673 get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
674 }
675 /* }}} */
676
677 /* {{{ proto static array getKeywords(string $locale) {
678 * return an associative array containing keyword-value
679 * pairs for this locale. The keys are keys to the array (doh!)
680 * }}}*/
681 /* {{{ proto static array locale_get_keywords(string $locale) {
682 * return an associative array containing keyword-value
683 * pairs for this locale. The keys are keys to the array (doh!)
684 */
PHP_FUNCTION(locale_get_keywords)685 PHP_FUNCTION( locale_get_keywords )
686 {
687 UEnumeration* e = NULL;
688 UErrorCode status = U_ZERO_ERROR;
689
690 const char* kw_key = NULL;
691 int32_t kw_key_len = 0;
692
693 const char* loc_name = NULL;
694 size_t loc_name_len = 0;
695
696 intl_error_reset( NULL );
697
698 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
699 &loc_name, &loc_name_len ) == FAILURE)
700 {
701 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
702 "locale_get_keywords: unable to parse input params", 0 );
703
704 RETURN_FALSE;
705 }
706
707 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
708
709 if(loc_name_len == 0) {
710 loc_name = intl_locale_get_default();
711 }
712
713 /* Get the keywords */
714 e = uloc_openKeywords( loc_name, &status );
715 if( e != NULL )
716 {
717 /*
718 ICU expects the buffer to be allocated before calling the function
719 and so the buffer size has been explicitly specified
720 ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
721 hence the kw_value buffer size is 100
722 */
723
724 /* Traverse it, filling the return array. */
725 array_init( return_value );
726
727 while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
728 int32_t kw_value_len = 100;
729 zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
730
731 /* Get the keyword value for each keyword */
732 kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
733 if (status == U_BUFFER_OVERFLOW_ERROR) {
734 status = U_ZERO_ERROR;
735 kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
736 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
737 } else if(!U_FAILURE(status)) {
738 kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
739 }
740 if (U_FAILURE(status)) {
741 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 );
742 if( kw_value_str){
743 zend_string_efree( kw_value_str );
744 }
745 zend_array_destroy(Z_ARR_P(return_value));
746 RETURN_FALSE;
747 }
748
749 add_assoc_str( return_value, (char *)kw_key, kw_value_str);
750 } /* end of while */
751
752 } /* end of if e!=NULL */
753
754 uenum_close( e );
755 }
756 /* }}} */
757
758 /* {{{ proto static string Locale::canonicalize($locale)
759 * @return string the canonicalized locale
760 * }}} */
761 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
762 * @param string $locale The locale string to canonicalize
763 */
PHP_FUNCTION(locale_canonicalize)764 PHP_FUNCTION(locale_canonicalize)
765 {
766 get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
767 }
768 /* }}} */
769
770 /* {{{ append_key_value
771 * Internal function which is called from locale_compose
772 * gets the value for the key_name and appends to the loc_name
773 * returns 1 if successful , -1 if not found ,
774 * 0 if array element is not a string , -2 if buffer-overflow
775 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)776 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
777 {
778 zval *ele_value;
779
780 if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
781 if(Z_TYPE_P(ele_value)!= IS_STRING ){
782 /* element value is not a string */
783 return FAILURE;
784 }
785 if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
786 strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
787 /* not lang or grandfathered tag */
788 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
789 }
790 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
791 return SUCCESS;
792 }
793
794 return LOC_NOT_FOUND;
795 }
796 /* }}} */
797
798 /* {{{ append_prefix , appends the prefix needed
799 * e.g. private adds 'x'
800 */
add_prefix(smart_str * loc_name,char * key_name)801 static void add_prefix(smart_str* loc_name, char* key_name)
802 {
803 if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
804 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
805 smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
806 }
807 }
808 /* }}} */
809
810 /* {{{ append_multiple_key_values
811 * Internal function which is called from locale_compose
812 * gets the multiple values for the key_name and appends to the loc_name
813 * used for 'variant','extlang','private'
814 * returns 1 if successful , -1 if not found ,
815 * 0 if array element is not a string , -2 if buffer-overflow
816 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)817 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
818 {
819 zval *ele_value;
820 int isFirstSubtag = 0;
821
822 /* Variant/ Extlang/Private etc. */
823 if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
824 if( Z_TYPE_P(ele_value) == IS_STRING ){
825 add_prefix( loc_name , key_name);
826
827 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
828 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
829 return SUCCESS;
830 } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
831 HashTable *arr = Z_ARRVAL_P(ele_value);
832 zval *data;
833
834 ZEND_HASH_FOREACH_VAL(arr, data) {
835 if(Z_TYPE_P(data) != IS_STRING) {
836 return FAILURE;
837 }
838 if (isFirstSubtag++ == 0){
839 add_prefix(loc_name , key_name);
840 }
841 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
842 smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
843 } ZEND_HASH_FOREACH_END();
844 return SUCCESS;
845 } else {
846 return FAILURE;
847 }
848 } else {
849 char cur_key_name[31];
850 int max_value = 0, i;
851 /* Decide the max_value: the max. no. of elements allowed */
852 if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
853 max_value = MAX_NO_VARIANT;
854 }
855 if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
856 max_value = MAX_NO_EXTLANG;
857 }
858 if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
859 max_value = MAX_NO_PRIVATE;
860 }
861
862 /* Multiple variant values as variant0, variant1 ,variant2 */
863 isFirstSubtag = 0;
864 for( i=0 ; i< max_value; i++ ){
865 snprintf( cur_key_name , 30, "%s%d", key_name , i);
866 if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
867 if( Z_TYPE_P(ele_value)!= IS_STRING ){
868 /* variant is not a string */
869 return FAILURE;
870 }
871 /* Add the contents */
872 if (isFirstSubtag++ == 0){
873 add_prefix(loc_name , cur_key_name);
874 }
875 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
876 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
877 }
878 } /* end of for */
879 } /* end of else */
880
881 return SUCCESS;
882 }
883 /* }}} */
884
885 /*{{{
886 * If applicable sets error message and aborts locale_compose gracefully
887 * returns 0 if locale_compose needs to be aborted
888 * otherwise returns 1
889 */
handleAppendResult(int result,smart_str * loc_name)890 static int handleAppendResult( int result, smart_str* loc_name)
891 {
892 intl_error_reset( NULL );
893 if( result == FAILURE) {
894 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
895 "locale_compose: parameter array element is not a string", 0 );
896 smart_str_free(loc_name);
897 return 0;
898 }
899 return 1;
900 }
901 /* }}} */
902
903 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
904 /* {{{ proto static string Locale::composeLocale($array)
905 * Creates a locale by combining the parts of locale-ID passed
906 * }}} */
907 /* {{{ proto static string compose_locale($array)
908 * Creates a locale by combining the parts of locale-ID passed
909 * }}} */
PHP_FUNCTION(locale_compose)910 PHP_FUNCTION(locale_compose)
911 {
912 smart_str loc_name_s = {0};
913 smart_str *loc_name = &loc_name_s;
914 zval* arr = NULL;
915 HashTable* hash_arr = NULL;
916 int result = 0;
917
918 intl_error_reset( NULL );
919
920 if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
921 &arr) == FAILURE)
922 {
923 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
924 "locale_compose: unable to parse input params", 0 );
925 RETURN_FALSE;
926 }
927
928 hash_arr = Z_ARRVAL_P( arr );
929
930 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
931 RETURN_FALSE;
932
933 /* Check for grandfathered first */
934 result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
935 if( result == SUCCESS){
936 RETURN_SMART_STR(loc_name);
937 }
938 if( !handleAppendResult( result, loc_name)){
939 RETURN_FALSE;
940 }
941
942 /* Not grandfathered */
943 result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
944 if( result == LOC_NOT_FOUND ){
945 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
946 "locale_compose: parameter array does not contain 'language' tag.", 0 );
947 smart_str_free(loc_name);
948 RETURN_FALSE;
949 }
950 if( !handleAppendResult( result, loc_name)){
951 RETURN_FALSE;
952 }
953
954 /* Extlang */
955 result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
956 if( !handleAppendResult( result, loc_name)){
957 RETURN_FALSE;
958 }
959
960 /* Script */
961 result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
962 if( !handleAppendResult( result, loc_name)){
963 RETURN_FALSE;
964 }
965
966 /* Region */
967 result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
968 if( !handleAppendResult( result, loc_name)){
969 RETURN_FALSE;
970 }
971
972 /* Variant */
973 result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
974 if( !handleAppendResult( result, loc_name)){
975 RETURN_FALSE;
976 }
977
978 /* Private */
979 result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
980 if( !handleAppendResult( result, loc_name)){
981 RETURN_FALSE;
982 }
983
984 RETURN_SMART_STR(loc_name);
985 }
986 /* }}} */
987
988
989 /*{{{
990 * Parses the locale and returns private subtags if existing
991 * else returns NULL
992 * e.g. for locale='en_US-x-prv1-prv2-prv3'
993 * returns a pointer to the string 'prv1-prv2-prv3'
994 */
get_private_subtags(const char * loc_name)995 static zend_string* get_private_subtags(const char* loc_name)
996 {
997 zend_string* result = NULL;
998 size_t len = 0;
999 const char* mod_loc_name =NULL;
1000
1001 if( loc_name && (len = strlen(loc_name)) > 0 ){
1002 zend_off_t singletonPos = 0;
1003 mod_loc_name = loc_name ;
1004 while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
1005 if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1006 /* private subtag start found */
1007 if( singletonPos + 2 == len){
1008 /* loc_name ends with '-x-' ; return NULL */
1009 }
1010 else{
1011 /* result = mod_loc_name + singletonPos +2; */
1012 result = zend_string_init(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ), 0);
1013 }
1014 break;
1015 }
1016 else{
1017 if((size_t)(singletonPos + 1) >= len){
1018 /* String end */
1019 break;
1020 } else {
1021 /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1022 mod_loc_name = mod_loc_name + singletonPos +1;
1023 len = strlen(mod_loc_name);
1024 }
1025 }
1026 } /* end of while */
1027 }
1028
1029 return result;
1030 }
1031 /* }}} */
1032
1033 /* {{{ code used by locale_parse
1034 */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)1035 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1036 {
1037 zend_string* key_value = NULL;
1038 char* cur_key_name = NULL;
1039 char* token = NULL;
1040 char* last_ptr = NULL;
1041
1042 int result = 0;
1043 int cur_result = 0;
1044
1045
1046 if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1047 key_value = get_private_subtags( loc_name );
1048 result = 1;
1049 } else {
1050 key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1051 }
1052 if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1053 ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1054 if( result > 0 && key_value){
1055 int cnt = 0;
1056 /* Tokenize on the "_" or "-" */
1057 token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
1058 if( cur_key_name ){
1059 efree( cur_key_name);
1060 }
1061 cur_key_name = (char*)ecalloc( 25, 25);
1062 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1063 add_assoc_string( hash_arr, cur_key_name , token);
1064 /* tokenize on the "_" or "-" and stop at singleton if any */
1065 while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1066 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1067 add_assoc_string( hash_arr, cur_key_name , token);
1068 }
1069 /*
1070 if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1071 }
1072 */
1073 }
1074 if (key_value) {
1075 zend_string_release_ex(key_value, 0);
1076 }
1077 } else {
1078 if( result == 1 ){
1079 add_assoc_str( hash_arr, key_name , key_value);
1080 cur_result = 1;
1081 } else if (key_value) {
1082 zend_string_release_ex(key_value, 0);
1083 }
1084 }
1085
1086 if( cur_key_name ){
1087 efree( cur_key_name);
1088 }
1089 /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1090 return cur_result;
1091 }
1092 /* }}} */
1093
1094 /* {{{ proto static array Locale::parseLocale($locale)
1095 * parses a locale-id into an array the different parts of it
1096 }}} */
1097 /* {{{ proto static array parse_locale($locale)
1098 * parses a locale-id into an array the different parts of it
1099 */
PHP_FUNCTION(locale_parse)1100 PHP_FUNCTION(locale_parse)
1101 {
1102 const char* loc_name = NULL;
1103 size_t loc_name_len = 0;
1104 int grOffset = 0;
1105
1106 intl_error_reset( NULL );
1107
1108 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1109 &loc_name, &loc_name_len ) == FAILURE)
1110 {
1111 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1112 "locale_parse: unable to parse input params", 0 );
1113
1114 RETURN_FALSE;
1115 }
1116
1117 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1118
1119 if(loc_name_len == 0) {
1120 loc_name = intl_locale_get_default();
1121 }
1122
1123 array_init( return_value );
1124
1125 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
1126 if( grOffset >= 0 ){
1127 add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1128 }
1129 else{
1130 /* Not grandfathered */
1131 add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1132 add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1133 add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1134 add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1135 add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1136 }
1137 }
1138 /* }}} */
1139
1140 /* {{{ proto static array Locale::getAllVariants($locale)
1141 * gets an array containing the list of variants, or null
1142 }}} */
1143 /* {{{ proto static array locale_get_all_variants($locale)
1144 * gets an array containing the list of variants, or null
1145 */
PHP_FUNCTION(locale_get_all_variants)1146 PHP_FUNCTION(locale_get_all_variants)
1147 {
1148 const char* loc_name = NULL;
1149 size_t loc_name_len = 0;
1150
1151 int result = 0;
1152 char* token = NULL;
1153 zend_string* variant = NULL;
1154 char* saved_ptr = NULL;
1155
1156 intl_error_reset( NULL );
1157
1158 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1159 &loc_name, &loc_name_len ) == FAILURE)
1160 {
1161 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1162 "locale_parse: unable to parse input params", 0 );
1163
1164 RETURN_FALSE;
1165 }
1166
1167 if(loc_name_len == 0) {
1168 loc_name = intl_locale_get_default();
1169 loc_name_len = strlen(loc_name);
1170 }
1171
1172 INTL_CHECK_LOCALE_LEN(loc_name_len);
1173
1174 array_init( return_value );
1175
1176 /* If the locale is grandfathered, stop, no variants */
1177 if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
1178 /* ("Grandfathered Tag. No variants."); */
1179 }
1180 else {
1181 /* Call ICU variant */
1182 variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1183 if( result > 0 && variant){
1184 /* Tokenize on the "_" or "-" */
1185 token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1186 add_next_index_stringl( return_value, token , strlen(token));
1187 /* tokenize on the "_" or "-" and stop at singleton if any */
1188 while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1189 add_next_index_stringl( return_value, token , strlen(token));
1190 }
1191 }
1192 if( variant ){
1193 zend_string_release_ex( variant, 0 );
1194 }
1195 }
1196
1197
1198 }
1199 /* }}} */
1200
1201 /*{{{
1202 * Converts to lower case and also replaces all hyphens with the underscore
1203 */
strToMatch(const char * str,char * retstr)1204 static int strToMatch(const char* str ,char *retstr)
1205 {
1206 char* anchor = NULL;
1207 const char* anchor1 = NULL;
1208 int result = 0;
1209
1210 if( (!str) || str[0] == '\0'){
1211 return result;
1212 } else {
1213 anchor = retstr;
1214 anchor1 = str;
1215 while( (*str)!='\0' ){
1216 if( *str == '-' ){
1217 *retstr = '_';
1218 } else {
1219 *retstr = tolower(*str);
1220 }
1221 str++;
1222 retstr++;
1223 }
1224 *retstr = '\0';
1225 retstr= anchor;
1226 str= anchor1;
1227 result = 1;
1228 }
1229
1230 return(result);
1231 }
1232 /* }}} */
1233
1234 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1235 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1236 */
1237 /* }}} */
1238 /* {{{ proto bool locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1239 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1240 */
PHP_FUNCTION(locale_filter_matches)1241 PHP_FUNCTION(locale_filter_matches)
1242 {
1243 char* lang_tag = NULL;
1244 size_t lang_tag_len = 0;
1245 const char* loc_range = NULL;
1246 size_t loc_range_len = 0;
1247
1248 int result = 0;
1249 char* token = 0;
1250 char* chrcheck = NULL;
1251
1252 zend_string* can_lang_tag = NULL;
1253 zend_string* can_loc_range = NULL;
1254
1255 char* cur_lang_tag = NULL;
1256 char* cur_loc_range = NULL;
1257
1258 zend_bool boolCanonical = 0;
1259 UErrorCode status = U_ZERO_ERROR;
1260
1261 intl_error_reset( NULL );
1262
1263 if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1264 &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1265 &boolCanonical) == FAILURE)
1266 {
1267 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1268 "locale_filter_matches: unable to parse input params", 0 );
1269
1270 RETURN_FALSE;
1271 }
1272
1273 if(loc_range_len == 0) {
1274 loc_range = intl_locale_get_default();
1275 loc_range_len = strlen(loc_range);
1276 }
1277
1278 if( strcmp(loc_range,"*")==0){
1279 RETURN_TRUE;
1280 }
1281
1282 INTL_CHECK_LOCALE_LEN(loc_range_len);
1283 INTL_CHECK_LOCALE_LEN(lang_tag_len);
1284
1285 if( boolCanonical ){
1286 /* canonicalize loc_range */
1287 can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1288 if( result <=0) {
1289 intl_error_set( NULL, status,
1290 "locale_filter_matches : unable to canonicalize loc_range" , 0 );
1291 RETURN_FALSE;
1292 }
1293
1294 /* canonicalize lang_tag */
1295 can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
1296 if( result <=0) {
1297 intl_error_set( NULL, status,
1298 "locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1299 RETURN_FALSE;
1300 }
1301
1302 /* Convert to lower case for case-insensitive comparison */
1303 cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1304
1305 /* Convert to lower case for case-insensitive comparison */
1306 result = strToMatch( can_lang_tag->val , cur_lang_tag);
1307 if( result == 0) {
1308 efree( cur_lang_tag );
1309 zend_string_release_ex( can_lang_tag, 0 );
1310 RETURN_FALSE;
1311 }
1312
1313 cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1314 result = strToMatch( can_loc_range->val , cur_loc_range );
1315 if( result == 0) {
1316 efree( cur_lang_tag );
1317 zend_string_release_ex( can_lang_tag, 0 );
1318 efree( cur_loc_range );
1319 zend_string_release_ex( can_loc_range, 0 );
1320 RETURN_FALSE;
1321 }
1322
1323 /* check if prefix */
1324 token = strstr( cur_lang_tag , cur_loc_range );
1325
1326 if( token && (token==cur_lang_tag) ){
1327 /* check if the char. after match is SEPARATOR */
1328 chrcheck = token + (strlen(cur_loc_range));
1329 if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1330 efree( cur_lang_tag );
1331 efree( cur_loc_range );
1332 if( can_lang_tag){
1333 zend_string_release_ex( can_lang_tag, 0 );
1334 }
1335 if( can_loc_range){
1336 zend_string_release_ex( can_loc_range, 0 );
1337 }
1338 RETURN_TRUE;
1339 }
1340 }
1341
1342 /* No prefix as loc_range */
1343 if( cur_lang_tag){
1344 efree( cur_lang_tag );
1345 }
1346 if( cur_loc_range){
1347 efree( cur_loc_range );
1348 }
1349 if( can_lang_tag){
1350 zend_string_release_ex( can_lang_tag, 0 );
1351 }
1352 if( can_loc_range){
1353 zend_string_release_ex( can_loc_range, 0 );
1354 }
1355 RETURN_FALSE;
1356
1357 } /* end of if isCanonical */
1358 else{
1359 /* Convert to lower case for case-insensitive comparison */
1360 cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1361
1362 result = strToMatch( lang_tag , cur_lang_tag);
1363 if( result == 0) {
1364 efree( cur_lang_tag );
1365 RETURN_FALSE;
1366 }
1367 cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1368 result = strToMatch( loc_range , cur_loc_range );
1369 if( result == 0) {
1370 efree( cur_lang_tag );
1371 efree( cur_loc_range );
1372 RETURN_FALSE;
1373 }
1374
1375 /* check if prefix */
1376 token = strstr( cur_lang_tag , cur_loc_range );
1377
1378 if( token && (token==cur_lang_tag) ){
1379 /* check if the char. after match is SEPARATOR */
1380 chrcheck = token + (strlen(cur_loc_range));
1381 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1382 efree( cur_lang_tag );
1383 efree( cur_loc_range );
1384 RETURN_TRUE;
1385 }
1386 }
1387
1388 /* No prefix as loc_range */
1389 if( cur_lang_tag){
1390 efree( cur_lang_tag );
1391 }
1392 if( cur_loc_range){
1393 efree( cur_loc_range );
1394 }
1395 RETURN_FALSE;
1396
1397 }
1398 }
1399 /* }}} */
1400
array_cleanup(char * arr[],int arr_size)1401 static void array_cleanup( char* arr[] , int arr_size)
1402 {
1403 int i=0;
1404 for( i=0; i< arr_size; i++ ){
1405 if( arr[i*2] ){
1406 efree( arr[i*2]);
1407 }
1408 }
1409 efree(arr);
1410 }
1411
1412 #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
1413 /* {{{
1414 * returns the lookup result to lookup_loc_range_src_php
1415 * internal function
1416 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1417 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1418 {
1419 int i = 0;
1420 int cur_arr_len = 0;
1421 int result = 0;
1422
1423 zend_string* lang_tag = NULL;
1424 zval* ele_value = NULL;
1425
1426 char* cur_loc_range = NULL;
1427 zend_string* can_loc_range = NULL;
1428 zend_off_t saved_pos = 0;
1429
1430 zend_string* return_value = NULL;
1431
1432 char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1433 ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1434 /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1435 if(Z_TYPE_P(ele_value)!= IS_STRING) {
1436 /* element value is not a string */
1437 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0);
1438 LOOKUP_CLEAN_RETURN(NULL);
1439 }
1440 cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1441 result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1442 if(result == 0) {
1443 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1444 LOOKUP_CLEAN_RETURN(NULL);
1445 }
1446 cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1447 cur_arr_len++ ;
1448 } ZEND_HASH_FOREACH_END(); /* end of for */
1449
1450 /* Canonicalize array elements */
1451 if(canonicalize) {
1452 for(i=0; i<cur_arr_len; i++) {
1453 lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1454 if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1455 if(lang_tag) {
1456 zend_string_release_ex(lang_tag, 0);
1457 }
1458 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1459 LOOKUP_CLEAN_RETURN(NULL);
1460 }
1461 cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1462 result = strToMatch(lang_tag->val, cur_arr[i*2]);
1463 zend_string_release_ex(lang_tag, 0);
1464 if(result == 0) {
1465 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1466 LOOKUP_CLEAN_RETURN(NULL);
1467 }
1468 }
1469
1470 }
1471
1472 if(canonicalize) {
1473 /* Canonicalize the loc_range */
1474 can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1475 if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1476 /* Error */
1477 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1478 if(can_loc_range) {
1479 zend_string_release_ex(can_loc_range, 0);
1480 }
1481 LOOKUP_CLEAN_RETURN(NULL);
1482 } else {
1483 loc_range = can_loc_range->val;
1484 }
1485 }
1486
1487 cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1488 /* convert to lower and replace hyphens */
1489 result = strToMatch(loc_range, cur_loc_range);
1490 if(can_loc_range) {
1491 zend_string_release_ex(can_loc_range, 0);
1492 }
1493 if(result == 0) {
1494 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1495 LOOKUP_CLEAN_RETURN(NULL);
1496 }
1497
1498 /* Lookup for the lang_tag match */
1499 saved_pos = strlen(cur_loc_range);
1500 while(saved_pos > 0) {
1501 for(i=0; i< cur_arr_len; i++){
1502 if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1503 /* Match found */
1504 char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1505 return_value = zend_string_init(str, strlen(str), 0);
1506 efree(cur_loc_range);
1507 LOOKUP_CLEAN_RETURN(return_value);
1508 }
1509 }
1510 saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1511 }
1512
1513 /* Match not found */
1514 efree(cur_loc_range);
1515 LOOKUP_CLEAN_RETURN(NULL);
1516 }
1517 /* }}} */
1518
1519 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1520 * Searches the items in $langtag for the best match to the language
1521 * range
1522 */
1523 /* }}} */
1524 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1525 * Searches the items in $langtag for the best match to the language
1526 * range
1527 */
PHP_FUNCTION(locale_lookup)1528 PHP_FUNCTION(locale_lookup)
1529 {
1530 zend_string* fallback_loc_str = NULL;
1531 const char* loc_range = NULL;
1532 size_t loc_range_len = 0;
1533
1534 zval* arr = NULL;
1535 HashTable* hash_arr = NULL;
1536 zend_bool boolCanonical = 0;
1537 zend_string* result_str = NULL;
1538
1539 intl_error_reset( NULL );
1540
1541 if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS!", &arr, &loc_range, &loc_range_len,
1542 &boolCanonical, &fallback_loc_str) == FAILURE) {
1543 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 );
1544 RETURN_FALSE;
1545 }
1546
1547 if(loc_range_len == 0) {
1548 if(fallback_loc_str) {
1549 loc_range = ZSTR_VAL(fallback_loc_str);
1550 loc_range_len = ZSTR_LEN(fallback_loc_str);
1551 } else {
1552 loc_range = intl_locale_get_default();
1553 loc_range_len = strlen(loc_range);
1554 }
1555 }
1556
1557 hash_arr = Z_ARRVAL_P(arr);
1558
1559 INTL_CHECK_LOCALE_LEN(loc_range_len);
1560
1561 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1562 RETURN_EMPTY_STRING();
1563 }
1564
1565 result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1566 if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1567 if( fallback_loc_str ) {
1568 result_str = zend_string_copy(fallback_loc_str);
1569 } else {
1570 RETURN_EMPTY_STRING();
1571 }
1572 }
1573
1574 RETURN_STR(result_str);
1575 }
1576 /* }}} */
1577
1578 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1579 * Tries to find out best available locale based on HTTP �Accept-Language� header
1580 */
1581 /* }}} */
1582 /* {{{ proto string locale_accept_from_http(string $http_accept)
1583 * Tries to find out best available locale based on HTTP �Accept-Language� header
1584 */
PHP_FUNCTION(locale_accept_from_http)1585 PHP_FUNCTION(locale_accept_from_http)
1586 {
1587 UEnumeration *available;
1588 char *http_accept = NULL;
1589 size_t http_accept_len;
1590 UErrorCode status = 0;
1591 int len;
1592 char resultLocale[INTL_MAX_LOCALE_LEN+1];
1593 UAcceptResult outResult;
1594
1595 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1596 {
1597 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1598 "locale_accept_from_http: unable to parse input parameters", 0 );
1599 RETURN_FALSE;
1600 }
1601 if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1602 /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1603 char *start = http_accept;
1604 char *end;
1605 size_t len;
1606 do {
1607 end = strchr(start, ',');
1608 len = end ? end-start : http_accept_len-(start-http_accept);
1609 if(len > ULOC_FULLNAME_CAPACITY) {
1610 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1611 "locale_accept_from_http: locale string too long", 0 );
1612 RETURN_FALSE;
1613 }
1614 if(end) {
1615 start = end+1;
1616 }
1617 } while(end != NULL);
1618 }
1619
1620 available = ures_openAvailableLocales(NULL, &status);
1621 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1622 len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1623 &outResult, http_accept, available, &status);
1624 uenum_close(available);
1625 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1626 if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1627 RETURN_FALSE;
1628 }
1629 RETURN_STRINGL(resultLocale, len);
1630 }
1631 /* }}} */
1632