1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
12 +----------------------------------------------------------------------+
13 */
14
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18
19 #include <unicode/ustring.h>
20 #include <unicode/udata.h>
21 #include <unicode/putil.h>
22 #include <unicode/ures.h>
23
24 #include "php_intl.h"
25 #include "locale.h"
26 #include "locale_class.h"
27 #include "intl_convert.h"
28 #include "intl_data.h"
29
30 #include <zend_API.h>
31 #include <zend.h>
32 #include <php.h>
33 #include "main/php_ini.h"
34 #include "zend_smart_str.h"
35
36 ZEND_EXTERN_MODULE_GLOBALS( intl )
37
38 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
39 #define SEPARATOR "_"
40 #define SEPARATOR1 "-"
41 #define DELIMITER "-_"
42 #define EXTLANG_PREFIX "a"
43 #define PRIVATE_PREFIX "x"
44 #define DISP_NAME "name"
45
46 #define MAX_NO_VARIANT 15
47 #define MAX_NO_EXTLANG 3
48 #define MAX_NO_PRIVATE 15
49 #define MAX_NO_LOOKUP_LANG_TAG 100
50
51 #define LOC_NOT_FOUND 1
52
53 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
54 #define VARIANT_KEYNAME_LEN 11
55 #define EXTLANG_KEYNAME_LEN 10
56 #define PRIVATE_KEYNAME_LEN 11
57
58 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
59 * https://www.iana.org/assignments/language-subtag-registry
60 *
61 * This list includes all grandfathered tags, as well as redundant
62 * tags that have a Preferred-Value.
63 */
64 static const char * const LOC_GRANDFATHERED[] = {
65 "art-lojban",
66 "cel-gaulish",
67 "en-GB-oed",
68 "i-ami",
69 "i-bnn",
70 "i-default",
71 "i-enochian",
72 "i-hak",
73 "i-klingon",
74 "i-lux",
75 "i-mingo",
76 "i-navajo",
77 "i-pwn",
78 "i-tao",
79 "i-tay",
80 "i-tsu",
81 "no-bok",
82 "no-nyn",
83 "sgn-BE-FR",
84 "sgn-BE-NL",
85 "sgn-BR",
86 "sgn-CH-DE",
87 "sgn-CO",
88 "sgn-DE",
89 "sgn-DK",
90 "sgn-ES",
91 "sgn-FR",
92 "sgn-GB",
93 "sgn-GR",
94 "sgn-IE",
95 "sgn-IT",
96 "sgn-JP",
97 "sgn-MX",
98 "sgn-NI",
99 "sgn-NL",
100 "sgn-NO",
101 "sgn-PT",
102 "sgn-SE",
103 "sgn-US",
104 "sgn-ZA",
105 "zh-cmn",
106 "zh-cmn-Hans",
107 "zh-cmn-Hant",
108 "zh-gan",
109 "zh-guoyu",
110 "zh-hakka",
111 "zh-min",
112 "zh-min-nan",
113 "zh-wuu",
114 "zh-xiang",
115 NULL
116 };
117
118 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
119 *
120 * This array lists the preferred values for the grandfathered and redundant
121 * tags listed in LOC_GRANDFATHERED. This is in sync with the array
122 * LOC_GRANDFATHERED, i.e., the offsets of the grandfathered tags match the
123 * offsets of the preferred value. If a value in LOC_PREFERRED_GRANDFATHERED is
124 * NULL, then the matching offset in LOC_GRANDFATHERED has no preferred value.
125 */
126 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
127 "jbo",
128 NULL,
129 "en-GB-oxendict",
130 "ami",
131 "bnn",
132 NULL,
133 NULL,
134 "hak",
135 "tlh",
136 "lb",
137 NULL,
138 "nv",
139 "pwn",
140 "tao",
141 "tay",
142 "tsu",
143 "nb",
144 "nn",
145 "sfb",
146 "vgt",
147 "bzs",
148 "sgg",
149 "csn",
150 "gsg",
151 "dsl",
152 "ssp",
153 "fsl",
154 "bfi",
155 "gss",
156 "isg",
157 "ise",
158 "jsl",
159 "mfs",
160 "ncs",
161 "dse",
162 "nsl",
163 "psr",
164 "swl",
165 "ase",
166 "sfs",
167 "cmn",
168 "cmn-Hans",
169 "cmn-Hant",
170 "gan",
171 "cmn",
172 "hak",
173 NULL,
174 "nan",
175 "wuu",
176 "hsn",
177 NULL
178 };
179
180 /* returns true if a is an ID separator, false otherwise */
181 #define isIDSeparator(a) (a == '_' || a == '-')
182 #define isKeywordSeparator(a) (a == '@' )
183 #define isEndOfTag(a) (a == '\0' )
184
185 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
186
187 /*returns true if one of the special prefixes is here (s=string)
188 'x-' or 'i-' */
189 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
190 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
191
192 /* Dot terminates it because of POSIX form where dot precedes the codepage
193 * except for variant */
194 #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
195
196 /* {{{ return the offset of 'key' in the array 'list'.
197 * returns -1 if not present */
findOffset(const char * const * list,const char * key)198 static int16_t findOffset(const char* const* list, const char* key)
199 {
200 const char* const* anchor = list;
201 while (*list != NULL) {
202 if (strcasecmp(key, *list) == 0) {
203 return (int16_t)(list - anchor);
204 }
205 list++;
206 }
207
208 return -1;
209
210 }
211 /*}}}*/
212
getPreferredTag(const char * gf_tag)213 static char* getPreferredTag(const char* gf_tag)
214 {
215 char* result = NULL;
216 zend_off_t grOffset = 0;
217
218 grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
219 if(grOffset < 0) {
220 return NULL;
221 }
222 if( LOC_PREFERRED_GRANDFATHERED[grOffset] != NULL ){
223 /* return preferred tag */
224 result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
225 } else {
226 /* Return correct grandfathered language tag */
227 result = estrdup( LOC_GRANDFATHERED[grOffset] );
228 }
229 return result;
230 }
231
232 /* {{{
233 * returns the position of next token for lookup
234 * or -1 if no token
235 * strtokr equivalent search for token in reverse direction
236 */
getStrrtokenPos(char * str,zend_off_t savedPos)237 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
238 {
239 zend_off_t result =-1;
240 zend_off_t i;
241
242 for(i=savedPos-1; i>=0; i--) {
243 if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
244 /* delimiter found; check for singleton */
245 if(i>=2 && isIDSeparator(*(str+i-2)) ){
246 /* a singleton; so send the position of token before the singleton */
247 result = i-2;
248 } else {
249 result = i;
250 }
251 break;
252 }
253 }
254 if(result < 1){
255 /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
256 result =-1;
257 }
258 return result;
259 }
260 /* }}} */
261
262 /* {{{
263 * returns the position of a singleton if present
264 * returns -1 if no singleton
265 * strtok equivalent search for singleton
266 */
getSingletonPos(const char * str)267 static zend_off_t getSingletonPos(const char* str)
268 {
269 zend_off_t result =-1;
270 size_t len = 0;
271
272 if( str && ((len=strlen(str))>0) ){
273 zend_off_t i = 0;
274 for( i=0; (size_t)i < len ; i++){
275 if( isIDSeparator(*(str+i)) ){
276 if( i==1){
277 /* string is of the form x-avy or a-prv1 */
278 result =0;
279 break;
280 } else {
281 /* delimiter found; check for singleton */
282 if( isIDSeparator(*(str+i+2)) ){
283 /* a singleton; so send the position of separator before singleton */
284 result = i+1;
285 break;
286 }
287 }
288 }
289 }/* end of for */
290
291 }
292 return result;
293 }
294 /* }}} */
295
296 /* {{{ Get default locale */
297 /* }}} */
298 /* {{{ Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)299 PHP_NAMED_FUNCTION(zif_locale_get_default)
300 {
301 if (zend_parse_parameters_none() == FAILURE) {
302 RETURN_THROWS();
303 }
304
305 RETURN_STRING( intl_locale_get_default( ) );
306 }
307
308 /* }}} */
309
310 /* {{{ Set default locale */
311 /* }}} */
312 /* {{{ Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)313 PHP_NAMED_FUNCTION(zif_locale_set_default)
314 {
315 zend_string* locale_name;
316 zend_string *ini_name;
317 char *default_locale = NULL;
318
319 if(zend_parse_parameters( ZEND_NUM_ARGS(), "S", &locale_name) == FAILURE)
320 {
321 RETURN_THROWS();
322 }
323
324 if (ZSTR_LEN(locale_name) == 0) {
325 default_locale = (char *)uloc_getDefault();
326 locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
327 }
328
329 ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
330 zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
331 zend_string_release_ex(ini_name, 0);
332 if (default_locale != NULL) {
333 zend_string_release_ex(locale_name, 0);
334 }
335
336 RETURN_TRUE;
337 }
338 /* }}} */
339
340 /* {{{
341 * Gets the value from ICU
342 * common code shared by get_primary_language,get_script or get_region or get_variant
343 * result = 0 if error, 1 if successful , -1 if no value
344 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)345 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
346 {
347 zend_string* tag_value = NULL;
348 int32_t tag_value_len = 512;
349
350 char* mod_loc_name = NULL;
351
352 int32_t buflen = 512;
353 UErrorCode status = U_ZERO_ERROR;
354
355 if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
356 return NULL;
357 }
358
359 if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
360 /* Handle grandfathered languages */
361 zend_off_t grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
362 if( grOffset >= 0 ){
363 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
364 return zend_string_init(loc_name, strlen(loc_name), 0);
365 } else {
366 /* Since Grandfathered , no value , do nothing , retutn NULL */
367 return NULL;
368 }
369 }
370
371 if( fromParseLocale==1 ){
372 zend_off_t singletonPos = 0;
373
374 /* Handle singletons */
375 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
376 if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
377 return zend_string_init(loc_name, strlen(loc_name), 0);
378 }
379 }
380
381 singletonPos = getSingletonPos( loc_name );
382 if( singletonPos == 0){
383 /* singleton at start of script, region , variant etc.
384 * or invalid singleton at start of language */
385 return NULL;
386 } else if( singletonPos > 0 ){
387 /* singleton at some position except at start
388 * strip off the singleton and rest of the loc_name */
389 mod_loc_name = estrndup ( loc_name , singletonPos-1);
390 }
391 } /* end of if fromParse */
392
393 } /* end of if != LOC_CANONICAL_TAG */
394
395 if( mod_loc_name == NULL){
396 mod_loc_name = estrdup(loc_name );
397 }
398
399 /* Proceed to ICU */
400 do{
401 if (tag_value) {
402 tag_value = zend_string_realloc( tag_value , buflen, 0);
403 } else {
404 tag_value = zend_string_alloc( buflen, 0);
405 }
406 tag_value_len = buflen;
407
408 if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
409 buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
410 }
411 if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
412 buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
413 }
414 if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
415 buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
416 }
417 if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
418 buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
419 }
420 if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
421 buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
422 }
423
424 if( U_FAILURE( status ) ) {
425 if( status == U_BUFFER_OVERFLOW_ERROR ) {
426 status = U_ZERO_ERROR;
427 buflen++; /* add space for \0 */
428 continue;
429 }
430
431 /* Error in retrieving data */
432 *result = 0;
433 if( tag_value ){
434 zend_string_release_ex( tag_value, 0 );
435 }
436 if( mod_loc_name ){
437 efree( mod_loc_name);
438 }
439 return NULL;
440 }
441 } while( buflen > tag_value_len );
442
443 if( buflen ==0 ){
444 /* No value found */
445 *result = -1;
446 if( tag_value ){
447 zend_string_release_ex( tag_value, 0 );
448 }
449 if( mod_loc_name ){
450 efree( mod_loc_name);
451 }
452 return NULL;
453 } else {
454 *result = 1;
455 }
456
457 if( mod_loc_name ){
458 efree( mod_loc_name);
459 }
460
461 tag_value->len = strlen(tag_value->val);
462 return tag_value;
463 }
464 /* }}} */
465
466 /* {{{
467 * Gets the value from ICU , called when PHP userspace function is called
468 * common code shared by get_primary_language,get_script or get_region or get_variant
469 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)470 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
471 {
472
473 const char* loc_name = NULL;
474 size_t loc_name_len = 0;
475
476 zend_string* tag_value = NULL;
477 char* empty_result = "";
478
479 int result = 0;
480 char* msg = NULL;
481
482 UErrorCode status = U_ZERO_ERROR;
483
484 intl_error_reset( NULL );
485
486 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
487 &loc_name ,&loc_name_len ) == FAILURE) {
488 RETURN_THROWS();
489 }
490
491 if(loc_name_len == 0) {
492 loc_name = intl_locale_get_default();
493 loc_name_len = strlen(loc_name);
494 }
495
496 INTL_CHECK_LOCALE_LEN(loc_name_len);
497
498 /* Call ICU get */
499 tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
500
501 /* No value found */
502 if( result == -1 ) {
503 if( tag_value){
504 zend_string_release_ex( tag_value, 0 );
505 }
506 RETURN_STRING( empty_result);
507 }
508
509 /* value found */
510 if( tag_value){
511 RETVAL_STR( tag_value );
512 return;
513 }
514
515 /* Error encountered while fetching the value */
516 if( result ==0) {
517 spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
518 intl_error_set( NULL, status, msg , 1 );
519 efree(msg);
520 RETURN_NULL();
521 }
522
523 }
524 /* }}} */
525
526 /* {{{ gets the script for the $locale */
PHP_FUNCTION(locale_get_script)527 PHP_FUNCTION( locale_get_script )
528 {
529 get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
530 }
531 /* }}} */
532
533 /* {{{ gets the region for the $locale */
PHP_FUNCTION(locale_get_region)534 PHP_FUNCTION( locale_get_region )
535 {
536 get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
537 }
538 /* }}} */
539
540 /* {{{ gets the primary language for the $locale */
PHP_FUNCTION(locale_get_primary_language)541 PHP_FUNCTION(locale_get_primary_language )
542 {
543 get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
544 }
545 /* }}} */
546
547
548 /* {{{
549 * common code shared by display_xyz functions to get the value from ICU
550 }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)551 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
552 {
553 const char* loc_name = NULL;
554 size_t loc_name_len = 0;
555
556 const char* disp_loc_name = NULL;
557 size_t disp_loc_name_len = 0;
558 int free_loc_name = 0;
559
560 UChar* disp_name = NULL;
561 int32_t disp_name_len = 0;
562
563 char* mod_loc_name = NULL;
564
565 int32_t buflen = 512;
566 UErrorCode status = U_ZERO_ERROR;
567
568 zend_string* u8str;
569
570 char* msg = NULL;
571
572 intl_error_reset( NULL );
573
574 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s!",
575 &loc_name, &loc_name_len ,
576 &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
577 {
578 RETURN_THROWS();
579 }
580
581 if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
582 /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
583 spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
584 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 );
585 efree(msg);
586 RETURN_FALSE;
587 }
588
589 if(loc_name_len == 0) {
590 loc_name = intl_locale_get_default();
591 }
592
593 if( strcmp(tag_name, DISP_NAME) != 0 ){
594 /* Handle grandfathered languages */
595 int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
596 if( grOffset >= 0 ){
597 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
598 mod_loc_name = getPreferredTag( loc_name );
599 } else {
600 /* Since Grandfathered, no value, do nothing, return NULL */
601 RETURN_FALSE;
602 }
603 }
604 } /* end of if != LOC_CANONICAL_TAG */
605
606 if( mod_loc_name==NULL ){
607 mod_loc_name = estrdup( loc_name );
608 }
609
610 /* Check if disp_loc_name passed , if not use default locale */
611 if( !disp_loc_name){
612 disp_loc_name = estrdup(intl_locale_get_default());
613 free_loc_name = 1;
614 }
615
616 /* Get the disp_value for the given locale */
617 do{
618 disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
619 disp_name_len = buflen;
620
621 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
622 buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
623 } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
624 buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
625 } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
626 buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
627 } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
628 buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
629 } else if( strcmp(tag_name , DISP_NAME)==0 ){
630 buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
631 }
632
633 /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
634 if( U_FAILURE( status ) )
635 {
636 if( status == U_BUFFER_OVERFLOW_ERROR )
637 {
638 status = U_ZERO_ERROR;
639 continue;
640 }
641
642 spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
643 intl_error_set( NULL, status, msg , 1 );
644 efree(msg);
645 if( disp_name){
646 efree( disp_name );
647 }
648 if( mod_loc_name){
649 efree( mod_loc_name );
650 }
651 if (free_loc_name) {
652 efree((void *)disp_loc_name);
653 disp_loc_name = NULL;
654 }
655 RETURN_FALSE;
656 }
657 } while( buflen > disp_name_len );
658
659 if( mod_loc_name){
660 efree( mod_loc_name );
661 }
662 if (free_loc_name) {
663 efree((void *)disp_loc_name);
664 disp_loc_name = NULL;
665 }
666 /* Convert display locale name from UTF-16 to UTF-8. */
667 u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
668 efree( disp_name );
669 if( !u8str )
670 {
671 spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
672 intl_error_set( NULL, status, msg , 1 );
673 efree(msg);
674 RETURN_FALSE;
675 }
676
677 RETVAL_NEW_STR( u8str );
678 }
679 /* }}} */
680
681 /* {{{ gets the name for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_name)682 PHP_FUNCTION(locale_get_display_name)
683 {
684 get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
685 }
686 /* }}} */
687
688 /* {{{ gets the language for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_language)689 PHP_FUNCTION(locale_get_display_language)
690 {
691 get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
692 }
693 /* }}} */
694
695 /* {{{ gets the script for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_script)696 PHP_FUNCTION(locale_get_display_script)
697 {
698 get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
699 }
700 /* }}} */
701
702 /* {{{ gets the region for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_region)703 PHP_FUNCTION(locale_get_display_region)
704 {
705 get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
706 }
707 /* }}} */
708
709 /* {{{
710 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
711 * gets the variant for the $locale in $in_locale or default_locale
712 }}} */
713 /* {{{
714 * proto static string get_display_variant($locale, $in_locale = null)
715 * gets the variant for the $locale in $in_locale or default_locale
716 */
PHP_FUNCTION(locale_get_display_variant)717 PHP_FUNCTION(locale_get_display_variant)
718 {
719 get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
720 }
721 /* }}} */
722
723 /* {{{ return an associative array containing keyword-value
724 * pairs for this locale. The keys are keys to the array (doh!)
725 * }}}*/
726 /* {{{ return an associative array containing keyword-value
727 * pairs for this locale. The keys are keys to the array (doh!)
728 */
PHP_FUNCTION(locale_get_keywords)729 PHP_FUNCTION( locale_get_keywords )
730 {
731 UEnumeration* e = NULL;
732 UErrorCode status = U_ZERO_ERROR;
733
734 const char* kw_key = NULL;
735 int32_t kw_key_len = 0;
736
737 const char* loc_name = NULL;
738 size_t loc_name_len = 0;
739
740 intl_error_reset( NULL );
741
742 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
743 &loc_name, &loc_name_len ) == FAILURE)
744 {
745 RETURN_THROWS();
746 }
747
748 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
749
750 if(loc_name_len == 0) {
751 loc_name = intl_locale_get_default();
752 }
753
754 /* Get the keywords */
755 e = uloc_openKeywords( loc_name, &status );
756 if( e != NULL ) {
757 /*
758 ICU expects the buffer to be allocated before calling the function
759 and so the buffer size has been explicitly specified
760 ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
761 hence the kw_value buffer size is 100
762 */
763
764 /* Traverse it, filling the return array. */
765 array_init( return_value );
766
767 while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
768 int32_t kw_value_len = 100;
769 zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
770
771 /* Get the keyword value for each keyword */
772 kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
773 if (status == U_BUFFER_OVERFLOW_ERROR) {
774 status = U_ZERO_ERROR;
775 kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
776 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
777 } else if(!U_FAILURE(status)) {
778 kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
779 }
780 if (U_FAILURE(status)) {
781 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 );
782 if( kw_value_str){
783 zend_string_efree( kw_value_str );
784 }
785 zend_array_destroy(Z_ARR_P(return_value));
786 RETURN_FALSE;
787 }
788
789 add_assoc_str( return_value, (char *)kw_key, kw_value_str);
790 } /* end of while */
791
792 } /* end of if e!=NULL */
793
794 uenum_close( e );
795 }
796 /* }}} */
797
798 /* {{{ @return string the canonicalized locale
799 * }}} */
800 /* {{{ @param string $locale The locale string to canonicalize */
PHP_FUNCTION(locale_canonicalize)801 PHP_FUNCTION(locale_canonicalize)
802 {
803 get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
804 }
805 /* }}} */
806
807 /* {{{ append_key_value
808 * Internal function which is called from locale_compose
809 * gets the value for the key_name and appends to the loc_name
810 * returns 1 if successful , -1 if not found ,
811 * 0 if array element is not a string , -2 if buffer-overflow
812 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)813 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
814 {
815 zval *ele_value;
816
817 if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
818 if(Z_TYPE_P(ele_value)!= IS_STRING ){
819 /* element value is not a string */
820 return FAILURE;
821 }
822 if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
823 strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
824 /* not lang or grandfathered tag */
825 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
826 }
827 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
828 return SUCCESS;
829 }
830
831 return LOC_NOT_FOUND;
832 }
833 /* }}} */
834
835 /* {{{ append_prefix , appends the prefix needed
836 * e.g. private adds 'x'
837 */
add_prefix(smart_str * loc_name,char * key_name)838 static void add_prefix(smart_str* loc_name, char* key_name)
839 {
840 if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
841 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
842 smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
843 }
844 }
845 /* }}} */
846
847 /* {{{ append_multiple_key_values
848 * Internal function which is called from locale_compose
849 * gets the multiple values for the key_name and appends to the loc_name
850 * used for 'variant','extlang','private'
851 * returns 1 if successful , -1 if not found ,
852 * 0 if array element is not a string , -2 if buffer-overflow
853 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)854 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
855 {
856 zval *ele_value;
857 int isFirstSubtag = 0;
858
859 /* Variant/ Extlang/Private etc. */
860 if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
861 if( Z_TYPE_P(ele_value) == IS_STRING ){
862 add_prefix( loc_name , key_name);
863
864 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
865 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
866 return SUCCESS;
867 } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
868 HashTable *arr = Z_ARRVAL_P(ele_value);
869 zval *data;
870
871 ZEND_HASH_FOREACH_VAL(arr, data) {
872 if(Z_TYPE_P(data) != IS_STRING) {
873 return FAILURE;
874 }
875 if (isFirstSubtag++ == 0){
876 add_prefix(loc_name , key_name);
877 }
878 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
879 smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
880 } ZEND_HASH_FOREACH_END();
881 return SUCCESS;
882 } else {
883 return FAILURE;
884 }
885 } else {
886 char cur_key_name[31];
887 int max_value = 0, i;
888 /* Decide the max_value: the max. no. of elements allowed */
889 if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
890 max_value = MAX_NO_VARIANT;
891 }
892 if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
893 max_value = MAX_NO_EXTLANG;
894 }
895 if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
896 max_value = MAX_NO_PRIVATE;
897 }
898
899 /* Multiple variant values as variant0, variant1 ,variant2 */
900 isFirstSubtag = 0;
901 for( i=0 ; i< max_value; i++ ){
902 snprintf( cur_key_name , 30, "%s%d", key_name , i);
903 if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
904 if( Z_TYPE_P(ele_value)!= IS_STRING ){
905 /* variant is not a string */
906 return FAILURE;
907 }
908 /* Add the contents */
909 if (isFirstSubtag++ == 0){
910 add_prefix(loc_name , cur_key_name);
911 }
912 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
913 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
914 }
915 } /* end of for */
916 } /* end of else */
917
918 return SUCCESS;
919 }
920 /* }}} */
921
922 /*{{{
923 * If applicable sets error message and aborts locale_compose gracefully
924 * returns 0 if locale_compose needs to be aborted
925 * otherwise returns 1
926 */
handleAppendResult(int result,smart_str * loc_name)927 static int handleAppendResult( int result, smart_str* loc_name)
928 {
929 intl_error_reset( NULL );
930 if( result == FAILURE) {
931 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
932 "locale_compose: parameter array element is not a string", 0 );
933 smart_str_free(loc_name);
934 return 0;
935 }
936 return 1;
937 }
938 /* }}} */
939
940 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
941 /* {{{ Creates a locale by combining the parts of locale-ID passed
942 * }}} */
943 /* {{{ Creates a locale by combining the parts of locale-ID passed
944 * }}} */
PHP_FUNCTION(locale_compose)945 PHP_FUNCTION(locale_compose)
946 {
947 smart_str loc_name_s = {0};
948 smart_str *loc_name = &loc_name_s;
949 zval* arr = NULL;
950 HashTable* hash_arr = NULL;
951 int result = 0;
952
953 intl_error_reset( NULL );
954
955 if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
956 &arr) == FAILURE)
957 {
958 RETURN_THROWS();
959 }
960
961 hash_arr = Z_ARRVAL_P( arr );
962
963 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
964 RETURN_FALSE;
965
966 /* Check for grandfathered first */
967 result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
968 if( result == SUCCESS){
969 RETURN_SMART_STR(loc_name);
970 }
971 if( !handleAppendResult( result, loc_name)){
972 RETURN_FALSE;
973 }
974
975 /* Not grandfathered */
976 result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
977 if( result == LOC_NOT_FOUND ){
978 zend_argument_value_error(1, "must contain a \"%s\" key", LOC_LANG_TAG);
979 smart_str_free(loc_name);
980 RETURN_THROWS();
981 }
982 if( !handleAppendResult( result, loc_name)){
983 RETURN_FALSE;
984 }
985
986 /* Extlang */
987 result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
988 if( !handleAppendResult( result, loc_name)){
989 RETURN_FALSE;
990 }
991
992 /* Script */
993 result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
994 if( !handleAppendResult( result, loc_name)){
995 RETURN_FALSE;
996 }
997
998 /* Region */
999 result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
1000 if( !handleAppendResult( result, loc_name)){
1001 RETURN_FALSE;
1002 }
1003
1004 /* Variant */
1005 result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
1006 if( !handleAppendResult( result, loc_name)){
1007 RETURN_FALSE;
1008 }
1009
1010 /* Private */
1011 result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
1012 if( !handleAppendResult( result, loc_name)){
1013 RETURN_FALSE;
1014 }
1015
1016 RETURN_SMART_STR(loc_name);
1017 }
1018 /* }}} */
1019
1020
1021 /*{{{
1022 * Parses the locale and returns private subtags if existing
1023 * else returns NULL
1024 * e.g. for locale='en_US-x-prv1-prv2-prv3'
1025 * returns a pointer to the string 'prv1-prv2-prv3'
1026 */
get_private_subtags(const char * loc_name)1027 static zend_string* get_private_subtags(const char* loc_name)
1028 {
1029 zend_string* result = NULL;
1030 size_t len = 0;
1031 const char* mod_loc_name =NULL;
1032
1033 if( loc_name && (len = strlen(loc_name)) > 0 ){
1034 zend_off_t singletonPos = 0;
1035 mod_loc_name = loc_name ;
1036 while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
1037 if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1038 /* private subtag start found */
1039 if( singletonPos + 2 == len){
1040 /* loc_name ends with '-x-' ; return NULL */
1041 }
1042 else{
1043 /* result = mod_loc_name + singletonPos +2; */
1044 result = zend_string_init(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ), 0);
1045 }
1046 break;
1047 }
1048 else{
1049 if((size_t)(singletonPos + 1) >= len){
1050 /* String end */
1051 break;
1052 } else {
1053 /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1054 mod_loc_name = mod_loc_name + singletonPos +1;
1055 len = strlen(mod_loc_name);
1056 }
1057 }
1058 } /* end of while */
1059 }
1060
1061 return result;
1062 }
1063 /* }}} */
1064
1065 /* {{{ code used by locale_parse */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)1066 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1067 {
1068 zend_string* key_value = NULL;
1069 char* cur_key_name = NULL;
1070 char* token = NULL;
1071 char* last_ptr = NULL;
1072
1073 int result = 0;
1074 int cur_result = 0;
1075
1076
1077 if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1078 key_value = get_private_subtags( loc_name );
1079 result = 1;
1080 } else {
1081 key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1082 }
1083 if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1084 ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1085 if( result > 0 && key_value){
1086 int cnt = 0;
1087 /* Tokenize on the "_" or "-" */
1088 token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
1089 if( cur_key_name ){
1090 efree( cur_key_name);
1091 }
1092 cur_key_name = (char*)ecalloc( 25, 25);
1093 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1094 add_assoc_string( hash_arr, cur_key_name , token);
1095 /* tokenize on the "_" or "-" and stop at singleton if any */
1096 while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1097 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1098 add_assoc_string( hash_arr, cur_key_name , token);
1099 }
1100 /*
1101 if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1102 }
1103 */
1104 }
1105 if (key_value) {
1106 zend_string_release_ex(key_value, 0);
1107 }
1108 } else {
1109 if( result == 1 ){
1110 add_assoc_str( hash_arr, key_name , key_value);
1111 cur_result = 1;
1112 } else if (key_value) {
1113 zend_string_release_ex(key_value, 0);
1114 }
1115 }
1116
1117 if( cur_key_name ){
1118 efree( cur_key_name);
1119 }
1120 /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1121 return cur_result;
1122 }
1123 /* }}} */
1124
1125 /* {{{ parses a locale-id into an array the different parts of it */
PHP_FUNCTION(locale_parse)1126 PHP_FUNCTION(locale_parse)
1127 {
1128 const char* loc_name = NULL;
1129 size_t loc_name_len = 0;
1130 int grOffset = 0;
1131
1132 intl_error_reset( NULL );
1133
1134 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1135 &loc_name, &loc_name_len ) == FAILURE)
1136 {
1137 RETURN_THROWS();
1138 }
1139
1140 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1141
1142 if(loc_name_len == 0) {
1143 loc_name = intl_locale_get_default();
1144 }
1145
1146 array_init( return_value );
1147
1148 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
1149 if( grOffset >= 0 ){
1150 add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1151 }
1152 else{
1153 /* Not grandfathered */
1154 add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1155 add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1156 add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1157 add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1158 add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1159 }
1160 }
1161 /* }}} */
1162
1163 /* {{{ gets an array containing the list of variants, or null */
PHP_FUNCTION(locale_get_all_variants)1164 PHP_FUNCTION(locale_get_all_variants)
1165 {
1166 const char* loc_name = NULL;
1167 size_t loc_name_len = 0;
1168
1169 int result = 0;
1170 char* token = NULL;
1171 zend_string* variant = NULL;
1172 char* saved_ptr = NULL;
1173
1174 intl_error_reset( NULL );
1175
1176 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1177 &loc_name, &loc_name_len ) == FAILURE)
1178 {
1179 RETURN_THROWS();
1180 }
1181
1182 if(loc_name_len == 0) {
1183 loc_name = intl_locale_get_default();
1184 loc_name_len = strlen(loc_name);
1185 }
1186
1187 INTL_CHECK_LOCALE_LEN(loc_name_len);
1188
1189 array_init( return_value );
1190
1191 /* If the locale is grandfathered, stop, no variants */
1192 if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
1193 /* ("Grandfathered Tag. No variants."); */
1194 }
1195 else {
1196 /* Call ICU variant */
1197 variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1198 if( result > 0 && variant){
1199 /* Tokenize on the "_" or "-" */
1200 token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1201 add_next_index_stringl( return_value, token , strlen(token));
1202 /* tokenize on the "_" or "-" and stop at singleton if any */
1203 while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1204 add_next_index_stringl( return_value, token , strlen(token));
1205 }
1206 }
1207 if( variant ){
1208 zend_string_release_ex( variant, 0 );
1209 }
1210 }
1211
1212
1213 }
1214 /* }}} */
1215
1216 /* {{{ Converts to lower case and also replaces all hyphens with the underscore */
strToMatch(const char * str,char * retstr)1217 static int strToMatch(const char* str ,char *retstr)
1218 {
1219 char* anchor = NULL;
1220 const char* anchor1 = NULL;
1221 int result = 0;
1222
1223 if( (!str) || str[0] == '\0'){
1224 return result;
1225 } else {
1226 anchor = retstr;
1227 anchor1 = str;
1228 while( (*str)!='\0' ){
1229 if( *str == '-' ){
1230 *retstr = '_';
1231 } else {
1232 *retstr = tolower(*str);
1233 }
1234 str++;
1235 retstr++;
1236 }
1237 *retstr = '\0';
1238 retstr= anchor;
1239 str= anchor1;
1240 result = 1;
1241 }
1242
1243 return(result);
1244 }
1245 /* }}} */
1246
1247 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
1248 /* }}} */
1249 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
PHP_FUNCTION(locale_filter_matches)1250 PHP_FUNCTION(locale_filter_matches)
1251 {
1252 char* lang_tag = NULL;
1253 size_t lang_tag_len = 0;
1254 const char* loc_range = NULL;
1255 size_t loc_range_len = 0;
1256
1257 int result = 0;
1258 char* token = 0;
1259 char* chrcheck = NULL;
1260
1261 zend_string* can_lang_tag = NULL;
1262 zend_string* can_loc_range = NULL;
1263
1264 char* cur_lang_tag = NULL;
1265 char* cur_loc_range = NULL;
1266
1267 bool boolCanonical = 0;
1268 UErrorCode status = U_ZERO_ERROR;
1269
1270 intl_error_reset( NULL );
1271
1272 if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1273 &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1274 &boolCanonical) == FAILURE)
1275 {
1276 RETURN_THROWS();
1277 }
1278
1279 if(loc_range_len == 0) {
1280 loc_range = intl_locale_get_default();
1281 loc_range_len = strlen(loc_range);
1282 }
1283
1284 if( strcmp(loc_range,"*")==0){
1285 RETURN_TRUE;
1286 }
1287
1288 INTL_CHECK_LOCALE_LEN(loc_range_len);
1289 INTL_CHECK_LOCALE_LEN(lang_tag_len);
1290
1291 if( boolCanonical ){
1292 /* canonicalize loc_range */
1293 can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1294 if( result <=0) {
1295 intl_error_set( NULL, status,
1296 "locale_filter_matches : unable to canonicalize loc_range" , 0 );
1297 RETURN_FALSE;
1298 }
1299
1300 /* canonicalize lang_tag */
1301 can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
1302 if( result <=0) {
1303 intl_error_set( NULL, status,
1304 "locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1305 RETURN_FALSE;
1306 }
1307
1308 /* Convert to lower case for case-insensitive comparison */
1309 cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1310
1311 /* Convert to lower case for case-insensitive comparison */
1312 result = strToMatch( can_lang_tag->val , cur_lang_tag);
1313 if( result == 0) {
1314 efree( cur_lang_tag );
1315 zend_string_release_ex( can_lang_tag, 0 );
1316 RETURN_FALSE;
1317 }
1318
1319 cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1320 result = strToMatch( can_loc_range->val , cur_loc_range );
1321 if( result == 0) {
1322 efree( cur_lang_tag );
1323 zend_string_release_ex( can_lang_tag, 0 );
1324 efree( cur_loc_range );
1325 zend_string_release_ex( can_loc_range, 0 );
1326 RETURN_FALSE;
1327 }
1328
1329 /* check if prefix */
1330 token = strstr( cur_lang_tag , cur_loc_range );
1331
1332 if( token && (token==cur_lang_tag) ){
1333 /* check if the char. after match is SEPARATOR */
1334 chrcheck = token + (strlen(cur_loc_range));
1335 if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1336 efree( cur_lang_tag );
1337 efree( cur_loc_range );
1338 if( can_lang_tag){
1339 zend_string_release_ex( can_lang_tag, 0 );
1340 }
1341 if( can_loc_range){
1342 zend_string_release_ex( can_loc_range, 0 );
1343 }
1344 RETURN_TRUE;
1345 }
1346 }
1347
1348 /* No prefix as loc_range */
1349 if( cur_lang_tag){
1350 efree( cur_lang_tag );
1351 }
1352 if( cur_loc_range){
1353 efree( cur_loc_range );
1354 }
1355 if( can_lang_tag){
1356 zend_string_release_ex( can_lang_tag, 0 );
1357 }
1358 if( can_loc_range){
1359 zend_string_release_ex( can_loc_range, 0 );
1360 }
1361 RETURN_FALSE;
1362
1363 } /* end of if isCanonical */
1364 else{
1365 /* Convert to lower case for case-insensitive comparison */
1366 cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1367
1368 result = strToMatch( lang_tag , cur_lang_tag);
1369 if( result == 0) {
1370 efree( cur_lang_tag );
1371 RETURN_FALSE;
1372 }
1373 cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1374 result = strToMatch( loc_range , cur_loc_range );
1375 if( result == 0) {
1376 efree( cur_lang_tag );
1377 efree( cur_loc_range );
1378 RETURN_FALSE;
1379 }
1380
1381 /* check if prefix */
1382 token = strstr( cur_lang_tag , cur_loc_range );
1383
1384 if( token && (token==cur_lang_tag) ){
1385 /* check if the char. after match is SEPARATOR */
1386 chrcheck = token + (strlen(cur_loc_range));
1387 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1388 efree( cur_lang_tag );
1389 efree( cur_loc_range );
1390 RETURN_TRUE;
1391 }
1392 }
1393
1394 /* No prefix as loc_range */
1395 if( cur_lang_tag){
1396 efree( cur_lang_tag );
1397 }
1398 if( cur_loc_range){
1399 efree( cur_loc_range );
1400 }
1401 RETURN_FALSE;
1402
1403 }
1404 }
1405 /* }}} */
1406
array_cleanup(char * arr[],int arr_size)1407 static void array_cleanup( char* arr[] , int arr_size)
1408 {
1409 int i=0;
1410 for( i=0; i< arr_size; i++ ){
1411 if( arr[i*2] ){
1412 efree( arr[i*2]);
1413 }
1414 }
1415 efree(arr);
1416 }
1417
1418 #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
1419 /* {{{
1420 * returns the lookup result to lookup_loc_range_src_php
1421 * internal function
1422 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1423 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1424 {
1425 int i = 0;
1426 int cur_arr_len = 0;
1427 int result = 0;
1428
1429 zend_string* lang_tag = NULL;
1430 zval* ele_value = NULL;
1431
1432 char* cur_loc_range = NULL;
1433 zend_string* can_loc_range = NULL;
1434 zend_off_t saved_pos = 0;
1435
1436 zend_string* return_value = NULL;
1437
1438 char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1439 ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1440 /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1441 if(Z_TYPE_P(ele_value)!= IS_STRING) {
1442 /* element value is not a string */
1443 zend_argument_type_error(2, "must only contain string values");
1444 LOOKUP_CLEAN_RETURN(NULL);
1445 }
1446 cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1447 result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1448 if(result == 0) {
1449 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1450 LOOKUP_CLEAN_RETURN(NULL);
1451 }
1452 cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1453 cur_arr_len++ ;
1454 } ZEND_HASH_FOREACH_END(); /* end of for */
1455
1456 /* Canonicalize array elements */
1457 if(canonicalize) {
1458 for(i=0; i<cur_arr_len; i++) {
1459 lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1460 if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1461 if(lang_tag) {
1462 zend_string_release_ex(lang_tag, 0);
1463 }
1464 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1465 LOOKUP_CLEAN_RETURN(NULL);
1466 }
1467 cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1468 result = strToMatch(lang_tag->val, cur_arr[i*2]);
1469 zend_string_release_ex(lang_tag, 0);
1470 if(result == 0) {
1471 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1472 LOOKUP_CLEAN_RETURN(NULL);
1473 }
1474 }
1475
1476 }
1477
1478 if(canonicalize) {
1479 /* Canonicalize the loc_range */
1480 can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1481 if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1482 /* Error */
1483 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1484 if(can_loc_range) {
1485 zend_string_release_ex(can_loc_range, 0);
1486 }
1487 LOOKUP_CLEAN_RETURN(NULL);
1488 } else {
1489 loc_range = can_loc_range->val;
1490 }
1491 }
1492
1493 cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1494 /* convert to lower and replace hyphens */
1495 result = strToMatch(loc_range, cur_loc_range);
1496 if(can_loc_range) {
1497 zend_string_release_ex(can_loc_range, 0);
1498 }
1499 if(result == 0) {
1500 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1501 LOOKUP_CLEAN_RETURN(NULL);
1502 }
1503
1504 /* Lookup for the lang_tag match */
1505 saved_pos = strlen(cur_loc_range);
1506 while(saved_pos > 0) {
1507 for(i=0; i< cur_arr_len; i++){
1508 if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1509 /* Match found */
1510 char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1511 return_value = zend_string_init(str, strlen(str), 0);
1512 efree(cur_loc_range);
1513 LOOKUP_CLEAN_RETURN(return_value);
1514 }
1515 }
1516 saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1517 }
1518
1519 /* Match not found */
1520 efree(cur_loc_range);
1521 LOOKUP_CLEAN_RETURN(NULL);
1522 }
1523 /* }}} */
1524
1525 /* {{{ Searches the items in $langtag for the best match to the language
1526 * range
1527 */
1528 /* }}} */
1529 /* {{{ Searches the items in $langtag for the best match to the language
1530 * range
1531 */
PHP_FUNCTION(locale_lookup)1532 PHP_FUNCTION(locale_lookup)
1533 {
1534 zend_string* fallback_loc_str = NULL;
1535 const char* loc_range = NULL;
1536 size_t loc_range_len = 0;
1537
1538 zval* arr = NULL;
1539 HashTable* hash_arr = NULL;
1540 bool boolCanonical = 0;
1541 zend_string* result_str = NULL;
1542
1543 intl_error_reset( NULL );
1544
1545 if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS!", &arr, &loc_range, &loc_range_len,
1546 &boolCanonical, &fallback_loc_str) == FAILURE) {
1547 RETURN_THROWS();
1548 }
1549
1550 if(loc_range_len == 0) {
1551 if(fallback_loc_str) {
1552 loc_range = ZSTR_VAL(fallback_loc_str);
1553 loc_range_len = ZSTR_LEN(fallback_loc_str);
1554 } else {
1555 loc_range = intl_locale_get_default();
1556 loc_range_len = strlen(loc_range);
1557 }
1558 }
1559
1560 hash_arr = Z_ARRVAL_P(arr);
1561
1562 INTL_CHECK_LOCALE_LEN(loc_range_len);
1563
1564 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1565 RETURN_EMPTY_STRING();
1566 }
1567
1568 result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1569 if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1570 if( fallback_loc_str ) {
1571 result_str = zend_string_copy(fallback_loc_str);
1572 } else {
1573 RETURN_EMPTY_STRING();
1574 }
1575 }
1576
1577 RETURN_STR(result_str);
1578 }
1579 /* }}} */
1580
1581 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
1582 /* }}} */
1583 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
PHP_FUNCTION(locale_accept_from_http)1584 PHP_FUNCTION(locale_accept_from_http)
1585 {
1586 UEnumeration *available;
1587 char *http_accept = NULL;
1588 size_t http_accept_len;
1589 UErrorCode status = 0;
1590 int len;
1591 char resultLocale[INTL_MAX_LOCALE_LEN+1];
1592 UAcceptResult outResult;
1593
1594 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1595 {
1596 RETURN_THROWS();
1597 }
1598 if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1599 /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1600 char *start = http_accept;
1601 char *end;
1602 size_t len;
1603 do {
1604 end = strchr(start, ',');
1605 len = end ? end-start : http_accept_len-(start-http_accept);
1606 if(len > ULOC_FULLNAME_CAPACITY) {
1607 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1608 "locale_accept_from_http: locale string too long", 0 );
1609 RETURN_FALSE;
1610 }
1611 if(end) {
1612 start = end+1;
1613 }
1614 } while(end != NULL);
1615 }
1616
1617 available = ures_openAvailableLocales(NULL, &status);
1618 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1619 len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1620 &outResult, http_accept, available, &status);
1621 uenum_close(available);
1622 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1623 if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1624 RETURN_FALSE;
1625 }
1626 RETURN_STRINGL(resultLocale, len);
1627 }
1628 /* }}} */
1629