1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
12 +----------------------------------------------------------------------+
13 */
14
15 #ifdef HAVE_CONFIG_H
16 #include <config.h>
17 #endif
18
19 #include <unicode/ustring.h>
20 #include <unicode/udata.h>
21 #include <unicode/putil.h>
22 #include <unicode/ures.h>
23
24 #include "php_intl.h"
25 #include "locale.h"
26 #include "locale_class.h"
27 #include "intl_convert.h"
28 #include "intl_data.h"
29
30 #include <zend_API.h>
31 #include <zend.h>
32 #include <php.h>
33 #include "main/php_ini.h"
34 #include "zend_smart_str.h"
35
36 ZEND_EXTERN_MODULE_GLOBALS( intl )
37
38 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
39 #define SEPARATOR "_"
40 #define SEPARATOR1 "-"
41 #define DELIMITER "-_"
42 #define EXTLANG_PREFIX "a"
43 #define PRIVATE_PREFIX "x"
44 #define DISP_NAME "name"
45
46 #define MAX_NO_VARIANT 15
47 #define MAX_NO_EXTLANG 3
48 #define MAX_NO_PRIVATE 15
49 #define MAX_NO_LOOKUP_LANG_TAG 100
50
51 #define LOC_NOT_FOUND 1
52
53 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
54 #define VARIANT_KEYNAME_LEN 11
55 #define EXTLANG_KEYNAME_LEN 10
56 #define PRIVATE_KEYNAME_LEN 11
57
58 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
59 * https://www.iana.org/assignments/language-subtag-registry
60 *
61 * This list includes all grandfathered tags, as well as redundant
62 * tags that have a Preferred-Value.
63 */
64 static const char * const LOC_GRANDFATHERED[] = {
65 "art-lojban",
66 "cel-gaulish",
67 "en-GB-oed",
68 "i-ami",
69 "i-bnn",
70 "i-default",
71 "i-enochian",
72 "i-hak",
73 "i-klingon",
74 "i-lux",
75 "i-mingo",
76 "i-navajo",
77 "i-pwn",
78 "i-tao",
79 "i-tay",
80 "i-tsu",
81 "no-bok",
82 "no-nyn",
83 "sgn-BE-FR",
84 "sgn-BE-NL",
85 "sgn-BR",
86 "sgn-CH-DE",
87 "sgn-CO",
88 "sgn-DE",
89 "sgn-DK",
90 "sgn-ES",
91 "sgn-FR",
92 "sgn-GB",
93 "sgn-GR",
94 "sgn-IE",
95 "sgn-IT",
96 "sgn-JP",
97 "sgn-MX",
98 "sgn-NI",
99 "sgn-NL",
100 "sgn-NO",
101 "sgn-PT",
102 "sgn-SE",
103 "sgn-US",
104 "sgn-ZA",
105 "zh-cmn",
106 "zh-cmn-Hans",
107 "zh-cmn-Hant",
108 "zh-gan",
109 "zh-guoyu",
110 "zh-hakka",
111 "zh-min",
112 "zh-min-nan",
113 "zh-wuu",
114 "zh-xiang",
115 NULL
116 };
117
118 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
119 *
120 * This array lists the preferred values for the grandfathered and redundant
121 * tags listed in LOC_GRANDFATHERED. This is in sync with the array
122 * LOC_GRANDFATHERED, i.e., the offsets of the grandfathered tags match the
123 * offsets of the preferred value. If a value in LOC_PREFERRED_GRANDFATHERED is
124 * NULL, then the matching offset in LOC_GRANDFATHERED has no preferred value.
125 */
126 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
127 "jbo",
128 NULL,
129 "en-GB-oxendict",
130 "ami",
131 "bnn",
132 NULL,
133 NULL,
134 "hak",
135 "tlh",
136 "lb",
137 NULL,
138 "nv",
139 "pwn",
140 "tao",
141 "tay",
142 "tsu",
143 "nb",
144 "nn",
145 "sfb",
146 "vgt",
147 "bzs",
148 "sgg",
149 "csn",
150 "gsg",
151 "dsl",
152 "ssp",
153 "fsl",
154 "bfi",
155 "gss",
156 "isg",
157 "ise",
158 "jsl",
159 "mfs",
160 "ncs",
161 "dse",
162 "nsl",
163 "psr",
164 "swl",
165 "ase",
166 "sfs",
167 "cmn",
168 "cmn-Hans",
169 "cmn-Hant",
170 "gan",
171 "cmn",
172 "hak",
173 NULL,
174 "nan",
175 "wuu",
176 "hsn",
177 NULL
178 };
179
180 /* returns true if a is an ID separator, false otherwise */
181 #define isIDSeparator(a) (a == '_' || a == '-')
182 #define isKeywordSeparator(a) (a == '@' )
183 #define isEndOfTag(a) (a == '\0' )
184
185 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
186
187 /*returns true if one of the special prefixes is here (s=string)
188 'x-' or 'i-' */
189 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
190 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
191
192 /* Dot terminates it because of POSIX form where dot precedes the codepage
193 * except for variant */
194 #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
195
196 /* {{{ return the offset of 'key' in the array 'list'.
197 * returns -1 if not present */
findOffset(const char * const * list,const char * key)198 static int16_t findOffset(const char* const* list, const char* key)
199 {
200 const char* const* anchor = list;
201 while (*list != NULL) {
202 if (strcasecmp(key, *list) == 0) {
203 return (int16_t)(list - anchor);
204 }
205 list++;
206 }
207
208 return -1;
209
210 }
211 /*}}}*/
212
getPreferredTag(const char * gf_tag)213 static char* getPreferredTag(const char* gf_tag)
214 {
215 char* result = NULL;
216 zend_off_t grOffset = 0;
217
218 grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
219 if(grOffset < 0) {
220 return NULL;
221 }
222 if( LOC_PREFERRED_GRANDFATHERED[grOffset] != NULL ){
223 /* return preferred tag */
224 result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
225 } else {
226 /* Return correct grandfathered language tag */
227 result = estrdup( LOC_GRANDFATHERED[grOffset] );
228 }
229 return result;
230 }
231
232 /* {{{
233 * returns the position of next token for lookup
234 * or -1 if no token
235 * strtokr equivalent search for token in reverse direction
236 */
getStrrtokenPos(char * str,zend_off_t savedPos)237 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
238 {
239 zend_off_t result =-1;
240 zend_off_t i;
241
242 for(i=savedPos-1; i>=0; i--) {
243 if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
244 /* delimiter found; check for singleton */
245 if(i>=2 && isIDSeparator(*(str+i-2)) ){
246 /* a singleton; so send the position of token before the singleton */
247 result = i-2;
248 } else {
249 result = i;
250 }
251 break;
252 }
253 }
254 if(result < 1){
255 /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
256 result =-1;
257 }
258 return result;
259 }
260 /* }}} */
261
262 /* {{{
263 * returns the position of a singleton if present
264 * returns -1 if no singleton
265 * strtok equivalent search for singleton
266 */
getSingletonPos(const char * str)267 static zend_off_t getSingletonPos(const char* str)
268 {
269 zend_off_t result =-1;
270 size_t len = 0;
271
272 if( str && ((len=strlen(str))>0) ){
273 zend_off_t i = 0;
274 for( i=0; (size_t)i < len ; i++){
275 if( isIDSeparator(*(str+i)) ){
276 if( i==1){
277 /* string is of the form x-avy or a-prv1 */
278 result =0;
279 break;
280 } else {
281 /* delimiter found; check for singleton */
282 if( isIDSeparator(*(str+i+2)) ){
283 /* a singleton; so send the position of separator before singleton */
284 result = i+1;
285 break;
286 }
287 }
288 }
289 }/* end of for */
290
291 }
292 return result;
293 }
294 /* }}} */
295
296 /* {{{ Get default locale */
297 /* }}} */
298 /* {{{ Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)299 PHP_NAMED_FUNCTION(zif_locale_get_default)
300 {
301 ZEND_PARSE_PARAMETERS_NONE();
302
303 RETURN_STRING( intl_locale_get_default( ) );
304 }
305
306 /* }}} */
307
308 /* {{{ Set default locale */
309 /* }}} */
310 /* {{{ Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)311 PHP_NAMED_FUNCTION(zif_locale_set_default)
312 {
313 zend_string* locale_name;
314 zend_string *ini_name;
315 char *default_locale = NULL;
316
317 ZEND_PARSE_PARAMETERS_START(1, 1)
318 Z_PARAM_STR(locale_name)
319 ZEND_PARSE_PARAMETERS_END();
320
321 if (ZSTR_LEN(locale_name) == 0) {
322 default_locale = (char *)uloc_getDefault();
323 locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
324 }
325
326 ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
327 zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
328 zend_string_release_ex(ini_name, 0);
329 if (default_locale != NULL) {
330 zend_string_release_ex(locale_name, 0);
331 }
332
333 RETURN_TRUE;
334 }
335 /* }}} */
336
337 /* {{{
338 * Gets the value from ICU
339 * common code shared by get_primary_language,get_script or get_region or get_variant
340 * result = 0 if error, 1 if successful , -1 if no value
341 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)342 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
343 {
344 zend_string* tag_value = NULL;
345 int32_t tag_value_len = 512;
346
347 char* mod_loc_name = NULL;
348
349 int32_t buflen = 512;
350 UErrorCode status = U_ZERO_ERROR;
351
352 if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
353 return NULL;
354 }
355
356 if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
357 /* Handle grandfathered languages */
358 zend_off_t grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
359 if( grOffset >= 0 ){
360 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
361 return zend_string_init(loc_name, strlen(loc_name), 0);
362 } else {
363 /* Since Grandfathered , no value , do nothing , retutn NULL */
364 return NULL;
365 }
366 }
367
368 if( fromParseLocale==1 ){
369 zend_off_t singletonPos = 0;
370
371 /* Handle singletons */
372 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
373 if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
374 return zend_string_init(loc_name, strlen(loc_name), 0);
375 }
376 }
377
378 singletonPos = getSingletonPos( loc_name );
379 if( singletonPos == 0){
380 /* singleton at start of script, region , variant etc.
381 * or invalid singleton at start of language */
382 return NULL;
383 } else if( singletonPos > 0 ){
384 /* singleton at some position except at start
385 * strip off the singleton and rest of the loc_name */
386 mod_loc_name = estrndup ( loc_name , singletonPos-1);
387 }
388 } /* end of if fromParse */
389
390 } /* end of if != LOC_CANONICAL_TAG */
391
392 if( mod_loc_name == NULL){
393 mod_loc_name = estrdup(loc_name );
394 }
395
396 /* Proceed to ICU */
397 do{
398 if (tag_value) {
399 tag_value = zend_string_realloc( tag_value , buflen, 0);
400 } else {
401 tag_value = zend_string_alloc( buflen, 0);
402 }
403 tag_value_len = buflen;
404
405 if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
406 buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
407 }
408 if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
409 buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
410 }
411 if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
412 buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
413 }
414 if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
415 buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
416 }
417 if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
418 buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
419 }
420
421 if( U_FAILURE( status ) ) {
422 if( status == U_BUFFER_OVERFLOW_ERROR ) {
423 status = U_ZERO_ERROR;
424 buflen++; /* add space for \0 */
425 continue;
426 }
427
428 /* Error in retrieving data */
429 *result = 0;
430 if( tag_value ){
431 zend_string_release_ex( tag_value, 0 );
432 }
433 if( mod_loc_name ){
434 efree( mod_loc_name);
435 }
436 return NULL;
437 }
438 } while( buflen > tag_value_len );
439
440 if( buflen ==0 ){
441 /* No value found */
442 *result = -1;
443 if( tag_value ){
444 zend_string_release_ex( tag_value, 0 );
445 }
446 if( mod_loc_name ){
447 efree( mod_loc_name);
448 }
449 return NULL;
450 } else {
451 *result = 1;
452 }
453
454 if( mod_loc_name ){
455 efree( mod_loc_name);
456 }
457
458 tag_value->len = strlen(tag_value->val);
459 return tag_value;
460 }
461 /* }}} */
462
463 /* {{{
464 * Gets the value from ICU , called when PHP userspace function is called
465 * common code shared by get_primary_language,get_script or get_region or get_variant
466 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)467 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
468 {
469
470 char* loc_name = NULL;
471 size_t loc_name_len = 0;
472
473 zend_string* tag_value = NULL;
474 char* empty_result = "";
475
476 int result = 0;
477 char* msg = NULL;
478
479 UErrorCode status = U_ZERO_ERROR;
480
481 intl_error_reset( NULL );
482
483 ZEND_PARSE_PARAMETERS_START(1, 1)
484 Z_PARAM_STRING(loc_name, loc_name_len)
485 ZEND_PARSE_PARAMETERS_END();
486
487 if(loc_name_len == 0) {
488 loc_name = (char *)intl_locale_get_default();
489 loc_name_len = strlen(loc_name);
490 }
491
492 INTL_CHECK_LOCALE_LEN(loc_name_len);
493
494 /* Call ICU get */
495 tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
496
497 /* No value found */
498 if( result == -1 ) {
499 if( tag_value){
500 zend_string_release_ex( tag_value, 0 );
501 }
502 RETURN_STRING( empty_result);
503 }
504
505 /* value found */
506 if( tag_value){
507 RETVAL_STR( tag_value );
508 return;
509 }
510
511 /* Error encountered while fetching the value */
512 if( result ==0) {
513 spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
514 intl_error_set( NULL, status, msg , 1 );
515 efree(msg);
516 RETURN_NULL();
517 }
518
519 }
520 /* }}} */
521
522 /* {{{ gets the script for the $locale */
PHP_FUNCTION(locale_get_script)523 PHP_FUNCTION( locale_get_script )
524 {
525 get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
526 }
527 /* }}} */
528
529 /* {{{ gets the region for the $locale */
PHP_FUNCTION(locale_get_region)530 PHP_FUNCTION( locale_get_region )
531 {
532 get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
533 }
534 /* }}} */
535
536 /* {{{ gets the primary language for the $locale */
PHP_FUNCTION(locale_get_primary_language)537 PHP_FUNCTION(locale_get_primary_language )
538 {
539 get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
540 }
541 /* }}} */
542
543
544 /* {{{
545 * common code shared by display_xyz functions to get the value from ICU
546 }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)547 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
548 {
549 char* loc_name = NULL;
550 size_t loc_name_len = 0;
551
552 char* disp_loc_name = NULL;
553 size_t disp_loc_name_len = 0;
554 int free_loc_name = 0;
555
556 UChar* disp_name = NULL;
557 int32_t disp_name_len = 0;
558
559 char* mod_loc_name = NULL;
560
561 int32_t buflen = 512;
562 UErrorCode status = U_ZERO_ERROR;
563
564 zend_string* u8str;
565
566 char* msg = NULL;
567
568 intl_error_reset( NULL );
569
570 ZEND_PARSE_PARAMETERS_START(1, 2)
571 Z_PARAM_STRING(loc_name, loc_name_len)
572 Z_PARAM_OPTIONAL
573 Z_PARAM_STRING_OR_NULL(disp_loc_name, disp_loc_name_len)
574 ZEND_PARSE_PARAMETERS_END();
575
576 if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
577 /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
578 spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
579 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 );
580 efree(msg);
581 RETURN_FALSE;
582 }
583
584 if(loc_name_len == 0) {
585 loc_name = (char *)intl_locale_get_default();
586 }
587
588 if( strcmp(tag_name, DISP_NAME) != 0 ){
589 /* Handle grandfathered languages */
590 int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
591 if( grOffset >= 0 ){
592 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
593 mod_loc_name = getPreferredTag( loc_name );
594 } else {
595 /* Since Grandfathered, no value, do nothing, return NULL */
596 RETURN_FALSE;
597 }
598 }
599 } /* end of if != LOC_CANONICAL_TAG */
600
601 if( mod_loc_name==NULL ){
602 mod_loc_name = estrdup( loc_name );
603 }
604
605 /* Check if disp_loc_name passed , if not use default locale */
606 if( !disp_loc_name){
607 disp_loc_name = estrdup(intl_locale_get_default());
608 free_loc_name = 1;
609 }
610
611 /* Get the disp_value for the given locale */
612 do{
613 disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
614 disp_name_len = buflen;
615
616 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
617 buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
618 } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
619 buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
620 } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
621 buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
622 } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
623 buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
624 } else if( strcmp(tag_name , DISP_NAME)==0 ){
625 buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
626 }
627
628 /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
629 if( U_FAILURE( status ) )
630 {
631 if( status == U_BUFFER_OVERFLOW_ERROR )
632 {
633 status = U_ZERO_ERROR;
634 continue;
635 }
636
637 spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
638 intl_error_set( NULL, status, msg , 1 );
639 efree(msg);
640 if( disp_name){
641 efree( disp_name );
642 }
643 if( mod_loc_name){
644 efree( mod_loc_name );
645 }
646 if (free_loc_name) {
647 efree((void *)disp_loc_name);
648 disp_loc_name = NULL;
649 }
650 RETURN_FALSE;
651 }
652 } while( buflen > disp_name_len );
653
654 if( mod_loc_name){
655 efree( mod_loc_name );
656 }
657 if (free_loc_name) {
658 efree((void *)disp_loc_name);
659 disp_loc_name = NULL;
660 }
661 /* Convert display locale name from UTF-16 to UTF-8. */
662 u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
663 efree( disp_name );
664 if( !u8str )
665 {
666 spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
667 intl_error_set( NULL, status, msg , 1 );
668 efree(msg);
669 RETURN_FALSE;
670 }
671
672 RETVAL_NEW_STR( u8str );
673 }
674 /* }}} */
675
676 /* {{{ gets the name for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_name)677 PHP_FUNCTION(locale_get_display_name)
678 {
679 get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
680 }
681 /* }}} */
682
683 /* {{{ gets the language for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_language)684 PHP_FUNCTION(locale_get_display_language)
685 {
686 get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
687 }
688 /* }}} */
689
690 /* {{{ gets the script for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_script)691 PHP_FUNCTION(locale_get_display_script)
692 {
693 get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
694 }
695 /* }}} */
696
697 /* {{{ gets the region for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_region)698 PHP_FUNCTION(locale_get_display_region)
699 {
700 get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
701 }
702 /* }}} */
703
704 /* {{{
705 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
706 * gets the variant for the $locale in $in_locale or default_locale
707 }}} */
708 /* {{{
709 * proto static string get_display_variant($locale, $in_locale = null)
710 * gets the variant for the $locale in $in_locale or default_locale
711 */
PHP_FUNCTION(locale_get_display_variant)712 PHP_FUNCTION(locale_get_display_variant)
713 {
714 get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
715 }
716 /* }}} */
717
718 /* {{{ return an associative array containing keyword-value
719 * pairs for this locale. The keys are keys to the array (doh!)
720 * }}}*/
721 /* {{{ return an associative array containing keyword-value
722 * pairs for this locale. The keys are keys to the array (doh!)
723 */
PHP_FUNCTION(locale_get_keywords)724 PHP_FUNCTION( locale_get_keywords )
725 {
726 UEnumeration* e = NULL;
727 UErrorCode status = U_ZERO_ERROR;
728
729 const char* kw_key = NULL;
730 int32_t kw_key_len = 0;
731
732 char* loc_name = NULL;
733 size_t loc_name_len = 0;
734
735 intl_error_reset( NULL );
736
737 ZEND_PARSE_PARAMETERS_START(1, 1)
738 Z_PARAM_STRING(loc_name, loc_name_len)
739 ZEND_PARSE_PARAMETERS_END();
740
741 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
742
743 if(loc_name_len == 0) {
744 loc_name = (char *)intl_locale_get_default();
745 }
746
747 /* Get the keywords */
748 e = uloc_openKeywords( loc_name, &status );
749 if( e != NULL ) {
750 /*
751 ICU expects the buffer to be allocated before calling the function
752 and so the buffer size has been explicitly specified
753 ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
754 hence the kw_value buffer size is 100
755 */
756
757 /* Traverse it, filling the return array. */
758 array_init( return_value );
759
760 while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
761 int32_t kw_value_len = 100;
762 zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
763
764 /* Get the keyword value for each keyword */
765 kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
766 if (status == U_BUFFER_OVERFLOW_ERROR) {
767 status = U_ZERO_ERROR;
768 kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
769 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
770 } else if(!U_FAILURE(status)) {
771 kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
772 }
773 if (U_FAILURE(status)) {
774 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 );
775 if( kw_value_str){
776 zend_string_efree( kw_value_str );
777 }
778 zend_array_destroy(Z_ARR_P(return_value));
779 RETURN_FALSE;
780 }
781
782 add_assoc_str( return_value, (char *)kw_key, kw_value_str);
783 } /* end of while */
784
785 } /* end of if e!=NULL */
786
787 uenum_close( e );
788 }
789 /* }}} */
790
791 /* {{{ @return string the canonicalized locale
792 * }}} */
793 /* {{{ @param string $locale The locale string to canonicalize */
PHP_FUNCTION(locale_canonicalize)794 PHP_FUNCTION(locale_canonicalize)
795 {
796 get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
797 }
798 /* }}} */
799
800 /* {{{ append_key_value
801 * Internal function which is called from locale_compose
802 * gets the value for the key_name and appends to the loc_name
803 * returns 1 if successful , -1 if not found ,
804 * 0 if array element is not a string , -2 if buffer-overflow
805 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)806 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
807 {
808 zval *ele_value;
809
810 if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
811 if(Z_TYPE_P(ele_value)!= IS_STRING ){
812 /* element value is not a string */
813 return FAILURE;
814 }
815 if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
816 strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
817 /* not lang or grandfathered tag */
818 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
819 }
820 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
821 return SUCCESS;
822 }
823
824 return LOC_NOT_FOUND;
825 }
826 /* }}} */
827
828 /* {{{ append_prefix , appends the prefix needed
829 * e.g. private adds 'x'
830 */
add_prefix(smart_str * loc_name,char * key_name)831 static void add_prefix(smart_str* loc_name, char* key_name)
832 {
833 if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
834 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
835 smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
836 }
837 }
838 /* }}} */
839
840 /* {{{ append_multiple_key_values
841 * Internal function which is called from locale_compose
842 * gets the multiple values for the key_name and appends to the loc_name
843 * used for 'variant','extlang','private'
844 * returns 1 if successful , -1 if not found ,
845 * 0 if array element is not a string , -2 if buffer-overflow
846 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)847 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
848 {
849 zval *ele_value;
850 int isFirstSubtag = 0;
851
852 /* Variant/ Extlang/Private etc. */
853 if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
854 if( Z_TYPE_P(ele_value) == IS_STRING ){
855 add_prefix( loc_name , key_name);
856
857 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
858 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
859 return SUCCESS;
860 } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
861 HashTable *arr = Z_ARRVAL_P(ele_value);
862 zval *data;
863
864 ZEND_HASH_FOREACH_VAL(arr, data) {
865 if(Z_TYPE_P(data) != IS_STRING) {
866 return FAILURE;
867 }
868 if (isFirstSubtag++ == 0){
869 add_prefix(loc_name , key_name);
870 }
871 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
872 smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
873 } ZEND_HASH_FOREACH_END();
874 return SUCCESS;
875 } else {
876 return FAILURE;
877 }
878 } else {
879 char cur_key_name[31];
880 int max_value = 0, i;
881 /* Decide the max_value: the max. no. of elements allowed */
882 if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
883 max_value = MAX_NO_VARIANT;
884 }
885 if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
886 max_value = MAX_NO_EXTLANG;
887 }
888 if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
889 max_value = MAX_NO_PRIVATE;
890 }
891
892 /* Multiple variant values as variant0, variant1 ,variant2 */
893 isFirstSubtag = 0;
894 for( i=0 ; i< max_value; i++ ){
895 snprintf( cur_key_name , 30, "%s%d", key_name , i);
896 if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
897 if( Z_TYPE_P(ele_value)!= IS_STRING ){
898 /* variant is not a string */
899 return FAILURE;
900 }
901 /* Add the contents */
902 if (isFirstSubtag++ == 0){
903 add_prefix(loc_name , cur_key_name);
904 }
905 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
906 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
907 }
908 } /* end of for */
909 } /* end of else */
910
911 return SUCCESS;
912 }
913 /* }}} */
914
915 /*{{{
916 * If applicable sets error message and aborts locale_compose gracefully
917 * returns 0 if locale_compose needs to be aborted
918 * otherwise returns 1
919 */
handleAppendResult(int result,smart_str * loc_name)920 static int handleAppendResult( int result, smart_str* loc_name)
921 {
922 intl_error_reset( NULL );
923 if( result == FAILURE) {
924 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
925 "locale_compose: parameter array element is not a string", 0 );
926 smart_str_free(loc_name);
927 return 0;
928 }
929 return 1;
930 }
931 /* }}} */
932
933 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
934 /* {{{ Creates a locale by combining the parts of locale-ID passed
935 * }}} */
936 /* {{{ Creates a locale by combining the parts of locale-ID passed
937 * }}} */
PHP_FUNCTION(locale_compose)938 PHP_FUNCTION(locale_compose)
939 {
940 smart_str loc_name_s = {0};
941 smart_str *loc_name = &loc_name_s;
942 zval* arr = NULL;
943 HashTable* hash_arr = NULL;
944 int result = 0;
945
946 intl_error_reset( NULL );
947
948 ZEND_PARSE_PARAMETERS_START(1, 1)
949 Z_PARAM_ARRAY(arr)
950 ZEND_PARSE_PARAMETERS_END();
951
952 hash_arr = Z_ARRVAL_P( arr );
953
954 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
955 RETURN_FALSE;
956
957 /* Check for grandfathered first */
958 result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
959 if( result == SUCCESS){
960 RETURN_SMART_STR(loc_name);
961 }
962 if( !handleAppendResult( result, loc_name)){
963 RETURN_FALSE;
964 }
965
966 /* Not grandfathered */
967 result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
968 if( result == LOC_NOT_FOUND ){
969 zend_argument_value_error(1, "must contain a \"%s\" key", LOC_LANG_TAG);
970 smart_str_free(loc_name);
971 RETURN_THROWS();
972 }
973 if( !handleAppendResult( result, loc_name)){
974 RETURN_FALSE;
975 }
976
977 /* Extlang */
978 result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
979 if( !handleAppendResult( result, loc_name)){
980 RETURN_FALSE;
981 }
982
983 /* Script */
984 result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
985 if( !handleAppendResult( result, loc_name)){
986 RETURN_FALSE;
987 }
988
989 /* Region */
990 result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
991 if( !handleAppendResult( result, loc_name)){
992 RETURN_FALSE;
993 }
994
995 /* Variant */
996 result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
997 if( !handleAppendResult( result, loc_name)){
998 RETURN_FALSE;
999 }
1000
1001 /* Private */
1002 result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
1003 if( !handleAppendResult( result, loc_name)){
1004 RETURN_FALSE;
1005 }
1006
1007 RETURN_SMART_STR(loc_name);
1008 }
1009 /* }}} */
1010
1011
1012 /*{{{
1013 * Parses the locale and returns private subtags if existing
1014 * else returns NULL
1015 * e.g. for locale='en_US-x-prv1-prv2-prv3'
1016 * returns a pointer to the string 'prv1-prv2-prv3'
1017 */
get_private_subtags(const char * loc_name)1018 static zend_string* get_private_subtags(const char* loc_name)
1019 {
1020 zend_string* result = NULL;
1021 size_t len = 0;
1022 const char* mod_loc_name =NULL;
1023
1024 if( loc_name && (len = strlen(loc_name)) > 0 ){
1025 zend_off_t singletonPos = 0;
1026 mod_loc_name = loc_name ;
1027 while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
1028 if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1029 /* private subtag start found */
1030 if( singletonPos + 2 == len){
1031 /* loc_name ends with '-x-' ; return NULL */
1032 }
1033 else{
1034 /* result = mod_loc_name + singletonPos +2; */
1035 result = zend_string_init(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ), 0);
1036 }
1037 break;
1038 }
1039 else{
1040 if((size_t)(singletonPos + 1) >= len){
1041 /* String end */
1042 break;
1043 } else {
1044 /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1045 mod_loc_name = mod_loc_name + singletonPos +1;
1046 len = strlen(mod_loc_name);
1047 }
1048 }
1049 } /* end of while */
1050 }
1051
1052 return result;
1053 }
1054 /* }}} */
1055
1056 /* {{{ code used by locale_parse */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)1057 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1058 {
1059 zend_string* key_value = NULL;
1060 char* cur_key_name = NULL;
1061 char* token = NULL;
1062 char* last_ptr = NULL;
1063
1064 int result = 0;
1065 int cur_result = 0;
1066
1067
1068 if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1069 key_value = get_private_subtags( loc_name );
1070 result = 1;
1071 } else {
1072 key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1073 }
1074 if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1075 ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1076 if( result > 0 && key_value){
1077 int cnt = 0;
1078 /* Tokenize on the "_" or "-" */
1079 token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
1080 if( cur_key_name ){
1081 efree( cur_key_name);
1082 }
1083 /* Over-allocates a few bytes for the integer so we don't have to reallocate. */
1084 size_t cur_key_name_size = (sizeof("-2147483648") - 1) + strlen(key_name) + 1;
1085 cur_key_name = emalloc(cur_key_name_size);
1086 snprintf( cur_key_name, cur_key_name_size , "%s%d", key_name , cnt++);
1087 add_assoc_string( hash_arr, cur_key_name , token);
1088 /* tokenize on the "_" or "-" and stop at singleton if any */
1089 while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1090 snprintf( cur_key_name , cur_key_name_size, "%s%d", key_name , cnt++);
1091 add_assoc_string( hash_arr, cur_key_name , token);
1092 }
1093 /*
1094 if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1095 }
1096 */
1097 }
1098 if (key_value) {
1099 zend_string_release_ex(key_value, 0);
1100 }
1101 } else {
1102 if( result == 1 ){
1103 add_assoc_str( hash_arr, key_name , key_value);
1104 cur_result = 1;
1105 } else if (key_value) {
1106 zend_string_release_ex(key_value, 0);
1107 }
1108 }
1109
1110 if( cur_key_name ){
1111 efree( cur_key_name);
1112 }
1113 /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1114 return cur_result;
1115 }
1116 /* }}} */
1117
1118 /* {{{ parses a locale-id into an array the different parts of it */
PHP_FUNCTION(locale_parse)1119 PHP_FUNCTION(locale_parse)
1120 {
1121 char* loc_name = NULL;
1122 size_t loc_name_len = 0;
1123 int grOffset = 0;
1124
1125 intl_error_reset( NULL );
1126
1127 ZEND_PARSE_PARAMETERS_START(1, 1)
1128 Z_PARAM_STRING(loc_name, loc_name_len)
1129 ZEND_PARSE_PARAMETERS_END();
1130
1131 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1132
1133 if(loc_name_len == 0) {
1134 loc_name = (char *)intl_locale_get_default();
1135 }
1136
1137 array_init( return_value );
1138
1139 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
1140 if( grOffset >= 0 ){
1141 add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1142 }
1143 else{
1144 /* Not grandfathered */
1145 add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1146 add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1147 add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1148 add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1149 add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1150 }
1151 }
1152 /* }}} */
1153
1154 /* {{{ gets an array containing the list of variants, or null */
PHP_FUNCTION(locale_get_all_variants)1155 PHP_FUNCTION(locale_get_all_variants)
1156 {
1157 char* loc_name = NULL;
1158 size_t loc_name_len = 0;
1159
1160 int result = 0;
1161 char* token = NULL;
1162 zend_string* variant = NULL;
1163 char* saved_ptr = NULL;
1164
1165 intl_error_reset( NULL );
1166
1167 ZEND_PARSE_PARAMETERS_START(1, 1)
1168 Z_PARAM_STRING(loc_name, loc_name_len)
1169 ZEND_PARSE_PARAMETERS_END();
1170
1171 if(loc_name_len == 0) {
1172 loc_name = (char *)intl_locale_get_default();
1173 loc_name_len = strlen(loc_name);
1174 }
1175
1176 INTL_CHECK_LOCALE_LEN(loc_name_len);
1177
1178 array_init( return_value );
1179
1180 /* If the locale is grandfathered, stop, no variants */
1181 if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
1182 /* ("Grandfathered Tag. No variants."); */
1183 }
1184 else {
1185 /* Call ICU variant */
1186 variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1187 if( result > 0 && variant){
1188 /* Tokenize on the "_" or "-" */
1189 token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1190 add_next_index_stringl( return_value, token , strlen(token));
1191 /* tokenize on the "_" or "-" and stop at singleton if any */
1192 while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1193 add_next_index_stringl( return_value, token , strlen(token));
1194 }
1195 }
1196 if( variant ){
1197 zend_string_release_ex( variant, 0 );
1198 }
1199 }
1200
1201
1202 }
1203 /* }}} */
1204
1205 /* {{{ Converts to lower case and also replaces all hyphens with the underscore */
strToMatch(const char * str,char * retstr)1206 static int strToMatch(const char* str ,char *retstr)
1207 {
1208 char* anchor = NULL;
1209 const char* anchor1 = NULL;
1210 int result = 0;
1211
1212 if( (!str) || str[0] == '\0'){
1213 return result;
1214 } else {
1215 anchor = retstr;
1216 anchor1 = str;
1217 while( (*str)!='\0' ){
1218 if( *str == '-' ){
1219 *retstr = '_';
1220 } else {
1221 *retstr = tolower(*str);
1222 }
1223 str++;
1224 retstr++;
1225 }
1226 *retstr = '\0';
1227 retstr= anchor;
1228 str= anchor1;
1229 result = 1;
1230 }
1231
1232 return(result);
1233 }
1234 /* }}} */
1235
1236 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
1237 /* }}} */
1238 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
PHP_FUNCTION(locale_filter_matches)1239 PHP_FUNCTION(locale_filter_matches)
1240 {
1241 char* lang_tag = NULL;
1242 size_t lang_tag_len = 0;
1243 char* loc_range = NULL;
1244 size_t loc_range_len = 0;
1245
1246 int result = 0;
1247 char* token = 0;
1248 char* chrcheck = NULL;
1249
1250 zend_string* can_lang_tag = NULL;
1251 zend_string* can_loc_range = NULL;
1252
1253 char* cur_lang_tag = NULL;
1254 char* cur_loc_range = NULL;
1255
1256 bool boolCanonical = 0;
1257 UErrorCode status = U_ZERO_ERROR;
1258
1259 intl_error_reset( NULL );
1260
1261 ZEND_PARSE_PARAMETERS_START(2, 3)
1262 Z_PARAM_STRING(lang_tag, lang_tag_len)
1263 Z_PARAM_STRING(loc_range, loc_range_len)
1264 Z_PARAM_OPTIONAL
1265 Z_PARAM_BOOL(boolCanonical)
1266 ZEND_PARSE_PARAMETERS_END();
1267
1268 if(loc_range_len == 0) {
1269 loc_range = (char *)intl_locale_get_default();
1270 loc_range_len = strlen(loc_range);
1271 }
1272
1273 if( strcmp(loc_range,"*")==0){
1274 RETURN_TRUE;
1275 }
1276
1277 INTL_CHECK_LOCALE_LEN(loc_range_len);
1278 INTL_CHECK_LOCALE_LEN(lang_tag_len);
1279
1280 if( boolCanonical ){
1281 /* canonicalize loc_range */
1282 can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1283 if( result <=0) {
1284 intl_error_set( NULL, status,
1285 "locale_filter_matches : unable to canonicalize loc_range" , 0 );
1286 RETURN_FALSE;
1287 }
1288
1289 /* canonicalize lang_tag */
1290 can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
1291 if( result <=0) {
1292 intl_error_set( NULL, status,
1293 "locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1294 RETURN_FALSE;
1295 }
1296
1297 /* Convert to lower case for case-insensitive comparison */
1298 cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1299
1300 /* Convert to lower case for case-insensitive comparison */
1301 result = strToMatch( can_lang_tag->val , cur_lang_tag);
1302 if( result == 0) {
1303 efree( cur_lang_tag );
1304 zend_string_release_ex( can_lang_tag, 0 );
1305 RETURN_FALSE;
1306 }
1307
1308 cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1309 result = strToMatch( can_loc_range->val , cur_loc_range );
1310 if( result == 0) {
1311 efree( cur_lang_tag );
1312 zend_string_release_ex( can_lang_tag, 0 );
1313 efree( cur_loc_range );
1314 zend_string_release_ex( can_loc_range, 0 );
1315 RETURN_FALSE;
1316 }
1317
1318 /* check if prefix */
1319 token = strstr( cur_lang_tag , cur_loc_range );
1320
1321 if( token && (token==cur_lang_tag) ){
1322 /* check if the char. after match is SEPARATOR */
1323 chrcheck = token + (strlen(cur_loc_range));
1324 if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1325 efree( cur_lang_tag );
1326 efree( cur_loc_range );
1327 if( can_lang_tag){
1328 zend_string_release_ex( can_lang_tag, 0 );
1329 }
1330 if( can_loc_range){
1331 zend_string_release_ex( can_loc_range, 0 );
1332 }
1333 RETURN_TRUE;
1334 }
1335 }
1336
1337 /* No prefix as loc_range */
1338 if( cur_lang_tag){
1339 efree( cur_lang_tag );
1340 }
1341 if( cur_loc_range){
1342 efree( cur_loc_range );
1343 }
1344 if( can_lang_tag){
1345 zend_string_release_ex( can_lang_tag, 0 );
1346 }
1347 if( can_loc_range){
1348 zend_string_release_ex( can_loc_range, 0 );
1349 }
1350 RETURN_FALSE;
1351
1352 } /* end of if isCanonical */
1353 else{
1354 /* Convert to lower case for case-insensitive comparison */
1355 cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1356
1357 result = strToMatch( lang_tag , cur_lang_tag);
1358 if( result == 0) {
1359 efree( cur_lang_tag );
1360 RETURN_FALSE;
1361 }
1362 cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1363 result = strToMatch( loc_range , cur_loc_range );
1364 if( result == 0) {
1365 efree( cur_lang_tag );
1366 efree( cur_loc_range );
1367 RETURN_FALSE;
1368 }
1369
1370 /* check if prefix */
1371 token = strstr( cur_lang_tag , cur_loc_range );
1372
1373 if( token && (token==cur_lang_tag) ){
1374 /* check if the char. after match is SEPARATOR */
1375 chrcheck = token + (strlen(cur_loc_range));
1376 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1377 efree( cur_lang_tag );
1378 efree( cur_loc_range );
1379 RETURN_TRUE;
1380 }
1381 }
1382
1383 /* No prefix as loc_range */
1384 if( cur_lang_tag){
1385 efree( cur_lang_tag );
1386 }
1387 if( cur_loc_range){
1388 efree( cur_loc_range );
1389 }
1390 RETURN_FALSE;
1391
1392 }
1393 }
1394 /* }}} */
1395
array_cleanup(char * arr[],int arr_size)1396 static void array_cleanup( char* arr[] , int arr_size)
1397 {
1398 int i=0;
1399 for( i=0; i< arr_size; i++ ){
1400 if( arr[i*2] ){
1401 efree( arr[i*2]);
1402 }
1403 }
1404 efree(arr);
1405 }
1406
1407 #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
1408 /* {{{
1409 * returns the lookup result to lookup_loc_range_src_php
1410 * internal function
1411 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1412 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1413 {
1414 int i = 0;
1415 int cur_arr_len = 0;
1416 int result = 0;
1417
1418 zend_string* lang_tag = NULL;
1419 zval* ele_value = NULL;
1420
1421 char* cur_loc_range = NULL;
1422 zend_string* can_loc_range = NULL;
1423 zend_off_t saved_pos = 0;
1424
1425 zend_string* return_value = NULL;
1426
1427 char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1428 ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1429 /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1430 if(Z_TYPE_P(ele_value)!= IS_STRING) {
1431 /* element value is not a string */
1432 zend_argument_type_error(2, "must only contain string values");
1433 LOOKUP_CLEAN_RETURN(NULL);
1434 }
1435 cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1436 result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1437 if(result == 0) {
1438 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1439 LOOKUP_CLEAN_RETURN(NULL);
1440 }
1441 cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1442 cur_arr_len++ ;
1443 } ZEND_HASH_FOREACH_END(); /* end of for */
1444
1445 /* Canonicalize array elements */
1446 if(canonicalize) {
1447 for(i=0; i<cur_arr_len; i++) {
1448 lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1449 if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1450 if(lang_tag) {
1451 zend_string_release_ex(lang_tag, 0);
1452 }
1453 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1454 LOOKUP_CLEAN_RETURN(NULL);
1455 }
1456 cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1457 result = strToMatch(lang_tag->val, cur_arr[i*2]);
1458 zend_string_release_ex(lang_tag, 0);
1459 if(result == 0) {
1460 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1461 LOOKUP_CLEAN_RETURN(NULL);
1462 }
1463 }
1464
1465 }
1466
1467 if(canonicalize) {
1468 /* Canonicalize the loc_range */
1469 can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1470 if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1471 /* Error */
1472 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1473 if(can_loc_range) {
1474 zend_string_release_ex(can_loc_range, 0);
1475 }
1476 LOOKUP_CLEAN_RETURN(NULL);
1477 } else {
1478 loc_range = can_loc_range->val;
1479 }
1480 }
1481
1482 cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1483 /* convert to lower and replace hyphens */
1484 result = strToMatch(loc_range, cur_loc_range);
1485 if(can_loc_range) {
1486 zend_string_release_ex(can_loc_range, 0);
1487 }
1488 if(result == 0) {
1489 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1490 LOOKUP_CLEAN_RETURN(NULL);
1491 }
1492
1493 /* Lookup for the lang_tag match */
1494 saved_pos = strlen(cur_loc_range);
1495 while(saved_pos > 0) {
1496 for(i=0; i< cur_arr_len; i++){
1497 if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1498 /* Match found */
1499 char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1500 return_value = zend_string_init(str, strlen(str), 0);
1501 efree(cur_loc_range);
1502 LOOKUP_CLEAN_RETURN(return_value);
1503 }
1504 }
1505 saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1506 }
1507
1508 /* Match not found */
1509 efree(cur_loc_range);
1510 LOOKUP_CLEAN_RETURN(NULL);
1511 }
1512 /* }}} */
1513
1514 /* {{{ Searches the items in $langtag for the best match to the language
1515 * range
1516 */
1517 /* }}} */
1518 /* {{{ Searches the items in $langtag for the best match to the language
1519 * range
1520 */
PHP_FUNCTION(locale_lookup)1521 PHP_FUNCTION(locale_lookup)
1522 {
1523 zend_string* fallback_loc_str = NULL;
1524 char* loc_range = NULL;
1525 size_t loc_range_len = 0;
1526
1527 zval* arr = NULL;
1528 HashTable* hash_arr = NULL;
1529 bool boolCanonical = 0;
1530 zend_string* result_str = NULL;
1531
1532 intl_error_reset( NULL );
1533
1534 ZEND_PARSE_PARAMETERS_START(2, 4)
1535 Z_PARAM_ARRAY(arr)
1536 Z_PARAM_STRING(loc_range, loc_range_len)
1537 Z_PARAM_OPTIONAL
1538 Z_PARAM_BOOL(boolCanonical)
1539 Z_PARAM_STR_OR_NULL(fallback_loc_str)
1540 ZEND_PARSE_PARAMETERS_END();
1541
1542 if(loc_range_len == 0) {
1543 if(fallback_loc_str) {
1544 loc_range = ZSTR_VAL(fallback_loc_str);
1545 loc_range_len = ZSTR_LEN(fallback_loc_str);
1546 } else {
1547 loc_range = (char *)intl_locale_get_default();
1548 loc_range_len = strlen(loc_range);
1549 }
1550 }
1551
1552 hash_arr = Z_ARRVAL_P(arr);
1553
1554 INTL_CHECK_LOCALE_LEN(loc_range_len);
1555
1556 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1557 RETURN_EMPTY_STRING();
1558 }
1559
1560 result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1561 if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1562 if( fallback_loc_str ) {
1563 result_str = zend_string_copy(fallback_loc_str);
1564 } else {
1565 RETURN_EMPTY_STRING();
1566 }
1567 }
1568
1569 RETURN_STR(result_str);
1570 }
1571 /* }}} */
1572
1573 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
1574 /* }}} */
1575 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
PHP_FUNCTION(locale_accept_from_http)1576 PHP_FUNCTION(locale_accept_from_http)
1577 {
1578 UEnumeration *available;
1579 char *http_accept = NULL;
1580 size_t http_accept_len;
1581 UErrorCode status = 0;
1582 int len;
1583 char resultLocale[INTL_MAX_LOCALE_LEN+1];
1584 UAcceptResult outResult;
1585
1586 ZEND_PARSE_PARAMETERS_START(1, 1)
1587 Z_PARAM_STRING(http_accept, http_accept_len)
1588 ZEND_PARSE_PARAMETERS_END();
1589 if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1590 /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1591 char *start = http_accept;
1592 char *end;
1593 size_t len;
1594 do {
1595 end = strchr(start, ',');
1596 len = end ? end-start : http_accept_len-(start-http_accept);
1597 if(len > ULOC_FULLNAME_CAPACITY) {
1598 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1599 "locale_accept_from_http: locale string too long", 0 );
1600 RETURN_FALSE;
1601 }
1602 if(end) {
1603 start = end+1;
1604 }
1605 } while(end != NULL);
1606 }
1607
1608 available = ures_openAvailableLocales(NULL, &status);
1609 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1610 len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1611 &outResult, http_accept, available, &status);
1612 uenum_close(available);
1613 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1614 if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1615 RETURN_FALSE;
1616 }
1617 RETURN_STRINGL(resultLocale, len);
1618 }
1619 /* }}} */
1620