1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | http://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
12 +----------------------------------------------------------------------+
13 */
14
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18
19 #include <unicode/ustring.h>
20 #include <unicode/udata.h>
21 #include <unicode/putil.h>
22 #include <unicode/ures.h>
23
24 #include "php_intl.h"
25 #include "locale.h"
26 #include "locale_class.h"
27 #include "intl_convert.h"
28 #include "intl_data.h"
29
30 #include <zend_API.h>
31 #include <zend.h>
32 #include <php.h>
33 #include "main/php_ini.h"
34 #include "zend_smart_str.h"
35
36 ZEND_EXTERN_MODULE_GLOBALS( intl )
37
38 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
39 #define SEPARATOR "_"
40 #define SEPARATOR1 "-"
41 #define DELIMITER "-_"
42 #define EXTLANG_PREFIX "a"
43 #define PRIVATE_PREFIX "x"
44 #define DISP_NAME "name"
45
46 #define MAX_NO_VARIANT 15
47 #define MAX_NO_EXTLANG 3
48 #define MAX_NO_PRIVATE 15
49 #define MAX_NO_LOOKUP_LANG_TAG 100
50
51 #define LOC_NOT_FOUND 1
52
53 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
54 #define VARIANT_KEYNAME_LEN 11
55 #define EXTLANG_KEYNAME_LEN 10
56 #define PRIVATE_KEYNAME_LEN 11
57
58 /* Based on IANA registry at the time of writing this code
59 *
60 */
61 static const char * const LOC_GRANDFATHERED[] = {
62 "art-lojban", "i-klingon", "i-lux", "i-navajo", "no-bok", "no-nyn",
63 "cel-gaulish", "en-GB-oed", "i-ami",
64 "i-bnn", "i-default", "i-enochian",
65 "i-mingo", "i-pwn", "i-tao",
66 "i-tay", "i-tsu", "sgn-BE-fr",
67 "sgn-BE-nl", "sgn-CH-de", "zh-cmn",
68 "zh-cmn-Hans", "zh-cmn-Hant", "zh-gan" ,
69 "zh-guoyu", "zh-hakka", "zh-min",
70 "zh-min-nan", "zh-wuu", "zh-xiang",
71 "zh-yue", NULL
72 };
73
74 /* Based on IANA registry at the time of writing this code
75 * This array lists the preferred values for the grandfathered tags if applicable
76 * This is in sync with the array LOC_GRANDFATHERED
77 * e.g. the offsets of the grandfathered tags match the offset of the preferred value
78 */
79 static const int LOC_PREFERRED_GRANDFATHERED_LEN = 6;
80 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
81 "jbo", "tlh", "lb",
82 "nv", "nb", "nn",
83 NULL
84 };
85
86 /* returns true if a is an ID separator, false otherwise */
87 #define isIDSeparator(a) (a == '_' || a == '-')
88 #define isKeywordSeparator(a) (a == '@' )
89 #define isEndOfTag(a) (a == '\0' )
90
91 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
92
93 /*returns true if one of the special prefixes is here (s=string)
94 'x-' or 'i-' */
95 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
96 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
97
98 /* Dot terminates it because of POSIX form where dot precedes the codepage
99 * except for variant */
100 #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
101
102 /* {{{ return the offset of 'key' in the array 'list'.
103 * returns -1 if not present */
findOffset(const char * const * list,const char * key)104 static int16_t findOffset(const char* const* list, const char* key)
105 {
106 const char* const* anchor = list;
107 while (*list != NULL) {
108 if (strcmp(key, *list) == 0) {
109 return (int16_t)(list - anchor);
110 }
111 list++;
112 }
113
114 return -1;
115
116 }
117 /*}}}*/
118
getPreferredTag(const char * gf_tag)119 static char* getPreferredTag(const char* gf_tag)
120 {
121 char* result = NULL;
122 zend_off_t grOffset = 0;
123
124 grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
125 if(grOffset < 0) {
126 return NULL;
127 }
128 if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
129 /* return preferred tag */
130 result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
131 } else {
132 /* Return correct grandfathered language tag */
133 result = estrdup( LOC_GRANDFATHERED[grOffset] );
134 }
135 return result;
136 }
137
138 /* {{{
139 * returns the position of next token for lookup
140 * or -1 if no token
141 * strtokr equivalent search for token in reverse direction
142 */
getStrrtokenPos(char * str,zend_off_t savedPos)143 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
144 {
145 zend_off_t result =-1;
146 zend_off_t i;
147
148 for(i=savedPos-1; i>=0; i--) {
149 if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
150 /* delimiter found; check for singleton */
151 if(i>=2 && isIDSeparator(*(str+i-2)) ){
152 /* a singleton; so send the position of token before the singleton */
153 result = i-2;
154 } else {
155 result = i;
156 }
157 break;
158 }
159 }
160 if(result < 1){
161 /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
162 result =-1;
163 }
164 return result;
165 }
166 /* }}} */
167
168 /* {{{
169 * returns the position of a singleton if present
170 * returns -1 if no singleton
171 * strtok equivalent search for singleton
172 */
getSingletonPos(const char * str)173 static zend_off_t getSingletonPos(const char* str)
174 {
175 zend_off_t result =-1;
176 size_t len = 0;
177
178 if( str && ((len=strlen(str))>0) ){
179 zend_off_t i = 0;
180 for( i=0; (size_t)i < len ; i++){
181 if( isIDSeparator(*(str+i)) ){
182 if( i==1){
183 /* string is of the form x-avy or a-prv1 */
184 result =0;
185 break;
186 } else {
187 /* delimiter found; check for singleton */
188 if( isIDSeparator(*(str+i+2)) ){
189 /* a singleton; so send the position of separator before singleton */
190 result = i+1;
191 break;
192 }
193 }
194 }
195 }/* end of for */
196
197 }
198 return result;
199 }
200 /* }}} */
201
202 /* {{{ Get default locale */
203 /* }}} */
204 /* {{{ Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)205 PHP_NAMED_FUNCTION(zif_locale_get_default)
206 {
207 if (zend_parse_parameters_none() == FAILURE) {
208 RETURN_THROWS();
209 }
210
211 RETURN_STRING( intl_locale_get_default( ) );
212 }
213
214 /* }}} */
215
216 /* {{{ Set default locale */
217 /* }}} */
218 /* {{{ Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)219 PHP_NAMED_FUNCTION(zif_locale_set_default)
220 {
221 zend_string* locale_name;
222 zend_string *ini_name;
223 char *default_locale = NULL;
224
225 if(zend_parse_parameters( ZEND_NUM_ARGS(), "S", &locale_name) == FAILURE)
226 {
227 RETURN_THROWS();
228 }
229
230 if (ZSTR_LEN(locale_name) == 0) {
231 default_locale = (char *)uloc_getDefault();
232 locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
233 }
234
235 ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
236 zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
237 zend_string_release_ex(ini_name, 0);
238 if (default_locale != NULL) {
239 zend_string_release_ex(locale_name, 0);
240 }
241
242 RETURN_TRUE;
243 }
244 /* }}} */
245
246 /* {{{
247 * Gets the value from ICU
248 * common code shared by get_primary_language,get_script or get_region or get_variant
249 * result = 0 if error, 1 if successful , -1 if no value
250 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)251 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
252 {
253 zend_string* tag_value = NULL;
254 int32_t tag_value_len = 512;
255
256 char* mod_loc_name = NULL;
257
258 int32_t buflen = 512;
259 UErrorCode status = U_ZERO_ERROR;
260
261 if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
262 return NULL;
263 }
264
265 if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
266 /* Handle grandfathered languages */
267 zend_off_t grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
268 if( grOffset >= 0 ){
269 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
270 return zend_string_init(loc_name, strlen(loc_name), 0);
271 } else {
272 /* Since Grandfathered , no value , do nothing , retutn NULL */
273 return NULL;
274 }
275 }
276
277 if( fromParseLocale==1 ){
278 zend_off_t singletonPos = 0;
279
280 /* Handle singletons */
281 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282 if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
283 return zend_string_init(loc_name, strlen(loc_name), 0);
284 }
285 }
286
287 singletonPos = getSingletonPos( loc_name );
288 if( singletonPos == 0){
289 /* singleton at start of script, region , variant etc.
290 * or invalid singleton at start of language */
291 return NULL;
292 } else if( singletonPos > 0 ){
293 /* singleton at some position except at start
294 * strip off the singleton and rest of the loc_name */
295 mod_loc_name = estrndup ( loc_name , singletonPos-1);
296 }
297 } /* end of if fromParse */
298
299 } /* end of if != LOC_CANONICAL_TAG */
300
301 if( mod_loc_name == NULL){
302 mod_loc_name = estrdup(loc_name );
303 }
304
305 /* Proceed to ICU */
306 do{
307 if (tag_value) {
308 tag_value = zend_string_realloc( tag_value , buflen, 0);
309 } else {
310 tag_value = zend_string_alloc( buflen, 0);
311 }
312 tag_value_len = buflen;
313
314 if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
315 buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
316 }
317 if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
318 buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
319 }
320 if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
321 buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
322 }
323 if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
324 buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
325 }
326 if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
327 buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
328 }
329
330 if( U_FAILURE( status ) ) {
331 if( status == U_BUFFER_OVERFLOW_ERROR ) {
332 status = U_ZERO_ERROR;
333 buflen++; /* add space for \0 */
334 continue;
335 }
336
337 /* Error in retrieving data */
338 *result = 0;
339 if( tag_value ){
340 zend_string_release_ex( tag_value, 0 );
341 }
342 if( mod_loc_name ){
343 efree( mod_loc_name);
344 }
345 return NULL;
346 }
347 } while( buflen > tag_value_len );
348
349 if( buflen ==0 ){
350 /* No value found */
351 *result = -1;
352 if( tag_value ){
353 zend_string_release_ex( tag_value, 0 );
354 }
355 if( mod_loc_name ){
356 efree( mod_loc_name);
357 }
358 return NULL;
359 } else {
360 *result = 1;
361 }
362
363 if( mod_loc_name ){
364 efree( mod_loc_name);
365 }
366
367 tag_value->len = strlen(tag_value->val);
368 return tag_value;
369 }
370 /* }}} */
371
372 /* {{{
373 * Gets the value from ICU , called when PHP userspace function is called
374 * common code shared by get_primary_language,get_script or get_region or get_variant
375 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)376 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
377 {
378
379 const char* loc_name = NULL;
380 size_t loc_name_len = 0;
381
382 zend_string* tag_value = NULL;
383 char* empty_result = "";
384
385 int result = 0;
386 char* msg = NULL;
387
388 UErrorCode status = U_ZERO_ERROR;
389
390 intl_error_reset( NULL );
391
392 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
393 &loc_name ,&loc_name_len ) == FAILURE) {
394 RETURN_THROWS();
395 }
396
397 if(loc_name_len == 0) {
398 loc_name = intl_locale_get_default();
399 loc_name_len = strlen(loc_name);
400 }
401
402 INTL_CHECK_LOCALE_LEN(loc_name_len);
403
404 /* Call ICU get */
405 tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
406
407 /* No value found */
408 if( result == -1 ) {
409 if( tag_value){
410 zend_string_release_ex( tag_value, 0 );
411 }
412 RETURN_STRING( empty_result);
413 }
414
415 /* value found */
416 if( tag_value){
417 RETVAL_STR( tag_value );
418 return;
419 }
420
421 /* Error encountered while fetching the value */
422 if( result ==0) {
423 spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
424 intl_error_set( NULL, status, msg , 1 );
425 efree(msg);
426 RETURN_NULL();
427 }
428
429 }
430 /* }}} */
431
432 /* {{{ gets the script for the $locale */
PHP_FUNCTION(locale_get_script)433 PHP_FUNCTION( locale_get_script )
434 {
435 get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
436 }
437 /* }}} */
438
439 /* {{{ gets the region for the $locale */
PHP_FUNCTION(locale_get_region)440 PHP_FUNCTION( locale_get_region )
441 {
442 get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
443 }
444 /* }}} */
445
446 /* {{{ gets the primary language for the $locale */
PHP_FUNCTION(locale_get_primary_language)447 PHP_FUNCTION(locale_get_primary_language )
448 {
449 get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
450 }
451 /* }}} */
452
453
454 /* {{{
455 * common code shared by display_xyz functions to get the value from ICU
456 }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)457 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
458 {
459 const char* loc_name = NULL;
460 size_t loc_name_len = 0;
461
462 const char* disp_loc_name = NULL;
463 size_t disp_loc_name_len = 0;
464 int free_loc_name = 0;
465
466 UChar* disp_name = NULL;
467 int32_t disp_name_len = 0;
468
469 char* mod_loc_name = NULL;
470
471 int32_t buflen = 512;
472 UErrorCode status = U_ZERO_ERROR;
473
474 zend_string* u8str;
475
476 char* msg = NULL;
477
478 intl_error_reset( NULL );
479
480 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s!",
481 &loc_name, &loc_name_len ,
482 &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
483 {
484 RETURN_THROWS();
485 }
486
487 if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
488 /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
489 spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
490 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 );
491 efree(msg);
492 RETURN_FALSE;
493 }
494
495 if(loc_name_len == 0) {
496 loc_name = intl_locale_get_default();
497 }
498
499 if( strcmp(tag_name, DISP_NAME) != 0 ){
500 /* Handle grandfathered languages */
501 int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
502 if( grOffset >= 0 ){
503 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
504 mod_loc_name = getPreferredTag( loc_name );
505 } else {
506 /* Since Grandfathered, no value, do nothing, retutn NULL */
507 RETURN_FALSE;
508 }
509 }
510 } /* end of if != LOC_CANONICAL_TAG */
511
512 if( mod_loc_name==NULL ){
513 mod_loc_name = estrdup( loc_name );
514 }
515
516 /* Check if disp_loc_name passed , if not use default locale */
517 if( !disp_loc_name){
518 disp_loc_name = estrdup(intl_locale_get_default());
519 free_loc_name = 1;
520 }
521
522 /* Get the disp_value for the given locale */
523 do{
524 disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
525 disp_name_len = buflen;
526
527 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
528 buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
529 } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
530 buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
531 } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
532 buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
533 } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
534 buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
535 } else if( strcmp(tag_name , DISP_NAME)==0 ){
536 buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
537 }
538
539 /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
540 if( U_FAILURE( status ) )
541 {
542 if( status == U_BUFFER_OVERFLOW_ERROR )
543 {
544 status = U_ZERO_ERROR;
545 continue;
546 }
547
548 spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
549 intl_error_set( NULL, status, msg , 1 );
550 efree(msg);
551 if( disp_name){
552 efree( disp_name );
553 }
554 if( mod_loc_name){
555 efree( mod_loc_name );
556 }
557 if (free_loc_name) {
558 efree((void *)disp_loc_name);
559 disp_loc_name = NULL;
560 }
561 RETURN_FALSE;
562 }
563 } while( buflen > disp_name_len );
564
565 if( mod_loc_name){
566 efree( mod_loc_name );
567 }
568 if (free_loc_name) {
569 efree((void *)disp_loc_name);
570 disp_loc_name = NULL;
571 }
572 /* Convert display locale name from UTF-16 to UTF-8. */
573 u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
574 efree( disp_name );
575 if( !u8str )
576 {
577 spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
578 intl_error_set( NULL, status, msg , 1 );
579 efree(msg);
580 RETURN_FALSE;
581 }
582
583 RETVAL_NEW_STR( u8str );
584 }
585 /* }}} */
586
587 /* {{{ gets the name for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_name)588 PHP_FUNCTION(locale_get_display_name)
589 {
590 get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
591 }
592 /* }}} */
593
594 /* {{{ gets the language for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_language)595 PHP_FUNCTION(locale_get_display_language)
596 {
597 get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
598 }
599 /* }}} */
600
601 /* {{{ gets the script for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_script)602 PHP_FUNCTION(locale_get_display_script)
603 {
604 get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
605 }
606 /* }}} */
607
608 /* {{{ gets the region for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_region)609 PHP_FUNCTION(locale_get_display_region)
610 {
611 get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
612 }
613 /* }}} */
614
615 /* {{{
616 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
617 * gets the variant for the $locale in $in_locale or default_locale
618 }}} */
619 /* {{{
620 * proto static string get_display_variant($locale, $in_locale = null)
621 * gets the variant for the $locale in $in_locale or default_locale
622 */
PHP_FUNCTION(locale_get_display_variant)623 PHP_FUNCTION(locale_get_display_variant)
624 {
625 get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
626 }
627 /* }}} */
628
629 /* {{{ return an associative array containing keyword-value
630 * pairs for this locale. The keys are keys to the array (doh!)
631 * }}}*/
632 /* {{{ return an associative array containing keyword-value
633 * pairs for this locale. The keys are keys to the array (doh!)
634 */
PHP_FUNCTION(locale_get_keywords)635 PHP_FUNCTION( locale_get_keywords )
636 {
637 UEnumeration* e = NULL;
638 UErrorCode status = U_ZERO_ERROR;
639
640 const char* kw_key = NULL;
641 int32_t kw_key_len = 0;
642
643 const char* loc_name = NULL;
644 size_t loc_name_len = 0;
645
646 intl_error_reset( NULL );
647
648 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
649 &loc_name, &loc_name_len ) == FAILURE)
650 {
651 RETURN_THROWS();
652 }
653
654 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
655
656 if(loc_name_len == 0) {
657 loc_name = intl_locale_get_default();
658 }
659
660 /* Get the keywords */
661 e = uloc_openKeywords( loc_name, &status );
662 if( e != NULL )
663 {
664 /*
665 ICU expects the buffer to be allocated before calling the function
666 and so the buffer size has been explicitly specified
667 ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
668 hence the kw_value buffer size is 100
669 */
670
671 /* Traverse it, filling the return array. */
672 array_init( return_value );
673
674 while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
675 int32_t kw_value_len = 100;
676 zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
677
678 /* Get the keyword value for each keyword */
679 kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
680 if (status == U_BUFFER_OVERFLOW_ERROR) {
681 status = U_ZERO_ERROR;
682 kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
683 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
684 } else if(!U_FAILURE(status)) {
685 kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
686 }
687 if (U_FAILURE(status)) {
688 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 );
689 if( kw_value_str){
690 zend_string_efree( kw_value_str );
691 }
692 zend_array_destroy(Z_ARR_P(return_value));
693 RETURN_FALSE;
694 }
695
696 add_assoc_str( return_value, (char *)kw_key, kw_value_str);
697 } /* end of while */
698
699 } /* end of if e!=NULL */
700
701 uenum_close( e );
702 }
703 /* }}} */
704
705 /* {{{ @return string the canonicalized locale
706 * }}} */
707 /* {{{ @param string $locale The locale string to canonicalize */
PHP_FUNCTION(locale_canonicalize)708 PHP_FUNCTION(locale_canonicalize)
709 {
710 get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
711 }
712 /* }}} */
713
714 /* {{{ append_key_value
715 * Internal function which is called from locale_compose
716 * gets the value for the key_name and appends to the loc_name
717 * returns 1 if successful , -1 if not found ,
718 * 0 if array element is not a string , -2 if buffer-overflow
719 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)720 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
721 {
722 zval *ele_value;
723
724 if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
725 if(Z_TYPE_P(ele_value)!= IS_STRING ){
726 /* element value is not a string */
727 return FAILURE;
728 }
729 if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
730 strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
731 /* not lang or grandfathered tag */
732 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
733 }
734 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
735 return SUCCESS;
736 }
737
738 return LOC_NOT_FOUND;
739 }
740 /* }}} */
741
742 /* {{{ append_prefix , appends the prefix needed
743 * e.g. private adds 'x'
744 */
add_prefix(smart_str * loc_name,char * key_name)745 static void add_prefix(smart_str* loc_name, char* key_name)
746 {
747 if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
748 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
749 smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
750 }
751 }
752 /* }}} */
753
754 /* {{{ append_multiple_key_values
755 * Internal function which is called from locale_compose
756 * gets the multiple values for the key_name and appends to the loc_name
757 * used for 'variant','extlang','private'
758 * returns 1 if successful , -1 if not found ,
759 * 0 if array element is not a string , -2 if buffer-overflow
760 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)761 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
762 {
763 zval *ele_value;
764 int isFirstSubtag = 0;
765
766 /* Variant/ Extlang/Private etc. */
767 if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
768 if( Z_TYPE_P(ele_value) == IS_STRING ){
769 add_prefix( loc_name , key_name);
770
771 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
772 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
773 return SUCCESS;
774 } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
775 HashTable *arr = Z_ARRVAL_P(ele_value);
776 zval *data;
777
778 ZEND_HASH_FOREACH_VAL(arr, data) {
779 if(Z_TYPE_P(data) != IS_STRING) {
780 return FAILURE;
781 }
782 if (isFirstSubtag++ == 0){
783 add_prefix(loc_name , key_name);
784 }
785 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
786 smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
787 } ZEND_HASH_FOREACH_END();
788 return SUCCESS;
789 } else {
790 return FAILURE;
791 }
792 } else {
793 char cur_key_name[31];
794 int max_value = 0, i;
795 /* Decide the max_value: the max. no. of elements allowed */
796 if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
797 max_value = MAX_NO_VARIANT;
798 }
799 if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
800 max_value = MAX_NO_EXTLANG;
801 }
802 if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
803 max_value = MAX_NO_PRIVATE;
804 }
805
806 /* Multiple variant values as variant0, variant1 ,variant2 */
807 isFirstSubtag = 0;
808 for( i=0 ; i< max_value; i++ ){
809 snprintf( cur_key_name , 30, "%s%d", key_name , i);
810 if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
811 if( Z_TYPE_P(ele_value)!= IS_STRING ){
812 /* variant is not a string */
813 return FAILURE;
814 }
815 /* Add the contents */
816 if (isFirstSubtag++ == 0){
817 add_prefix(loc_name , cur_key_name);
818 }
819 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
820 smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
821 }
822 } /* end of for */
823 } /* end of else */
824
825 return SUCCESS;
826 }
827 /* }}} */
828
829 /*{{{
830 * If applicable sets error message and aborts locale_compose gracefully
831 * returns 0 if locale_compose needs to be aborted
832 * otherwise returns 1
833 */
handleAppendResult(int result,smart_str * loc_name)834 static int handleAppendResult( int result, smart_str* loc_name)
835 {
836 intl_error_reset( NULL );
837 if( result == FAILURE) {
838 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
839 "locale_compose: parameter array element is not a string", 0 );
840 smart_str_free(loc_name);
841 return 0;
842 }
843 return 1;
844 }
845 /* }}} */
846
847 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
848 /* {{{ Creates a locale by combining the parts of locale-ID passed
849 * }}} */
850 /* {{{ Creates a locale by combining the parts of locale-ID passed
851 * }}} */
PHP_FUNCTION(locale_compose)852 PHP_FUNCTION(locale_compose)
853 {
854 smart_str loc_name_s = {0};
855 smart_str *loc_name = &loc_name_s;
856 zval* arr = NULL;
857 HashTable* hash_arr = NULL;
858 int result = 0;
859
860 intl_error_reset( NULL );
861
862 if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
863 &arr) == FAILURE)
864 {
865 RETURN_THROWS();
866 }
867
868 hash_arr = Z_ARRVAL_P( arr );
869
870 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
871 RETURN_FALSE;
872
873 /* Check for grandfathered first */
874 result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
875 if( result == SUCCESS){
876 RETURN_SMART_STR(loc_name);
877 }
878 if( !handleAppendResult( result, loc_name)){
879 RETURN_FALSE;
880 }
881
882 /* Not grandfathered */
883 result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
884 if( result == LOC_NOT_FOUND ){
885 zend_argument_value_error(1, "must contain a \"%s\" key", LOC_LANG_TAG);
886 smart_str_free(loc_name);
887 RETURN_THROWS();
888 }
889 if( !handleAppendResult( result, loc_name)){
890 RETURN_FALSE;
891 }
892
893 /* Extlang */
894 result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
895 if( !handleAppendResult( result, loc_name)){
896 RETURN_FALSE;
897 }
898
899 /* Script */
900 result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
901 if( !handleAppendResult( result, loc_name)){
902 RETURN_FALSE;
903 }
904
905 /* Region */
906 result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
907 if( !handleAppendResult( result, loc_name)){
908 RETURN_FALSE;
909 }
910
911 /* Variant */
912 result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
913 if( !handleAppendResult( result, loc_name)){
914 RETURN_FALSE;
915 }
916
917 /* Private */
918 result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
919 if( !handleAppendResult( result, loc_name)){
920 RETURN_FALSE;
921 }
922
923 RETURN_SMART_STR(loc_name);
924 }
925 /* }}} */
926
927
928 /*{{{
929 * Parses the locale and returns private subtags if existing
930 * else returns NULL
931 * e.g. for locale='en_US-x-prv1-prv2-prv3'
932 * returns a pointer to the string 'prv1-prv2-prv3'
933 */
get_private_subtags(const char * loc_name)934 static zend_string* get_private_subtags(const char* loc_name)
935 {
936 zend_string* result = NULL;
937 size_t len = 0;
938 const char* mod_loc_name =NULL;
939
940 if( loc_name && (len = strlen(loc_name)) > 0 ){
941 zend_off_t singletonPos = 0;
942 mod_loc_name = loc_name ;
943 while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
944 if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
945 /* private subtag start found */
946 if( singletonPos + 2 == len){
947 /* loc_name ends with '-x-' ; return NULL */
948 }
949 else{
950 /* result = mod_loc_name + singletonPos +2; */
951 result = zend_string_init(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ), 0);
952 }
953 break;
954 }
955 else{
956 if((size_t)(singletonPos + 1) >= len){
957 /* String end */
958 break;
959 } else {
960 /* singleton found but not a private subtag , hence check further in the string for the private subtag */
961 mod_loc_name = mod_loc_name + singletonPos +1;
962 len = strlen(mod_loc_name);
963 }
964 }
965 } /* end of while */
966 }
967
968 return result;
969 }
970 /* }}} */
971
972 /* {{{ code used by locale_parse */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)973 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
974 {
975 zend_string* key_value = NULL;
976 char* cur_key_name = NULL;
977 char* token = NULL;
978 char* last_ptr = NULL;
979
980 int result = 0;
981 int cur_result = 0;
982
983
984 if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
985 key_value = get_private_subtags( loc_name );
986 result = 1;
987 } else {
988 key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
989 }
990 if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
991 ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
992 if( result > 0 && key_value){
993 int cnt = 0;
994 /* Tokenize on the "_" or "-" */
995 token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
996 if( cur_key_name ){
997 efree( cur_key_name);
998 }
999 cur_key_name = (char*)ecalloc( 25, 25);
1000 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1001 add_assoc_string( hash_arr, cur_key_name , token);
1002 /* tokenize on the "_" or "-" and stop at singleton if any */
1003 while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1004 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1005 add_assoc_string( hash_arr, cur_key_name , token);
1006 }
1007 /*
1008 if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1009 }
1010 */
1011 }
1012 if (key_value) {
1013 zend_string_release_ex(key_value, 0);
1014 }
1015 } else {
1016 if( result == 1 ){
1017 add_assoc_str( hash_arr, key_name , key_value);
1018 cur_result = 1;
1019 } else if (key_value) {
1020 zend_string_release_ex(key_value, 0);
1021 }
1022 }
1023
1024 if( cur_key_name ){
1025 efree( cur_key_name);
1026 }
1027 /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1028 return cur_result;
1029 }
1030 /* }}} */
1031
1032 /* {{{ parses a locale-id into an array the different parts of it */
PHP_FUNCTION(locale_parse)1033 PHP_FUNCTION(locale_parse)
1034 {
1035 const char* loc_name = NULL;
1036 size_t loc_name_len = 0;
1037 int grOffset = 0;
1038
1039 intl_error_reset( NULL );
1040
1041 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1042 &loc_name, &loc_name_len ) == FAILURE)
1043 {
1044 RETURN_THROWS();
1045 }
1046
1047 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1048
1049 if(loc_name_len == 0) {
1050 loc_name = intl_locale_get_default();
1051 }
1052
1053 array_init( return_value );
1054
1055 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
1056 if( grOffset >= 0 ){
1057 add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1058 }
1059 else{
1060 /* Not grandfathered */
1061 add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1062 add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1063 add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1064 add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1065 add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1066 }
1067 }
1068 /* }}} */
1069
1070 /* {{{ gets an array containing the list of variants, or null */
PHP_FUNCTION(locale_get_all_variants)1071 PHP_FUNCTION(locale_get_all_variants)
1072 {
1073 const char* loc_name = NULL;
1074 size_t loc_name_len = 0;
1075
1076 int result = 0;
1077 char* token = NULL;
1078 zend_string* variant = NULL;
1079 char* saved_ptr = NULL;
1080
1081 intl_error_reset( NULL );
1082
1083 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1084 &loc_name, &loc_name_len ) == FAILURE)
1085 {
1086 RETURN_THROWS();
1087 }
1088
1089 if(loc_name_len == 0) {
1090 loc_name = intl_locale_get_default();
1091 loc_name_len = strlen(loc_name);
1092 }
1093
1094 INTL_CHECK_LOCALE_LEN(loc_name_len);
1095
1096 array_init( return_value );
1097
1098 /* If the locale is grandfathered, stop, no variants */
1099 if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
1100 /* ("Grandfathered Tag. No variants."); */
1101 }
1102 else {
1103 /* Call ICU variant */
1104 variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1105 if( result > 0 && variant){
1106 /* Tokenize on the "_" or "-" */
1107 token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1108 add_next_index_stringl( return_value, token , strlen(token));
1109 /* tokenize on the "_" or "-" and stop at singleton if any */
1110 while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1111 add_next_index_stringl( return_value, token , strlen(token));
1112 }
1113 }
1114 if( variant ){
1115 zend_string_release_ex( variant, 0 );
1116 }
1117 }
1118
1119
1120 }
1121 /* }}} */
1122
1123 /* {{{ Converts to lower case and also replaces all hyphens with the underscore */
strToMatch(const char * str,char * retstr)1124 static int strToMatch(const char* str ,char *retstr)
1125 {
1126 char* anchor = NULL;
1127 const char* anchor1 = NULL;
1128 int result = 0;
1129
1130 if( (!str) || str[0] == '\0'){
1131 return result;
1132 } else {
1133 anchor = retstr;
1134 anchor1 = str;
1135 while( (*str)!='\0' ){
1136 if( *str == '-' ){
1137 *retstr = '_';
1138 } else {
1139 *retstr = tolower(*str);
1140 }
1141 str++;
1142 retstr++;
1143 }
1144 *retstr = '\0';
1145 retstr= anchor;
1146 str= anchor1;
1147 result = 1;
1148 }
1149
1150 return(result);
1151 }
1152 /* }}} */
1153
1154 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
1155 /* }}} */
1156 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
PHP_FUNCTION(locale_filter_matches)1157 PHP_FUNCTION(locale_filter_matches)
1158 {
1159 char* lang_tag = NULL;
1160 size_t lang_tag_len = 0;
1161 const char* loc_range = NULL;
1162 size_t loc_range_len = 0;
1163
1164 int result = 0;
1165 char* token = 0;
1166 char* chrcheck = NULL;
1167
1168 zend_string* can_lang_tag = NULL;
1169 zend_string* can_loc_range = NULL;
1170
1171 char* cur_lang_tag = NULL;
1172 char* cur_loc_range = NULL;
1173
1174 zend_bool boolCanonical = 0;
1175 UErrorCode status = U_ZERO_ERROR;
1176
1177 intl_error_reset( NULL );
1178
1179 if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1180 &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1181 &boolCanonical) == FAILURE)
1182 {
1183 RETURN_THROWS();
1184 }
1185
1186 if(loc_range_len == 0) {
1187 loc_range = intl_locale_get_default();
1188 loc_range_len = strlen(loc_range);
1189 }
1190
1191 if( strcmp(loc_range,"*")==0){
1192 RETURN_TRUE;
1193 }
1194
1195 INTL_CHECK_LOCALE_LEN(loc_range_len);
1196 INTL_CHECK_LOCALE_LEN(lang_tag_len);
1197
1198 if( boolCanonical ){
1199 /* canonicalize loc_range */
1200 can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1201 if( result <=0) {
1202 intl_error_set( NULL, status,
1203 "locale_filter_matches : unable to canonicalize loc_range" , 0 );
1204 RETURN_FALSE;
1205 }
1206
1207 /* canonicalize lang_tag */
1208 can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
1209 if( result <=0) {
1210 intl_error_set( NULL, status,
1211 "locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1212 RETURN_FALSE;
1213 }
1214
1215 /* Convert to lower case for case-insensitive comparison */
1216 cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1217
1218 /* Convert to lower case for case-insensitive comparison */
1219 result = strToMatch( can_lang_tag->val , cur_lang_tag);
1220 if( result == 0) {
1221 efree( cur_lang_tag );
1222 zend_string_release_ex( can_lang_tag, 0 );
1223 RETURN_FALSE;
1224 }
1225
1226 cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1227 result = strToMatch( can_loc_range->val , cur_loc_range );
1228 if( result == 0) {
1229 efree( cur_lang_tag );
1230 zend_string_release_ex( can_lang_tag, 0 );
1231 efree( cur_loc_range );
1232 zend_string_release_ex( can_loc_range, 0 );
1233 RETURN_FALSE;
1234 }
1235
1236 /* check if prefix */
1237 token = strstr( cur_lang_tag , cur_loc_range );
1238
1239 if( token && (token==cur_lang_tag) ){
1240 /* check if the char. after match is SEPARATOR */
1241 chrcheck = token + (strlen(cur_loc_range));
1242 if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1243 efree( cur_lang_tag );
1244 efree( cur_loc_range );
1245 if( can_lang_tag){
1246 zend_string_release_ex( can_lang_tag, 0 );
1247 }
1248 if( can_loc_range){
1249 zend_string_release_ex( can_loc_range, 0 );
1250 }
1251 RETURN_TRUE;
1252 }
1253 }
1254
1255 /* No prefix as loc_range */
1256 if( cur_lang_tag){
1257 efree( cur_lang_tag );
1258 }
1259 if( cur_loc_range){
1260 efree( cur_loc_range );
1261 }
1262 if( can_lang_tag){
1263 zend_string_release_ex( can_lang_tag, 0 );
1264 }
1265 if( can_loc_range){
1266 zend_string_release_ex( can_loc_range, 0 );
1267 }
1268 RETURN_FALSE;
1269
1270 } /* end of if isCanonical */
1271 else{
1272 /* Convert to lower case for case-insensitive comparison */
1273 cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1274
1275 result = strToMatch( lang_tag , cur_lang_tag);
1276 if( result == 0) {
1277 efree( cur_lang_tag );
1278 RETURN_FALSE;
1279 }
1280 cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1281 result = strToMatch( loc_range , cur_loc_range );
1282 if( result == 0) {
1283 efree( cur_lang_tag );
1284 efree( cur_loc_range );
1285 RETURN_FALSE;
1286 }
1287
1288 /* check if prefix */
1289 token = strstr( cur_lang_tag , cur_loc_range );
1290
1291 if( token && (token==cur_lang_tag) ){
1292 /* check if the char. after match is SEPARATOR */
1293 chrcheck = token + (strlen(cur_loc_range));
1294 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1295 efree( cur_lang_tag );
1296 efree( cur_loc_range );
1297 RETURN_TRUE;
1298 }
1299 }
1300
1301 /* No prefix as loc_range */
1302 if( cur_lang_tag){
1303 efree( cur_lang_tag );
1304 }
1305 if( cur_loc_range){
1306 efree( cur_loc_range );
1307 }
1308 RETURN_FALSE;
1309
1310 }
1311 }
1312 /* }}} */
1313
array_cleanup(char * arr[],int arr_size)1314 static void array_cleanup( char* arr[] , int arr_size)
1315 {
1316 int i=0;
1317 for( i=0; i< arr_size; i++ ){
1318 if( arr[i*2] ){
1319 efree( arr[i*2]);
1320 }
1321 }
1322 efree(arr);
1323 }
1324
1325 #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
1326 /* {{{
1327 * returns the lookup result to lookup_loc_range_src_php
1328 * internal function
1329 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1330 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1331 {
1332 int i = 0;
1333 int cur_arr_len = 0;
1334 int result = 0;
1335
1336 zend_string* lang_tag = NULL;
1337 zval* ele_value = NULL;
1338
1339 char* cur_loc_range = NULL;
1340 zend_string* can_loc_range = NULL;
1341 zend_off_t saved_pos = 0;
1342
1343 zend_string* return_value = NULL;
1344
1345 char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1346 ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1347 /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1348 if(Z_TYPE_P(ele_value)!= IS_STRING) {
1349 /* element value is not a string */
1350 zend_argument_type_error(2, "must only contain string values");
1351 LOOKUP_CLEAN_RETURN(NULL);
1352 }
1353 cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1354 result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1355 if(result == 0) {
1356 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1357 LOOKUP_CLEAN_RETURN(NULL);
1358 }
1359 cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1360 cur_arr_len++ ;
1361 } ZEND_HASH_FOREACH_END(); /* end of for */
1362
1363 /* Canonicalize array elements */
1364 if(canonicalize) {
1365 for(i=0; i<cur_arr_len; i++) {
1366 lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1367 if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1368 if(lang_tag) {
1369 zend_string_release_ex(lang_tag, 0);
1370 }
1371 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1372 LOOKUP_CLEAN_RETURN(NULL);
1373 }
1374 cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1375 result = strToMatch(lang_tag->val, cur_arr[i*2]);
1376 zend_string_release_ex(lang_tag, 0);
1377 if(result == 0) {
1378 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1379 LOOKUP_CLEAN_RETURN(NULL);
1380 }
1381 }
1382
1383 }
1384
1385 if(canonicalize) {
1386 /* Canonicalize the loc_range */
1387 can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1388 if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1389 /* Error */
1390 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1391 if(can_loc_range) {
1392 zend_string_release_ex(can_loc_range, 0);
1393 }
1394 LOOKUP_CLEAN_RETURN(NULL);
1395 } else {
1396 loc_range = can_loc_range->val;
1397 }
1398 }
1399
1400 cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1401 /* convert to lower and replace hyphens */
1402 result = strToMatch(loc_range, cur_loc_range);
1403 if(can_loc_range) {
1404 zend_string_release_ex(can_loc_range, 0);
1405 }
1406 if(result == 0) {
1407 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1408 LOOKUP_CLEAN_RETURN(NULL);
1409 }
1410
1411 /* Lookup for the lang_tag match */
1412 saved_pos = strlen(cur_loc_range);
1413 while(saved_pos > 0) {
1414 for(i=0; i< cur_arr_len; i++){
1415 if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1416 /* Match found */
1417 char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1418 return_value = zend_string_init(str, strlen(str), 0);
1419 efree(cur_loc_range);
1420 LOOKUP_CLEAN_RETURN(return_value);
1421 }
1422 }
1423 saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1424 }
1425
1426 /* Match not found */
1427 efree(cur_loc_range);
1428 LOOKUP_CLEAN_RETURN(NULL);
1429 }
1430 /* }}} */
1431
1432 /* {{{ Searches the items in $langtag for the best match to the language
1433 * range
1434 */
1435 /* }}} */
1436 /* {{{ Searches the items in $langtag for the best match to the language
1437 * range
1438 */
PHP_FUNCTION(locale_lookup)1439 PHP_FUNCTION(locale_lookup)
1440 {
1441 zend_string* fallback_loc_str = NULL;
1442 const char* loc_range = NULL;
1443 size_t loc_range_len = 0;
1444
1445 zval* arr = NULL;
1446 HashTable* hash_arr = NULL;
1447 zend_bool boolCanonical = 0;
1448 zend_string* result_str = NULL;
1449
1450 intl_error_reset( NULL );
1451
1452 if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS!", &arr, &loc_range, &loc_range_len,
1453 &boolCanonical, &fallback_loc_str) == FAILURE) {
1454 RETURN_THROWS();
1455 }
1456
1457 if(loc_range_len == 0) {
1458 if(fallback_loc_str) {
1459 loc_range = ZSTR_VAL(fallback_loc_str);
1460 loc_range_len = ZSTR_LEN(fallback_loc_str);
1461 } else {
1462 loc_range = intl_locale_get_default();
1463 loc_range_len = strlen(loc_range);
1464 }
1465 }
1466
1467 hash_arr = Z_ARRVAL_P(arr);
1468
1469 INTL_CHECK_LOCALE_LEN(loc_range_len);
1470
1471 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1472 RETURN_EMPTY_STRING();
1473 }
1474
1475 result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1476 if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1477 if( fallback_loc_str ) {
1478 result_str = zend_string_copy(fallback_loc_str);
1479 } else {
1480 RETURN_EMPTY_STRING();
1481 }
1482 }
1483
1484 RETURN_STR(result_str);
1485 }
1486 /* }}} */
1487
1488 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
1489 /* }}} */
1490 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
PHP_FUNCTION(locale_accept_from_http)1491 PHP_FUNCTION(locale_accept_from_http)
1492 {
1493 UEnumeration *available;
1494 char *http_accept = NULL;
1495 size_t http_accept_len;
1496 UErrorCode status = 0;
1497 int len;
1498 char resultLocale[INTL_MAX_LOCALE_LEN+1];
1499 UAcceptResult outResult;
1500
1501 if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1502 {
1503 RETURN_THROWS();
1504 }
1505 if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1506 /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1507 char *start = http_accept;
1508 char *end;
1509 size_t len;
1510 do {
1511 end = strchr(start, ',');
1512 len = end ? end-start : http_accept_len-(start-http_accept);
1513 if(len > ULOC_FULLNAME_CAPACITY) {
1514 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1515 "locale_accept_from_http: locale string too long", 0 );
1516 RETURN_FALSE;
1517 }
1518 if(end) {
1519 start = end+1;
1520 }
1521 } while(end != NULL);
1522 }
1523
1524 available = ures_openAvailableLocales(NULL, &status);
1525 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1526 len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1527 &outResult, http_accept, available, &status);
1528 uenum_close(available);
1529 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1530 if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1531 RETURN_FALSE;
1532 }
1533 RETURN_STRINGL(resultLocale, len);
1534 }
1535 /* }}} */
1536