1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
14 +----------------------------------------------------------------------+
15 */
16
17 /* $Id$ */
18
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22
23 #include <unicode/ustring.h>
24 #include <unicode/udata.h>
25 #include <unicode/putil.h>
26 #include <unicode/ures.h>
27
28 #include "php_intl.h"
29 #include "locale.h"
30 #include "locale_class.h"
31 #include "locale_methods.h"
32 #include "intl_convert.h"
33 #include "intl_data.h"
34
35 #include <zend_API.h>
36 #include <zend.h>
37 #include <php.h>
38 #include "main/php_ini.h"
39 #include "ext/standard/php_smart_str.h"
40
41 ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44 #define SEPARATOR "_"
45 #define SEPARATOR1 "-"
46 #define DELIMITER "-_"
47 #define EXTLANG_PREFIX "a"
48 #define PRIVATE_PREFIX "x"
49 #define DISP_NAME "name"
50
51 #define MAX_NO_VARIANT 15
52 #define MAX_NO_EXTLANG 3
53 #define MAX_NO_PRIVATE 15
54 #define MAX_NO_LOOKUP_LANG_TAG 100
55
56 #define LOC_NOT_FOUND 1
57
58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59 #define VARIANT_KEYNAME_LEN 11
60 #define EXTLANG_KEYNAME_LEN 10
61 #define PRIVATE_KEYNAME_LEN 11
62
63 /* Based on IANA registry at the time of writing this code
64 *
65 */
66 static const char * const LOC_GRANDFATHERED[] = {
67 "art-lojban", "i-klingon", "i-lux", "i-navajo", "no-bok", "no-nyn",
68 "cel-gaulish", "en-GB-oed", "i-ami",
69 "i-bnn", "i-default", "i-enochian",
70 "i-mingo", "i-pwn", "i-tao",
71 "i-tay", "i-tsu", "sgn-BE-fr",
72 "sgn-BE-nl", "sgn-CH-de", "zh-cmn",
73 "zh-cmn-Hans", "zh-cmn-Hant", "zh-gan" ,
74 "zh-guoyu", "zh-hakka", "zh-min",
75 "zh-min-nan", "zh-wuu", "zh-xiang",
76 "zh-yue", NULL
77 };
78
79 /* Based on IANA registry at the time of writing this code
80 * This array lists the preferred values for the grandfathered tags if applicable
81 * This is in sync with the array LOC_GRANDFATHERED
82 * e.g. the offsets of the grandfathered tags match the offset of the preferred value
83 */
84 static const int LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
86 "jbo", "tlh", "lb",
87 "nv", "nb", "nn",
88 NULL
89 };
90
91 /*returns TRUE if a is an ID separator FALSE otherwise*/
92 #define isIDSeparator(a) (a == '_' || a == '-')
93 #define isKeywordSeparator(a) (a == '@' )
94 #define isEndOfTag(a) (a == '\0' )
95
96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98 /*returns TRUE if one of the special prefixes is here (s=string)
99 'x-' or 'i-' */
100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103 /* Dot terminates it because of POSIX form where dot precedes the codepage
104 * except for variant */
105 #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
106
107 /* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
findOffset(const char * const * list,const char * key)109 static int16_t findOffset(const char* const* list, const char* key)
110 {
111 const char* const* anchor = list;
112 while (*list != NULL) {
113 if (strcmp(key, *list) == 0) {
114 return (int16_t)(list - anchor);
115 }
116 list++;
117 }
118
119 return -1;
120
121 }
122 /*}}}*/
123
getPreferredTag(const char * gf_tag)124 static char* getPreferredTag(const char* gf_tag)
125 {
126 char* result = NULL;
127 int grOffset = 0;
128
129 grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130 if(grOffset < 0) {
131 return NULL;
132 }
133 if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134 /* return preferred tag */
135 result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136 } else {
137 /* Return correct grandfathered language tag */
138 result = estrdup( LOC_GRANDFATHERED[grOffset] );
139 }
140 return result;
141 }
142
143 /* {{{
144 * returns the position of next token for lookup
145 * or -1 if no token
146 * strtokr equivalent search for token in reverse direction
147 */
getStrrtokenPos(char * str,int savedPos)148 static int getStrrtokenPos(char* str, int savedPos)
149 {
150 int result =-1;
151 int i;
152
153 for(i=savedPos-1; i>=0; i--) {
154 if(isIDSeparator(*(str+i)) ){
155 /* delimiter found; check for singleton */
156 if(i>=2 && isIDSeparator(*(str+i-2)) ){
157 /* a singleton; so send the position of token before the singleton */
158 result = i-2;
159 } else {
160 result = i;
161 }
162 break;
163 }
164 }
165 if(result < 1){
166 /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167 result =-1;
168 }
169 return result;
170 }
171 /* }}} */
172
173 /* {{{
174 * returns the position of a singleton if present
175 * returns -1 if no singleton
176 * strtok equivalent search for singleton
177 */
getSingletonPos(const char * str)178 static int getSingletonPos(const char* str)
179 {
180 int result =-1;
181 int i=0;
182 int len = 0;
183
184 if( str && ((len=strlen(str))>0) ){
185 for( i=0; i<len ; i++){
186 if( isIDSeparator(*(str+i)) ){
187 if( i==1){
188 /* string is of the form x-avy or a-prv1 */
189 result =0;
190 break;
191 } else {
192 /* delimiter found; check for singleton */
193 if( isIDSeparator(*(str+i+2)) ){
194 /* a singleton; so send the position of separator before singleton */
195 result = i+1;
196 break;
197 }
198 }
199 }
200 }/* end of for */
201
202 }
203 return result;
204 }
205 /* }}} */
206
207 /* {{{ proto static string Locale::getDefault( )
208 Get default locale */
209 /* }}} */
210 /* {{{ proto static string locale_get_default( )
211 Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)212 PHP_NAMED_FUNCTION(zif_locale_get_default)
213 {
214 RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215 }
216
217 /* }}} */
218
219 /* {{{ proto static string Locale::setDefault( string $locale )
220 Set default locale */
221 /* }}} */
222 /* {{{ proto static string locale_set_default( string $locale )
223 Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)224 PHP_NAMED_FUNCTION(zif_locale_set_default)
225 {
226 char* locale_name = NULL;
227 int len=0;
228
229 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
230 &locale_name ,&len ) == FAILURE)
231 {
232 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233 "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234
235 RETURN_FALSE;
236 }
237
238 if(len == 0) {
239 locale_name = (char *)uloc_getDefault() ;
240 len = strlen(locale_name);
241 }
242
243 zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244
245 RETURN_TRUE;
246 }
247 /* }}} */
248
249 /* {{{
250 * Gets the value from ICU
251 * common code shared by get_primary_language,get_script or get_region or get_variant
252 * result = 0 if error, 1 if successful , -1 if no value
253 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)254 static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255 {
256 char* tag_value = NULL;
257 int32_t tag_value_len = 512;
258
259 int singletonPos = 0;
260 char* mod_loc_name = NULL;
261 int grOffset = 0;
262
263 int32_t buflen = 512;
264 UErrorCode status = U_ZERO_ERROR;
265
266
267 if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
268 /* Handle grandfathered languages */
269 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
270 if( grOffset >= 0 ){
271 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
272 return estrdup(loc_name);
273 } else {
274 /* Since Grandfathered , no value , do nothing , retutn NULL */
275 return NULL;
276 }
277 }
278
279 if( fromParseLocale==1 ){
280 /* Handle singletons */
281 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282 if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
283 return estrdup(loc_name);
284 }
285 }
286
287 singletonPos = getSingletonPos( loc_name );
288 if( singletonPos == 0){
289 /* singleton at start of script, region , variant etc.
290 * or invalid singleton at start of language */
291 return NULL;
292 } else if( singletonPos > 0 ){
293 /* singleton at some position except at start
294 * strip off the singleton and rest of the loc_name */
295 mod_loc_name = estrndup ( loc_name , singletonPos-1);
296 }
297 } /* end of if fromParse */
298
299 } /* end of if != LOC_CANONICAL_TAG */
300
301 if( mod_loc_name == NULL){
302 mod_loc_name = estrdup(loc_name );
303 }
304
305 /* Proceed to ICU */
306 do{
307 tag_value = erealloc( tag_value , buflen );
308 tag_value_len = buflen;
309
310 if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
311 buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
312 }
313 if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
314 buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
315 }
316 if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
317 buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
318 }
319 if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
320 buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
321 }
322 if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
323 buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
324 }
325
326 if( U_FAILURE( status ) ) {
327 if( status == U_BUFFER_OVERFLOW_ERROR ) {
328 status = U_ZERO_ERROR;
329 buflen++; /* add space for \0 */
330 continue;
331 }
332
333 /* Error in retriving data */
334 *result = 0;
335 if( tag_value ){
336 efree( tag_value );
337 }
338 if( mod_loc_name ){
339 efree( mod_loc_name);
340 }
341 return NULL;
342 }
343 } while( buflen > tag_value_len );
344
345 if( buflen ==0 ){
346 /* No value found */
347 *result = -1;
348 if( tag_value ){
349 efree( tag_value );
350 }
351 if( mod_loc_name ){
352 efree( mod_loc_name);
353 }
354 return NULL;
355 } else {
356 *result = 1;
357 }
358
359 if( mod_loc_name ){
360 efree( mod_loc_name);
361 }
362 return tag_value;
363 }
364 /* }}} */
365
366 /* {{{
367 * Gets the value from ICU , called when PHP userspace function is called
368 * common code shared by get_primary_language,get_script or get_region or get_variant
369 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)370 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
371 {
372
373 const char* loc_name = NULL;
374 int loc_name_len = 0;
375
376 char* tag_value = NULL;
377 char* empty_result = "";
378
379 int result = 0;
380 char* msg = NULL;
381
382 UErrorCode status = U_ZERO_ERROR;
383
384 intl_error_reset( NULL TSRMLS_CC );
385
386 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
387 &loc_name ,&loc_name_len ) == FAILURE) {
388 spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
389 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
390 efree(msg);
391
392 RETURN_FALSE;
393 }
394
395 if(loc_name_len == 0) {
396 loc_name = intl_locale_get_default(TSRMLS_C);
397 }
398
399 /* Call ICU get */
400 tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
401
402 /* No value found */
403 if( result == -1 ) {
404 if( tag_value){
405 efree( tag_value);
406 }
407 RETURN_STRING( empty_result , TRUE);
408 }
409
410 /* value found */
411 if( tag_value){
412 RETURN_STRING( tag_value , FALSE);
413 }
414
415 /* Error encountered while fetching the value */
416 if( result ==0) {
417 spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
418 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
419 efree(msg);
420 RETURN_NULL();
421 }
422
423 }
424 /* }}} */
425
426 /* {{{ proto static string Locale::getScript($locale)
427 * gets the script for the $locale
428 }}} */
429 /* {{{ proto static string locale_get_script($locale)
430 * gets the script for the $locale
431 */
PHP_FUNCTION(locale_get_script)432 PHP_FUNCTION( locale_get_script )
433 {
434 get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
435 }
436 /* }}} */
437
438 /* {{{ proto static string Locale::getRegion($locale)
439 * gets the region for the $locale
440 }}} */
441 /* {{{ proto static string locale_get_region($locale)
442 * gets the region for the $locale
443 */
PHP_FUNCTION(locale_get_region)444 PHP_FUNCTION( locale_get_region )
445 {
446 get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
447 }
448 /* }}} */
449
450 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
451 * gets the primary language for the $locale
452 }}} */
453 /* {{{ proto static string locale_get_primary_language($locale)
454 * gets the primary language for the $locale
455 */
PHP_FUNCTION(locale_get_primary_language)456 PHP_FUNCTION(locale_get_primary_language )
457 {
458 get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
459 }
460 /* }}} */
461
462
463 /* {{{
464 * common code shared by display_xyz functions to get the value from ICU
465 }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)466 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
467 {
468 const char* loc_name = NULL;
469 int loc_name_len = 0;
470
471 const char* disp_loc_name = NULL;
472 int disp_loc_name_len = 0;
473 int free_loc_name = 0;
474
475 UChar* disp_name = NULL;
476 int32_t disp_name_len = 0;
477
478 char* mod_loc_name = NULL;
479
480 int32_t buflen = 512;
481 UErrorCode status = U_ZERO_ERROR;
482
483 char* utf8value = NULL;
484 int utf8value_len = 0;
485
486 char* msg = NULL;
487 int grOffset = 0;
488
489 intl_error_reset( NULL TSRMLS_CC );
490
491 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
492 &loc_name, &loc_name_len ,
493 &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
494 {
495 spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
496 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
497 efree(msg);
498 RETURN_FALSE;
499 }
500
501 if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
502 /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
503 spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
504 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
505 efree(msg);
506 RETURN_FALSE;
507 }
508
509 if(loc_name_len == 0) {
510 loc_name = intl_locale_get_default(TSRMLS_C);
511 }
512
513 if( strcmp(tag_name, DISP_NAME) != 0 ){
514 /* Handle grandfathered languages */
515 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
516 if( grOffset >= 0 ){
517 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
518 mod_loc_name = getPreferredTag( loc_name );
519 } else {
520 /* Since Grandfathered, no value, do nothing, retutn NULL */
521 RETURN_FALSE;
522 }
523 }
524 } /* end of if != LOC_CANONICAL_TAG */
525
526 if( mod_loc_name==NULL ){
527 mod_loc_name = estrdup( loc_name );
528 }
529
530 /* Check if disp_loc_name passed , if not use default locale */
531 if( !disp_loc_name){
532 disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
533 free_loc_name = 1;
534 }
535
536 /* Get the disp_value for the given locale */
537 do{
538 disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
539 disp_name_len = buflen;
540
541 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
542 buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
543 } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
544 buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
545 } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
546 buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
547 } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
548 buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
549 } else if( strcmp(tag_name , DISP_NAME)==0 ){
550 buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
551 }
552
553 /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
554 if( U_FAILURE( status ) )
555 {
556 if( status == U_BUFFER_OVERFLOW_ERROR )
557 {
558 status = U_ZERO_ERROR;
559 continue;
560 }
561
562 spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
563 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
564 efree(msg);
565 if( disp_name){
566 efree( disp_name );
567 }
568 if( mod_loc_name){
569 efree( mod_loc_name );
570 }
571 if (free_loc_name) {
572 efree((void *)disp_loc_name);
573 disp_loc_name = NULL;
574 }
575 RETURN_FALSE;
576 }
577 } while( buflen > disp_name_len );
578
579 if( mod_loc_name){
580 efree( mod_loc_name );
581 }
582 if (free_loc_name) {
583 efree((void *)disp_loc_name);
584 disp_loc_name = NULL;
585 }
586 /* Convert display locale name from UTF-16 to UTF-8. */
587 intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
588 efree( disp_name );
589 if( U_FAILURE( status ) )
590 {
591 spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
592 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
593 efree(msg);
594 RETURN_FALSE;
595 }
596
597 RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
598
599 }
600 /* }}} */
601
602 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
603 * gets the name for the $locale in $in_locale or default_locale
604 }}} */
605 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
606 * gets the name for the $locale in $in_locale or default_locale
607 */
PHP_FUNCTION(locale_get_display_name)608 PHP_FUNCTION(locale_get_display_name)
609 {
610 get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
611 }
612 /* }}} */
613
614 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
615 * gets the language for the $locale in $in_locale or default_locale
616 }}} */
617 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
618 * gets the language for the $locale in $in_locale or default_locale
619 */
PHP_FUNCTION(locale_get_display_language)620 PHP_FUNCTION(locale_get_display_language)
621 {
622 get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
623 }
624 /* }}} */
625
626 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
627 * gets the script for the $locale in $in_locale or default_locale
628 }}} */
629 /* {{{ proto static string get_display_script($locale, $in_locale = null)
630 * gets the script for the $locale in $in_locale or default_locale
631 */
PHP_FUNCTION(locale_get_display_script)632 PHP_FUNCTION(locale_get_display_script)
633 {
634 get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
635 }
636 /* }}} */
637
638 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
639 * gets the region for the $locale in $in_locale or default_locale
640 }}} */
641 /* {{{ proto static string get_display_region($locale, $in_locale = null)
642 * gets the region for the $locale in $in_locale or default_locale
643 */
PHP_FUNCTION(locale_get_display_region)644 PHP_FUNCTION(locale_get_display_region)
645 {
646 get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
647 }
648 /* }}} */
649
650 /* {{{
651 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
652 * gets the variant for the $locale in $in_locale or default_locale
653 }}} */
654 /* {{{
655 * proto static string get_display_variant($locale, $in_locale = null)
656 * gets the variant for the $locale in $in_locale or default_locale
657 */
PHP_FUNCTION(locale_get_display_variant)658 PHP_FUNCTION(locale_get_display_variant)
659 {
660 get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
661 }
662 /* }}} */
663
664 /* {{{ proto static array getKeywords(string $locale) {
665 * return an associative array containing keyword-value
666 * pairs for this locale. The keys are keys to the array (doh!)
667 * }}}*/
668 /* {{{ proto static array locale_get_keywords(string $locale) {
669 * return an associative array containing keyword-value
670 * pairs for this locale. The keys are keys to the array (doh!)
671 */
PHP_FUNCTION(locale_get_keywords)672 PHP_FUNCTION( locale_get_keywords )
673 {
674 UEnumeration* e = NULL;
675 UErrorCode status = U_ZERO_ERROR;
676
677 const char* kw_key = NULL;
678 int32_t kw_key_len = 0;
679
680 const char* loc_name = NULL;
681 int loc_name_len = 0;
682
683 /*
684 ICU expects the buffer to be allocated before calling the function
685 and so the buffer size has been explicitly specified
686 ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
687 hence the kw_value buffer size is 100
688 */
689 char* kw_value = NULL;
690 int32_t kw_value_len = 100;
691
692 intl_error_reset( NULL TSRMLS_CC );
693
694 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
695 &loc_name, &loc_name_len ) == FAILURE)
696 {
697 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
698 "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
699
700 RETURN_FALSE;
701 }
702
703 if(loc_name_len == 0) {
704 loc_name = intl_locale_get_default(TSRMLS_C);
705 }
706
707 /* Get the keywords */
708 e = uloc_openKeywords( loc_name, &status );
709 if( e != NULL )
710 {
711 /* Traverse it, filling the return array. */
712 array_init( return_value );
713
714 while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
715 kw_value = ecalloc( 1 , kw_value_len );
716
717 /* Get the keyword value for each keyword */
718 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len , &status );
719 if (status == U_BUFFER_OVERFLOW_ERROR) {
720 status = U_ZERO_ERROR;
721 kw_value = erealloc( kw_value , kw_value_len+1);
722 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 , &status );
723 } else if(!U_FAILURE(status)) {
724 kw_value = erealloc( kw_value , kw_value_len+1);
725 }
726 if (U_FAILURE(status)) {
727 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 TSRMLS_CC );
728 if( kw_value){
729 efree( kw_value );
730 }
731 zval_dtor(return_value);
732 RETURN_FALSE;
733 }
734
735 add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
736 } /* end of while */
737
738 } /* end of if e!=NULL */
739
740 uenum_close( e );
741 }
742 /* }}} */
743
744 /* {{{ proto static string Locale::canonicalize($locale)
745 * @return string the canonicalized locale
746 * }}} */
747 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
748 * @param string $locale The locale string to canonicalize
749 */
PHP_FUNCTION(locale_canonicalize)750 PHP_FUNCTION(locale_canonicalize)
751 {
752 get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
753 }
754 /* }}} */
755
756 /* {{{ append_key_value
757 * Internal function which is called from locale_compose
758 * gets the value for the key_name and appends to the loc_name
759 * returns 1 if successful , -1 if not found ,
760 * 0 if array element is not a string , -2 if buffer-overflow
761 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)762 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
763 {
764 zval** ele_value = NULL;
765
766 if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
767 if(Z_TYPE_PP(ele_value)!= IS_STRING ){
768 /* element value is not a string */
769 return FAILURE;
770 }
771 if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
772 strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
773 /* not lang or grandfathered tag */
774 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
775 }
776 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
777 return SUCCESS;
778 }
779
780 return LOC_NOT_FOUND;
781 }
782 /* }}} */
783
784 /* {{{ append_prefix , appends the prefix needed
785 * e.g. private adds 'x'
786 */
add_prefix(smart_str * loc_name,char * key_name)787 static void add_prefix(smart_str* loc_name, char* key_name)
788 {
789 if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
790 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
791 smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
792 }
793 }
794 /* }}} */
795
796 /* {{{ append_multiple_key_values
797 * Internal function which is called from locale_compose
798 * gets the multiple values for the key_name and appends to the loc_name
799 * used for 'variant','extlang','private'
800 * returns 1 if successful , -1 if not found ,
801 * 0 if array element is not a string , -2 if buffer-overflow
802 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name TSRMLS_DC)803 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
804 {
805 zval** ele_value = NULL;
806 int i = 0;
807 int isFirstSubtag = 0;
808 int max_value = 0;
809
810 /* Variant/ Extlang/Private etc. */
811 if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
812 if( Z_TYPE_PP(ele_value) == IS_STRING ){
813 add_prefix( loc_name , key_name);
814
815 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
816 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
817 return SUCCESS;
818 } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
819 HashPosition pos;
820 HashTable *arr = HASH_OF(*ele_value);
821 zval **data = NULL;
822
823 zend_hash_internal_pointer_reset_ex(arr, &pos);
824 while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
825 if(Z_TYPE_PP(data) != IS_STRING) {
826 return FAILURE;
827 }
828 if (isFirstSubtag++ == 0){
829 add_prefix(loc_name , key_name);
830 }
831 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
832 smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
833 zend_hash_move_forward_ex(arr, &pos);
834 }
835 return SUCCESS;
836 } else {
837 return FAILURE;
838 }
839 } else {
840 char cur_key_name[31];
841 /* Decide the max_value: the max. no. of elements allowed */
842 if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
843 max_value = MAX_NO_VARIANT;
844 }
845 if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
846 max_value = MAX_NO_EXTLANG;
847 }
848 if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
849 max_value = MAX_NO_PRIVATE;
850 }
851
852 /* Multiple variant values as variant0, variant1 ,variant2 */
853 isFirstSubtag = 0;
854 for( i=0 ; i< max_value; i++ ){
855 snprintf( cur_key_name , 30, "%s%d", key_name , i);
856 if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
857 if( Z_TYPE_PP(ele_value)!= IS_STRING ){
858 /* variant is not a string */
859 return FAILURE;
860 }
861 /* Add the contents */
862 if (isFirstSubtag++ == 0){
863 add_prefix(loc_name , cur_key_name);
864 }
865 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
866 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
867 }
868 } /* end of for */
869 } /* end of else */
870
871 return SUCCESS;
872 }
873 /* }}} */
874
875 /*{{{
876 * If applicable sets error message and aborts locale_compose gracefully
877 * returns 0 if locale_compose needs to be aborted
878 * otherwise returns 1
879 */
handleAppendResult(int result,smart_str * loc_name TSRMLS_DC)880 static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
881 {
882 intl_error_reset( NULL TSRMLS_CC );
883 if( result == FAILURE) {
884 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
885 "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
886 smart_str_free(loc_name);
887 return 0;
888 }
889 return 1;
890 }
891 /* }}} */
892
893 #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
894 /* {{{ proto static string Locale::composeLocale($array)
895 * Creates a locale by combining the parts of locale-ID passed
896 * }}} */
897 /* {{{ proto static string compose_locale($array)
898 * Creates a locale by combining the parts of locale-ID passed
899 * }}} */
PHP_FUNCTION(locale_compose)900 PHP_FUNCTION(locale_compose)
901 {
902 smart_str loc_name_s = {0};
903 smart_str *loc_name = &loc_name_s;
904 zval* arr = NULL;
905 HashTable* hash_arr = NULL;
906 int result = 0;
907
908 intl_error_reset( NULL TSRMLS_CC );
909
910 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
911 &arr) == FAILURE)
912 {
913 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
914 "locale_compose: unable to parse input params", 0 TSRMLS_CC );
915 RETURN_FALSE;
916 }
917
918 hash_arr = HASH_OF( arr );
919
920 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
921 RETURN_FALSE;
922
923 /* Check for grandfathered first */
924 result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
925 if( result == SUCCESS){
926 RETURN_SMART_STR(loc_name);
927 }
928 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
929 RETURN_FALSE;
930 }
931
932 /* Not grandfathered */
933 result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
934 if( result == LOC_NOT_FOUND ){
935 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
936 "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
937 smart_str_free(loc_name);
938 RETURN_FALSE;
939 }
940 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
941 RETURN_FALSE;
942 }
943
944 /* Extlang */
945 result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
946 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
947 RETURN_FALSE;
948 }
949
950 /* Script */
951 result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
952 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
953 RETURN_FALSE;
954 }
955
956 /* Region */
957 result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
958 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
959 RETURN_FALSE;
960 }
961
962 /* Variant */
963 result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
964 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
965 RETURN_FALSE;
966 }
967
968 /* Private */
969 result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
970 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
971 RETURN_FALSE;
972 }
973
974 RETURN_SMART_STR(loc_name);
975 }
976 /* }}} */
977
978
979 /*{{{
980 * Parses the locale and returns private subtags if existing
981 * else returns NULL
982 * e.g. for locale='en_US-x-prv1-prv2-prv3'
983 * returns a pointer to the string 'prv1-prv2-prv3'
984 */
get_private_subtags(const char * loc_name)985 static char* get_private_subtags(const char* loc_name)
986 {
987 char* result =NULL;
988 int singletonPos = 0;
989 int len =0;
990 const char* mod_loc_name =NULL;
991
992 if( loc_name && (len = strlen(loc_name)>0 ) ){
993 mod_loc_name = loc_name ;
994 len = strlen(mod_loc_name);
995 while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
996
997 if( singletonPos!=-1){
998 if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
999 /* private subtag start found */
1000 if( singletonPos + 2 == len){
1001 /* loc_name ends with '-x-' ; return NULL */
1002 }
1003 else{
1004 /* result = mod_loc_name + singletonPos +2; */
1005 result = estrndup(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ) );
1006 }
1007 break;
1008 }
1009 else{
1010 if( singletonPos + 1 >= len){
1011 /* String end */
1012 break;
1013 } else {
1014 /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1015 mod_loc_name = mod_loc_name + singletonPos +1;
1016 len = strlen(mod_loc_name);
1017 }
1018 }
1019 }
1020
1021 } /* end of while */
1022 }
1023
1024 return result;
1025 }
1026 /* }}} */
1027
1028 /* {{{ code used by locale_parse
1029 */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name TSRMLS_DC)1030 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1031 {
1032 char* key_value = NULL;
1033 char* cur_key_name = NULL;
1034 char* token = NULL;
1035 char* last_ptr = NULL;
1036
1037 int result = 0;
1038 int cur_result = 0;
1039 int cnt = 0;
1040
1041
1042 if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1043 key_value = get_private_subtags( loc_name );
1044 result = 1;
1045 } else {
1046 key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1047 }
1048 if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1049 ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1050 if( result > 0 && key_value){
1051 /* Tokenize on the "_" or "-" */
1052 token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1053 if( cur_key_name ){
1054 efree( cur_key_name);
1055 }
1056 cur_key_name = (char*)ecalloc( 25, 25);
1057 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1058 add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1059 /* tokenize on the "_" or "-" and stop at singleton if any */
1060 while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1061 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1062 add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1063 }
1064 /*
1065 if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1066 }
1067 */
1068 }
1069 } else {
1070 if( result == 1 ){
1071 add_assoc_string( hash_arr, key_name , key_value , TRUE );
1072 cur_result = 1;
1073 }
1074 }
1075
1076 if( cur_key_name ){
1077 efree( cur_key_name);
1078 }
1079 /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1080 if( key_value){
1081 efree(key_value);
1082 }
1083 return cur_result;
1084 }
1085 /* }}} */
1086
1087 /* {{{ proto static array Locale::parseLocale($locale)
1088 * parses a locale-id into an array the different parts of it
1089 }}} */
1090 /* {{{ proto static array parse_locale($locale)
1091 * parses a locale-id into an array the different parts of it
1092 */
PHP_FUNCTION(locale_parse)1093 PHP_FUNCTION(locale_parse)
1094 {
1095 const char* loc_name = NULL;
1096 int loc_name_len = 0;
1097 int grOffset = 0;
1098
1099 intl_error_reset( NULL TSRMLS_CC );
1100
1101 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1102 &loc_name, &loc_name_len ) == FAILURE)
1103 {
1104 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1105 "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1106
1107 RETURN_FALSE;
1108 }
1109
1110 if(loc_name_len == 0) {
1111 loc_name = intl_locale_get_default(TSRMLS_C);
1112 }
1113
1114 array_init( return_value );
1115
1116 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
1117 if( grOffset >= 0 ){
1118 add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1119 }
1120 else{
1121 /* Not grandfathered */
1122 add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1123 add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1124 add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1125 add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1126 add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1127 }
1128 }
1129 /* }}} */
1130
1131 /* {{{ proto static array Locale::getAllVariants($locale)
1132 * gets an array containing the list of variants, or null
1133 }}} */
1134 /* {{{ proto static array locale_get_all_variants($locale)
1135 * gets an array containing the list of variants, or null
1136 */
PHP_FUNCTION(locale_get_all_variants)1137 PHP_FUNCTION(locale_get_all_variants)
1138 {
1139 const char* loc_name = NULL;
1140 int loc_name_len = 0;
1141
1142 int result = 0;
1143 char* token = NULL;
1144 char* variant = NULL;
1145 char* saved_ptr = NULL;
1146
1147 intl_error_reset( NULL TSRMLS_CC );
1148
1149 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1150 &loc_name, &loc_name_len ) == FAILURE)
1151 {
1152 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1153 "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1154
1155 RETURN_FALSE;
1156 }
1157
1158 if(loc_name_len == 0) {
1159 loc_name = intl_locale_get_default(TSRMLS_C);
1160 }
1161
1162
1163 array_init( return_value );
1164
1165 /* If the locale is grandfathered, stop, no variants */
1166 if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
1167 /* ("Grandfathered Tag. No variants."); */
1168 }
1169 else {
1170 /* Call ICU variant */
1171 variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1172 if( result > 0 && variant){
1173 /* Tokenize on the "_" or "-" */
1174 token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1175 add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1176 /* tokenize on the "_" or "-" and stop at singleton if any */
1177 while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1178 add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1179 }
1180 }
1181 if( variant ){
1182 efree( variant );
1183 }
1184 }
1185
1186
1187 }
1188 /* }}} */
1189
1190 /*{{{
1191 * Converts to lower case and also replaces all hyphens with the underscore
1192 */
strToMatch(const char * str,char * retstr)1193 static int strToMatch(const char* str ,char *retstr)
1194 {
1195 char* anchor = NULL;
1196 const char* anchor1 = NULL;
1197 int result = 0;
1198
1199 if( (!str) || str[0] == '\0'){
1200 return result;
1201 } else {
1202 anchor = retstr;
1203 anchor1 = str;
1204 while( (*str)!='\0' ){
1205 if( *str == '-' ){
1206 *retstr = '_';
1207 } else {
1208 *retstr = tolower(*str);
1209 }
1210 str++;
1211 retstr++;
1212 }
1213 *retstr = '\0';
1214 retstr= anchor;
1215 str= anchor1;
1216 result = 1;
1217 }
1218
1219 return(result);
1220 }
1221 /* }}} */
1222
1223 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1224 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1225 */
1226 /* }}} */
1227 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1228 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1229 */
PHP_FUNCTION(locale_filter_matches)1230 PHP_FUNCTION(locale_filter_matches)
1231 {
1232 char* lang_tag = NULL;
1233 int lang_tag_len = 0;
1234 const char* loc_range = NULL;
1235 int loc_range_len = 0;
1236
1237 int result = 0;
1238 char* token = 0;
1239 char* chrcheck = NULL;
1240
1241 char* can_lang_tag = NULL;
1242 char* can_loc_range = NULL;
1243
1244 char* cur_lang_tag = NULL;
1245 char* cur_loc_range = NULL;
1246
1247 zend_bool boolCanonical = 0;
1248 UErrorCode status = U_ZERO_ERROR;
1249
1250 intl_error_reset( NULL TSRMLS_CC );
1251
1252 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1253 &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1254 &boolCanonical) == FAILURE)
1255 {
1256 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1257 "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1258
1259 RETURN_FALSE;
1260 }
1261
1262 if(loc_range_len == 0) {
1263 loc_range = intl_locale_get_default(TSRMLS_C);
1264 }
1265
1266 if( strcmp(loc_range,"*")==0){
1267 RETURN_TRUE;
1268 }
1269
1270 if( boolCanonical ){
1271 /* canonicalize loc_range */
1272 can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1273 if( result ==0) {
1274 intl_error_set( NULL, status,
1275 "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1276 RETURN_FALSE;
1277 }
1278
1279 /* canonicalize lang_tag */
1280 can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
1281 if( result ==0) {
1282 intl_error_set( NULL, status,
1283 "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1284 RETURN_FALSE;
1285 }
1286
1287 /* Convert to lower case for case-insensitive comparison */
1288 cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1289
1290 /* Convert to lower case for case-insensitive comparison */
1291 result = strToMatch( can_lang_tag , cur_lang_tag);
1292 if( result == 0) {
1293 efree( cur_lang_tag );
1294 efree( can_lang_tag );
1295 RETURN_FALSE;
1296 }
1297
1298 cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1299 result = strToMatch( can_loc_range , cur_loc_range );
1300 if( result == 0) {
1301 efree( cur_lang_tag );
1302 efree( can_lang_tag );
1303 efree( cur_loc_range );
1304 efree( can_loc_range );
1305 RETURN_FALSE;
1306 }
1307
1308 /* check if prefix */
1309 token = strstr( cur_lang_tag , cur_loc_range );
1310
1311 if( token && (token==cur_lang_tag) ){
1312 /* check if the char. after match is SEPARATOR */
1313 chrcheck = token + (strlen(cur_loc_range));
1314 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1315 if( cur_lang_tag){
1316 efree( cur_lang_tag );
1317 }
1318 if( cur_loc_range){
1319 efree( cur_loc_range );
1320 }
1321 if( can_lang_tag){
1322 efree( can_lang_tag );
1323 }
1324 if( can_loc_range){
1325 efree( can_loc_range );
1326 }
1327 RETURN_TRUE;
1328 }
1329 }
1330
1331 /* No prefix as loc_range */
1332 if( cur_lang_tag){
1333 efree( cur_lang_tag );
1334 }
1335 if( cur_loc_range){
1336 efree( cur_loc_range );
1337 }
1338 if( can_lang_tag){
1339 efree( can_lang_tag );
1340 }
1341 if( can_loc_range){
1342 efree( can_loc_range );
1343 }
1344 RETURN_FALSE;
1345
1346 } /* end of if isCanonical */
1347 else{
1348 /* Convert to lower case for case-insensitive comparison */
1349 cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1350
1351 result = strToMatch( lang_tag , cur_lang_tag);
1352 if( result == 0) {
1353 efree( cur_lang_tag );
1354 RETURN_FALSE;
1355 }
1356 cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1357 result = strToMatch( loc_range , cur_loc_range );
1358 if( result == 0) {
1359 efree( cur_lang_tag );
1360 efree( cur_loc_range );
1361 RETURN_FALSE;
1362 }
1363
1364 /* check if prefix */
1365 token = strstr( cur_lang_tag , cur_loc_range );
1366
1367 if( token && (token==cur_lang_tag) ){
1368 /* check if the char. after match is SEPARATOR */
1369 chrcheck = token + (strlen(cur_loc_range));
1370 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1371 if( cur_lang_tag){
1372 efree( cur_lang_tag );
1373 }
1374 if( cur_loc_range){
1375 efree( cur_loc_range );
1376 }
1377 RETURN_TRUE;
1378 }
1379 }
1380
1381 /* No prefix as loc_range */
1382 if( cur_lang_tag){
1383 efree( cur_lang_tag );
1384 }
1385 if( cur_loc_range){
1386 efree( cur_loc_range );
1387 }
1388 RETURN_FALSE;
1389
1390 }
1391 }
1392 /* }}} */
1393
array_cleanup(char * arr[],int arr_size)1394 static void array_cleanup( char* arr[] , int arr_size)
1395 {
1396 int i=0;
1397 for( i=0; i< arr_size; i++ ){
1398 if( arr[i*2] ){
1399 efree( arr[i*2]);
1400 }
1401 }
1402 efree(arr);
1403 }
1404
1405 #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
1406 /* {{{
1407 * returns the lookup result to lookup_loc_range_src_php
1408 * internal function
1409 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize TSRMLS_DC)1410 static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize TSRMLS_DC)
1411 {
1412 int i = 0;
1413 int cur_arr_len = 0;
1414 int result = 0;
1415
1416 char* lang_tag = NULL;
1417 zval** ele_value = NULL;
1418 char** cur_arr = NULL;
1419
1420 char* cur_loc_range = NULL;
1421 char* can_loc_range = NULL;
1422 int saved_pos = 0;
1423
1424 char* return_value = NULL;
1425
1426 cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1427 /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1428 for(zend_hash_internal_pointer_reset(hash_arr);
1429 zend_hash_has_more_elements(hash_arr) == SUCCESS;
1430 zend_hash_move_forward(hash_arr)) {
1431
1432 if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1433 /* Should never actually fail since the key is known to exist.*/
1434 continue;
1435 }
1436 if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1437 /* element value is not a string */
1438 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1439 LOOKUP_CLEAN_RETURN(NULL);
1440 }
1441 cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1442 result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1443 if(result == 0) {
1444 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1445 LOOKUP_CLEAN_RETURN(NULL);
1446 }
1447 cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1448 cur_arr_len++ ;
1449 } /* end of for */
1450
1451 /* Canonicalize array elements */
1452 if(canonicalize) {
1453 for(i=0; i<cur_arr_len; i++) {
1454 lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1455 if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1456 if(lang_tag) {
1457 efree(lang_tag);
1458 }
1459 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1460 LOOKUP_CLEAN_RETURN(NULL);
1461 }
1462 cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1463 result = strToMatch(lang_tag, cur_arr[i*2]);
1464 efree(lang_tag);
1465 if(result == 0) {
1466 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1467 LOOKUP_CLEAN_RETURN(NULL);
1468 }
1469 }
1470
1471 }
1472
1473 if(canonicalize) {
1474 /* Canonicalize the loc_range */
1475 can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1476 if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1477 /* Error */
1478 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1479 if(can_loc_range) {
1480 efree(can_loc_range);
1481 }
1482 LOOKUP_CLEAN_RETURN(NULL);
1483 } else {
1484 loc_range = can_loc_range;
1485 }
1486 }
1487
1488 cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1489 /* convert to lower and replace hyphens */
1490 result = strToMatch(loc_range, cur_loc_range);
1491 if(can_loc_range) {
1492 efree(can_loc_range);
1493 }
1494 if(result == 0) {
1495 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1496 LOOKUP_CLEAN_RETURN(NULL);
1497 }
1498
1499 /* Lookup for the lang_tag match */
1500 saved_pos = strlen(cur_loc_range);
1501 while(saved_pos > 0) {
1502 for(i=0; i< cur_arr_len; i++){
1503 if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1504 /* Match found */
1505 return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1506 efree(cur_loc_range);
1507 LOOKUP_CLEAN_RETURN(return_value);
1508 }
1509 }
1510 saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1511 }
1512
1513 /* Match not found */
1514 efree(cur_loc_range);
1515 LOOKUP_CLEAN_RETURN(NULL);
1516 }
1517 /* }}} */
1518
1519 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1520 * Searchs the items in $langtag for the best match to the language
1521 * range
1522 */
1523 /* }}} */
1524 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1525 * Searchs the items in $langtag for the best match to the language
1526 * range
1527 */
PHP_FUNCTION(locale_lookup)1528 PHP_FUNCTION(locale_lookup)
1529 {
1530 char* fallback_loc = NULL;
1531 int fallback_loc_len = 0;
1532 const char* loc_range = NULL;
1533 int loc_range_len = 0;
1534
1535 zval* arr = NULL;
1536 HashTable* hash_arr = NULL;
1537 zend_bool boolCanonical = 0;
1538 char* result =NULL;
1539
1540 intl_error_reset( NULL TSRMLS_CC );
1541
1542 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1543 &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1544 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1545 RETURN_FALSE;
1546 }
1547
1548 if(loc_range_len == 0) {
1549 loc_range = intl_locale_get_default(TSRMLS_C);
1550 }
1551
1552 hash_arr = HASH_OF(arr);
1553
1554 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1555 RETURN_EMPTY_STRING();
1556 }
1557
1558 result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1559 if(result == NULL || result[0] == '\0') {
1560 if( fallback_loc ) {
1561 result = estrndup(fallback_loc, fallback_loc_len);
1562 } else {
1563 RETURN_EMPTY_STRING();
1564 }
1565 }
1566
1567 RETVAL_STRINGL(result, strlen(result), 0);
1568 }
1569 /* }}} */
1570
1571 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1572 * Tries to find out best available locale based on HTTP �Accept-Language� header
1573 */
1574 /* }}} */
1575 /* {{{ proto string locale_accept_from_http(string $http_accept)
1576 * Tries to find out best available locale based on HTTP �Accept-Language� header
1577 */
PHP_FUNCTION(locale_accept_from_http)1578 PHP_FUNCTION(locale_accept_from_http)
1579 {
1580 UEnumeration *available;
1581 char *http_accept = NULL;
1582 int http_accept_len;
1583 UErrorCode status = 0;
1584 int len;
1585 char resultLocale[INTL_MAX_LOCALE_LEN+1];
1586 UAcceptResult outResult;
1587
1588 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1589 {
1590 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1591 "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1592 RETURN_FALSE;
1593 }
1594 if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1595 /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1596 char *start = http_accept;
1597 char *end;
1598 size_t len;
1599 do {
1600 end = strchr(start, ',');
1601 len = end ? end-start : http_accept_len-(start-http_accept);
1602 if(len > ULOC_FULLNAME_CAPACITY) {
1603 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1604 "locale_accept_from_http: locale string too long", 0 TSRMLS_CC );
1605 RETURN_FALSE;
1606 }
1607 if(end) {
1608 start = end+1;
1609 }
1610 } while(end != NULL);
1611 }
1612
1613 available = ures_openAvailableLocales(NULL, &status);
1614 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1615 len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1616 &outResult, http_accept, available, &status);
1617 uenum_close(available);
1618 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1619 if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1620 RETURN_FALSE;
1621 }
1622 RETURN_STRINGL(resultLocale, len, 1);
1623 }
1624 /* }}} */
1625
1626 /*
1627 * Local variables:
1628 * tab-width: 4
1629 * c-basic-offset: 4
1630 * End:
1631 * vim600: noet sw=4 ts=4 fdm=marker
1632 * vim<600: noet sw=4 ts=4
1633 *can_loc_len
1634 */
1635