1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Kirti Velankar <kirtig@yahoo-inc.com> |
14 +----------------------------------------------------------------------+
15 */
16
17 /* $Id$ */
18
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22
23 #include <unicode/ustring.h>
24 #include <unicode/udata.h>
25 #include <unicode/putil.h>
26 #include <unicode/ures.h>
27
28 #include "php_intl.h"
29 #include "locale.h"
30 #include "locale_class.h"
31 #include "locale_methods.h"
32 #include "intl_convert.h"
33 #include "intl_data.h"
34
35 #include <zend_API.h>
36 #include <zend.h>
37 #include <php.h>
38 #include "main/php_ini.h"
39 #include "ext/standard/php_smart_str.h"
40
41 ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44 #define SEPARATOR "_"
45 #define SEPARATOR1 "-"
46 #define DELIMITER "-_"
47 #define EXTLANG_PREFIX "a"
48 #define PRIVATE_PREFIX "x"
49 #define DISP_NAME "name"
50
51 #define MAX_NO_VARIANT 15
52 #define MAX_NO_EXTLANG 3
53 #define MAX_NO_PRIVATE 15
54 #define MAX_NO_LOOKUP_LANG_TAG 100
55
56 #define LOC_NOT_FOUND 1
57
58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59 #define VARIANT_KEYNAME_LEN 11
60 #define EXTLANG_KEYNAME_LEN 10
61 #define PRIVATE_KEYNAME_LEN 11
62
63 /* Based on IANA registry at the time of writing this code
64 *
65 */
66 static const char * const LOC_GRANDFATHERED[] = {
67 "art-lojban", "i-klingon", "i-lux", "i-navajo", "no-bok", "no-nyn",
68 "cel-gaulish", "en-GB-oed", "i-ami",
69 "i-bnn", "i-default", "i-enochian",
70 "i-mingo", "i-pwn", "i-tao",
71 "i-tay", "i-tsu", "sgn-BE-fr",
72 "sgn-BE-nl", "sgn-CH-de", "zh-cmn",
73 "zh-cmn-Hans", "zh-cmn-Hant", "zh-gan" ,
74 "zh-guoyu", "zh-hakka", "zh-min",
75 "zh-min-nan", "zh-wuu", "zh-xiang",
76 "zh-yue", NULL
77 };
78
79 /* Based on IANA registry at the time of writing this code
80 * This array lists the preferred values for the grandfathered tags if applicable
81 * This is in sync with the array LOC_GRANDFATHERED
82 * e.g. the offsets of the grandfathered tags match the offset of the preferred value
83 */
84 static const int LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
86 "jbo", "tlh", "lb",
87 "nv", "nb", "nn",
88 NULL
89 };
90
91 /*returns TRUE if a is an ID separator FALSE otherwise*/
92 #define isIDSeparator(a) (a == '_' || a == '-')
93 #define isKeywordSeparator(a) (a == '@' )
94 #define isEndOfTag(a) (a == '\0' )
95
96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98 /*returns TRUE if one of the special prefixes is here (s=string)
99 'x-' or 'i-' */
100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103 /* Dot terminates it because of POSIX form where dot precedes the codepage
104 * except for variant */
105 #define isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
106
107 /* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
findOffset(const char * const * list,const char * key)109 static int16_t findOffset(const char* const* list, const char* key)
110 {
111 const char* const* anchor = list;
112 while (*list != NULL) {
113 if (strcmp(key, *list) == 0) {
114 return (int16_t)(list - anchor);
115 }
116 list++;
117 }
118
119 return -1;
120
121 }
122 /*}}}*/
123
getPreferredTag(const char * gf_tag)124 static char* getPreferredTag(const char* gf_tag)
125 {
126 char* result = NULL;
127 int grOffset = 0;
128
129 grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130 if(grOffset < 0) {
131 return NULL;
132 }
133 if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134 /* return preferred tag */
135 result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136 } else {
137 /* Return correct grandfathered language tag */
138 result = estrdup( LOC_GRANDFATHERED[grOffset] );
139 }
140 return result;
141 }
142
143 /* {{{
144 * returns the position of next token for lookup
145 * or -1 if no token
146 * strtokr equivalent search for token in reverse direction
147 */
getStrrtokenPos(char * str,int savedPos)148 static int getStrrtokenPos(char* str, int savedPos)
149 {
150 int result =-1;
151 int i;
152
153 for(i=savedPos-1; i>=0; i--) {
154 if(isIDSeparator(*(str+i)) ){
155 /* delimiter found; check for singleton */
156 if(i>=2 && isIDSeparator(*(str+i-2)) ){
157 /* a singleton; so send the position of token before the singleton */
158 result = i-2;
159 } else {
160 result = i;
161 }
162 break;
163 }
164 }
165 if(result < 1){
166 /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167 result =-1;
168 }
169 return result;
170 }
171 /* }}} */
172
173 /* {{{
174 * returns the position of a singleton if present
175 * returns -1 if no singleton
176 * strtok equivalent search for singleton
177 */
getSingletonPos(const char * str)178 static int getSingletonPos(const char* str)
179 {
180 int result =-1;
181 int i=0;
182 int len = 0;
183
184 if( str && ((len=strlen(str))>0) ){
185 for( i=0; i<len ; i++){
186 if( isIDSeparator(*(str+i)) ){
187 if( i==1){
188 /* string is of the form x-avy or a-prv1 */
189 result =0;
190 break;
191 } else {
192 /* delimiter found; check for singleton */
193 if( isIDSeparator(*(str+i+2)) ){
194 /* a singleton; so send the position of separator before singleton */
195 result = i+1;
196 break;
197 }
198 }
199 }
200 }/* end of for */
201
202 }
203 return result;
204 }
205 /* }}} */
206
207 /* {{{ proto static string Locale::getDefault( )
208 Get default locale */
209 /* }}} */
210 /* {{{ proto static string locale_get_default( )
211 Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)212 PHP_NAMED_FUNCTION(zif_locale_get_default)
213 {
214 RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215 }
216
217 /* }}} */
218
219 /* {{{ proto static string Locale::setDefault( string $locale )
220 Set default locale */
221 /* }}} */
222 /* {{{ proto static string locale_set_default( string $locale )
223 Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)224 PHP_NAMED_FUNCTION(zif_locale_set_default)
225 {
226 char* locale_name = NULL;
227 int len=0;
228
229 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
230 &locale_name ,&len ) == FAILURE)
231 {
232 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233 "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234
235 RETURN_FALSE;
236 }
237
238 if(len == 0) {
239 locale_name = (char *)uloc_getDefault() ;
240 len = strlen(locale_name);
241 }
242
243 zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244
245 RETURN_TRUE;
246 }
247 /* }}} */
248
249 /* {{{
250 * Gets the value from ICU
251 * common code shared by get_primary_language,get_script or get_region or get_variant
252 * result = 0 if error, 1 if successful , -1 if no value
253 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)254 static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255 {
256 char* tag_value = NULL;
257 int32_t tag_value_len = 512;
258
259 int singletonPos = 0;
260 char* mod_loc_name = NULL;
261 int grOffset = 0;
262
263 int32_t buflen = 512;
264 UErrorCode status = U_ZERO_ERROR;
265
266 if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
267 return NULL;
268 }
269
270 if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
271 /* Handle grandfathered languages */
272 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
273 if( grOffset >= 0 ){
274 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
275 return estrdup(loc_name);
276 } else {
277 /* Since Grandfathered , no value , do nothing , retutn NULL */
278 return NULL;
279 }
280 }
281
282 if( fromParseLocale==1 ){
283 /* Handle singletons */
284 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
285 if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
286 return estrdup(loc_name);
287 }
288 }
289
290 singletonPos = getSingletonPos( loc_name );
291 if( singletonPos == 0){
292 /* singleton at start of script, region , variant etc.
293 * or invalid singleton at start of language */
294 return NULL;
295 } else if( singletonPos > 0 ){
296 /* singleton at some position except at start
297 * strip off the singleton and rest of the loc_name */
298 mod_loc_name = estrndup ( loc_name , singletonPos-1);
299 }
300 } /* end of if fromParse */
301
302 } /* end of if != LOC_CANONICAL_TAG */
303
304 if( mod_loc_name == NULL){
305 mod_loc_name = estrdup(loc_name );
306 }
307
308 /* Proceed to ICU */
309 do{
310 tag_value = erealloc( tag_value , buflen );
311 tag_value_len = buflen;
312
313 if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
314 buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
315 }
316 if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
317 buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
318 }
319 if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
320 buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
321 }
322 if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
323 buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
324 }
325 if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
326 buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
327 }
328
329 if( U_FAILURE( status ) ) {
330 if( status == U_BUFFER_OVERFLOW_ERROR ) {
331 status = U_ZERO_ERROR;
332 buflen++; /* add space for \0 */
333 continue;
334 }
335
336 /* Error in retriving data */
337 *result = 0;
338 if( tag_value ){
339 efree( tag_value );
340 }
341 if( mod_loc_name ){
342 efree( mod_loc_name);
343 }
344 return NULL;
345 }
346 } while( buflen > tag_value_len );
347
348 if( buflen ==0 ){
349 /* No value found */
350 *result = -1;
351 if( tag_value ){
352 efree( tag_value );
353 }
354 if( mod_loc_name ){
355 efree( mod_loc_name);
356 }
357 return NULL;
358 } else {
359 *result = 1;
360 }
361
362 if( mod_loc_name ){
363 efree( mod_loc_name);
364 }
365 return tag_value;
366 }
367 /* }}} */
368
369 /* {{{
370 * Gets the value from ICU , called when PHP userspace function is called
371 * common code shared by get_primary_language,get_script or get_region or get_variant
372 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)373 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
374 {
375
376 const char* loc_name = NULL;
377 int loc_name_len = 0;
378
379 char* tag_value = NULL;
380 char* empty_result = "";
381
382 int result = 0;
383 char* msg = NULL;
384
385 UErrorCode status = U_ZERO_ERROR;
386
387 intl_error_reset( NULL TSRMLS_CC );
388
389 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
390 &loc_name ,&loc_name_len ) == FAILURE) {
391 spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
392 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
393 efree(msg);
394
395 RETURN_FALSE;
396 }
397
398 if(loc_name_len == 0) {
399 loc_name = intl_locale_get_default(TSRMLS_C);
400 }
401
402 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
403
404 /* Call ICU get */
405 tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
406
407 /* No value found */
408 if( result == -1 ) {
409 if( tag_value){
410 efree( tag_value);
411 }
412 RETURN_STRING( empty_result , TRUE);
413 }
414
415 /* value found */
416 if( tag_value){
417 RETURN_STRING( tag_value , FALSE);
418 }
419
420 /* Error encountered while fetching the value */
421 if( result ==0) {
422 spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
423 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
424 efree(msg);
425 RETURN_NULL();
426 }
427
428 }
429 /* }}} */
430
431 /* {{{ proto static string Locale::getScript($locale)
432 * gets the script for the $locale
433 }}} */
434 /* {{{ proto static string locale_get_script($locale)
435 * gets the script for the $locale
436 */
PHP_FUNCTION(locale_get_script)437 PHP_FUNCTION( locale_get_script )
438 {
439 get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
440 }
441 /* }}} */
442
443 /* {{{ proto static string Locale::getRegion($locale)
444 * gets the region for the $locale
445 }}} */
446 /* {{{ proto static string locale_get_region($locale)
447 * gets the region for the $locale
448 */
PHP_FUNCTION(locale_get_region)449 PHP_FUNCTION( locale_get_region )
450 {
451 get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
452 }
453 /* }}} */
454
455 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
456 * gets the primary language for the $locale
457 }}} */
458 /* {{{ proto static string locale_get_primary_language($locale)
459 * gets the primary language for the $locale
460 */
PHP_FUNCTION(locale_get_primary_language)461 PHP_FUNCTION(locale_get_primary_language )
462 {
463 get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
464 }
465 /* }}} */
466
467
468 /* {{{
469 * common code shared by display_xyz functions to get the value from ICU
470 }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)471 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
472 {
473 const char* loc_name = NULL;
474 int loc_name_len = 0;
475
476 const char* disp_loc_name = NULL;
477 int disp_loc_name_len = 0;
478 int free_loc_name = 0;
479
480 UChar* disp_name = NULL;
481 int32_t disp_name_len = 0;
482
483 char* mod_loc_name = NULL;
484
485 int32_t buflen = 512;
486 UErrorCode status = U_ZERO_ERROR;
487
488 char* utf8value = NULL;
489 int utf8value_len = 0;
490
491 char* msg = NULL;
492 int grOffset = 0;
493
494 intl_error_reset( NULL TSRMLS_CC );
495
496 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
497 &loc_name, &loc_name_len ,
498 &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
499 {
500 spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
501 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
502 efree(msg);
503 RETURN_FALSE;
504 }
505
506 if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
507 /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
508 spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
509 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, msg , 1 TSRMLS_CC );
510 efree(msg);
511 RETURN_FALSE;
512 }
513
514 if(loc_name_len == 0) {
515 loc_name = intl_locale_get_default(TSRMLS_C);
516 }
517
518 if( strcmp(tag_name, DISP_NAME) != 0 ){
519 /* Handle grandfathered languages */
520 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
521 if( grOffset >= 0 ){
522 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
523 mod_loc_name = getPreferredTag( loc_name );
524 } else {
525 /* Since Grandfathered, no value, do nothing, retutn NULL */
526 RETURN_FALSE;
527 }
528 }
529 } /* end of if != LOC_CANONICAL_TAG */
530
531 if( mod_loc_name==NULL ){
532 mod_loc_name = estrdup( loc_name );
533 }
534
535 /* Check if disp_loc_name passed , if not use default locale */
536 if( !disp_loc_name){
537 disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
538 free_loc_name = 1;
539 }
540
541 /* Get the disp_value for the given locale */
542 do{
543 disp_name = erealloc( disp_name , buflen * sizeof(UChar) );
544 disp_name_len = buflen;
545
546 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
547 buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
548 } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
549 buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
550 } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
551 buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
552 } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
553 buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
554 } else if( strcmp(tag_name , DISP_NAME)==0 ){
555 buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
556 }
557
558 /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
559 if( U_FAILURE( status ) )
560 {
561 if( status == U_BUFFER_OVERFLOW_ERROR )
562 {
563 status = U_ZERO_ERROR;
564 continue;
565 }
566
567 spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
568 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
569 efree(msg);
570 if( disp_name){
571 efree( disp_name );
572 }
573 if( mod_loc_name){
574 efree( mod_loc_name );
575 }
576 if (free_loc_name) {
577 efree((void *)disp_loc_name);
578 disp_loc_name = NULL;
579 }
580 RETURN_FALSE;
581 }
582 } while( buflen > disp_name_len );
583
584 if( mod_loc_name){
585 efree( mod_loc_name );
586 }
587 if (free_loc_name) {
588 efree((void *)disp_loc_name);
589 disp_loc_name = NULL;
590 }
591 /* Convert display locale name from UTF-16 to UTF-8. */
592 intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
593 efree( disp_name );
594 if( U_FAILURE( status ) )
595 {
596 spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
597 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
598 efree(msg);
599 RETURN_FALSE;
600 }
601
602 RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
603
604 }
605 /* }}} */
606
607 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
608 * gets the name for the $locale in $in_locale or default_locale
609 }}} */
610 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
611 * gets the name for the $locale in $in_locale or default_locale
612 */
PHP_FUNCTION(locale_get_display_name)613 PHP_FUNCTION(locale_get_display_name)
614 {
615 get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
616 }
617 /* }}} */
618
619 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
620 * gets the language for the $locale in $in_locale or default_locale
621 }}} */
622 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
623 * gets the language for the $locale in $in_locale or default_locale
624 */
PHP_FUNCTION(locale_get_display_language)625 PHP_FUNCTION(locale_get_display_language)
626 {
627 get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
628 }
629 /* }}} */
630
631 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
632 * gets the script for the $locale in $in_locale or default_locale
633 }}} */
634 /* {{{ proto static string get_display_script($locale, $in_locale = null)
635 * gets the script for the $locale in $in_locale or default_locale
636 */
PHP_FUNCTION(locale_get_display_script)637 PHP_FUNCTION(locale_get_display_script)
638 {
639 get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
640 }
641 /* }}} */
642
643 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
644 * gets the region for the $locale in $in_locale or default_locale
645 }}} */
646 /* {{{ proto static string get_display_region($locale, $in_locale = null)
647 * gets the region for the $locale in $in_locale or default_locale
648 */
PHP_FUNCTION(locale_get_display_region)649 PHP_FUNCTION(locale_get_display_region)
650 {
651 get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
652 }
653 /* }}} */
654
655 /* {{{
656 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
657 * gets the variant for the $locale in $in_locale or default_locale
658 }}} */
659 /* {{{
660 * proto static string get_display_variant($locale, $in_locale = null)
661 * gets the variant for the $locale in $in_locale or default_locale
662 */
PHP_FUNCTION(locale_get_display_variant)663 PHP_FUNCTION(locale_get_display_variant)
664 {
665 get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
666 }
667 /* }}} */
668
669 /* {{{ proto static array getKeywords(string $locale) {
670 * return an associative array containing keyword-value
671 * pairs for this locale. The keys are keys to the array (doh!)
672 * }}}*/
673 /* {{{ proto static array locale_get_keywords(string $locale) {
674 * return an associative array containing keyword-value
675 * pairs for this locale. The keys are keys to the array (doh!)
676 */
PHP_FUNCTION(locale_get_keywords)677 PHP_FUNCTION( locale_get_keywords )
678 {
679 UEnumeration* e = NULL;
680 UErrorCode status = U_ZERO_ERROR;
681
682 const char* kw_key = NULL;
683 int32_t kw_key_len = 0;
684
685 const char* loc_name = NULL;
686 int loc_name_len = 0;
687
688 /*
689 ICU expects the buffer to be allocated before calling the function
690 and so the buffer size has been explicitly specified
691 ICU uloc.h #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
692 hence the kw_value buffer size is 100
693 */
694 char* kw_value = NULL;
695 int32_t kw_value_len = 100;
696
697 intl_error_reset( NULL TSRMLS_CC );
698
699 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
700 &loc_name, &loc_name_len ) == FAILURE)
701 {
702 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
703 "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
704
705 RETURN_FALSE;
706 }
707
708 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
709
710 if(loc_name_len == 0) {
711 loc_name = intl_locale_get_default(TSRMLS_C);
712 }
713
714 /* Get the keywords */
715 e = uloc_openKeywords( loc_name, &status );
716 if( e != NULL )
717 {
718 /* Traverse it, filling the return array. */
719 array_init( return_value );
720
721 while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
722 kw_value = ecalloc( 1 , kw_value_len );
723
724 /* Get the keyword value for each keyword */
725 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len , &status );
726 if (status == U_BUFFER_OVERFLOW_ERROR) {
727 status = U_ZERO_ERROR;
728 kw_value = erealloc( kw_value , kw_value_len+1);
729 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 , &status );
730 } else if(!U_FAILURE(status)) {
731 kw_value = erealloc( kw_value , kw_value_len+1);
732 }
733 if (U_FAILURE(status)) {
734 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword value for the keyword", 0 TSRMLS_CC );
735 if( kw_value){
736 efree( kw_value );
737 }
738 zval_dtor(return_value);
739 RETURN_FALSE;
740 }
741
742 add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
743 } /* end of while */
744
745 } /* end of if e!=NULL */
746
747 uenum_close( e );
748 }
749 /* }}} */
750
751 /* {{{ proto static string Locale::canonicalize($locale)
752 * @return string the canonicalized locale
753 * }}} */
754 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
755 * @param string $locale The locale string to canonicalize
756 */
PHP_FUNCTION(locale_canonicalize)757 PHP_FUNCTION(locale_canonicalize)
758 {
759 get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
760 }
761 /* }}} */
762
763 /* {{{ append_key_value
764 * Internal function which is called from locale_compose
765 * gets the value for the key_name and appends to the loc_name
766 * returns 1 if successful , -1 if not found ,
767 * 0 if array element is not a string , -2 if buffer-overflow
768 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)769 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
770 {
771 zval** ele_value = NULL;
772
773 if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
774 if(Z_TYPE_PP(ele_value)!= IS_STRING ){
775 /* element value is not a string */
776 return FAILURE;
777 }
778 if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
779 strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
780 /* not lang or grandfathered tag */
781 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
782 }
783 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
784 return SUCCESS;
785 }
786
787 return LOC_NOT_FOUND;
788 }
789 /* }}} */
790
791 /* {{{ append_prefix , appends the prefix needed
792 * e.g. private adds 'x'
793 */
add_prefix(smart_str * loc_name,char * key_name)794 static void add_prefix(smart_str* loc_name, char* key_name)
795 {
796 if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
797 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
798 smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
799 }
800 }
801 /* }}} */
802
803 /* {{{ append_multiple_key_values
804 * Internal function which is called from locale_compose
805 * gets the multiple values for the key_name and appends to the loc_name
806 * used for 'variant','extlang','private'
807 * returns 1 if successful , -1 if not found ,
808 * 0 if array element is not a string , -2 if buffer-overflow
809 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name TSRMLS_DC)810 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
811 {
812 zval** ele_value = NULL;
813 int i = 0;
814 int isFirstSubtag = 0;
815 int max_value = 0;
816
817 /* Variant/ Extlang/Private etc. */
818 if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
819 if( Z_TYPE_PP(ele_value) == IS_STRING ){
820 add_prefix( loc_name , key_name);
821
822 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
823 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
824 return SUCCESS;
825 } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
826 HashPosition pos;
827 HashTable *arr = HASH_OF(*ele_value);
828 zval **data = NULL;
829
830 zend_hash_internal_pointer_reset_ex(arr, &pos);
831 while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
832 if(Z_TYPE_PP(data) != IS_STRING) {
833 return FAILURE;
834 }
835 if (isFirstSubtag++ == 0){
836 add_prefix(loc_name , key_name);
837 }
838 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
839 smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
840 zend_hash_move_forward_ex(arr, &pos);
841 }
842 return SUCCESS;
843 } else {
844 return FAILURE;
845 }
846 } else {
847 char cur_key_name[31];
848 /* Decide the max_value: the max. no. of elements allowed */
849 if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
850 max_value = MAX_NO_VARIANT;
851 }
852 if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
853 max_value = MAX_NO_EXTLANG;
854 }
855 if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
856 max_value = MAX_NO_PRIVATE;
857 }
858
859 /* Multiple variant values as variant0, variant1 ,variant2 */
860 isFirstSubtag = 0;
861 for( i=0 ; i< max_value; i++ ){
862 snprintf( cur_key_name , 30, "%s%d", key_name , i);
863 if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
864 if( Z_TYPE_PP(ele_value)!= IS_STRING ){
865 /* variant is not a string */
866 return FAILURE;
867 }
868 /* Add the contents */
869 if (isFirstSubtag++ == 0){
870 add_prefix(loc_name , cur_key_name);
871 }
872 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
873 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
874 }
875 } /* end of for */
876 } /* end of else */
877
878 return SUCCESS;
879 }
880 /* }}} */
881
882 /*{{{
883 * If applicable sets error message and aborts locale_compose gracefully
884 * returns 0 if locale_compose needs to be aborted
885 * otherwise returns 1
886 */
handleAppendResult(int result,smart_str * loc_name TSRMLS_DC)887 static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
888 {
889 intl_error_reset( NULL TSRMLS_CC );
890 if( result == FAILURE) {
891 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
892 "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
893 smart_str_free(loc_name);
894 return 0;
895 }
896 return 1;
897 }
898 /* }}} */
899
900 #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
901 /* {{{ proto static string Locale::composeLocale($array)
902 * Creates a locale by combining the parts of locale-ID passed
903 * }}} */
904 /* {{{ proto static string compose_locale($array)
905 * Creates a locale by combining the parts of locale-ID passed
906 * }}} */
PHP_FUNCTION(locale_compose)907 PHP_FUNCTION(locale_compose)
908 {
909 smart_str loc_name_s = {0};
910 smart_str *loc_name = &loc_name_s;
911 zval* arr = NULL;
912 HashTable* hash_arr = NULL;
913 int result = 0;
914
915 intl_error_reset( NULL TSRMLS_CC );
916
917 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
918 &arr) == FAILURE)
919 {
920 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
921 "locale_compose: unable to parse input params", 0 TSRMLS_CC );
922 RETURN_FALSE;
923 }
924
925 hash_arr = HASH_OF( arr );
926
927 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
928 RETURN_FALSE;
929
930 /* Check for grandfathered first */
931 result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG);
932 if( result == SUCCESS){
933 RETURN_SMART_STR(loc_name);
934 }
935 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
936 RETURN_FALSE;
937 }
938
939 /* Not grandfathered */
940 result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
941 if( result == LOC_NOT_FOUND ){
942 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
943 "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
944 smart_str_free(loc_name);
945 RETURN_FALSE;
946 }
947 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
948 RETURN_FALSE;
949 }
950
951 /* Extlang */
952 result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
953 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
954 RETURN_FALSE;
955 }
956
957 /* Script */
958 result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
959 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
960 RETURN_FALSE;
961 }
962
963 /* Region */
964 result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
965 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
966 RETURN_FALSE;
967 }
968
969 /* Variant */
970 result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
971 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
972 RETURN_FALSE;
973 }
974
975 /* Private */
976 result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
977 if( !handleAppendResult( result, loc_name TSRMLS_CC)){
978 RETURN_FALSE;
979 }
980
981 RETURN_SMART_STR(loc_name);
982 }
983 /* }}} */
984
985
986 /*{{{
987 * Parses the locale and returns private subtags if existing
988 * else returns NULL
989 * e.g. for locale='en_US-x-prv1-prv2-prv3'
990 * returns a pointer to the string 'prv1-prv2-prv3'
991 */
get_private_subtags(const char * loc_name)992 static char* get_private_subtags(const char* loc_name)
993 {
994 char* result =NULL;
995 int singletonPos = 0;
996 int len =0;
997 const char* mod_loc_name =NULL;
998
999 if( loc_name && (len = strlen(loc_name)>0 ) ){
1000 mod_loc_name = loc_name ;
1001 len = strlen(mod_loc_name);
1002 while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
1003
1004 if( singletonPos!=-1){
1005 if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1006 /* private subtag start found */
1007 if( singletonPos + 2 == len){
1008 /* loc_name ends with '-x-' ; return NULL */
1009 }
1010 else{
1011 /* result = mod_loc_name + singletonPos +2; */
1012 result = estrndup(mod_loc_name + singletonPos+2 , (len -( singletonPos +2) ) );
1013 }
1014 break;
1015 }
1016 else{
1017 if( singletonPos + 1 >= len){
1018 /* String end */
1019 break;
1020 } else {
1021 /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1022 mod_loc_name = mod_loc_name + singletonPos +1;
1023 len = strlen(mod_loc_name);
1024 }
1025 }
1026 }
1027
1028 } /* end of while */
1029 }
1030
1031 return result;
1032 }
1033 /* }}} */
1034
1035 /* {{{ code used by locale_parse
1036 */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name TSRMLS_DC)1037 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1038 {
1039 char* key_value = NULL;
1040 char* cur_key_name = NULL;
1041 char* token = NULL;
1042 char* last_ptr = NULL;
1043
1044 int result = 0;
1045 int cur_result = 0;
1046 int cnt = 0;
1047
1048
1049 if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1050 key_value = get_private_subtags( loc_name );
1051 result = 1;
1052 } else {
1053 key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1054 }
1055 if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1056 ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1057 if( result > 0 && key_value){
1058 /* Tokenize on the "_" or "-" */
1059 token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1060 if( cur_key_name ){
1061 efree( cur_key_name);
1062 }
1063 cur_key_name = (char*)ecalloc( 25, 25);
1064 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1065 add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1066 /* tokenize on the "_" or "-" and stop at singleton if any */
1067 while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1068 sprintf( cur_key_name , "%s%d", key_name , cnt++);
1069 add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1070 }
1071 /*
1072 if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1073 }
1074 */
1075 }
1076 } else {
1077 if( result == 1 ){
1078 add_assoc_string( hash_arr, key_name , key_value , TRUE );
1079 cur_result = 1;
1080 }
1081 }
1082
1083 if( cur_key_name ){
1084 efree( cur_key_name);
1085 }
1086 /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1087 if( key_value){
1088 efree(key_value);
1089 }
1090 return cur_result;
1091 }
1092 /* }}} */
1093
1094 /* {{{ proto static array Locale::parseLocale($locale)
1095 * parses a locale-id into an array the different parts of it
1096 }}} */
1097 /* {{{ proto static array parse_locale($locale)
1098 * parses a locale-id into an array the different parts of it
1099 */
PHP_FUNCTION(locale_parse)1100 PHP_FUNCTION(locale_parse)
1101 {
1102 const char* loc_name = NULL;
1103 int loc_name_len = 0;
1104 int grOffset = 0;
1105
1106 intl_error_reset( NULL TSRMLS_CC );
1107
1108 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1109 &loc_name, &loc_name_len ) == FAILURE)
1110 {
1111 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1112 "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1113
1114 RETURN_FALSE;
1115 }
1116
1117 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1118
1119 if(loc_name_len == 0) {
1120 loc_name = intl_locale_get_default(TSRMLS_C);
1121 }
1122
1123 array_init( return_value );
1124
1125 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
1126 if( grOffset >= 0 ){
1127 add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1128 }
1129 else{
1130 /* Not grandfathered */
1131 add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1132 add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1133 add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1134 add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1135 add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1136 }
1137 }
1138 /* }}} */
1139
1140 /* {{{ proto static array Locale::getAllVariants($locale)
1141 * gets an array containing the list of variants, or null
1142 }}} */
1143 /* {{{ proto static array locale_get_all_variants($locale)
1144 * gets an array containing the list of variants, or null
1145 */
PHP_FUNCTION(locale_get_all_variants)1146 PHP_FUNCTION(locale_get_all_variants)
1147 {
1148 const char* loc_name = NULL;
1149 int loc_name_len = 0;
1150
1151 int result = 0;
1152 char* token = NULL;
1153 char* variant = NULL;
1154 char* saved_ptr = NULL;
1155
1156 intl_error_reset( NULL TSRMLS_CC );
1157
1158 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1159 &loc_name, &loc_name_len ) == FAILURE)
1160 {
1161 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1162 "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1163
1164 RETURN_FALSE;
1165 }
1166
1167 if(loc_name_len == 0) {
1168 loc_name = intl_locale_get_default(TSRMLS_C);
1169 }
1170
1171 INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1172
1173 array_init( return_value );
1174
1175 /* If the locale is grandfathered, stop, no variants */
1176 if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){
1177 /* ("Grandfathered Tag. No variants."); */
1178 }
1179 else {
1180 /* Call ICU variant */
1181 variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1182 if( result > 0 && variant){
1183 /* Tokenize on the "_" or "-" */
1184 token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1185 add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1186 /* tokenize on the "_" or "-" and stop at singleton if any */
1187 while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1188 add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1189 }
1190 }
1191 if( variant ){
1192 efree( variant );
1193 }
1194 }
1195
1196
1197 }
1198 /* }}} */
1199
1200 /*{{{
1201 * Converts to lower case and also replaces all hyphens with the underscore
1202 */
strToMatch(const char * str,char * retstr)1203 static int strToMatch(const char* str ,char *retstr)
1204 {
1205 char* anchor = NULL;
1206 const char* anchor1 = NULL;
1207 int result = 0;
1208
1209 if( (!str) || str[0] == '\0'){
1210 return result;
1211 } else {
1212 anchor = retstr;
1213 anchor1 = str;
1214 while( (*str)!='\0' ){
1215 if( *str == '-' ){
1216 *retstr = '_';
1217 } else {
1218 *retstr = tolower(*str);
1219 }
1220 str++;
1221 retstr++;
1222 }
1223 *retstr = '\0';
1224 retstr= anchor;
1225 str= anchor1;
1226 result = 1;
1227 }
1228
1229 return(result);
1230 }
1231 /* }}} */
1232
1233 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1234 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1235 */
1236 /* }}} */
1237 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1238 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1239 */
PHP_FUNCTION(locale_filter_matches)1240 PHP_FUNCTION(locale_filter_matches)
1241 {
1242 char* lang_tag = NULL;
1243 int lang_tag_len = 0;
1244 const char* loc_range = NULL;
1245 int loc_range_len = 0;
1246
1247 int result = 0;
1248 char* token = 0;
1249 char* chrcheck = NULL;
1250
1251 char* can_lang_tag = NULL;
1252 char* can_loc_range = NULL;
1253
1254 char* cur_lang_tag = NULL;
1255 char* cur_loc_range = NULL;
1256
1257 zend_bool boolCanonical = 0;
1258 UErrorCode status = U_ZERO_ERROR;
1259
1260 intl_error_reset( NULL TSRMLS_CC );
1261
1262 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1263 &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1264 &boolCanonical) == FAILURE)
1265 {
1266 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1267 "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1268
1269 RETURN_FALSE;
1270 }
1271
1272 if(loc_range_len == 0) {
1273 loc_range = intl_locale_get_default(TSRMLS_C);
1274 }
1275
1276 if( strcmp(loc_range,"*")==0){
1277 RETURN_TRUE;
1278 }
1279
1280 INTL_CHECK_LOCALE_LEN(strlen(loc_range));
1281 INTL_CHECK_LOCALE_LEN(strlen(lang_tag));
1282
1283 if( boolCanonical ){
1284 /* canonicalize loc_range */
1285 can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1286 if( result ==0) {
1287 intl_error_set( NULL, status,
1288 "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1289 RETURN_FALSE;
1290 }
1291
1292 /* canonicalize lang_tag */
1293 can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0);
1294 if( result ==0) {
1295 intl_error_set( NULL, status,
1296 "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1297 RETURN_FALSE;
1298 }
1299
1300 /* Convert to lower case for case-insensitive comparison */
1301 cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1302
1303 /* Convert to lower case for case-insensitive comparison */
1304 result = strToMatch( can_lang_tag , cur_lang_tag);
1305 if( result == 0) {
1306 efree( cur_lang_tag );
1307 efree( can_lang_tag );
1308 RETURN_FALSE;
1309 }
1310
1311 cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1312 result = strToMatch( can_loc_range , cur_loc_range );
1313 if( result == 0) {
1314 efree( cur_lang_tag );
1315 efree( can_lang_tag );
1316 efree( cur_loc_range );
1317 efree( can_loc_range );
1318 RETURN_FALSE;
1319 }
1320
1321 /* check if prefix */
1322 token = strstr( cur_lang_tag , cur_loc_range );
1323
1324 if( token && (token==cur_lang_tag) ){
1325 /* check if the char. after match is SEPARATOR */
1326 chrcheck = token + (strlen(cur_loc_range));
1327 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1328 if( cur_lang_tag){
1329 efree( cur_lang_tag );
1330 }
1331 if( cur_loc_range){
1332 efree( cur_loc_range );
1333 }
1334 if( can_lang_tag){
1335 efree( can_lang_tag );
1336 }
1337 if( can_loc_range){
1338 efree( can_loc_range );
1339 }
1340 RETURN_TRUE;
1341 }
1342 }
1343
1344 /* No prefix as loc_range */
1345 if( cur_lang_tag){
1346 efree( cur_lang_tag );
1347 }
1348 if( cur_loc_range){
1349 efree( cur_loc_range );
1350 }
1351 if( can_lang_tag){
1352 efree( can_lang_tag );
1353 }
1354 if( can_loc_range){
1355 efree( can_loc_range );
1356 }
1357 RETURN_FALSE;
1358
1359 } /* end of if isCanonical */
1360 else{
1361 /* Convert to lower case for case-insensitive comparison */
1362 cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1363
1364 result = strToMatch( lang_tag , cur_lang_tag);
1365 if( result == 0) {
1366 efree( cur_lang_tag );
1367 RETURN_FALSE;
1368 }
1369 cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1370 result = strToMatch( loc_range , cur_loc_range );
1371 if( result == 0) {
1372 efree( cur_lang_tag );
1373 efree( cur_loc_range );
1374 RETURN_FALSE;
1375 }
1376
1377 /* check if prefix */
1378 token = strstr( cur_lang_tag , cur_loc_range );
1379
1380 if( token && (token==cur_lang_tag) ){
1381 /* check if the char. after match is SEPARATOR */
1382 chrcheck = token + (strlen(cur_loc_range));
1383 if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1384 if( cur_lang_tag){
1385 efree( cur_lang_tag );
1386 }
1387 if( cur_loc_range){
1388 efree( cur_loc_range );
1389 }
1390 RETURN_TRUE;
1391 }
1392 }
1393
1394 /* No prefix as loc_range */
1395 if( cur_lang_tag){
1396 efree( cur_lang_tag );
1397 }
1398 if( cur_loc_range){
1399 efree( cur_loc_range );
1400 }
1401 RETURN_FALSE;
1402
1403 }
1404 }
1405 /* }}} */
1406
array_cleanup(char * arr[],int arr_size)1407 static void array_cleanup( char* arr[] , int arr_size)
1408 {
1409 int i=0;
1410 for( i=0; i< arr_size; i++ ){
1411 if( arr[i*2] ){
1412 efree( arr[i*2]);
1413 }
1414 }
1415 efree(arr);
1416 }
1417
1418 #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value)
1419 /* {{{
1420 * returns the lookup result to lookup_loc_range_src_php
1421 * internal function
1422 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize TSRMLS_DC)1423 static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize TSRMLS_DC)
1424 {
1425 int i = 0;
1426 int cur_arr_len = 0;
1427 int result = 0;
1428
1429 char* lang_tag = NULL;
1430 zval** ele_value = NULL;
1431 char** cur_arr = NULL;
1432
1433 char* cur_loc_range = NULL;
1434 char* can_loc_range = NULL;
1435 int saved_pos = 0;
1436
1437 char* return_value = NULL;
1438
1439 cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1440 /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1441 for(zend_hash_internal_pointer_reset(hash_arr);
1442 zend_hash_has_more_elements(hash_arr) == SUCCESS;
1443 zend_hash_move_forward(hash_arr)) {
1444
1445 if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1446 /* Should never actually fail since the key is known to exist.*/
1447 continue;
1448 }
1449 if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1450 /* element value is not a string */
1451 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1452 LOOKUP_CLEAN_RETURN(NULL);
1453 }
1454 cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1455 result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1456 if(result == 0) {
1457 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1458 LOOKUP_CLEAN_RETURN(NULL);
1459 }
1460 cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1461 cur_arr_len++ ;
1462 } /* end of for */
1463
1464 /* Canonicalize array elements */
1465 if(canonicalize) {
1466 for(i=0; i<cur_arr_len; i++) {
1467 lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1468 if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1469 if(lang_tag) {
1470 efree(lang_tag);
1471 }
1472 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1473 LOOKUP_CLEAN_RETURN(NULL);
1474 }
1475 cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1476 result = strToMatch(lang_tag, cur_arr[i*2]);
1477 efree(lang_tag);
1478 if(result == 0) {
1479 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1480 LOOKUP_CLEAN_RETURN(NULL);
1481 }
1482 }
1483
1484 }
1485
1486 if(canonicalize) {
1487 /* Canonicalize the loc_range */
1488 can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1489 if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1490 /* Error */
1491 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1492 if(can_loc_range) {
1493 efree(can_loc_range);
1494 }
1495 LOOKUP_CLEAN_RETURN(NULL);
1496 } else {
1497 loc_range = can_loc_range;
1498 }
1499 }
1500
1501 cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1502 /* convert to lower and replace hyphens */
1503 result = strToMatch(loc_range, cur_loc_range);
1504 if(can_loc_range) {
1505 efree(can_loc_range);
1506 }
1507 if(result == 0) {
1508 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1509 LOOKUP_CLEAN_RETURN(NULL);
1510 }
1511
1512 /* Lookup for the lang_tag match */
1513 saved_pos = strlen(cur_loc_range);
1514 while(saved_pos > 0) {
1515 for(i=0; i< cur_arr_len; i++){
1516 if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1517 /* Match found */
1518 return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1519 efree(cur_loc_range);
1520 LOOKUP_CLEAN_RETURN(return_value);
1521 }
1522 }
1523 saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1524 }
1525
1526 /* Match not found */
1527 efree(cur_loc_range);
1528 LOOKUP_CLEAN_RETURN(NULL);
1529 }
1530 /* }}} */
1531
1532 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1533 * Searchs the items in $langtag for the best match to the language
1534 * range
1535 */
1536 /* }}} */
1537 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1538 * Searchs the items in $langtag for the best match to the language
1539 * range
1540 */
PHP_FUNCTION(locale_lookup)1541 PHP_FUNCTION(locale_lookup)
1542 {
1543 char* fallback_loc = NULL;
1544 int fallback_loc_len = 0;
1545 const char* loc_range = NULL;
1546 int loc_range_len = 0;
1547
1548 zval* arr = NULL;
1549 HashTable* hash_arr = NULL;
1550 zend_bool boolCanonical = 0;
1551 char* result =NULL;
1552
1553 intl_error_reset( NULL TSRMLS_CC );
1554
1555 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1556 &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1557 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1558 RETURN_FALSE;
1559 }
1560
1561 if(loc_range_len == 0) {
1562 loc_range = intl_locale_get_default(TSRMLS_C);
1563 }
1564
1565 INTL_CHECK_LOCALE_LEN(strlen(loc_range));
1566
1567 hash_arr = HASH_OF(arr);
1568
1569 if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1570 RETURN_EMPTY_STRING();
1571 }
1572
1573 result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1574 if(result == NULL || result[0] == '\0') {
1575 if( fallback_loc ) {
1576 result = estrndup(fallback_loc, fallback_loc_len);
1577 } else {
1578 RETURN_EMPTY_STRING();
1579 }
1580 }
1581
1582 RETVAL_STRINGL(result, strlen(result), 0);
1583 }
1584 /* }}} */
1585
1586 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1587 * Tries to find out best available locale based on HTTP �Accept-Language� header
1588 */
1589 /* }}} */
1590 /* {{{ proto string locale_accept_from_http(string $http_accept)
1591 * Tries to find out best available locale based on HTTP �Accept-Language� header
1592 */
PHP_FUNCTION(locale_accept_from_http)1593 PHP_FUNCTION(locale_accept_from_http)
1594 {
1595 UEnumeration *available;
1596 char *http_accept = NULL;
1597 int http_accept_len;
1598 UErrorCode status = 0;
1599 int len;
1600 char resultLocale[INTL_MAX_LOCALE_LEN+1];
1601 UAcceptResult outResult;
1602
1603 if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1604 {
1605 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1606 "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1607 RETURN_FALSE;
1608 }
1609 if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1610 /* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1611 char *start = http_accept;
1612 char *end;
1613 size_t len;
1614 do {
1615 end = strchr(start, ',');
1616 len = end ? end-start : http_accept_len-(start-http_accept);
1617 if(len > ULOC_FULLNAME_CAPACITY) {
1618 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1619 "locale_accept_from_http: locale string too long", 0 TSRMLS_CC );
1620 RETURN_FALSE;
1621 }
1622 if(end) {
1623 start = end+1;
1624 }
1625 } while(end != NULL);
1626 }
1627
1628 available = ures_openAvailableLocales(NULL, &status);
1629 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1630 len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1631 &outResult, http_accept, available, &status);
1632 uenum_close(available);
1633 INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1634 if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1635 RETURN_FALSE;
1636 }
1637 RETURN_STRINGL(resultLocale, len, 1);
1638 }
1639 /* }}} */
1640
1641 /*
1642 * Local variables:
1643 * tab-width: 4
1644 * c-basic-offset: 4
1645 * End:
1646 * vim600: noet sw=4 ts=4 fdm=marker
1647 * vim<600: noet sw=4 ts=4
1648 *can_loc_len
1649 */
1650