xref: /PHP-5.6/ext/intl/locale/locale_methods.c (revision 1fd18821)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14    +----------------------------------------------------------------------+
15 */
16 
17 /* $Id$ */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <unicode/ustring.h>
24 #include <unicode/udata.h>
25 #include <unicode/putil.h>
26 #include <unicode/ures.h>
27 
28 #include "php_intl.h"
29 #include "locale.h"
30 #include "locale_class.h"
31 #include "locale_methods.h"
32 #include "intl_convert.h"
33 #include "intl_data.h"
34 
35 #include <zend_API.h>
36 #include <zend.h>
37 #include <php.h>
38 #include "main/php_ini.h"
39 #include "ext/standard/php_smart_str.h"
40 
41 ZEND_EXTERN_MODULE_GLOBALS( intl )
42 
43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44 #define SEPARATOR "_"
45 #define SEPARATOR1 "-"
46 #define DELIMITER "-_"
47 #define EXTLANG_PREFIX "a"
48 #define PRIVATE_PREFIX "x"
49 #define DISP_NAME "name"
50 
51 #define MAX_NO_VARIANT  15
52 #define MAX_NO_EXTLANG  3
53 #define MAX_NO_PRIVATE  15
54 #define MAX_NO_LOOKUP_LANG_TAG  100
55 
56 #define LOC_NOT_FOUND 1
57 
58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59 #define VARIANT_KEYNAME_LEN  11
60 #define EXTLANG_KEYNAME_LEN  10
61 #define PRIVATE_KEYNAME_LEN  11
62 
63 /* Based on IANA registry at the time of writing this code
64 *
65 */
66 static const char * const LOC_GRANDFATHERED[] = {
67 	"art-lojban",		"i-klingon",		"i-lux",			"i-navajo",		"no-bok",		"no-nyn",
68 	"cel-gaulish",		"en-GB-oed",		"i-ami",
69 	"i-bnn",		"i-default",		"i-enochian",
70 	"i-mingo",		"i-pwn", 		"i-tao",
71 	"i-tay",		"i-tsu",		"sgn-BE-fr",
72 	"sgn-BE-nl",		"sgn-CH-de", 		"zh-cmn",
73  	"zh-cmn-Hans", 		"zh-cmn-Hant",		"zh-gan" ,
74 	"zh-guoyu", 		"zh-hakka", 		"zh-min",
75 	"zh-min-nan", 		"zh-wuu", 		"zh-xiang",
76 	"zh-yue",		NULL
77 };
78 
79 /* Based on IANA registry at the time of writing this code
80 *  This array lists the preferred values for the grandfathered tags if applicable
81 *  This is in sync with the array LOC_GRANDFATHERED
82 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83 */
84 static const int 		LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85 static const char * const 	LOC_PREFERRED_GRANDFATHERED[]  = {
86 	"jbo",			"tlh",			"lb",
87 	"nv", 			"nb",			"nn",
88 	NULL
89 };
90 
91 /*returns TRUE if a is an ID separator FALSE otherwise*/
92 #define isIDSeparator(a) (a == '_' || a == '-')
93 #define isKeywordSeparator(a) (a == '@' )
94 #define isEndOfTag(a) (a == '\0' )
95 
96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97 
98 /*returns TRUE if one of the special prefixes is here (s=string)
99   'x-' or 'i-' */
100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102 
103 /* Dot terminates it because of POSIX form  where dot precedes the codepage
104  * except for variant */
105 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106 
107 /* {{{ return the offset of 'key' in the array 'list'.
108  * returns -1 if not present */
findOffset(const char * const * list,const char * key)109 static int16_t findOffset(const char* const* list, const char* key)
110 {
111 	const char* const* anchor = list;
112 	while (*list != NULL) {
113 		if (strcmp(key, *list) == 0) {
114 			return (int16_t)(list - anchor);
115 		}
116 		list++;
117 	}
118 
119 	return -1;
120 
121 }
122 /*}}}*/
123 
getPreferredTag(const char * gf_tag)124 static char* getPreferredTag(const char* gf_tag)
125 {
126 	char* result = NULL;
127 	int grOffset = 0;
128 
129 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130 	if(grOffset < 0) {
131 		return NULL;
132 	}
133 	if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134 		/* return preferred tag */
135 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136 	} else {
137 		/* Return correct grandfathered language tag */
138 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
139 	}
140 	return result;
141 }
142 
143 /* {{{
144 * returns the position of next token for lookup
145 * or -1 if no token
146 * strtokr equivalent search for token in reverse direction
147 */
getStrrtokenPos(char * str,int savedPos)148 static int getStrrtokenPos(char* str, int savedPos)
149 {
150 	int result =-1;
151 	int i;
152 
153 	for(i=savedPos-1; i>=0; i--) {
154 		if(isIDSeparator(*(str+i)) ){
155 			/* delimiter found; check for singleton */
156 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
157 				/* a singleton; so send the position of token before the singleton */
158 				result = i-2;
159 			} else {
160 				result = i;
161 			}
162 			break;
163 		}
164 	}
165 	if(result < 1){
166 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167 		result =-1;
168 	}
169 	return result;
170 }
171 /* }}} */
172 
173 /* {{{
174 * returns the position of a singleton if present
175 * returns -1 if no singleton
176 * strtok equivalent search for singleton
177 */
getSingletonPos(const char * str)178 static int getSingletonPos(const char* str)
179 {
180 	int result =-1;
181 	int i=0;
182 	int len = 0;
183 
184 	if( str && ((len=strlen(str))>0) ){
185 		for( i=0; i<len ; i++){
186 			if( isIDSeparator(*(str+i)) ){
187 				if( i==1){
188 					/* string is of the form x-avy or a-prv1 */
189 					result =0;
190 					break;
191 				} else {
192 					/* delimiter found; check for singleton */
193 					if( isIDSeparator(*(str+i+2)) ){
194 						/* a singleton; so send the position of separator before singleton */
195 						result = i+1;
196 						break;
197 					}
198 				}
199 			}
200 		}/* end of for */
201 
202 	}
203 	return result;
204 }
205 /* }}} */
206 
207 /* {{{ proto static string Locale::getDefault(  )
208    Get default locale */
209 /* }}} */
210 /* {{{ proto static string locale_get_default( )
211    Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)212 PHP_NAMED_FUNCTION(zif_locale_get_default)
213 {
214 	RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215 }
216 
217 /* }}} */
218 
219 /* {{{ proto static string Locale::setDefault( string $locale )
220    Set default locale */
221 /* }}} */
222 /* {{{ proto static string locale_set_default( string $locale )
223    Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)224 PHP_NAMED_FUNCTION(zif_locale_set_default)
225 {
226 	char* locale_name = NULL;
227 	int   len=0;
228 
229 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
230 		&locale_name ,&len ) == FAILURE)
231 	{
232 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233 			 	"locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234 
235 		RETURN_FALSE;
236 	}
237 
238 	if(len == 0) {
239 		locale_name =  (char *)uloc_getDefault() ;
240 		len = strlen(locale_name);
241 	}
242 
243 	zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244 
245 	RETURN_TRUE;
246 }
247 /* }}} */
248 
249 /* {{{
250 * Gets the value from ICU
251 * common code shared by get_primary_language,get_script or get_region or get_variant
252 * result = 0 if error, 1 if successful , -1 if no value
253 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)254 static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255 {
256 	char*		tag_value	= NULL;
257 	int32_t     	tag_value_len   = 512;
258 
259 	int		singletonPos   	= 0;
260 	char*       	mod_loc_name	= NULL;
261 	int 		grOffset	= 0;
262 
263 	int32_t     	buflen          = 512;
264 	UErrorCode  	status          = U_ZERO_ERROR;
265 
266 	if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
267 		return NULL;
268 	}
269 
270 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
271 		/* Handle  grandfathered languages */
272 		grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
273 		if( grOffset >= 0 ){
274 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
275 				return estrdup(loc_name);
276 			} else {
277 				/* Since Grandfathered , no value , do nothing , retutn NULL */
278 				return NULL;
279 			}
280 		}
281 
282 	if( fromParseLocale==1 ){
283 		/* Handle singletons */
284 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
285 			if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
286 				return estrdup(loc_name);
287 			}
288 		}
289 
290 		singletonPos = getSingletonPos( loc_name );
291 		if( singletonPos == 0){
292 			/* singleton at start of script, region , variant etc.
293 			 * or invalid singleton at start of language */
294 			return NULL;
295 		} else if( singletonPos > 0 ){
296 			/* singleton at some position except at start
297 			 * strip off the singleton and rest of the loc_name */
298 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
299 		}
300 	} /* end of if fromParse */
301 
302 	} /* end of if != LOC_CANONICAL_TAG */
303 
304 	if( mod_loc_name == NULL){
305 		mod_loc_name = estrdup(loc_name );
306 	}
307 
308 	/* Proceed to ICU */
309     do{
310 		tag_value = erealloc( tag_value , buflen  );
311 		tag_value_len = buflen;
312 
313 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
314 			buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
315 		}
316 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
317 			buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
318 		}
319 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
320 			buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
321 		}
322 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
323 			buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
324 		}
325 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
326 			buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
327 		}
328 
329 		if( U_FAILURE( status ) ) {
330 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
331 				status = U_ZERO_ERROR;
332 				buflen++; /* add space for \0 */
333 				continue;
334 			}
335 
336 			/* Error in retriving data */
337 			*result = 0;
338 			if( tag_value ){
339 				efree( tag_value );
340 			}
341 			if( mod_loc_name ){
342 				efree( mod_loc_name);
343 			}
344 			return NULL;
345 		}
346 	} while( buflen > tag_value_len );
347 
348 	if(  buflen ==0 ){
349 		/* No value found */
350 		*result = -1;
351 		if( tag_value ){
352 			efree( tag_value );
353 		}
354 		if( mod_loc_name ){
355 			efree( mod_loc_name);
356 		}
357 		return NULL;
358 	} else {
359 		*result = 1;
360 	}
361 
362 	if( mod_loc_name ){
363 		efree( mod_loc_name);
364 	}
365 	return tag_value;
366 }
367 /* }}} */
368 
369 /* {{{
370 * Gets the value from ICU , called when PHP userspace function is called
371 * common code shared by get_primary_language,get_script or get_region or get_variant
372 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)373 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
374 {
375 
376 	const char* loc_name        	= NULL;
377 	int         loc_name_len    	= 0;
378 
379 	char*       tag_value		= NULL;
380 	char*       empty_result	= "";
381 
382 	int         result    		= 0;
383 	char*       msg        		= NULL;
384 
385 	UErrorCode  status          	= U_ZERO_ERROR;
386 
387 	intl_error_reset( NULL TSRMLS_CC );
388 
389 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
390 	&loc_name ,&loc_name_len ) == FAILURE) {
391 		spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
392 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
393 		efree(msg);
394 
395 		RETURN_FALSE;
396     }
397 
398 	if(loc_name_len == 0) {
399 		loc_name = intl_locale_get_default(TSRMLS_C);
400 	}
401 
402 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
403 
404 	/* Call ICU get */
405 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
406 
407 	/* No value found */
408 	if( result == -1 ) {
409 		if( tag_value){
410 			efree( tag_value);
411 		}
412 		RETURN_STRING( empty_result , TRUE);
413 	}
414 
415 	/* value found */
416 	if( tag_value){
417 		RETURN_STRING( tag_value , FALSE);
418 	}
419 
420 	/* Error encountered while fetching the value */
421 	if( result ==0) {
422 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
423 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
424 		efree(msg);
425 		RETURN_NULL();
426 	}
427 
428 }
429 /* }}} */
430 
431 /* {{{ proto static string Locale::getScript($locale)
432  * gets the script for the $locale
433  }}} */
434 /* {{{ proto static string locale_get_script($locale)
435  * gets the script for the $locale
436  */
PHP_FUNCTION(locale_get_script)437 PHP_FUNCTION( locale_get_script )
438 {
439 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
440 }
441 /* }}} */
442 
443 /* {{{ proto static string Locale::getRegion($locale)
444  * gets the region for the $locale
445  }}} */
446 /* {{{ proto static string locale_get_region($locale)
447  * gets the region for the $locale
448  */
PHP_FUNCTION(locale_get_region)449 PHP_FUNCTION( locale_get_region )
450 {
451 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
452 }
453 /* }}} */
454 
455 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
456  * gets the primary language for the $locale
457  }}} */
458 /* {{{ proto static string locale_get_primary_language($locale)
459  * gets the primary language for the $locale
460  */
PHP_FUNCTION(locale_get_primary_language)461 PHP_FUNCTION(locale_get_primary_language )
462 {
463 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
464 }
465 /* }}} */
466 
467 
468 /* {{{
469  * common code shared by display_xyz functions to  get the value from ICU
470  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)471 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
472 {
473 	const char* loc_name        	= NULL;
474 	int         loc_name_len    	= 0;
475 
476 	const char* disp_loc_name       = NULL;
477 	int         disp_loc_name_len   = 0;
478 	int         free_loc_name       = 0;
479 
480 	UChar*      disp_name      	= NULL;
481 	int32_t     disp_name_len  	= 0;
482 
483 	char*       mod_loc_name        = NULL;
484 
485 	int32_t     buflen          	= 512;
486 	UErrorCode  status          	= U_ZERO_ERROR;
487 
488 	char*       utf8value		= NULL;
489 	int         utf8value_len   	= 0;
490 
491   	char*       msg             	= NULL;
492 	int         grOffset    	= 0;
493 
494 	intl_error_reset( NULL TSRMLS_CC );
495 
496 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
497 		&loc_name, &loc_name_len ,
498 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
499 	{
500 		spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
501 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
502 		efree(msg);
503 		RETURN_FALSE;
504 	}
505 
506     if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
507         /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
508 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
509 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
510 		efree(msg);
511 		RETURN_FALSE;
512     }
513 
514 	if(loc_name_len == 0) {
515 		loc_name = intl_locale_get_default(TSRMLS_C);
516 	}
517 
518 	if( strcmp(tag_name, DISP_NAME) != 0 ){
519 		/* Handle grandfathered languages */
520 		grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
521 		if( grOffset >= 0 ){
522 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
523 				mod_loc_name = getPreferredTag( loc_name );
524 			} else {
525 				/* Since Grandfathered, no value, do nothing, retutn NULL */
526 				RETURN_FALSE;
527 			}
528 		}
529 	} /* end of if != LOC_CANONICAL_TAG */
530 
531 	if( mod_loc_name==NULL ){
532 		mod_loc_name = estrdup( loc_name );
533 	}
534 
535 	/* Check if disp_loc_name passed , if not use default locale */
536 	if( !disp_loc_name){
537 		disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
538 		free_loc_name = 1;
539 	}
540 
541     /* Get the disp_value for the given locale */
542     do{
543         disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
544         disp_name_len = buflen;
545 
546 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
547 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
548 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
549 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
550 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
551 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
552 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
553 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
554 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
555 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
556 		}
557 
558 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
559 		if( U_FAILURE( status ) )
560 		{
561 			if( status == U_BUFFER_OVERFLOW_ERROR )
562 			{
563 				status = U_ZERO_ERROR;
564 				continue;
565 			}
566 
567 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
568 			intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
569 			efree(msg);
570 			if( disp_name){
571 				efree( disp_name );
572 			}
573 			if( mod_loc_name){
574 				efree( mod_loc_name );
575 			}
576 			if (free_loc_name) {
577 				efree((void *)disp_loc_name);
578 				disp_loc_name = NULL;
579 			}
580 			RETURN_FALSE;
581 		}
582 	} while( buflen > disp_name_len );
583 
584 	if( mod_loc_name){
585 		efree( mod_loc_name );
586 	}
587 	if (free_loc_name) {
588 		efree((void *)disp_loc_name);
589 		disp_loc_name = NULL;
590 	}
591 	/* Convert display locale name from UTF-16 to UTF-8. */
592 	intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
593 	efree( disp_name );
594 	if( U_FAILURE( status ) )
595 	{
596 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
597 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
598 		efree(msg);
599 		RETURN_FALSE;
600 	}
601 
602 	RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
603 
604 }
605 /* }}} */
606 
607 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
608 * gets the name for the $locale in $in_locale or default_locale
609  }}} */
610 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
611 * gets the name for the $locale in $in_locale or default_locale
612 */
PHP_FUNCTION(locale_get_display_name)613 PHP_FUNCTION(locale_get_display_name)
614 {
615     get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
616 }
617 /* }}} */
618 
619 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
620 * gets the language for the $locale in $in_locale or default_locale
621  }}} */
622 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
623 * gets the language for the $locale in $in_locale or default_locale
624 */
PHP_FUNCTION(locale_get_display_language)625 PHP_FUNCTION(locale_get_display_language)
626 {
627     get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
628 }
629 /* }}} */
630 
631 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
632 * gets the script for the $locale in $in_locale or default_locale
633  }}} */
634 /* {{{ proto static string get_display_script($locale, $in_locale = null)
635 * gets the script for the $locale in $in_locale or default_locale
636 */
PHP_FUNCTION(locale_get_display_script)637 PHP_FUNCTION(locale_get_display_script)
638 {
639     get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
640 }
641 /* }}} */
642 
643 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
644 * gets the region for the $locale in $in_locale or default_locale
645  }}} */
646 /* {{{ proto static string get_display_region($locale, $in_locale = null)
647 * gets the region for the $locale in $in_locale or default_locale
648 */
PHP_FUNCTION(locale_get_display_region)649 PHP_FUNCTION(locale_get_display_region)
650 {
651     get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
652 }
653 /* }}} */
654 
655 /* {{{
656 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
657 * gets the variant for the $locale in $in_locale or default_locale
658  }}} */
659 /* {{{
660 * proto static string get_display_variant($locale, $in_locale = null)
661 * gets the variant for the $locale in $in_locale or default_locale
662 */
PHP_FUNCTION(locale_get_display_variant)663 PHP_FUNCTION(locale_get_display_variant)
664 {
665     get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
666 }
667 /* }}} */
668 
669  /* {{{ proto static array getKeywords(string $locale) {
670  * return an associative array containing keyword-value
671  * pairs for this locale. The keys are keys to the array (doh!)
672  * }}}*/
673  /* {{{ proto static array locale_get_keywords(string $locale) {
674  * return an associative array containing keyword-value
675  * pairs for this locale. The keys are keys to the array (doh!)
676  */
PHP_FUNCTION(locale_get_keywords)677 PHP_FUNCTION( locale_get_keywords )
678 {
679     UEnumeration*   e        = NULL;
680     UErrorCode      status   = U_ZERO_ERROR;
681 
682     const char*	 	kw_key        = NULL;
683     int32_t         kw_key_len    = 0;
684 
685     const char*       	loc_name        = NULL;
686     int        	 	loc_name_len    = 0;
687 
688 /*
689 	ICU expects the buffer to be allocated  before calling the function
690 	and so the buffer size has been explicitly specified
691 	ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
692 	hence the kw_value buffer size is 100
693 */
694 	char*	 	kw_value        = NULL;
695     int32_t     kw_value_len    = 100;
696 
697     intl_error_reset( NULL TSRMLS_CC );
698 
699     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
700         &loc_name, &loc_name_len ) == FAILURE)
701     {
702         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
703              "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
704 
705         RETURN_FALSE;
706     }
707 
708 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
709 
710     if(loc_name_len == 0) {
711         loc_name = intl_locale_get_default(TSRMLS_C);
712     }
713 
714 	/* Get the keywords */
715     e = uloc_openKeywords( loc_name, &status );
716     if( e != NULL )
717     {
718 		/* Traverse it, filling the return array. */
719     	array_init( return_value );
720 
721     	while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
722 			kw_value = ecalloc( 1 , kw_value_len  );
723 
724 			/* Get the keyword value for each keyword */
725 			kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
726 			if (status == U_BUFFER_OVERFLOW_ERROR) {
727 				status = U_ZERO_ERROR;
728 				kw_value = erealloc( kw_value , kw_value_len+1);
729 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
730 			} else if(!U_FAILURE(status)) {
731 				kw_value = erealloc( kw_value , kw_value_len+1);
732 			}
733 			if (U_FAILURE(status)) {
734 	        		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
735 				if( kw_value){
736 					efree( kw_value );
737 				}
738 				zval_dtor(return_value);
739         		RETURN_FALSE;
740 			}
741 
742        		add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
743 		} /* end of while */
744 
745 	} /* end of if e!=NULL */
746 
747     uenum_close( e );
748 }
749 /* }}} */
750 
751  /* {{{ proto static string Locale::canonicalize($locale)
752  * @return string the canonicalized locale
753  * }}} */
754  /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
755  * @param string $locale	The locale string to canonicalize
756  */
PHP_FUNCTION(locale_canonicalize)757 PHP_FUNCTION(locale_canonicalize)
758 {
759 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
760 }
761 /* }}} */
762 
763 /* {{{ append_key_value
764 * Internal function which is called from locale_compose
765 * gets the value for the key_name and appends to the loc_name
766 * returns 1 if successful , -1 if not found ,
767 * 0 if array element is not a string , -2 if buffer-overflow
768 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)769 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
770 {
771 	zval**	ele_value	= NULL;
772 
773 	if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
774 		if(Z_TYPE_PP(ele_value)!= IS_STRING ){
775 			/* element value is not a string */
776 			return FAILURE;
777 		}
778 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
779 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
780 			/* not lang or grandfathered tag */
781 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
782 		}
783 		smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
784 		return SUCCESS;
785 	}
786 
787 	return LOC_NOT_FOUND;
788 }
789 /* }}} */
790 
791 /* {{{ append_prefix , appends the prefix needed
792 * e.g. private adds 'x'
793 */
add_prefix(smart_str * loc_name,char * key_name)794 static void add_prefix(smart_str* loc_name, char* key_name)
795 {
796 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
797 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
798 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
799 	}
800 }
801 /* }}} */
802 
803 /* {{{ append_multiple_key_values
804 * Internal function which is called from locale_compose
805 * gets the multiple values for the key_name and appends to the loc_name
806 * used for 'variant','extlang','private'
807 * returns 1 if successful , -1 if not found ,
808 * 0 if array element is not a string , -2 if buffer-overflow
809 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name TSRMLS_DC)810 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
811 {
812 	zval**	ele_value    	= NULL;
813 	int 	i 		= 0;
814 	int 	isFirstSubtag 	= 0;
815 	int 	max_value 	= 0;
816 
817 	/* Variant/ Extlang/Private etc. */
818 	if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
819 		if( Z_TYPE_PP(ele_value) == IS_STRING ){
820 			add_prefix( loc_name , key_name);
821 
822 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
823 			smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
824 			return SUCCESS;
825 		} else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
826 			HashPosition pos;
827 			HashTable *arr = HASH_OF(*ele_value);
828 			zval **data = NULL;
829 
830 			zend_hash_internal_pointer_reset_ex(arr, &pos);
831 			while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
832 				if(Z_TYPE_PP(data) != IS_STRING) {
833 					return FAILURE;
834 				}
835 				if (isFirstSubtag++ == 0){
836 					add_prefix(loc_name , key_name);
837 				}
838 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
839 				smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
840 				zend_hash_move_forward_ex(arr, &pos);
841 			}
842 			return SUCCESS;
843 		} else {
844 			return FAILURE;
845 		}
846 	} else {
847 		char cur_key_name[31];
848 		/* Decide the max_value: the max. no. of elements allowed */
849 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
850 			max_value  = MAX_NO_VARIANT;
851 		}
852 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
853 			max_value  = MAX_NO_EXTLANG;
854 		}
855 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
856 			max_value  = MAX_NO_PRIVATE;
857 		}
858 
859 		/* Multiple variant values as variant0, variant1 ,variant2 */
860 		isFirstSubtag = 0;
861 		for( i=0 ; i< max_value; i++ ){
862 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
863 			if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
864 				if( Z_TYPE_PP(ele_value)!= IS_STRING ){
865 					/* variant is not a string */
866 					return FAILURE;
867 				}
868 				/* Add the contents */
869 				if (isFirstSubtag++ == 0){
870 					add_prefix(loc_name , cur_key_name);
871 				}
872 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
873 				smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
874 			}
875 		} /* end of for */
876 	} /* end of else */
877 
878 	return SUCCESS;
879 }
880 /* }}} */
881 
882 /*{{{
883 * If applicable sets error message and aborts locale_compose gracefully
884 * returns 0  if locale_compose needs to be aborted
885 * otherwise returns 1
886 */
handleAppendResult(int result,smart_str * loc_name TSRMLS_DC)887 static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
888 {
889 	intl_error_reset( NULL TSRMLS_CC );
890 	if( result == FAILURE) {
891 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
892 			 "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
893 		smart_str_free(loc_name);
894 		return 0;
895 	}
896 	return 1;
897 }
898 /* }}} */
899 
900 #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
901 /* {{{ proto static string Locale::composeLocale($array)
902 * Creates a locale by combining the parts of locale-ID passed
903 * }}} */
904 /* {{{ proto static string compose_locale($array)
905 * Creates a locale by combining the parts of locale-ID passed
906 * }}} */
PHP_FUNCTION(locale_compose)907 PHP_FUNCTION(locale_compose)
908 {
909 	smart_str      	loc_name_s = {0};
910 	smart_str *loc_name = &loc_name_s;
911 	zval*			arr	= NULL;
912 	HashTable*		hash_arr = NULL;
913 	int 			result = 0;
914 
915 	intl_error_reset( NULL TSRMLS_CC );
916 
917 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
918 		&arr) == FAILURE)
919 	{
920 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
921 			 "locale_compose: unable to parse input params", 0 TSRMLS_CC );
922 		RETURN_FALSE;
923 	}
924 
925 	hash_arr = HASH_OF( arr );
926 
927 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
928 		RETURN_FALSE;
929 
930 	/* Check for grandfathered first */
931 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
932 	if( result == SUCCESS){
933 		RETURN_SMART_STR(loc_name);
934 	}
935 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
936 		RETURN_FALSE;
937 	}
938 
939 	/* Not grandfathered */
940 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
941 	if( result == LOC_NOT_FOUND ){
942 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
943 		"locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
944 		smart_str_free(loc_name);
945 		RETURN_FALSE;
946 	}
947 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
948 		RETURN_FALSE;
949 	}
950 
951 	/* Extlang */
952 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
953 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
954 		RETURN_FALSE;
955 	}
956 
957 	/* Script */
958 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
959 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
960 		RETURN_FALSE;
961 	}
962 
963 	/* Region */
964 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
965 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
966 		RETURN_FALSE;
967 	}
968 
969 	/* Variant */
970 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
971 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
972 		RETURN_FALSE;
973 	}
974 
975 	/* Private */
976 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
977 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
978 		RETURN_FALSE;
979 	}
980 
981 	RETURN_SMART_STR(loc_name);
982 }
983 /* }}} */
984 
985 
986 /*{{{
987 * Parses the locale and returns private subtags  if existing
988 * else returns NULL
989 * e.g. for locale='en_US-x-prv1-prv2-prv3'
990 * returns a pointer to the string 'prv1-prv2-prv3'
991 */
get_private_subtags(const char * loc_name)992 static char* get_private_subtags(const char* loc_name)
993 {
994 	char* 	result =NULL;
995 	int 	singletonPos = 0;
996 	int 	len =0;
997 	const char* 	mod_loc_name =NULL;
998 
999 	if( loc_name && (len = strlen(loc_name)>0 ) ){
1000 		mod_loc_name = loc_name ;
1001 		len   = strlen(mod_loc_name);
1002 		while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
1003 
1004 			if( singletonPos!=-1){
1005 				if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1006 					/* private subtag start found */
1007 					if( singletonPos + 2 ==  len){
1008 						/* loc_name ends with '-x-' ; return  NULL */
1009 					}
1010 					else{
1011 						/* result = mod_loc_name + singletonPos +2; */
1012 						result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1013 					}
1014 					break;
1015 				}
1016 				else{
1017 					if( singletonPos + 1 >=  len){
1018 						/* String end */
1019 						break;
1020 					} else {
1021 						/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1022 						mod_loc_name = mod_loc_name + singletonPos +1;
1023 						len = strlen(mod_loc_name);
1024 					}
1025 				}
1026 			}
1027 
1028 		} /* end of while */
1029 	}
1030 
1031 	return result;
1032 }
1033 /* }}} */
1034 
1035 /* {{{ code used by locale_parse
1036 */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name TSRMLS_DC)1037 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1038 {
1039 	char*   key_value 	= NULL;
1040 	char*   cur_key_name	= NULL;
1041 	char*   token        	= NULL;
1042 	char*   last_ptr  	= NULL;
1043 
1044 	int	result		= 0;
1045 	int 	cur_result  	= 0;
1046 	int 	cnt  		= 0;
1047 
1048 
1049 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1050 		key_value = get_private_subtags( loc_name );
1051 		result = 1;
1052 	} else {
1053 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1054 	}
1055 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1056 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1057 		if( result > 0 && key_value){
1058 			/* Tokenize on the "_" or "-"  */
1059 			token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1060 			if( cur_key_name ){
1061 				efree( cur_key_name);
1062 			}
1063 			cur_key_name = (char*)ecalloc( 25,  25);
1064 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1065 			add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1066 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1067 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1068 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1069 				add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1070 			}
1071 /*
1072 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1073 			}
1074 */
1075 		}
1076 	} else {
1077 		if( result == 1 ){
1078 			add_assoc_string( hash_arr, key_name , key_value , TRUE );
1079 			cur_result = 1;
1080 		}
1081 	}
1082 
1083 	if( cur_key_name ){
1084 		efree( cur_key_name);
1085 	}
1086 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1087 	if( key_value){
1088 		efree(key_value);
1089 	}
1090 	return cur_result;
1091 }
1092 /* }}} */
1093 
1094 /* {{{ proto static array Locale::parseLocale($locale)
1095 * parses a locale-id into an array the different parts of it
1096  }}} */
1097 /* {{{ proto static array parse_locale($locale)
1098 * parses a locale-id into an array the different parts of it
1099 */
PHP_FUNCTION(locale_parse)1100 PHP_FUNCTION(locale_parse)
1101 {
1102     const char* loc_name        = NULL;
1103     int         loc_name_len    = 0;
1104     int         grOffset    	= 0;
1105 
1106     intl_error_reset( NULL TSRMLS_CC );
1107 
1108     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1109         &loc_name, &loc_name_len ) == FAILURE)
1110     {
1111         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1112              "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1113 
1114         RETURN_FALSE;
1115     }
1116 
1117     INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1118 
1119     if(loc_name_len == 0) {
1120         loc_name = intl_locale_get_default(TSRMLS_C);
1121     }
1122 
1123 	array_init( return_value );
1124 
1125 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1126 	if( grOffset >= 0 ){
1127 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1128 	}
1129 	else{
1130 		/* Not grandfathered */
1131 		add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1132 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1133 		add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1134 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1135 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1136 	}
1137 }
1138 /* }}} */
1139 
1140 /* {{{ proto static array Locale::getAllVariants($locale)
1141 * gets an array containing the list of variants, or null
1142  }}} */
1143 /* {{{ proto static array locale_get_all_variants($locale)
1144 * gets an array containing the list of variants, or null
1145 */
PHP_FUNCTION(locale_get_all_variants)1146 PHP_FUNCTION(locale_get_all_variants)
1147 {
1148 	const char*  	loc_name        = NULL;
1149 	int    		loc_name_len    = 0;
1150 
1151 	int	result		= 0;
1152 	char*	token		= NULL;
1153 	char*	variant		= NULL;
1154 	char*	saved_ptr	= NULL;
1155 
1156 	intl_error_reset( NULL TSRMLS_CC );
1157 
1158 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1159 	&loc_name, &loc_name_len ) == FAILURE)
1160 	{
1161 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1162 	     "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1163 
1164 		RETURN_FALSE;
1165 	}
1166 
1167 	if(loc_name_len == 0) {
1168 		loc_name = intl_locale_get_default(TSRMLS_C);
1169 	}
1170 
1171 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1172 
1173 	array_init( return_value );
1174 
1175 	/* If the locale is grandfathered, stop, no variants */
1176 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1177 		/* ("Grandfathered Tag. No variants."); */
1178 	}
1179 	else {
1180 	/* Call ICU variant */
1181 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1182 		if( result > 0 && variant){
1183 			/* Tokenize on the "_" or "-" */
1184 			token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1185 			add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1186 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1187 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1188  				add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1189 			}
1190 		}
1191 		if( variant ){
1192 			efree( variant );
1193 		}
1194 	}
1195 
1196 
1197 }
1198 /* }}} */
1199 
1200 /*{{{
1201 * Converts to lower case and also replaces all hyphens with the underscore
1202 */
strToMatch(const char * str,char * retstr)1203 static int strToMatch(const char* str ,char *retstr)
1204 {
1205 	char* 	anchor 	= NULL;
1206 	const char* 	anchor1 = NULL;
1207 	int 	result 	= 0;
1208 
1209     if( (!str) || str[0] == '\0'){
1210         return result;
1211     } else {
1212 	anchor = retstr;
1213 	anchor1 = str;
1214         while( (*str)!='\0' ){
1215 		if( *str == '-' ){
1216 			*retstr =  '_';
1217 		} else {
1218 			*retstr = tolower(*str);
1219 		}
1220             str++;
1221             retstr++;
1222 	}
1223 	*retstr = '\0';
1224 	retstr=  anchor;
1225 	str=  anchor1;
1226 	result = 1;
1227     }
1228 
1229     return(result);
1230 }
1231 /* }}} */
1232 
1233 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1234 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1235 */
1236 /* }}} */
1237 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1238 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1239 */
PHP_FUNCTION(locale_filter_matches)1240 PHP_FUNCTION(locale_filter_matches)
1241 {
1242 	char*       	lang_tag        = NULL;
1243 	int         	lang_tag_len    = 0;
1244 	const char*     loc_range       = NULL;
1245 	int         	loc_range_len   = 0;
1246 
1247 	int		result		= 0;
1248 	char*		token		= 0;
1249 	char*		chrcheck	= NULL;
1250 
1251 	char*       	can_lang_tag    = NULL;
1252 	char*       	can_loc_range   = NULL;
1253 
1254 	char*       	cur_lang_tag    = NULL;
1255 	char*       	cur_loc_range   = NULL;
1256 
1257 	zend_bool 	boolCanonical 	= 0;
1258 	UErrorCode	status		= U_ZERO_ERROR;
1259 
1260 	intl_error_reset( NULL TSRMLS_CC );
1261 
1262 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1263 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1264 		&boolCanonical) == FAILURE)
1265 	{
1266 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1267 		"locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1268 
1269 		RETURN_FALSE;
1270 	}
1271 
1272 	if(loc_range_len == 0) {
1273 		loc_range = intl_locale_get_default(TSRMLS_C);
1274 	}
1275 
1276 	if( strcmp(loc_range,"*")==0){
1277 		RETURN_TRUE;
1278 	}
1279 
1280 	INTL_CHECK_LOCALE_LEN(strlen(loc_range));
1281 	INTL_CHECK_LOCALE_LEN(strlen(lang_tag));
1282 
1283 	if( boolCanonical ){
1284 		/* canonicalize loc_range */
1285 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1286 		if( result ==0) {
1287 			intl_error_set( NULL, status,
1288 				"locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1289 			RETURN_FALSE;
1290 		}
1291 
1292 		/* canonicalize lang_tag */
1293 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1294 		if( result ==0) {
1295 			intl_error_set( NULL, status,
1296 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1297 			RETURN_FALSE;
1298 		}
1299 
1300 		/* Convert to lower case for case-insensitive comparison */
1301 		cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1302 
1303 		/* Convert to lower case for case-insensitive comparison */
1304 		result = strToMatch( can_lang_tag , cur_lang_tag);
1305 		if( result == 0) {
1306 			efree( cur_lang_tag );
1307 			efree( can_lang_tag );
1308 			RETURN_FALSE;
1309 		}
1310 
1311 		cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1312 		result = strToMatch( can_loc_range , cur_loc_range );
1313 		if( result == 0) {
1314 			efree( cur_lang_tag );
1315 			efree( can_lang_tag );
1316 			efree( cur_loc_range );
1317 			efree( can_loc_range );
1318 			RETURN_FALSE;
1319 		}
1320 
1321 		/* check if prefix */
1322 		token 	= strstr( cur_lang_tag , cur_loc_range );
1323 
1324 		if( token && (token==cur_lang_tag) ){
1325 			/* check if the char. after match is SEPARATOR */
1326 			chrcheck = token + (strlen(cur_loc_range));
1327 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1328 				if( cur_lang_tag){
1329 					efree( cur_lang_tag );
1330 				}
1331 				if( cur_loc_range){
1332 					efree( cur_loc_range );
1333 				}
1334 				if( can_lang_tag){
1335 					efree( can_lang_tag );
1336 				}
1337 				if( can_loc_range){
1338 					efree( can_loc_range );
1339 				}
1340 				RETURN_TRUE;
1341 			}
1342 		}
1343 
1344 		/* No prefix as loc_range */
1345 		if( cur_lang_tag){
1346 			efree( cur_lang_tag );
1347 		}
1348 		if( cur_loc_range){
1349 			efree( cur_loc_range );
1350 		}
1351 		if( can_lang_tag){
1352 			efree( can_lang_tag );
1353 		}
1354 		if( can_loc_range){
1355 			efree( can_loc_range );
1356 		}
1357 		RETURN_FALSE;
1358 
1359 	} /* end of if isCanonical */
1360 	else{
1361 		/* Convert to lower case for case-insensitive comparison */
1362 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1363 
1364 		result = strToMatch( lang_tag , cur_lang_tag);
1365 		if( result == 0) {
1366 			efree( cur_lang_tag );
1367 			RETURN_FALSE;
1368 		}
1369 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1370 		result = strToMatch( loc_range , cur_loc_range );
1371 		if( result == 0) {
1372 			efree( cur_lang_tag );
1373 			efree( cur_loc_range );
1374 			RETURN_FALSE;
1375 		}
1376 
1377 		/* check if prefix */
1378 		token 	= strstr( cur_lang_tag , cur_loc_range );
1379 
1380 		if( token && (token==cur_lang_tag) ){
1381 			/* check if the char. after match is SEPARATOR */
1382 			chrcheck = token + (strlen(cur_loc_range));
1383 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1384 				if( cur_lang_tag){
1385 					efree( cur_lang_tag );
1386 				}
1387 				if( cur_loc_range){
1388 					efree( cur_loc_range );
1389 				}
1390 				RETURN_TRUE;
1391 			}
1392 		}
1393 
1394 		/* No prefix as loc_range */
1395 		if( cur_lang_tag){
1396 			efree( cur_lang_tag );
1397 		}
1398 		if( cur_loc_range){
1399 			efree( cur_loc_range );
1400 		}
1401 		RETURN_FALSE;
1402 
1403 	}
1404 }
1405 /* }}} */
1406 
array_cleanup(char * arr[],int arr_size)1407 static void array_cleanup( char* arr[] , int arr_size)
1408 {
1409 	int i=0;
1410 	for( i=0; i< arr_size; i++ ){
1411 		if( arr[i*2] ){
1412 			efree( arr[i*2]);
1413 		}
1414 	}
1415 	efree(arr);
1416 }
1417 
1418 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1419 /* {{{
1420 * returns the lookup result to lookup_loc_range_src_php
1421 * internal function
1422 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize TSRMLS_DC)1423 static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1424 {
1425 	int	i = 0;
1426 	int	cur_arr_len = 0;
1427 	int result = 0;
1428 
1429 	char* lang_tag = NULL;
1430 	zval** ele_value = NULL;
1431 	char** cur_arr = NULL;
1432 
1433 	char* cur_loc_range	= NULL;
1434 	char* can_loc_range	= NULL;
1435 	int	saved_pos = 0;
1436 
1437 	char* return_value = NULL;
1438 
1439 	cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1440 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1441 	for(zend_hash_internal_pointer_reset(hash_arr);
1442 		zend_hash_has_more_elements(hash_arr) == SUCCESS;
1443 		zend_hash_move_forward(hash_arr)) {
1444 
1445 		if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1446 			/* Should never actually fail since the key is known to exist.*/
1447 			continue;
1448 		}
1449 		if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1450 			/* element value is not a string */
1451 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1452 			LOOKUP_CLEAN_RETURN(NULL);
1453 		}
1454 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1455 		result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1456 		if(result == 0) {
1457 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1458 			LOOKUP_CLEAN_RETURN(NULL);
1459 		}
1460 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1461 		cur_arr_len++ ;
1462 	} /* end of for */
1463 
1464 	/* Canonicalize array elements */
1465 	if(canonicalize) {
1466 		for(i=0; i<cur_arr_len; i++) {
1467 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1468 			if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1469 				if(lang_tag) {
1470 					efree(lang_tag);
1471 				}
1472 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1473 				LOOKUP_CLEAN_RETURN(NULL);
1474 			}
1475 			cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1476 			result = strToMatch(lang_tag, cur_arr[i*2]);
1477 			efree(lang_tag);
1478 			if(result == 0) {
1479 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1480 				LOOKUP_CLEAN_RETURN(NULL);
1481 			}
1482 		}
1483 
1484 	}
1485 
1486 	if(canonicalize) {
1487 		/* Canonicalize the loc_range */
1488 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1489 		if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1490 			/* Error */
1491 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1492 			if(can_loc_range) {
1493 				efree(can_loc_range);
1494 			}
1495 			LOOKUP_CLEAN_RETURN(NULL);
1496 		} else {
1497 			loc_range = can_loc_range;
1498 		}
1499 	}
1500 
1501 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1502 	/* convert to lower and replace hyphens */
1503 	result = strToMatch(loc_range, cur_loc_range);
1504 	if(can_loc_range) {
1505 		efree(can_loc_range);
1506 	}
1507 	if(result == 0) {
1508 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1509 		LOOKUP_CLEAN_RETURN(NULL);
1510 	}
1511 
1512 	/* Lookup for the lang_tag match */
1513 	saved_pos = strlen(cur_loc_range);
1514 	while(saved_pos > 0) {
1515 		for(i=0; i< cur_arr_len; i++){
1516 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1517 				/* Match found */
1518 				return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1519 				efree(cur_loc_range);
1520 				LOOKUP_CLEAN_RETURN(return_value);
1521 			}
1522 		}
1523 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1524 	}
1525 
1526 	/* Match not found */
1527 	efree(cur_loc_range);
1528 	LOOKUP_CLEAN_RETURN(NULL);
1529 }
1530 /* }}} */
1531 
1532 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1533 * Searchs the items in $langtag for the best match to the language
1534 * range
1535 */
1536 /* }}} */
1537 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1538 * Searchs the items in $langtag for the best match to the language
1539 * range
1540 */
PHP_FUNCTION(locale_lookup)1541 PHP_FUNCTION(locale_lookup)
1542 {
1543 	char*      	fallback_loc  		= NULL;
1544 	int        	fallback_loc_len	= 0;
1545 	const char*    	loc_range      		= NULL;
1546 	int        	loc_range_len  		= 0;
1547 
1548 	zval*		arr				= NULL;
1549 	HashTable*	hash_arr		= NULL;
1550 	zend_bool	boolCanonical	= 0;
1551 	char*	 	result			=NULL;
1552 
1553 	intl_error_reset( NULL TSRMLS_CC );
1554 
1555 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1556 		&boolCanonical,	&fallback_loc, &fallback_loc_len) == FAILURE) {
1557 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,	"locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1558 		RETURN_FALSE;
1559 	}
1560 
1561 	if(loc_range_len == 0) {
1562 		loc_range = intl_locale_get_default(TSRMLS_C);
1563 	}
1564 
1565 	INTL_CHECK_LOCALE_LEN(strlen(loc_range));
1566 
1567 	hash_arr = HASH_OF(arr);
1568 
1569 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1570 		RETURN_EMPTY_STRING();
1571 	}
1572 
1573 	result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1574 	if(result == NULL || result[0] == '\0') {
1575 		if( fallback_loc ) {
1576 			result = estrndup(fallback_loc, fallback_loc_len);
1577 		} else {
1578 			RETURN_EMPTY_STRING();
1579 		}
1580 	}
1581 
1582 	RETVAL_STRINGL(result, strlen(result), 0);
1583 }
1584 /* }}} */
1585 
1586 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1587 * Tries to find out best available locale based on HTTP �Accept-Language� header
1588 */
1589 /* }}} */
1590 /* {{{ proto string locale_accept_from_http(string $http_accept)
1591 * Tries to find out best available locale based on HTTP �Accept-Language� header
1592 */
PHP_FUNCTION(locale_accept_from_http)1593 PHP_FUNCTION(locale_accept_from_http)
1594 {
1595 	UEnumeration *available;
1596 	char *http_accept = NULL;
1597 	int http_accept_len;
1598 	UErrorCode status = 0;
1599 	int len;
1600 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1601 	UAcceptResult outResult;
1602 
1603 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1604 	{
1605 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1606 		"locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1607 		RETURN_FALSE;
1608 	}
1609 	if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1610 		/* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1611 		char *start = http_accept;
1612 		char *end;
1613 		size_t len;
1614 		do {
1615 			end = strchr(start, ',');
1616 			len = end ? end-start : http_accept_len-(start-http_accept);
1617 			if(len > ULOC_FULLNAME_CAPACITY) {
1618 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1619 						"locale_accept_from_http: locale string too long", 0 TSRMLS_CC );
1620 				RETURN_FALSE;
1621 			}
1622 			if(end) {
1623 				start = end+1;
1624 			}
1625 		} while(end != NULL);
1626 	}
1627 
1628 	available = ures_openAvailableLocales(NULL, &status);
1629 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1630 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1631 						&outResult, http_accept, available, &status);
1632 	uenum_close(available);
1633 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1634 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1635 		RETURN_FALSE;
1636 	}
1637 	RETURN_STRINGL(resultLocale, len, 1);
1638 }
1639 /* }}} */
1640 
1641 /*
1642  * Local variables:
1643  * tab-width: 4
1644  * c-basic-offset: 4
1645  * End:
1646  * vim600: noet sw=4 ts=4 fdm=marker
1647  * vim<600: noet sw=4 ts=4
1648  *can_loc_len
1649 */
1650