xref: /PHP-5.3/ext/intl/locale/locale_methods.c (revision e644aad3)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14    +----------------------------------------------------------------------+
15 */
16 
17 /* $Id$ */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <unicode/ustring.h>
24 #include <unicode/udata.h>
25 #include <unicode/putil.h>
26 #include <unicode/ures.h>
27 
28 #include "php_intl.h"
29 #include "locale.h"
30 #include "locale_class.h"
31 #include "locale_methods.h"
32 #include "intl_convert.h"
33 #include "intl_data.h"
34 
35 #include <zend_API.h>
36 #include <zend.h>
37 #include <php.h>
38 #include "main/php_ini.h"
39 #include "ext/standard/php_smart_str.h"
40 
41 ZEND_EXTERN_MODULE_GLOBALS( intl )
42 
43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44 #define SEPARATOR "_"
45 #define SEPARATOR1 "-"
46 #define DELIMITER "-_"
47 #define EXTLANG_PREFIX "a"
48 #define PRIVATE_PREFIX "x"
49 #define DISP_NAME "name"
50 
51 #define MAX_NO_VARIANT  15
52 #define MAX_NO_EXTLANG  3
53 #define MAX_NO_PRIVATE  15
54 #define MAX_NO_LOOKUP_LANG_TAG  100
55 
56 #define LOC_NOT_FOUND 1
57 
58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59 #define VARIANT_KEYNAME_LEN  11
60 #define EXTLANG_KEYNAME_LEN  10
61 #define PRIVATE_KEYNAME_LEN  11
62 
63 /* Based on IANA registry at the time of writing this code
64 *
65 */
66 static const char * const LOC_GRANDFATHERED[] = {
67 	"art-lojban",		"i-klingon",		"i-lux",			"i-navajo",		"no-bok",		"no-nyn",
68 	"cel-gaulish",		"en-GB-oed",		"i-ami",
69 	"i-bnn",		"i-default",		"i-enochian",
70 	"i-mingo",		"i-pwn", 		"i-tao",
71 	"i-tay",		"i-tsu",		"sgn-BE-fr",
72 	"sgn-BE-nl",		"sgn-CH-de", 		"zh-cmn",
73  	"zh-cmn-Hans", 		"zh-cmn-Hant",		"zh-gan" ,
74 	"zh-guoyu", 		"zh-hakka", 		"zh-min",
75 	"zh-min-nan", 		"zh-wuu", 		"zh-xiang",
76 	"zh-yue",		NULL
77 };
78 
79 /* Based on IANA registry at the time of writing this code
80 *  This array lists the preferred values for the grandfathered tags if applicable
81 *  This is in sync with the array LOC_GRANDFATHERED
82 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83 */
84 static const int 		LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85 static const char * const 	LOC_PREFERRED_GRANDFATHERED[]  = {
86 	"jbo",			"tlh",			"lb",
87 	"nv", 			"nb",			"nn",
88 	NULL
89 };
90 
91 /*returns TRUE if a is an ID separator FALSE otherwise*/
92 #define isIDSeparator(a) (a == '_' || a == '-')
93 #define isKeywordSeparator(a) (a == '@' )
94 #define isEndOfTag(a) (a == '\0' )
95 
96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97 
98 /*returns TRUE if one of the special prefixes is here (s=string)
99   'x-' or 'i-' */
100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102 
103 /* Dot terminates it because of POSIX form  where dot precedes the codepage
104  * except for variant */
105 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106 
107 /* {{{ return the offset of 'key' in the array 'list'.
108  * returns -1 if not present */
findOffset(const char * const * list,const char * key)109 static int16_t findOffset(const char* const* list, const char* key)
110 {
111 	const char* const* anchor = list;
112 	while (*list != NULL) {
113 		if (strcmp(key, *list) == 0) {
114 			return (int16_t)(list - anchor);
115 		}
116 		list++;
117 	}
118 
119 	return -1;
120 
121 }
122 /*}}}*/
123 
getPreferredTag(char * gf_tag)124 static char* getPreferredTag(char* gf_tag)
125 {
126 	char* result = NULL;
127 	int grOffset = 0;
128 
129 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130 	if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
131 		/* return preferred tag */
132 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
133 	} else {
134 		/* Return correct grandfathered language tag */
135 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
136 	}
137 	return result;
138 }
139 
140 /* {{{
141 * returns the position of next token for lookup
142 * or -1 if no token
143 * strtokr equivalent search for token in reverse direction
144 */
getStrrtokenPos(char * str,int savedPos)145 static int getStrrtokenPos(char* str, int savedPos)
146 {
147 	int result =-1;
148 	int i;
149 
150 	for(i=savedPos-1; i>=0; i--) {
151 		if(isIDSeparator(*(str+i)) ){
152 			/* delimiter found; check for singleton */
153 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
154 				/* a singleton; so send the position of token before the singleton */
155 				result = i-2;
156 			} else {
157 				result = i;
158 			}
159 			break;
160 		}
161 	}
162 	if(result < 1){
163 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
164 		result =-1;
165 	}
166 	return result;
167 }
168 /* }}} */
169 
170 /* {{{
171 * returns the position of a singleton if present
172 * returns -1 if no singleton
173 * strtok equivalent search for singleton
174 */
getSingletonPos(char * str)175 static int getSingletonPos(char* str)
176 {
177 	int result =-1;
178 	int i=0;
179 	int len = 0;
180 
181 	if( str && ((len=strlen(str))>0) ){
182 		for( i=0; i<len ; i++){
183 			if( isIDSeparator(*(str+i)) ){
184 				if( i==1){
185 					/* string is of the form x-avy or a-prv1 */
186 					result =0;
187 					break;
188 				} else {
189 					/* delimiter found; check for singleton */
190 					if( isIDSeparator(*(str+i+2)) ){
191 						/* a singleton; so send the position of separator before singleton */
192 						result = i+1;
193 						break;
194 					}
195 				}
196 			}
197 		}/* end of for */
198 
199 	}
200 	return result;
201 }
202 /* }}} */
203 
204 /* {{{ proto static string Locale::getDefault(  )
205    Get default locale */
206 /* }}} */
207 /* {{{ proto static string locale_get_default( )
208    Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)209 PHP_NAMED_FUNCTION(zif_locale_get_default)
210 {
211 	if( INTL_G(default_locale) == NULL ) {
212 		INTL_G(default_locale) = pestrdup( uloc_getDefault(), 1);
213  	}
214 	RETURN_STRING( INTL_G(default_locale), TRUE );
215 }
216 
217 /* }}} */
218 
219 /* {{{ proto static string Locale::setDefault( string $locale )
220    Set default locale */
221 /* }}} */
222 /* {{{ proto static string locale_set_default( string $locale )
223    Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)224 PHP_NAMED_FUNCTION(zif_locale_set_default)
225 {
226 	char* locale_name = NULL;
227 	int   len=0;
228 
229 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
230 		&locale_name ,&len ) == FAILURE)
231 	{
232 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233 			 	"locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234 
235 		RETURN_FALSE;
236 	}
237 
238 	if(len == 0) {
239 		locale_name =  (char *)uloc_getDefault() ;
240 		len = strlen(locale_name);
241 	}
242 
243 	zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244 
245 	RETURN_TRUE;
246 }
247 /* }}} */
248 
249 /* {{{
250 * Gets the value from ICU
251 * common code shared by get_primary_language,get_script or get_region or get_variant
252 * result = 0 if error, 1 if successful , -1 if no value
253 */
get_icu_value_internal(char * loc_name,char * tag_name,int * result,int fromParseLocale)254 static char* get_icu_value_internal( char* loc_name , char* tag_name, int* result , int fromParseLocale)
255 {
256 	char*		tag_value	= NULL;
257 	int32_t     	tag_value_len   = 512;
258 
259 	int		singletonPos   	= 0;
260 	char*       	mod_loc_name	= NULL;
261 	int 		grOffset	= 0;
262 
263 	int32_t     	buflen          = 512;
264 	UErrorCode  	status          = U_ZERO_ERROR;
265 
266 
267 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
268 		/* Handle  grandfathered languages */
269 		grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
270 		if( grOffset >= 0 ){
271 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
272 				return estrdup(loc_name);
273 			} else {
274 				/* Since Grandfathered , no value , do nothing , retutn NULL */
275 				return NULL;
276 			}
277 		}
278 
279 	if( fromParseLocale==1 ){
280 		/* Handle singletons */
281 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282 			if( strlen(loc_name)>1 && isIDPrefix(loc_name) ){
283 				return estrdup(loc_name);
284 			}
285 		}
286 
287 		singletonPos = getSingletonPos( loc_name );
288 		if( singletonPos == 0){
289 			/* singleton at start of script, region , variant etc.
290 			 * or invalid singleton at start of language */
291 			return NULL;
292 		} else if( singletonPos > 0 ){
293 			/* singleton at some position except at start
294 			 * strip off the singleton and rest of the loc_name */
295 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
296 		}
297 	} /* end of if fromParse */
298 
299 	} /* end of if != LOC_CANONICAL_TAG */
300 
301 	if( mod_loc_name == NULL){
302 		mod_loc_name = estrdup(loc_name );
303 	}
304 
305 	/* Proceed to ICU */
306     do{
307 		tag_value = erealloc( tag_value , buflen  );
308 		tag_value_len = buflen;
309 
310 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
311 			buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
312 		}
313 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
314 			buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
315 		}
316 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
317 			buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
318 		}
319 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
320 			buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
321 		}
322 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
323 			buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
324 		}
325 
326 		if( U_FAILURE( status ) ) {
327 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
328 				status = U_ZERO_ERROR;
329 				continue;
330 			}
331 
332 			/* Error in retriving data */
333 			*result = 0;
334 			if( tag_value ){
335 				efree( tag_value );
336 			}
337 			if( mod_loc_name ){
338 				efree( mod_loc_name);
339 			}
340 			return NULL;
341 		}
342 	} while( buflen > tag_value_len );
343 
344 	if(  buflen ==0 ){
345 		/* No value found */
346 		*result = -1;
347 		if( tag_value ){
348 			efree( tag_value );
349 		}
350 		if( mod_loc_name ){
351 			efree( mod_loc_name);
352 		}
353 		return NULL;
354 	} else {
355 		*result = 1;
356 	}
357 
358 	if( mod_loc_name ){
359 		efree( mod_loc_name);
360 	}
361 	return tag_value;
362 }
363 /* }}} */
364 
365 /* {{{
366 * Gets the value from ICU , called when PHP userspace function is called
367 * common code shared by get_primary_language,get_script or get_region or get_variant
368 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)369 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
370 {
371 
372 	char*       loc_name        	= NULL;
373 	int         loc_name_len    	= 0;
374 
375 	char*       tag_value		= NULL;
376 	char*       empty_result	= "";
377 
378 	int         result    		= 0;
379 	char*       msg        		= NULL;
380 
381 	UErrorCode  status          	= U_ZERO_ERROR;
382 
383 	intl_error_reset( NULL TSRMLS_CC );
384 
385 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
386 	&loc_name ,&loc_name_len ) == FAILURE) {
387 		spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
388 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
389 		efree(msg);
390 
391 		RETURN_FALSE;
392     }
393 
394 	if(loc_name_len == 0) {
395 		loc_name = INTL_G(default_locale);
396 	}
397 
398 	/* Call ICU get */
399 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
400 
401 	/* No value found */
402 	if( result == -1 ) {
403 		if( tag_value){
404 			efree( tag_value);
405 		}
406 		RETURN_STRING( empty_result , TRUE);
407 	}
408 
409 	/* value found */
410 	if( tag_value){
411 		RETURN_STRING( tag_value , FALSE);
412 	}
413 
414 	/* Error encountered while fetching the value */
415 	if( result ==0) {
416 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
417 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
418 		efree(msg);
419 		RETURN_NULL();
420 	}
421 
422 }
423 /* }}} */
424 
425 /* {{{ proto static string Locale::getScript($locale)
426  * gets the script for the $locale
427  }}} */
428 /* {{{ proto static string locale_get_script($locale)
429  * gets the script for the $locale
430  */
PHP_FUNCTION(locale_get_script)431 PHP_FUNCTION( locale_get_script )
432 {
433 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
434 }
435 /* }}} */
436 
437 /* {{{ proto static string Locale::getRegion($locale)
438  * gets the region for the $locale
439  }}} */
440 /* {{{ proto static string locale_get_region($locale)
441  * gets the region for the $locale
442  */
PHP_FUNCTION(locale_get_region)443 PHP_FUNCTION( locale_get_region )
444 {
445 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
446 }
447 /* }}} */
448 
449 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
450  * gets the primary language for the $locale
451  }}} */
452 /* {{{ proto static string locale_get_primary_language($locale)
453  * gets the primary language for the $locale
454  */
PHP_FUNCTION(locale_get_primary_language)455 PHP_FUNCTION(locale_get_primary_language )
456 {
457 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
458 }
459 /* }}} */
460 
461 
462 /* {{{
463  * common code shared by display_xyz functions to  get the value from ICU
464  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)465 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
466 {
467 	char*       loc_name        	= NULL;
468 	int         loc_name_len    	= 0;
469 
470 	char*       disp_loc_name       = NULL;
471 	int         disp_loc_name_len   = 0;
472 	int         free_loc_name       = 0;
473 
474 	UChar*      disp_name      	= NULL;
475 	int32_t     disp_name_len  	= 0;
476 
477 	char*       mod_loc_name        = NULL;
478 
479 	int32_t     buflen          	= 512;
480 	UErrorCode  status          	= U_ZERO_ERROR;
481 
482 	char*       utf8value		= NULL;
483 	int         utf8value_len   	= 0;
484 
485   	char*       msg             	= NULL;
486 	int         grOffset    	= 0;
487 
488 	intl_error_reset( NULL TSRMLS_CC );
489 
490 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
491 		&loc_name, &loc_name_len ,
492 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
493 	{
494 		spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
495 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
496 		efree(msg);
497 		RETURN_FALSE;
498 	}
499 
500     if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
501         /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
502 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
503 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
504 		efree(msg);
505 		RETURN_FALSE;
506     }
507 
508 	if(loc_name_len == 0) {
509         loc_name = INTL_G(default_locale);
510 	}
511 
512 	if( strcmp(tag_name, DISP_NAME) != 0 ){
513 		/* Handle grandfathered languages */
514 		grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
515 		if( grOffset >= 0 ){
516 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
517 				mod_loc_name = getPreferredTag( loc_name );
518 			} else {
519 				/* Since Grandfathered, no value, do nothing, retutn NULL */
520 				RETURN_FALSE;
521 			}
522 		}
523 	} /* end of if != LOC_CANONICAL_TAG */
524 
525 	if( mod_loc_name==NULL ){
526 		mod_loc_name = estrdup( loc_name );
527 	}
528 
529 	/* Check if disp_loc_name passed , if not use default locale */
530 	if( !disp_loc_name){
531 		disp_loc_name = estrdup(INTL_G(default_locale));
532 		free_loc_name = 1;
533 	}
534 
535     /* Get the disp_value for the given locale */
536     do{
537         disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
538         disp_name_len = buflen;
539 
540 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
541 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
542 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
543 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
544 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
545 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
546 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
547 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
548 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
549 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
550 		}
551 
552 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
553 		if( U_FAILURE( status ) )
554 		{
555 			if( status == U_BUFFER_OVERFLOW_ERROR )
556 			{
557 				status = U_ZERO_ERROR;
558 				continue;
559 			}
560 
561 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
562 			intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
563 			efree(msg);
564 			if( disp_name){
565 				efree( disp_name );
566 			}
567 			if( mod_loc_name){
568 				efree( mod_loc_name );
569 			}
570 			if (free_loc_name) {
571 				efree(disp_loc_name);
572 				disp_loc_name = NULL;
573 			}
574 			RETURN_FALSE;
575 		}
576 	} while( buflen > disp_name_len );
577 
578 	if( mod_loc_name){
579 		efree( mod_loc_name );
580 	}
581 	if (free_loc_name) {
582 		efree(disp_loc_name);
583 		disp_loc_name = NULL;
584 	}
585 	/* Convert display locale name from UTF-16 to UTF-8. */
586 	intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
587 	efree( disp_name );
588 	if( U_FAILURE( status ) )
589 	{
590 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
591 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
592 		efree(msg);
593 		RETURN_FALSE;
594 	}
595 
596 	RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
597 
598 }
599 /* }}} */
600 
601 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
602 * gets the name for the $locale in $in_locale or default_locale
603  }}} */
604 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
605 * gets the name for the $locale in $in_locale or default_locale
606 */
PHP_FUNCTION(locale_get_display_name)607 PHP_FUNCTION(locale_get_display_name)
608 {
609     get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
610 }
611 /* }}} */
612 
613 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
614 * gets the language for the $locale in $in_locale or default_locale
615  }}} */
616 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
617 * gets the language for the $locale in $in_locale or default_locale
618 */
PHP_FUNCTION(locale_get_display_language)619 PHP_FUNCTION(locale_get_display_language)
620 {
621     get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
622 }
623 /* }}} */
624 
625 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
626 * gets the script for the $locale in $in_locale or default_locale
627  }}} */
628 /* {{{ proto static string get_display_script($locale, $in_locale = null)
629 * gets the script for the $locale in $in_locale or default_locale
630 */
PHP_FUNCTION(locale_get_display_script)631 PHP_FUNCTION(locale_get_display_script)
632 {
633     get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
634 }
635 /* }}} */
636 
637 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
638 * gets the region for the $locale in $in_locale or default_locale
639  }}} */
640 /* {{{ proto static string get_display_region($locale, $in_locale = null)
641 * gets the region for the $locale in $in_locale or default_locale
642 */
PHP_FUNCTION(locale_get_display_region)643 PHP_FUNCTION(locale_get_display_region)
644 {
645     get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
646 }
647 /* }}} */
648 
649 /* {{{
650 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
651 * gets the variant for the $locale in $in_locale or default_locale
652  }}} */
653 /* {{{
654 * proto static string get_display_variant($locale, $in_locale = null)
655 * gets the variant for the $locale in $in_locale or default_locale
656 */
PHP_FUNCTION(locale_get_display_variant)657 PHP_FUNCTION(locale_get_display_variant)
658 {
659     get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
660 }
661 /* }}} */
662 
663  /* {{{ proto static array getKeywords(string $locale) {
664  * return an associative array containing keyword-value
665  * pairs for this locale. The keys are keys to the array (doh!)
666  * }}}*/
667  /* {{{ proto static array locale_get_keywords(string $locale) {
668  * return an associative array containing keyword-value
669  * pairs for this locale. The keys are keys to the array (doh!)
670  */
PHP_FUNCTION(locale_get_keywords)671 PHP_FUNCTION( locale_get_keywords )
672 {
673     UEnumeration*   e        = NULL;
674     UErrorCode      status   = U_ZERO_ERROR;
675 
676 	const char*	 	kw_key        = NULL;
677     int32_t         kw_key_len    = 0;
678 
679     char*       	loc_name        = NULL;
680     int        	 	loc_name_len    = 0;
681 
682 /*
683 	ICU expects the buffer to be allocated  before calling the function
684 	and so the buffer size has been explicitly specified
685 	ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
686 	hence the kw_value buffer size is 100
687 */
688 	char*	 	kw_value        = NULL;
689     int32_t     kw_value_len    = 100;
690 
691     intl_error_reset( NULL TSRMLS_CC );
692 
693     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
694         &loc_name, &loc_name_len ) == FAILURE)
695     {
696         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
697              "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
698 
699         RETURN_FALSE;
700     }
701 
702     if(loc_name_len == 0) {
703         loc_name = INTL_G(default_locale);
704     }
705 
706 	/* Get the keywords */
707     e = uloc_openKeywords( loc_name, &status );
708     if( e != NULL )
709     {
710 		/* Traverse it, filling the return array. */
711     	array_init( return_value );
712 
713     	while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
714 			kw_value = ecalloc( 1 , kw_value_len  );
715 
716 			/* Get the keyword value for each keyword */
717 			kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
718 			if (status == U_BUFFER_OVERFLOW_ERROR) {
719 				status = U_ZERO_ERROR;
720 				kw_value = erealloc( kw_value , kw_value_len+1);
721 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
722 			} else if(!U_FAILURE(status)) {
723 				kw_value = erealloc( kw_value , kw_value_len+1);
724 			}
725 			if (U_FAILURE(status)) {
726         		intl_error_set( NULL, FAILURE, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
727 				if( kw_value){
728 					efree( kw_value );
729 				}
730 				zval_dtor(return_value);
731         		RETURN_FALSE;
732 			}
733 
734        		add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
735 		} /* end of while */
736 
737 	} /* end of if e!=NULL */
738 
739     uenum_close( e );
740 }
741 /* }}} */
742 
743  /* {{{ proto static string Locale::canonicalize($locale)
744  * @return string the canonicalized locale
745  * }}} */
746  /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
747  * @param string $locale	The locale string to canonicalize
748  */
PHP_FUNCTION(locale_canonicalize)749 PHP_FUNCTION(locale_canonicalize)
750 {
751 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
752 }
753 /* }}} */
754 
755 /* {{{ append_key_value
756 * Internal function which is called from locale_compose
757 * gets the value for the key_name and appends to the loc_name
758 * returns 1 if successful , -1 if not found ,
759 * 0 if array element is not a string , -2 if buffer-overflow
760 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)761 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
762 {
763 	zval**	ele_value	= NULL;
764 
765 	if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
766 		if(Z_TYPE_PP(ele_value)!= IS_STRING ){
767 			/* element value is not a string */
768 			return FAILURE;
769 		}
770 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
771 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
772 			/* not lang or grandfathered tag */
773 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
774 		}
775 		smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
776 		return SUCCESS;
777 	}
778 
779 	return LOC_NOT_FOUND;
780 }
781 /* }}} */
782 
783 /* {{{ append_prefix , appends the prefix needed
784 * e.g. private adds 'x'
785 */
add_prefix(smart_str * loc_name,char * key_name)786 static void add_prefix(smart_str* loc_name, char* key_name)
787 {
788 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
789 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
790 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
791 	}
792 }
793 /* }}} */
794 
795 /* {{{ append_multiple_key_values
796 * Internal function which is called from locale_compose
797 * gets the multiple values for the key_name and appends to the loc_name
798 * used for 'variant','extlang','private'
799 * returns 1 if successful , -1 if not found ,
800 * 0 if array element is not a string , -2 if buffer-overflow
801 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name TSRMLS_DC)802 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
803 {
804 	zval**	ele_value    	= NULL;
805 	int 	i 		= 0;
806 	int 	isFirstSubtag 	= 0;
807 	int 	max_value 	= 0;
808 
809 	/* Variant/ Extlang/Private etc. */
810 	if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
811 		if( Z_TYPE_PP(ele_value) == IS_STRING ){
812 			add_prefix( loc_name , key_name);
813 
814 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
815 			smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
816 			return SUCCESS;
817 		} else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
818 			HashPosition pos;
819 			HashTable *arr = HASH_OF(*ele_value);
820 			zval **data = NULL;
821 
822 			zend_hash_internal_pointer_reset_ex(arr, &pos);
823 			while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
824 				if(Z_TYPE_PP(data) != IS_STRING) {
825 					return FAILURE;
826 				}
827 				if (isFirstSubtag++ == 0){
828 					add_prefix(loc_name , key_name);
829 				}
830 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
831 				smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
832 				zend_hash_move_forward_ex(arr, &pos);
833 			}
834 			return SUCCESS;
835 		} else {
836 			return FAILURE;
837 		}
838 	} else {
839 		char cur_key_name[31];
840 		/* Decide the max_value: the max. no. of elements allowed */
841 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
842 			max_value  = MAX_NO_VARIANT;
843 		}
844 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
845 			max_value  = MAX_NO_EXTLANG;
846 		}
847 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
848 			max_value  = MAX_NO_PRIVATE;
849 		}
850 
851 		/* Multiple variant values as variant0, variant1 ,variant2 */
852 		isFirstSubtag = 0;
853 		for( i=0 ; i< max_value; i++ ){
854 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
855 			if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
856 				if( Z_TYPE_PP(ele_value)!= IS_STRING ){
857 					/* variant is not a string */
858 					return FAILURE;
859 				}
860 				/* Add the contents */
861 				if (isFirstSubtag++ == 0){
862 					add_prefix(loc_name , cur_key_name);
863 				}
864 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
865 				smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
866 			}
867 		} /* end of for */
868 	} /* end of else */
869 
870 	return SUCCESS;
871 }
872 /* }}} */
873 
874 /*{{{
875 * If applicable sets error message and aborts locale_compose gracefully
876 * returns 0  if locale_compose needs to be aborted
877 * otherwise returns 1
878 */
handleAppendResult(int result,smart_str * loc_name TSRMLS_DC)879 static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
880 {
881 	intl_error_reset( NULL TSRMLS_CC );
882 	if( result == FAILURE) {
883 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
884 			 "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
885 		smart_str_free(loc_name);
886 		return 0;
887 	}
888 	return 1;
889 }
890 /* }}} */
891 
892 #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
893 /* {{{ proto static string Locale::composeLocale($array)
894 * Creates a locale by combining the parts of locale-ID passed
895 * }}} */
896 /* {{{ proto static string compose_locale($array)
897 * Creates a locale by combining the parts of locale-ID passed
898 * }}} */
PHP_FUNCTION(locale_compose)899 PHP_FUNCTION(locale_compose)
900 {
901 	smart_str      	loc_name_s = {0};
902 	smart_str *loc_name = &loc_name_s;
903 	zval*			arr	= NULL;
904 	HashTable*		hash_arr = NULL;
905 	int 			result = 0;
906 
907 	intl_error_reset( NULL TSRMLS_CC );
908 
909 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
910 		&arr) == FAILURE)
911 	{
912 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
913 			 "locale_compose: unable to parse input params", 0 TSRMLS_CC );
914 		RETURN_FALSE;
915 	}
916 
917 	hash_arr = HASH_OF( arr );
918 
919 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
920 		RETURN_FALSE;
921 
922 	/* Check for grandfathered first */
923 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
924 	if( result == SUCCESS){
925 		RETURN_SMART_STR(loc_name);
926 	}
927 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
928 		RETURN_FALSE;
929 	}
930 
931 	/* Not grandfathered */
932 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
933 	if( result == LOC_NOT_FOUND ){
934 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
935 		"locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
936 		smart_str_free(loc_name);
937 		RETURN_FALSE;
938 	}
939 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
940 		RETURN_FALSE;
941 	}
942 
943 	/* Extlang */
944 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
945 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
946 		RETURN_FALSE;
947 	}
948 
949 	/* Script */
950 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
951 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
952 		RETURN_FALSE;
953 	}
954 
955 	/* Region */
956 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
957 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
958 		RETURN_FALSE;
959 	}
960 
961 	/* Variant */
962 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
963 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
964 		RETURN_FALSE;
965 	}
966 
967 	/* Private */
968 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
969 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
970 		RETURN_FALSE;
971 	}
972 
973 	RETURN_SMART_STR(loc_name);
974 }
975 /* }}} */
976 
977 
978 /*{{{
979 * Parses the locale and returns private subtags  if existing
980 * else returns NULL
981 * e.g. for locale='en_US-x-prv1-prv2-prv3'
982 * returns a pointer to the string 'prv1-prv2-prv3'
983 */
get_private_subtags(char * loc_name)984 static char* get_private_subtags(char* loc_name)
985 {
986 	char* 	result =NULL;
987 	int 	singletonPos = 0;
988 	int 	len =0;
989 	char* 	mod_loc_name =NULL;
990 
991 	if( loc_name && (len = strlen(loc_name)>0 ) ){
992 		mod_loc_name = loc_name ;
993 		len   = strlen(mod_loc_name);
994 		while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
995 
996 			if( singletonPos!=-1){
997 				if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
998 					/* private subtag start found */
999 					if( singletonPos + 2 ==  len){
1000 						/* loc_name ends with '-x-' ; return  NULL */
1001 					}
1002 					else{
1003 						/* result = mod_loc_name + singletonPos +2; */
1004 						result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1005 					}
1006 					break;
1007 				}
1008 				else{
1009 					if( singletonPos + 1 >=  len){
1010 						/* String end */
1011 						break;
1012 					} else {
1013 						/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1014 						mod_loc_name = mod_loc_name + singletonPos +1;
1015 						len = strlen(mod_loc_name);
1016 					}
1017 				}
1018 			}
1019 
1020 		} /* end of while */
1021 	}
1022 
1023 	return result;
1024 }
1025 /* }}} */
1026 
1027 /* {{{ code used by locale_parse
1028 */
add_array_entry(char * loc_name,zval * hash_arr,char * key_name TSRMLS_DC)1029 static int add_array_entry(char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1030 {
1031 	char*   key_value 	= NULL;
1032 	char*   cur_key_name	= NULL;
1033 	char*   token        	= NULL;
1034 	char*   last_ptr  	= NULL;
1035 
1036 	int	result		= 0;
1037 	int 	cur_result  	= 0;
1038 	int 	cnt  		= 0;
1039 
1040 
1041 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1042 		key_value = get_private_subtags( loc_name );
1043 		result = 1;
1044 	} else {
1045 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1046 	}
1047 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1048 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1049 		if( result > 0 && key_value){
1050 			/* Tokenize on the "_" or "-"  */
1051 			token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1052 			if( cur_key_name ){
1053 				efree( cur_key_name);
1054 			}
1055 			cur_key_name = (char*)ecalloc( 25,  25);
1056 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1057 			add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1058 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1059 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1060 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1061 				add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1062 			}
1063 /*
1064 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1065 			}
1066 */
1067 		}
1068 	} else {
1069 		if( result == 1 ){
1070 			add_assoc_string( hash_arr, key_name , key_value , TRUE );
1071 			cur_result = 1;
1072 		}
1073 	}
1074 
1075 	if( cur_key_name ){
1076 		efree( cur_key_name);
1077 	}
1078 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1079 	if( key_value){
1080 		efree(key_value);
1081 	}
1082 	return cur_result;
1083 }
1084 /* }}} */
1085 
1086 /* {{{ proto static array Locale::parseLocale($locale)
1087 * parses a locale-id into an array the different parts of it
1088  }}} */
1089 /* {{{ proto static array parse_locale($locale)
1090 * parses a locale-id into an array the different parts of it
1091 */
PHP_FUNCTION(locale_parse)1092 PHP_FUNCTION(locale_parse)
1093 {
1094     char*       loc_name        = NULL;
1095     int         loc_name_len    = 0;
1096     int         grOffset    	= 0;
1097 
1098     intl_error_reset( NULL TSRMLS_CC );
1099 
1100     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1101         &loc_name, &loc_name_len ) == FAILURE)
1102     {
1103         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1104              "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1105 
1106         RETURN_FALSE;
1107     }
1108 
1109     if(loc_name_len == 0) {
1110         loc_name = INTL_G(default_locale);
1111     }
1112 
1113 	array_init( return_value );
1114 
1115 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1116 	if( grOffset >= 0 ){
1117 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1118 	}
1119 	else{
1120 		/* Not grandfathered */
1121 		add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1122 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1123 		add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1124 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1125 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1126 	}
1127 }
1128 /* }}} */
1129 
1130 /* {{{ proto static array Locale::getAllVariants($locale)
1131 * gets an array containing the list of variants, or null
1132  }}} */
1133 /* {{{ proto static array locale_get_all_variants($locale)
1134 * gets an array containing the list of variants, or null
1135 */
PHP_FUNCTION(locale_get_all_variants)1136 PHP_FUNCTION(locale_get_all_variants)
1137 {
1138 	char*  	loc_name        = NULL;
1139 	int    	loc_name_len    = 0;
1140 
1141 	int	result		= 0;
1142 	char*	token		= NULL;
1143 	char*	variant		= NULL;
1144 	char*	saved_ptr	= NULL;
1145 
1146 	intl_error_reset( NULL TSRMLS_CC );
1147 
1148 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1149 	&loc_name, &loc_name_len ) == FAILURE)
1150 	{
1151 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1152 	     "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1153 
1154 		RETURN_FALSE;
1155 	}
1156 
1157 	if(loc_name_len == 0) {
1158 		loc_name = INTL_G(default_locale);
1159 	}
1160 
1161 
1162 	array_init( return_value );
1163 
1164 	/* If the locale is grandfathered, stop, no variants */
1165 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1166 		/* ("Grandfathered Tag. No variants."); */
1167 	}
1168 	else {
1169 	/* Call ICU variant */
1170 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1171 		if( result > 0 && variant){
1172 			/* Tokenize on the "_" or "-" */
1173 			token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1174 			add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1175 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1176 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1177  				add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1178 			}
1179 		}
1180 		if( variant ){
1181 			efree( variant );
1182 		}
1183 	}
1184 
1185 
1186 }
1187 /* }}} */
1188 
1189 /*{{{
1190 * Converts to lower case and also replaces all hyphens with the underscore
1191 */
strToMatch(char * str,char * retstr)1192 static int strToMatch(char* str ,char *retstr)
1193 {
1194 	char* 	anchor 	= NULL;
1195 	char* 	anchor1 = NULL;
1196 	int 	result 	= 0;
1197 	int 	len 	= 0;
1198 
1199     if( (!str) || str[0] == '\0'){
1200         return result;
1201     } else {
1202 	anchor = retstr;
1203 	anchor1 = str;
1204         len = strlen(str);
1205         while( (*str)!='\0' ){
1206 		if( *str == '-' ){
1207 			*retstr =  '_';
1208 		} else {
1209 			*retstr = tolower(*str);
1210 		}
1211             str++;
1212             retstr++;
1213 	}
1214 	*retstr = '\0';
1215 	retstr=  anchor;
1216 	str=  anchor1;
1217 	result = 1;
1218     }
1219 
1220     return(result);
1221 }
1222 /* }}} */
1223 
1224 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1225 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1226 */
1227 /* }}} */
1228 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1229 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1230 */
PHP_FUNCTION(locale_filter_matches)1231 PHP_FUNCTION(locale_filter_matches)
1232 {
1233 	char*       	lang_tag        = NULL;
1234 	int         	lang_tag_len    = 0;
1235 	char*       	loc_range       = NULL;
1236 	int         	loc_range_len   = 0;
1237 
1238 	int		result		= 0;
1239 	char*		token		= 0;
1240 	char*		chrcheck	= NULL;
1241 
1242 	char*       	can_lang_tag    = NULL;
1243 	char*       	can_loc_range   = NULL;
1244 
1245 	char*       	cur_lang_tag    = NULL;
1246 	char*       	cur_loc_range   = NULL;
1247 
1248 	zend_bool 	boolCanonical 	= 0;
1249 	UErrorCode	status		= U_ZERO_ERROR;
1250 
1251 	intl_error_reset( NULL TSRMLS_CC );
1252 
1253 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1254 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1255 		&boolCanonical) == FAILURE)
1256 	{
1257 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1258 		"locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1259 
1260 		RETURN_FALSE;
1261 	}
1262 
1263 	if(loc_range_len == 0) {
1264 		loc_range = INTL_G(default_locale);
1265 	}
1266 
1267 	if( strcmp(loc_range,"*")==0){
1268 		RETURN_TRUE;
1269 	}
1270 
1271 	if( boolCanonical ){
1272 		/* canonicalize loc_range */
1273 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1274 		if( result ==0) {
1275 			intl_error_set( NULL, status,
1276 				"locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1277 			RETURN_FALSE;
1278 		}
1279 
1280 		/* canonicalize lang_tag */
1281 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1282 		if( result ==0) {
1283 			intl_error_set( NULL, status,
1284 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1285 			RETURN_FALSE;
1286 		}
1287 
1288 		/* Convert to lower case for case-insensitive comparison */
1289 		cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1290 
1291 		/* Convert to lower case for case-insensitive comparison */
1292 		result = strToMatch( can_lang_tag , cur_lang_tag);
1293 		if( result == 0) {
1294 			efree( cur_lang_tag );
1295 			efree( can_lang_tag );
1296 			RETURN_FALSE;
1297 		}
1298 
1299 		cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1300 		result = strToMatch( can_loc_range , cur_loc_range );
1301 		if( result == 0) {
1302 			efree( cur_lang_tag );
1303 			efree( can_lang_tag );
1304 			efree( cur_loc_range );
1305 			efree( can_loc_range );
1306 			RETURN_FALSE;
1307 		}
1308 
1309 		/* check if prefix */
1310 		token 	= strstr( cur_lang_tag , cur_loc_range );
1311 
1312 		if( token && (token==cur_lang_tag) ){
1313 			/* check if the char. after match is SEPARATOR */
1314 			chrcheck = token + (strlen(cur_loc_range));
1315 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1316 				if( cur_lang_tag){
1317 					efree( cur_lang_tag );
1318 				}
1319 				if( cur_loc_range){
1320 					efree( cur_loc_range );
1321 				}
1322 				if( can_lang_tag){
1323 					efree( can_lang_tag );
1324 				}
1325 				if( can_loc_range){
1326 					efree( can_loc_range );
1327 				}
1328 				RETURN_TRUE;
1329 			}
1330 		}
1331 
1332 		/* No prefix as loc_range */
1333 		if( cur_lang_tag){
1334 			efree( cur_lang_tag );
1335 		}
1336 		if( cur_loc_range){
1337 			efree( cur_loc_range );
1338 		}
1339 		if( can_lang_tag){
1340 			efree( can_lang_tag );
1341 		}
1342 		if( can_loc_range){
1343 			efree( can_loc_range );
1344 		}
1345 		RETURN_FALSE;
1346 
1347 	} /* end of if isCanonical */
1348 	else{
1349 		/* Convert to lower case for case-insensitive comparison */
1350 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1351 
1352 		result = strToMatch( lang_tag , cur_lang_tag);
1353 		if( result == 0) {
1354 			efree( cur_lang_tag );
1355 			RETURN_FALSE;
1356 		}
1357 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1358 		result = strToMatch( loc_range , cur_loc_range );
1359 		if( result == 0) {
1360 			efree( cur_lang_tag );
1361 			efree( cur_loc_range );
1362 			RETURN_FALSE;
1363 		}
1364 
1365 		/* check if prefix */
1366 		token 	= strstr( cur_lang_tag , cur_loc_range );
1367 
1368 		if( token && (token==cur_lang_tag) ){
1369 			/* check if the char. after match is SEPARATOR */
1370 			chrcheck = token + (strlen(cur_loc_range));
1371 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1372 				if( cur_lang_tag){
1373 					efree( cur_lang_tag );
1374 				}
1375 				if( cur_loc_range){
1376 					efree( cur_loc_range );
1377 				}
1378 				RETURN_TRUE;
1379 			}
1380 		}
1381 
1382 		/* No prefix as loc_range */
1383 		if( cur_lang_tag){
1384 			efree( cur_lang_tag );
1385 		}
1386 		if( cur_loc_range){
1387 			efree( cur_loc_range );
1388 		}
1389 		RETURN_FALSE;
1390 
1391 	}
1392 }
1393 /* }}} */
1394 
array_cleanup(char * arr[],int arr_size)1395 static void array_cleanup( char* arr[] , int arr_size)
1396 {
1397 	int i=0;
1398 	for( i=0; i< arr_size; i++ ){
1399 		if( arr[i*2] ){
1400 			efree( arr[i*2]);
1401 		}
1402 	}
1403 	efree(arr);
1404 }
1405 
1406 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1407 /* {{{
1408 * returns the lookup result to lookup_loc_range_src_php
1409 * internal function
1410 */
lookup_loc_range(char * loc_range,HashTable * hash_arr,int canonicalize TSRMLS_DC)1411 static char* lookup_loc_range(char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1412 {
1413 	int	i = 0;
1414 	int	cur_arr_len = 0;
1415 	int result = 0;
1416 
1417 	char* lang_tag = NULL;
1418 	zval** ele_value = NULL;
1419 	char** cur_arr = NULL;
1420 
1421 	char* cur_loc_range	= NULL;
1422 	char* can_loc_range	= NULL;
1423 	int	saved_pos = 0;
1424 
1425 	char* return_value = NULL;
1426 
1427 	cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1428 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1429 	for(zend_hash_internal_pointer_reset(hash_arr);
1430 		zend_hash_has_more_elements(hash_arr) == SUCCESS;
1431 		zend_hash_move_forward(hash_arr)) {
1432 
1433 		if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1434 			/* Should never actually fail since the key is known to exist.*/
1435 			continue;
1436 		}
1437 		if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1438 			/* element value is not a string */
1439 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1440 			LOOKUP_CLEAN_RETURN(NULL);
1441 		}
1442 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1443 		result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1444 		if(result == 0) {
1445 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1446 			LOOKUP_CLEAN_RETURN(NULL);
1447 		}
1448 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1449 		cur_arr_len++ ;
1450 	} /* end of for */
1451 
1452 	/* Canonicalize array elements */
1453 	if(canonicalize) {
1454 		for(i=0; i<cur_arr_len; i++) {
1455 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1456 			if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1457 				if(lang_tag) {
1458 					efree(lang_tag);
1459 				}
1460 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1461 				LOOKUP_CLEAN_RETURN(NULL);
1462 			}
1463 			cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1464 			result = strToMatch(lang_tag, cur_arr[i*2]);
1465 			efree(lang_tag);
1466 			if(result == 0) {
1467 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1468 				LOOKUP_CLEAN_RETURN(NULL);
1469 			}
1470 		}
1471 
1472 	}
1473 
1474 	if(canonicalize) {
1475 		/* Canonicalize the loc_range */
1476 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1477 		if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1478 			/* Error */
1479 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1480 			if(can_loc_range) {
1481 				efree(can_loc_range);
1482 			}
1483 			LOOKUP_CLEAN_RETURN(NULL);
1484 		} else {
1485 			loc_range = can_loc_range;
1486 		}
1487 	}
1488 
1489 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1490 	/* convert to lower and replace hyphens */
1491 	result = strToMatch(loc_range, cur_loc_range);
1492 	if(can_loc_range) {
1493 		efree(can_loc_range);
1494 	}
1495 	if(result == 0) {
1496 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1497 		LOOKUP_CLEAN_RETURN(NULL);
1498 	}
1499 
1500 	/* Lookup for the lang_tag match */
1501 	saved_pos = strlen(cur_loc_range);
1502 	while(saved_pos > 0) {
1503 		for(i=0; i< cur_arr_len; i++){
1504 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1505 				/* Match found */
1506 				return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1507 				efree(cur_loc_range);
1508 				LOOKUP_CLEAN_RETURN(return_value);
1509 			}
1510 		}
1511 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1512 	}
1513 
1514 	/* Match not found */
1515 	efree(cur_loc_range);
1516 	LOOKUP_CLEAN_RETURN(NULL);
1517 }
1518 /* }}} */
1519 
1520 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1521 * Searchs the items in $langtag for the best match to the language
1522 * range
1523 */
1524 /* }}} */
1525 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1526 * Searchs the items in $langtag for the best match to the language
1527 * range
1528 */
PHP_FUNCTION(locale_lookup)1529 PHP_FUNCTION(locale_lookup)
1530 {
1531 	char*      	fallback_loc  		= NULL;
1532 	int        	fallback_loc_len	= 0;
1533 	char*      	loc_range      		= NULL;
1534 	int        	loc_range_len  		= 0;
1535 
1536 	zval*		arr				= NULL;
1537 	HashTable*	hash_arr		= NULL;
1538 	zend_bool	boolCanonical	= 0;
1539 	char*	 	result			=NULL;
1540 
1541 	intl_error_reset( NULL TSRMLS_CC );
1542 
1543 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1544 		&boolCanonical,	&fallback_loc, &fallback_loc_len) == FAILURE) {
1545 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,	"locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1546 		RETURN_FALSE;
1547 	}
1548 
1549 	if(loc_range_len == 0) {
1550 		loc_range = INTL_G(default_locale);
1551 	}
1552 
1553 	hash_arr = HASH_OF(arr);
1554 
1555 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1556 		RETURN_EMPTY_STRING();
1557 	}
1558 
1559 	result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1560 	if(result == NULL || result[0] == '\0') {
1561 		if( fallback_loc ) {
1562 			result = estrndup(fallback_loc, fallback_loc_len);
1563 		} else {
1564 			RETURN_EMPTY_STRING();
1565 		}
1566 	}
1567 
1568 	RETVAL_STRINGL(result, strlen(result), 0);
1569 }
1570 /* }}} */
1571 
1572 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1573 * Tries to find out best available locale based on HTTP �Accept-Language� header
1574 */
1575 /* }}} */
1576 /* {{{ proto string locale_accept_from_http(string $http_accept)
1577 * Tries to find out best available locale based on HTTP �Accept-Language� header
1578 */
PHP_FUNCTION(locale_accept_from_http)1579 PHP_FUNCTION(locale_accept_from_http)
1580 {
1581 	UEnumeration *available;
1582 	char *http_accept = NULL;
1583 	int http_accept_len;
1584 	UErrorCode status = 0;
1585 	int len;
1586 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1587 	UAcceptResult outResult;
1588 
1589 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1590 	{
1591 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1592 		"locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1593 		RETURN_FALSE;
1594 	}
1595 
1596 	available = ures_openAvailableLocales(NULL, &status);
1597 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1598 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1599 						&outResult, http_accept, available, &status);
1600 	uenum_close(available);
1601 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1602 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1603 		RETURN_FALSE;
1604 	}
1605 	RETURN_STRINGL(resultLocale, len, 1);
1606 }
1607 /* }}} */
1608 
1609 /*
1610  * Local variables:
1611  * tab-width: 4
1612  * c-basic-offset: 4
1613  * End:
1614  * vim600: noet sw=4 ts=4 fdm=marker
1615  * vim<600: noet sw=4 ts=4
1616  *can_loc_len
1617 */
1618