xref: /PHP-5.5/ext/intl/locale/locale_methods.c (revision aa82e99e)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14    +----------------------------------------------------------------------+
15 */
16 
17 /* $Id$ */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <unicode/ustring.h>
24 #include <unicode/udata.h>
25 #include <unicode/putil.h>
26 #include <unicode/ures.h>
27 
28 #include "php_intl.h"
29 #include "locale.h"
30 #include "locale_class.h"
31 #include "locale_methods.h"
32 #include "intl_convert.h"
33 #include "intl_data.h"
34 
35 #include <zend_API.h>
36 #include <zend.h>
37 #include <php.h>
38 #include "main/php_ini.h"
39 #include "ext/standard/php_smart_str.h"
40 
41 ZEND_EXTERN_MODULE_GLOBALS( intl )
42 
43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44 #define SEPARATOR "_"
45 #define SEPARATOR1 "-"
46 #define DELIMITER "-_"
47 #define EXTLANG_PREFIX "a"
48 #define PRIVATE_PREFIX "x"
49 #define DISP_NAME "name"
50 
51 #define MAX_NO_VARIANT  15
52 #define MAX_NO_EXTLANG  3
53 #define MAX_NO_PRIVATE  15
54 #define MAX_NO_LOOKUP_LANG_TAG  100
55 
56 #define LOC_NOT_FOUND 1
57 
58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59 #define VARIANT_KEYNAME_LEN  11
60 #define EXTLANG_KEYNAME_LEN  10
61 #define PRIVATE_KEYNAME_LEN  11
62 
63 /* Based on IANA registry at the time of writing this code
64 *
65 */
66 static const char * const LOC_GRANDFATHERED[] = {
67 	"art-lojban",		"i-klingon",		"i-lux",			"i-navajo",		"no-bok",		"no-nyn",
68 	"cel-gaulish",		"en-GB-oed",		"i-ami",
69 	"i-bnn",		"i-default",		"i-enochian",
70 	"i-mingo",		"i-pwn", 		"i-tao",
71 	"i-tay",		"i-tsu",		"sgn-BE-fr",
72 	"sgn-BE-nl",		"sgn-CH-de", 		"zh-cmn",
73  	"zh-cmn-Hans", 		"zh-cmn-Hant",		"zh-gan" ,
74 	"zh-guoyu", 		"zh-hakka", 		"zh-min",
75 	"zh-min-nan", 		"zh-wuu", 		"zh-xiang",
76 	"zh-yue",		NULL
77 };
78 
79 /* Based on IANA registry at the time of writing this code
80 *  This array lists the preferred values for the grandfathered tags if applicable
81 *  This is in sync with the array LOC_GRANDFATHERED
82 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83 */
84 static const int 		LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85 static const char * const 	LOC_PREFERRED_GRANDFATHERED[]  = {
86 	"jbo",			"tlh",			"lb",
87 	"nv", 			"nb",			"nn",
88 	NULL
89 };
90 
91 /*returns TRUE if a is an ID separator FALSE otherwise*/
92 #define isIDSeparator(a) (a == '_' || a == '-')
93 #define isKeywordSeparator(a) (a == '@' )
94 #define isEndOfTag(a) (a == '\0' )
95 
96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97 
98 /*returns TRUE if one of the special prefixes is here (s=string)
99   'x-' or 'i-' */
100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102 
103 /* Dot terminates it because of POSIX form  where dot precedes the codepage
104  * except for variant */
105 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106 
107 /* {{{ return the offset of 'key' in the array 'list'.
108  * returns -1 if not present */
findOffset(const char * const * list,const char * key)109 static int16_t findOffset(const char* const* list, const char* key)
110 {
111 	const char* const* anchor = list;
112 	while (*list != NULL) {
113 		if (strcmp(key, *list) == 0) {
114 			return (int16_t)(list - anchor);
115 		}
116 		list++;
117 	}
118 
119 	return -1;
120 
121 }
122 /*}}}*/
123 
getPreferredTag(const char * gf_tag)124 static char* getPreferredTag(const char* gf_tag)
125 {
126 	char* result = NULL;
127 	int grOffset = 0;
128 
129 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130 	if(grOffset < 0) {
131 		return NULL;
132 	}
133 	if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134 		/* return preferred tag */
135 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136 	} else {
137 		/* Return correct grandfathered language tag */
138 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
139 	}
140 	return result;
141 }
142 
143 /* {{{
144 * returns the position of next token for lookup
145 * or -1 if no token
146 * strtokr equivalent search for token in reverse direction
147 */
getStrrtokenPos(char * str,int savedPos)148 static int getStrrtokenPos(char* str, int savedPos)
149 {
150 	int result =-1;
151 	int i;
152 
153 	for(i=savedPos-1; i>=0; i--) {
154 		if(isIDSeparator(*(str+i)) ){
155 			/* delimiter found; check for singleton */
156 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
157 				/* a singleton; so send the position of token before the singleton */
158 				result = i-2;
159 			} else {
160 				result = i;
161 			}
162 			break;
163 		}
164 	}
165 	if(result < 1){
166 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167 		result =-1;
168 	}
169 	return result;
170 }
171 /* }}} */
172 
173 /* {{{
174 * returns the position of a singleton if present
175 * returns -1 if no singleton
176 * strtok equivalent search for singleton
177 */
getSingletonPos(const char * str)178 static int getSingletonPos(const char* str)
179 {
180 	int result =-1;
181 	int i=0;
182 	int len = 0;
183 
184 	if( str && ((len=strlen(str))>0) ){
185 		for( i=0; i<len ; i++){
186 			if( isIDSeparator(*(str+i)) ){
187 				if( i==1){
188 					/* string is of the form x-avy or a-prv1 */
189 					result =0;
190 					break;
191 				} else {
192 					/* delimiter found; check for singleton */
193 					if( isIDSeparator(*(str+i+2)) ){
194 						/* a singleton; so send the position of separator before singleton */
195 						result = i+1;
196 						break;
197 					}
198 				}
199 			}
200 		}/* end of for */
201 
202 	}
203 	return result;
204 }
205 /* }}} */
206 
207 /* {{{ proto static string Locale::getDefault(  )
208    Get default locale */
209 /* }}} */
210 /* {{{ proto static string locale_get_default( )
211    Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)212 PHP_NAMED_FUNCTION(zif_locale_get_default)
213 {
214 	RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215 }
216 
217 /* }}} */
218 
219 /* {{{ proto static string Locale::setDefault( string $locale )
220    Set default locale */
221 /* }}} */
222 /* {{{ proto static string locale_set_default( string $locale )
223    Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)224 PHP_NAMED_FUNCTION(zif_locale_set_default)
225 {
226 	char* locale_name = NULL;
227 	int   len=0;
228 
229 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
230 		&locale_name ,&len ) == FAILURE)
231 	{
232 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233 			 	"locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234 
235 		RETURN_FALSE;
236 	}
237 
238 	if(len == 0) {
239 		locale_name =  (char *)uloc_getDefault() ;
240 		len = strlen(locale_name);
241 	}
242 
243 	zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244 
245 	RETURN_TRUE;
246 }
247 /* }}} */
248 
249 /* {{{
250 * Gets the value from ICU
251 * common code shared by get_primary_language,get_script or get_region or get_variant
252 * result = 0 if error, 1 if successful , -1 if no value
253 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)254 static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255 {
256 	char*		tag_value	= NULL;
257 	int32_t     	tag_value_len   = 512;
258 
259 	int		singletonPos   	= 0;
260 	char*       	mod_loc_name	= NULL;
261 	int 		grOffset	= 0;
262 
263 	int32_t     	buflen          = 512;
264 	UErrorCode  	status          = U_ZERO_ERROR;
265 
266 
267 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
268 		/* Handle  grandfathered languages */
269 		grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
270 		if( grOffset >= 0 ){
271 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
272 				return estrdup(loc_name);
273 			} else {
274 				/* Since Grandfathered , no value , do nothing , retutn NULL */
275 				return NULL;
276 			}
277 		}
278 
279 	if( fromParseLocale==1 ){
280 		/* Handle singletons */
281 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282 			if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
283 				return estrdup(loc_name);
284 			}
285 		}
286 
287 		singletonPos = getSingletonPos( loc_name );
288 		if( singletonPos == 0){
289 			/* singleton at start of script, region , variant etc.
290 			 * or invalid singleton at start of language */
291 			return NULL;
292 		} else if( singletonPos > 0 ){
293 			/* singleton at some position except at start
294 			 * strip off the singleton and rest of the loc_name */
295 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
296 		}
297 	} /* end of if fromParse */
298 
299 	} /* end of if != LOC_CANONICAL_TAG */
300 
301 	if( mod_loc_name == NULL){
302 		mod_loc_name = estrdup(loc_name );
303 	}
304 
305 	/* Proceed to ICU */
306     do{
307 		tag_value = erealloc( tag_value , buflen  );
308 		tag_value_len = buflen;
309 
310 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
311 			buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
312 		}
313 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
314 			buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
315 		}
316 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
317 			buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
318 		}
319 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
320 			buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
321 		}
322 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
323 			buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
324 		}
325 
326 		if( U_FAILURE( status ) ) {
327 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
328 				status = U_ZERO_ERROR;
329 				buflen++; /* add space for \0 */
330 				continue;
331 			}
332 
333 			/* Error in retriving data */
334 			*result = 0;
335 			if( tag_value ){
336 				efree( tag_value );
337 			}
338 			if( mod_loc_name ){
339 				efree( mod_loc_name);
340 			}
341 			return NULL;
342 		}
343 	} while( buflen > tag_value_len );
344 
345 	if(  buflen ==0 ){
346 		/* No value found */
347 		*result = -1;
348 		if( tag_value ){
349 			efree( tag_value );
350 		}
351 		if( mod_loc_name ){
352 			efree( mod_loc_name);
353 		}
354 		return NULL;
355 	} else {
356 		*result = 1;
357 	}
358 
359 	if( mod_loc_name ){
360 		efree( mod_loc_name);
361 	}
362 	return tag_value;
363 }
364 /* }}} */
365 
366 /* {{{
367 * Gets the value from ICU , called when PHP userspace function is called
368 * common code shared by get_primary_language,get_script or get_region or get_variant
369 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)370 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
371 {
372 
373 	const char* loc_name        	= NULL;
374 	int         loc_name_len    	= 0;
375 
376 	char*       tag_value		= NULL;
377 	char*       empty_result	= "";
378 
379 	int         result    		= 0;
380 	char*       msg        		= NULL;
381 
382 	UErrorCode  status          	= U_ZERO_ERROR;
383 
384 	intl_error_reset( NULL TSRMLS_CC );
385 
386 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
387 	&loc_name ,&loc_name_len ) == FAILURE) {
388 		spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
389 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
390 		efree(msg);
391 
392 		RETURN_FALSE;
393     }
394 
395 	if(loc_name_len == 0) {
396 		loc_name = intl_locale_get_default(TSRMLS_C);
397 	}
398 
399 	/* Call ICU get */
400 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
401 
402 	/* No value found */
403 	if( result == -1 ) {
404 		if( tag_value){
405 			efree( tag_value);
406 		}
407 		RETURN_STRING( empty_result , TRUE);
408 	}
409 
410 	/* value found */
411 	if( tag_value){
412 		RETURN_STRING( tag_value , FALSE);
413 	}
414 
415 	/* Error encountered while fetching the value */
416 	if( result ==0) {
417 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
418 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
419 		efree(msg);
420 		RETURN_NULL();
421 	}
422 
423 }
424 /* }}} */
425 
426 /* {{{ proto static string Locale::getScript($locale)
427  * gets the script for the $locale
428  }}} */
429 /* {{{ proto static string locale_get_script($locale)
430  * gets the script for the $locale
431  */
PHP_FUNCTION(locale_get_script)432 PHP_FUNCTION( locale_get_script )
433 {
434 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
435 }
436 /* }}} */
437 
438 /* {{{ proto static string Locale::getRegion($locale)
439  * gets the region for the $locale
440  }}} */
441 /* {{{ proto static string locale_get_region($locale)
442  * gets the region for the $locale
443  */
PHP_FUNCTION(locale_get_region)444 PHP_FUNCTION( locale_get_region )
445 {
446 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
447 }
448 /* }}} */
449 
450 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
451  * gets the primary language for the $locale
452  }}} */
453 /* {{{ proto static string locale_get_primary_language($locale)
454  * gets the primary language for the $locale
455  */
PHP_FUNCTION(locale_get_primary_language)456 PHP_FUNCTION(locale_get_primary_language )
457 {
458 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
459 }
460 /* }}} */
461 
462 
463 /* {{{
464  * common code shared by display_xyz functions to  get the value from ICU
465  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)466 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
467 {
468 	const char* loc_name        	= NULL;
469 	int         loc_name_len    	= 0;
470 
471 	const char* disp_loc_name       = NULL;
472 	int         disp_loc_name_len   = 0;
473 	int         free_loc_name       = 0;
474 
475 	UChar*      disp_name      	= NULL;
476 	int32_t     disp_name_len  	= 0;
477 
478 	char*       mod_loc_name        = NULL;
479 
480 	int32_t     buflen          	= 512;
481 	UErrorCode  status          	= U_ZERO_ERROR;
482 
483 	char*       utf8value		= NULL;
484 	int         utf8value_len   	= 0;
485 
486   	char*       msg             	= NULL;
487 	int         grOffset    	= 0;
488 
489 	intl_error_reset( NULL TSRMLS_CC );
490 
491 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
492 		&loc_name, &loc_name_len ,
493 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
494 	{
495 		spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
496 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
497 		efree(msg);
498 		RETURN_FALSE;
499 	}
500 
501     if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
502         /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
503 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
504 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
505 		efree(msg);
506 		RETURN_FALSE;
507     }
508 
509 	if(loc_name_len == 0) {
510 		loc_name = intl_locale_get_default(TSRMLS_C);
511 	}
512 
513 	if( strcmp(tag_name, DISP_NAME) != 0 ){
514 		/* Handle grandfathered languages */
515 		grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
516 		if( grOffset >= 0 ){
517 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
518 				mod_loc_name = getPreferredTag( loc_name );
519 			} else {
520 				/* Since Grandfathered, no value, do nothing, retutn NULL */
521 				RETURN_FALSE;
522 			}
523 		}
524 	} /* end of if != LOC_CANONICAL_TAG */
525 
526 	if( mod_loc_name==NULL ){
527 		mod_loc_name = estrdup( loc_name );
528 	}
529 
530 	/* Check if disp_loc_name passed , if not use default locale */
531 	if( !disp_loc_name){
532 		disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
533 		free_loc_name = 1;
534 	}
535 
536     /* Get the disp_value for the given locale */
537     do{
538         disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
539         disp_name_len = buflen;
540 
541 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
542 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
543 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
544 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
545 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
546 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
547 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
548 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
549 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
550 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
551 		}
552 
553 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
554 		if( U_FAILURE( status ) )
555 		{
556 			if( status == U_BUFFER_OVERFLOW_ERROR )
557 			{
558 				status = U_ZERO_ERROR;
559 				continue;
560 			}
561 
562 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
563 			intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
564 			efree(msg);
565 			if( disp_name){
566 				efree( disp_name );
567 			}
568 			if( mod_loc_name){
569 				efree( mod_loc_name );
570 			}
571 			if (free_loc_name) {
572 				efree((void *)disp_loc_name);
573 				disp_loc_name = NULL;
574 			}
575 			RETURN_FALSE;
576 		}
577 	} while( buflen > disp_name_len );
578 
579 	if( mod_loc_name){
580 		efree( mod_loc_name );
581 	}
582 	if (free_loc_name) {
583 		efree((void *)disp_loc_name);
584 		disp_loc_name = NULL;
585 	}
586 	/* Convert display locale name from UTF-16 to UTF-8. */
587 	intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
588 	efree( disp_name );
589 	if( U_FAILURE( status ) )
590 	{
591 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
592 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
593 		efree(msg);
594 		RETURN_FALSE;
595 	}
596 
597 	RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
598 
599 }
600 /* }}} */
601 
602 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
603 * gets the name for the $locale in $in_locale or default_locale
604  }}} */
605 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
606 * gets the name for the $locale in $in_locale or default_locale
607 */
PHP_FUNCTION(locale_get_display_name)608 PHP_FUNCTION(locale_get_display_name)
609 {
610     get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
611 }
612 /* }}} */
613 
614 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
615 * gets the language for the $locale in $in_locale or default_locale
616  }}} */
617 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
618 * gets the language for the $locale in $in_locale or default_locale
619 */
PHP_FUNCTION(locale_get_display_language)620 PHP_FUNCTION(locale_get_display_language)
621 {
622     get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
623 }
624 /* }}} */
625 
626 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
627 * gets the script for the $locale in $in_locale or default_locale
628  }}} */
629 /* {{{ proto static string get_display_script($locale, $in_locale = null)
630 * gets the script for the $locale in $in_locale or default_locale
631 */
PHP_FUNCTION(locale_get_display_script)632 PHP_FUNCTION(locale_get_display_script)
633 {
634     get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
635 }
636 /* }}} */
637 
638 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
639 * gets the region for the $locale in $in_locale or default_locale
640  }}} */
641 /* {{{ proto static string get_display_region($locale, $in_locale = null)
642 * gets the region for the $locale in $in_locale or default_locale
643 */
PHP_FUNCTION(locale_get_display_region)644 PHP_FUNCTION(locale_get_display_region)
645 {
646     get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
647 }
648 /* }}} */
649 
650 /* {{{
651 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
652 * gets the variant for the $locale in $in_locale or default_locale
653  }}} */
654 /* {{{
655 * proto static string get_display_variant($locale, $in_locale = null)
656 * gets the variant for the $locale in $in_locale or default_locale
657 */
PHP_FUNCTION(locale_get_display_variant)658 PHP_FUNCTION(locale_get_display_variant)
659 {
660     get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
661 }
662 /* }}} */
663 
664  /* {{{ proto static array getKeywords(string $locale) {
665  * return an associative array containing keyword-value
666  * pairs for this locale. The keys are keys to the array (doh!)
667  * }}}*/
668  /* {{{ proto static array locale_get_keywords(string $locale) {
669  * return an associative array containing keyword-value
670  * pairs for this locale. The keys are keys to the array (doh!)
671  */
PHP_FUNCTION(locale_get_keywords)672 PHP_FUNCTION( locale_get_keywords )
673 {
674     UEnumeration*   e        = NULL;
675     UErrorCode      status   = U_ZERO_ERROR;
676 
677     const char*	 	kw_key        = NULL;
678     int32_t         kw_key_len    = 0;
679 
680     const char*       	loc_name        = NULL;
681     int        	 	loc_name_len    = 0;
682 
683 /*
684 	ICU expects the buffer to be allocated  before calling the function
685 	and so the buffer size has been explicitly specified
686 	ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
687 	hence the kw_value buffer size is 100
688 */
689 	char*	 	kw_value        = NULL;
690     int32_t     kw_value_len    = 100;
691 
692     intl_error_reset( NULL TSRMLS_CC );
693 
694     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
695         &loc_name, &loc_name_len ) == FAILURE)
696     {
697         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
698              "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
699 
700         RETURN_FALSE;
701     }
702 
703     if(loc_name_len == 0) {
704         loc_name = intl_locale_get_default(TSRMLS_C);
705     }
706 
707 	/* Get the keywords */
708     e = uloc_openKeywords( loc_name, &status );
709     if( e != NULL )
710     {
711 		/* Traverse it, filling the return array. */
712     	array_init( return_value );
713 
714     	while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
715 			kw_value = ecalloc( 1 , kw_value_len  );
716 
717 			/* Get the keyword value for each keyword */
718 			kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
719 			if (status == U_BUFFER_OVERFLOW_ERROR) {
720 				status = U_ZERO_ERROR;
721 				kw_value = erealloc( kw_value , kw_value_len+1);
722 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
723 			} else if(!U_FAILURE(status)) {
724 				kw_value = erealloc( kw_value , kw_value_len+1);
725 			}
726 			if (U_FAILURE(status)) {
727 	        		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
728 				if( kw_value){
729 					efree( kw_value );
730 				}
731 				zval_dtor(return_value);
732         		RETURN_FALSE;
733 			}
734 
735        		add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
736 		} /* end of while */
737 
738 	} /* end of if e!=NULL */
739 
740     uenum_close( e );
741 }
742 /* }}} */
743 
744  /* {{{ proto static string Locale::canonicalize($locale)
745  * @return string the canonicalized locale
746  * }}} */
747  /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
748  * @param string $locale	The locale string to canonicalize
749  */
PHP_FUNCTION(locale_canonicalize)750 PHP_FUNCTION(locale_canonicalize)
751 {
752 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
753 }
754 /* }}} */
755 
756 /* {{{ append_key_value
757 * Internal function which is called from locale_compose
758 * gets the value for the key_name and appends to the loc_name
759 * returns 1 if successful , -1 if not found ,
760 * 0 if array element is not a string , -2 if buffer-overflow
761 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)762 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
763 {
764 	zval**	ele_value	= NULL;
765 
766 	if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
767 		if(Z_TYPE_PP(ele_value)!= IS_STRING ){
768 			/* element value is not a string */
769 			return FAILURE;
770 		}
771 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
772 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
773 			/* not lang or grandfathered tag */
774 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
775 		}
776 		smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
777 		return SUCCESS;
778 	}
779 
780 	return LOC_NOT_FOUND;
781 }
782 /* }}} */
783 
784 /* {{{ append_prefix , appends the prefix needed
785 * e.g. private adds 'x'
786 */
add_prefix(smart_str * loc_name,char * key_name)787 static void add_prefix(smart_str* loc_name, char* key_name)
788 {
789 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
790 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
791 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
792 	}
793 }
794 /* }}} */
795 
796 /* {{{ append_multiple_key_values
797 * Internal function which is called from locale_compose
798 * gets the multiple values for the key_name and appends to the loc_name
799 * used for 'variant','extlang','private'
800 * returns 1 if successful , -1 if not found ,
801 * 0 if array element is not a string , -2 if buffer-overflow
802 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name TSRMLS_DC)803 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
804 {
805 	zval**	ele_value    	= NULL;
806 	int 	i 		= 0;
807 	int 	isFirstSubtag 	= 0;
808 	int 	max_value 	= 0;
809 
810 	/* Variant/ Extlang/Private etc. */
811 	if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
812 		if( Z_TYPE_PP(ele_value) == IS_STRING ){
813 			add_prefix( loc_name , key_name);
814 
815 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
816 			smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
817 			return SUCCESS;
818 		} else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
819 			HashPosition pos;
820 			HashTable *arr = HASH_OF(*ele_value);
821 			zval **data = NULL;
822 
823 			zend_hash_internal_pointer_reset_ex(arr, &pos);
824 			while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
825 				if(Z_TYPE_PP(data) != IS_STRING) {
826 					return FAILURE;
827 				}
828 				if (isFirstSubtag++ == 0){
829 					add_prefix(loc_name , key_name);
830 				}
831 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
832 				smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
833 				zend_hash_move_forward_ex(arr, &pos);
834 			}
835 			return SUCCESS;
836 		} else {
837 			return FAILURE;
838 		}
839 	} else {
840 		char cur_key_name[31];
841 		/* Decide the max_value: the max. no. of elements allowed */
842 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
843 			max_value  = MAX_NO_VARIANT;
844 		}
845 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
846 			max_value  = MAX_NO_EXTLANG;
847 		}
848 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
849 			max_value  = MAX_NO_PRIVATE;
850 		}
851 
852 		/* Multiple variant values as variant0, variant1 ,variant2 */
853 		isFirstSubtag = 0;
854 		for( i=0 ; i< max_value; i++ ){
855 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
856 			if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
857 				if( Z_TYPE_PP(ele_value)!= IS_STRING ){
858 					/* variant is not a string */
859 					return FAILURE;
860 				}
861 				/* Add the contents */
862 				if (isFirstSubtag++ == 0){
863 					add_prefix(loc_name , cur_key_name);
864 				}
865 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
866 				smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
867 			}
868 		} /* end of for */
869 	} /* end of else */
870 
871 	return SUCCESS;
872 }
873 /* }}} */
874 
875 /*{{{
876 * If applicable sets error message and aborts locale_compose gracefully
877 * returns 0  if locale_compose needs to be aborted
878 * otherwise returns 1
879 */
handleAppendResult(int result,smart_str * loc_name TSRMLS_DC)880 static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
881 {
882 	intl_error_reset( NULL TSRMLS_CC );
883 	if( result == FAILURE) {
884 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
885 			 "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
886 		smart_str_free(loc_name);
887 		return 0;
888 	}
889 	return 1;
890 }
891 /* }}} */
892 
893 #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
894 /* {{{ proto static string Locale::composeLocale($array)
895 * Creates a locale by combining the parts of locale-ID passed
896 * }}} */
897 /* {{{ proto static string compose_locale($array)
898 * Creates a locale by combining the parts of locale-ID passed
899 * }}} */
PHP_FUNCTION(locale_compose)900 PHP_FUNCTION(locale_compose)
901 {
902 	smart_str      	loc_name_s = {0};
903 	smart_str *loc_name = &loc_name_s;
904 	zval*			arr	= NULL;
905 	HashTable*		hash_arr = NULL;
906 	int 			result = 0;
907 
908 	intl_error_reset( NULL TSRMLS_CC );
909 
910 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
911 		&arr) == FAILURE)
912 	{
913 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
914 			 "locale_compose: unable to parse input params", 0 TSRMLS_CC );
915 		RETURN_FALSE;
916 	}
917 
918 	hash_arr = HASH_OF( arr );
919 
920 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
921 		RETURN_FALSE;
922 
923 	/* Check for grandfathered first */
924 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
925 	if( result == SUCCESS){
926 		RETURN_SMART_STR(loc_name);
927 	}
928 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
929 		RETURN_FALSE;
930 	}
931 
932 	/* Not grandfathered */
933 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
934 	if( result == LOC_NOT_FOUND ){
935 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
936 		"locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
937 		smart_str_free(loc_name);
938 		RETURN_FALSE;
939 	}
940 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
941 		RETURN_FALSE;
942 	}
943 
944 	/* Extlang */
945 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
946 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
947 		RETURN_FALSE;
948 	}
949 
950 	/* Script */
951 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
952 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
953 		RETURN_FALSE;
954 	}
955 
956 	/* Region */
957 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
958 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
959 		RETURN_FALSE;
960 	}
961 
962 	/* Variant */
963 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
964 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
965 		RETURN_FALSE;
966 	}
967 
968 	/* Private */
969 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
970 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
971 		RETURN_FALSE;
972 	}
973 
974 	RETURN_SMART_STR(loc_name);
975 }
976 /* }}} */
977 
978 
979 /*{{{
980 * Parses the locale and returns private subtags  if existing
981 * else returns NULL
982 * e.g. for locale='en_US-x-prv1-prv2-prv3'
983 * returns a pointer to the string 'prv1-prv2-prv3'
984 */
get_private_subtags(const char * loc_name)985 static char* get_private_subtags(const char* loc_name)
986 {
987 	char* 	result =NULL;
988 	int 	singletonPos = 0;
989 	int 	len =0;
990 	const char* 	mod_loc_name =NULL;
991 
992 	if( loc_name && (len = strlen(loc_name)>0 ) ){
993 		mod_loc_name = loc_name ;
994 		len   = strlen(mod_loc_name);
995 		while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
996 
997 			if( singletonPos!=-1){
998 				if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
999 					/* private subtag start found */
1000 					if( singletonPos + 2 ==  len){
1001 						/* loc_name ends with '-x-' ; return  NULL */
1002 					}
1003 					else{
1004 						/* result = mod_loc_name + singletonPos +2; */
1005 						result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1006 					}
1007 					break;
1008 				}
1009 				else{
1010 					if( singletonPos + 1 >=  len){
1011 						/* String end */
1012 						break;
1013 					} else {
1014 						/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1015 						mod_loc_name = mod_loc_name + singletonPos +1;
1016 						len = strlen(mod_loc_name);
1017 					}
1018 				}
1019 			}
1020 
1021 		} /* end of while */
1022 	}
1023 
1024 	return result;
1025 }
1026 /* }}} */
1027 
1028 /* {{{ code used by locale_parse
1029 */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name TSRMLS_DC)1030 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1031 {
1032 	char*   key_value 	= NULL;
1033 	char*   cur_key_name	= NULL;
1034 	char*   token        	= NULL;
1035 	char*   last_ptr  	= NULL;
1036 
1037 	int	result		= 0;
1038 	int 	cur_result  	= 0;
1039 	int 	cnt  		= 0;
1040 
1041 
1042 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1043 		key_value = get_private_subtags( loc_name );
1044 		result = 1;
1045 	} else {
1046 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1047 	}
1048 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1049 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1050 		if( result > 0 && key_value){
1051 			/* Tokenize on the "_" or "-"  */
1052 			token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1053 			if( cur_key_name ){
1054 				efree( cur_key_name);
1055 			}
1056 			cur_key_name = (char*)ecalloc( 25,  25);
1057 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1058 			add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1059 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1060 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1061 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1062 				add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1063 			}
1064 /*
1065 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1066 			}
1067 */
1068 		}
1069 	} else {
1070 		if( result == 1 ){
1071 			add_assoc_string( hash_arr, key_name , key_value , TRUE );
1072 			cur_result = 1;
1073 		}
1074 	}
1075 
1076 	if( cur_key_name ){
1077 		efree( cur_key_name);
1078 	}
1079 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1080 	if( key_value){
1081 		efree(key_value);
1082 	}
1083 	return cur_result;
1084 }
1085 /* }}} */
1086 
1087 /* {{{ proto static array Locale::parseLocale($locale)
1088 * parses a locale-id into an array the different parts of it
1089  }}} */
1090 /* {{{ proto static array parse_locale($locale)
1091 * parses a locale-id into an array the different parts of it
1092 */
PHP_FUNCTION(locale_parse)1093 PHP_FUNCTION(locale_parse)
1094 {
1095     const char* loc_name        = NULL;
1096     int         loc_name_len    = 0;
1097     int         grOffset    	= 0;
1098 
1099     intl_error_reset( NULL TSRMLS_CC );
1100 
1101     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1102         &loc_name, &loc_name_len ) == FAILURE)
1103     {
1104         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1105              "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1106 
1107         RETURN_FALSE;
1108     }
1109 
1110     if(loc_name_len == 0) {
1111         loc_name = intl_locale_get_default(TSRMLS_C);
1112     }
1113 
1114 	array_init( return_value );
1115 
1116 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1117 	if( grOffset >= 0 ){
1118 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1119 	}
1120 	else{
1121 		/* Not grandfathered */
1122 		add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1123 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1124 		add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1125 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1126 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1127 	}
1128 }
1129 /* }}} */
1130 
1131 /* {{{ proto static array Locale::getAllVariants($locale)
1132 * gets an array containing the list of variants, or null
1133  }}} */
1134 /* {{{ proto static array locale_get_all_variants($locale)
1135 * gets an array containing the list of variants, or null
1136 */
PHP_FUNCTION(locale_get_all_variants)1137 PHP_FUNCTION(locale_get_all_variants)
1138 {
1139 	const char*  	loc_name        = NULL;
1140 	int    		loc_name_len    = 0;
1141 
1142 	int	result		= 0;
1143 	char*	token		= NULL;
1144 	char*	variant		= NULL;
1145 	char*	saved_ptr	= NULL;
1146 
1147 	intl_error_reset( NULL TSRMLS_CC );
1148 
1149 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1150 	&loc_name, &loc_name_len ) == FAILURE)
1151 	{
1152 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1153 	     "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1154 
1155 		RETURN_FALSE;
1156 	}
1157 
1158 	if(loc_name_len == 0) {
1159 		loc_name = intl_locale_get_default(TSRMLS_C);
1160 	}
1161 
1162 
1163 	array_init( return_value );
1164 
1165 	/* If the locale is grandfathered, stop, no variants */
1166 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1167 		/* ("Grandfathered Tag. No variants."); */
1168 	}
1169 	else {
1170 	/* Call ICU variant */
1171 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1172 		if( result > 0 && variant){
1173 			/* Tokenize on the "_" or "-" */
1174 			token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1175 			add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1176 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1177 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1178  				add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1179 			}
1180 		}
1181 		if( variant ){
1182 			efree( variant );
1183 		}
1184 	}
1185 
1186 
1187 }
1188 /* }}} */
1189 
1190 /*{{{
1191 * Converts to lower case and also replaces all hyphens with the underscore
1192 */
strToMatch(const char * str,char * retstr)1193 static int strToMatch(const char* str ,char *retstr)
1194 {
1195 	char* 	anchor 	= NULL;
1196 	const char* 	anchor1 = NULL;
1197 	int 	result 	= 0;
1198 
1199     if( (!str) || str[0] == '\0'){
1200         return result;
1201     } else {
1202 	anchor = retstr;
1203 	anchor1 = str;
1204         while( (*str)!='\0' ){
1205 		if( *str == '-' ){
1206 			*retstr =  '_';
1207 		} else {
1208 			*retstr = tolower(*str);
1209 		}
1210             str++;
1211             retstr++;
1212 	}
1213 	*retstr = '\0';
1214 	retstr=  anchor;
1215 	str=  anchor1;
1216 	result = 1;
1217     }
1218 
1219     return(result);
1220 }
1221 /* }}} */
1222 
1223 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1224 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1225 */
1226 /* }}} */
1227 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1228 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1229 */
PHP_FUNCTION(locale_filter_matches)1230 PHP_FUNCTION(locale_filter_matches)
1231 {
1232 	char*       	lang_tag        = NULL;
1233 	int         	lang_tag_len    = 0;
1234 	const char*     loc_range       = NULL;
1235 	int         	loc_range_len   = 0;
1236 
1237 	int		result		= 0;
1238 	char*		token		= 0;
1239 	char*		chrcheck	= NULL;
1240 
1241 	char*       	can_lang_tag    = NULL;
1242 	char*       	can_loc_range   = NULL;
1243 
1244 	char*       	cur_lang_tag    = NULL;
1245 	char*       	cur_loc_range   = NULL;
1246 
1247 	zend_bool 	boolCanonical 	= 0;
1248 	UErrorCode	status		= U_ZERO_ERROR;
1249 
1250 	intl_error_reset( NULL TSRMLS_CC );
1251 
1252 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1253 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1254 		&boolCanonical) == FAILURE)
1255 	{
1256 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1257 		"locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1258 
1259 		RETURN_FALSE;
1260 	}
1261 
1262 	if(loc_range_len == 0) {
1263 		loc_range = intl_locale_get_default(TSRMLS_C);
1264 	}
1265 
1266 	if( strcmp(loc_range,"*")==0){
1267 		RETURN_TRUE;
1268 	}
1269 
1270 	if( boolCanonical ){
1271 		/* canonicalize loc_range */
1272 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1273 		if( result ==0) {
1274 			intl_error_set( NULL, status,
1275 				"locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1276 			RETURN_FALSE;
1277 		}
1278 
1279 		/* canonicalize lang_tag */
1280 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1281 		if( result ==0) {
1282 			intl_error_set( NULL, status,
1283 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1284 			RETURN_FALSE;
1285 		}
1286 
1287 		/* Convert to lower case for case-insensitive comparison */
1288 		cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1289 
1290 		/* Convert to lower case for case-insensitive comparison */
1291 		result = strToMatch( can_lang_tag , cur_lang_tag);
1292 		if( result == 0) {
1293 			efree( cur_lang_tag );
1294 			efree( can_lang_tag );
1295 			RETURN_FALSE;
1296 		}
1297 
1298 		cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1299 		result = strToMatch( can_loc_range , cur_loc_range );
1300 		if( result == 0) {
1301 			efree( cur_lang_tag );
1302 			efree( can_lang_tag );
1303 			efree( cur_loc_range );
1304 			efree( can_loc_range );
1305 			RETURN_FALSE;
1306 		}
1307 
1308 		/* check if prefix */
1309 		token 	= strstr( cur_lang_tag , cur_loc_range );
1310 
1311 		if( token && (token==cur_lang_tag) ){
1312 			/* check if the char. after match is SEPARATOR */
1313 			chrcheck = token + (strlen(cur_loc_range));
1314 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1315 				if( cur_lang_tag){
1316 					efree( cur_lang_tag );
1317 				}
1318 				if( cur_loc_range){
1319 					efree( cur_loc_range );
1320 				}
1321 				if( can_lang_tag){
1322 					efree( can_lang_tag );
1323 				}
1324 				if( can_loc_range){
1325 					efree( can_loc_range );
1326 				}
1327 				RETURN_TRUE;
1328 			}
1329 		}
1330 
1331 		/* No prefix as loc_range */
1332 		if( cur_lang_tag){
1333 			efree( cur_lang_tag );
1334 		}
1335 		if( cur_loc_range){
1336 			efree( cur_loc_range );
1337 		}
1338 		if( can_lang_tag){
1339 			efree( can_lang_tag );
1340 		}
1341 		if( can_loc_range){
1342 			efree( can_loc_range );
1343 		}
1344 		RETURN_FALSE;
1345 
1346 	} /* end of if isCanonical */
1347 	else{
1348 		/* Convert to lower case for case-insensitive comparison */
1349 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1350 
1351 		result = strToMatch( lang_tag , cur_lang_tag);
1352 		if( result == 0) {
1353 			efree( cur_lang_tag );
1354 			RETURN_FALSE;
1355 		}
1356 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1357 		result = strToMatch( loc_range , cur_loc_range );
1358 		if( result == 0) {
1359 			efree( cur_lang_tag );
1360 			efree( cur_loc_range );
1361 			RETURN_FALSE;
1362 		}
1363 
1364 		/* check if prefix */
1365 		token 	= strstr( cur_lang_tag , cur_loc_range );
1366 
1367 		if( token && (token==cur_lang_tag) ){
1368 			/* check if the char. after match is SEPARATOR */
1369 			chrcheck = token + (strlen(cur_loc_range));
1370 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1371 				if( cur_lang_tag){
1372 					efree( cur_lang_tag );
1373 				}
1374 				if( cur_loc_range){
1375 					efree( cur_loc_range );
1376 				}
1377 				RETURN_TRUE;
1378 			}
1379 		}
1380 
1381 		/* No prefix as loc_range */
1382 		if( cur_lang_tag){
1383 			efree( cur_lang_tag );
1384 		}
1385 		if( cur_loc_range){
1386 			efree( cur_loc_range );
1387 		}
1388 		RETURN_FALSE;
1389 
1390 	}
1391 }
1392 /* }}} */
1393 
array_cleanup(char * arr[],int arr_size)1394 static void array_cleanup( char* arr[] , int arr_size)
1395 {
1396 	int i=0;
1397 	for( i=0; i< arr_size; i++ ){
1398 		if( arr[i*2] ){
1399 			efree( arr[i*2]);
1400 		}
1401 	}
1402 	efree(arr);
1403 }
1404 
1405 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1406 /* {{{
1407 * returns the lookup result to lookup_loc_range_src_php
1408 * internal function
1409 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize TSRMLS_DC)1410 static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1411 {
1412 	int	i = 0;
1413 	int	cur_arr_len = 0;
1414 	int result = 0;
1415 
1416 	char* lang_tag = NULL;
1417 	zval** ele_value = NULL;
1418 	char** cur_arr = NULL;
1419 
1420 	char* cur_loc_range	= NULL;
1421 	char* can_loc_range	= NULL;
1422 	int	saved_pos = 0;
1423 
1424 	char* return_value = NULL;
1425 
1426 	cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1427 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1428 	for(zend_hash_internal_pointer_reset(hash_arr);
1429 		zend_hash_has_more_elements(hash_arr) == SUCCESS;
1430 		zend_hash_move_forward(hash_arr)) {
1431 
1432 		if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1433 			/* Should never actually fail since the key is known to exist.*/
1434 			continue;
1435 		}
1436 		if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1437 			/* element value is not a string */
1438 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1439 			LOOKUP_CLEAN_RETURN(NULL);
1440 		}
1441 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1442 		result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1443 		if(result == 0) {
1444 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1445 			LOOKUP_CLEAN_RETURN(NULL);
1446 		}
1447 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1448 		cur_arr_len++ ;
1449 	} /* end of for */
1450 
1451 	/* Canonicalize array elements */
1452 	if(canonicalize) {
1453 		for(i=0; i<cur_arr_len; i++) {
1454 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1455 			if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1456 				if(lang_tag) {
1457 					efree(lang_tag);
1458 				}
1459 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1460 				LOOKUP_CLEAN_RETURN(NULL);
1461 			}
1462 			cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1463 			result = strToMatch(lang_tag, cur_arr[i*2]);
1464 			efree(lang_tag);
1465 			if(result == 0) {
1466 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1467 				LOOKUP_CLEAN_RETURN(NULL);
1468 			}
1469 		}
1470 
1471 	}
1472 
1473 	if(canonicalize) {
1474 		/* Canonicalize the loc_range */
1475 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1476 		if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1477 			/* Error */
1478 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1479 			if(can_loc_range) {
1480 				efree(can_loc_range);
1481 			}
1482 			LOOKUP_CLEAN_RETURN(NULL);
1483 		} else {
1484 			loc_range = can_loc_range;
1485 		}
1486 	}
1487 
1488 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1489 	/* convert to lower and replace hyphens */
1490 	result = strToMatch(loc_range, cur_loc_range);
1491 	if(can_loc_range) {
1492 		efree(can_loc_range);
1493 	}
1494 	if(result == 0) {
1495 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1496 		LOOKUP_CLEAN_RETURN(NULL);
1497 	}
1498 
1499 	/* Lookup for the lang_tag match */
1500 	saved_pos = strlen(cur_loc_range);
1501 	while(saved_pos > 0) {
1502 		for(i=0; i< cur_arr_len; i++){
1503 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1504 				/* Match found */
1505 				return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1506 				efree(cur_loc_range);
1507 				LOOKUP_CLEAN_RETURN(return_value);
1508 			}
1509 		}
1510 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1511 	}
1512 
1513 	/* Match not found */
1514 	efree(cur_loc_range);
1515 	LOOKUP_CLEAN_RETURN(NULL);
1516 }
1517 /* }}} */
1518 
1519 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1520 * Searchs the items in $langtag for the best match to the language
1521 * range
1522 */
1523 /* }}} */
1524 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1525 * Searchs the items in $langtag for the best match to the language
1526 * range
1527 */
PHP_FUNCTION(locale_lookup)1528 PHP_FUNCTION(locale_lookup)
1529 {
1530 	char*      	fallback_loc  		= NULL;
1531 	int        	fallback_loc_len	= 0;
1532 	const char*    	loc_range      		= NULL;
1533 	int        	loc_range_len  		= 0;
1534 
1535 	zval*		arr				= NULL;
1536 	HashTable*	hash_arr		= NULL;
1537 	zend_bool	boolCanonical	= 0;
1538 	char*	 	result			=NULL;
1539 
1540 	intl_error_reset( NULL TSRMLS_CC );
1541 
1542 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1543 		&boolCanonical,	&fallback_loc, &fallback_loc_len) == FAILURE) {
1544 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,	"locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1545 		RETURN_FALSE;
1546 	}
1547 
1548 	if(loc_range_len == 0) {
1549 		loc_range = intl_locale_get_default(TSRMLS_C);
1550 	}
1551 
1552 	hash_arr = HASH_OF(arr);
1553 
1554 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1555 		RETURN_EMPTY_STRING();
1556 	}
1557 
1558 	result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1559 	if(result == NULL || result[0] == '\0') {
1560 		if( fallback_loc ) {
1561 			result = estrndup(fallback_loc, fallback_loc_len);
1562 		} else {
1563 			RETURN_EMPTY_STRING();
1564 		}
1565 	}
1566 
1567 	RETVAL_STRINGL(result, strlen(result), 0);
1568 }
1569 /* }}} */
1570 
1571 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1572 * Tries to find out best available locale based on HTTP �Accept-Language� header
1573 */
1574 /* }}} */
1575 /* {{{ proto string locale_accept_from_http(string $http_accept)
1576 * Tries to find out best available locale based on HTTP �Accept-Language� header
1577 */
PHP_FUNCTION(locale_accept_from_http)1578 PHP_FUNCTION(locale_accept_from_http)
1579 {
1580 	UEnumeration *available;
1581 	char *http_accept = NULL;
1582 	int http_accept_len;
1583 	UErrorCode status = 0;
1584 	int len;
1585 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1586 	UAcceptResult outResult;
1587 
1588 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1589 	{
1590 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1591 		"locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1592 		RETURN_FALSE;
1593 	}
1594 	if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1595 		/* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1596 		char *start = http_accept;
1597 		char *end;
1598 		size_t len;
1599 		do {
1600 			end = strchr(start, ',');
1601 			len = end ? end-start : http_accept_len-(start-http_accept);
1602 			if(len > ULOC_FULLNAME_CAPACITY) {
1603 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1604 						"locale_accept_from_http: locale string too long", 0 TSRMLS_CC );
1605 				RETURN_FALSE;
1606 			}
1607 			if(end) {
1608 				start = end+1;
1609 			}
1610 		} while(end != NULL);
1611 	}
1612 
1613 	available = ures_openAvailableLocales(NULL, &status);
1614 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1615 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1616 						&outResult, http_accept, available, &status);
1617 	uenum_close(available);
1618 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1619 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1620 		RETURN_FALSE;
1621 	}
1622 	RETURN_STRINGL(resultLocale, len, 1);
1623 }
1624 /* }}} */
1625 
1626 /*
1627  * Local variables:
1628  * tab-width: 4
1629  * c-basic-offset: 4
1630  * End:
1631  * vim600: noet sw=4 ts=4 fdm=marker
1632  * vim<600: noet sw=4 ts=4
1633  *can_loc_len
1634 */
1635