xref: /PHP-5.4/ext/intl/locale/locale_methods.c (revision e644aad3)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14    +----------------------------------------------------------------------+
15 */
16 
17 /* $Id$ */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <unicode/ustring.h>
24 #include <unicode/udata.h>
25 #include <unicode/putil.h>
26 #include <unicode/ures.h>
27 
28 #include "php_intl.h"
29 #include "locale.h"
30 #include "locale_class.h"
31 #include "locale_methods.h"
32 #include "intl_convert.h"
33 #include "intl_data.h"
34 
35 #include <zend_API.h>
36 #include <zend.h>
37 #include <php.h>
38 #include "main/php_ini.h"
39 #include "ext/standard/php_smart_str.h"
40 
41 ZEND_EXTERN_MODULE_GLOBALS( intl )
42 
43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44 #define SEPARATOR "_"
45 #define SEPARATOR1 "-"
46 #define DELIMITER "-_"
47 #define EXTLANG_PREFIX "a"
48 #define PRIVATE_PREFIX "x"
49 #define DISP_NAME "name"
50 
51 #define MAX_NO_VARIANT  15
52 #define MAX_NO_EXTLANG  3
53 #define MAX_NO_PRIVATE  15
54 #define MAX_NO_LOOKUP_LANG_TAG  100
55 
56 #define LOC_NOT_FOUND 1
57 
58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59 #define VARIANT_KEYNAME_LEN  11
60 #define EXTLANG_KEYNAME_LEN  10
61 #define PRIVATE_KEYNAME_LEN  11
62 
63 /* Based on IANA registry at the time of writing this code
64 *
65 */
66 static const char * const LOC_GRANDFATHERED[] = {
67 	"art-lojban",		"i-klingon",		"i-lux",			"i-navajo",		"no-bok",		"no-nyn",
68 	"cel-gaulish",		"en-GB-oed",		"i-ami",
69 	"i-bnn",		"i-default",		"i-enochian",
70 	"i-mingo",		"i-pwn", 		"i-tao",
71 	"i-tay",		"i-tsu",		"sgn-BE-fr",
72 	"sgn-BE-nl",		"sgn-CH-de", 		"zh-cmn",
73  	"zh-cmn-Hans", 		"zh-cmn-Hant",		"zh-gan" ,
74 	"zh-guoyu", 		"zh-hakka", 		"zh-min",
75 	"zh-min-nan", 		"zh-wuu", 		"zh-xiang",
76 	"zh-yue",		NULL
77 };
78 
79 /* Based on IANA registry at the time of writing this code
80 *  This array lists the preferred values for the grandfathered tags if applicable
81 *  This is in sync with the array LOC_GRANDFATHERED
82 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83 */
84 static const int 		LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85 static const char * const 	LOC_PREFERRED_GRANDFATHERED[]  = {
86 	"jbo",			"tlh",			"lb",
87 	"nv", 			"nb",			"nn",
88 	NULL
89 };
90 
91 /*returns TRUE if a is an ID separator FALSE otherwise*/
92 #define isIDSeparator(a) (a == '_' || a == '-')
93 #define isKeywordSeparator(a) (a == '@' )
94 #define isEndOfTag(a) (a == '\0' )
95 
96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97 
98 /*returns TRUE if one of the special prefixes is here (s=string)
99   'x-' or 'i-' */
100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102 
103 /* Dot terminates it because of POSIX form  where dot precedes the codepage
104  * except for variant */
105 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106 
107 /* {{{ return the offset of 'key' in the array 'list'.
108  * returns -1 if not present */
findOffset(const char * const * list,const char * key)109 static int16_t findOffset(const char* const* list, const char* key)
110 {
111 	const char* const* anchor = list;
112 	while (*list != NULL) {
113 		if (strcmp(key, *list) == 0) {
114 			return (int16_t)(list - anchor);
115 		}
116 		list++;
117 	}
118 
119 	return -1;
120 
121 }
122 /*}}}*/
123 
getPreferredTag(char * gf_tag)124 static char* getPreferredTag(char* gf_tag)
125 {
126 	char* result = NULL;
127 	int grOffset = 0;
128 
129 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130 	if(grOffset < 0) {
131 		return NULL;
132 	}
133 	if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134 		/* return preferred tag */
135 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136 	} else {
137 		/* Return correct grandfathered language tag */
138 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
139 	}
140 	return result;
141 }
142 
143 /* {{{
144 * returns the position of next token for lookup
145 * or -1 if no token
146 * strtokr equivalent search for token in reverse direction
147 */
getStrrtokenPos(char * str,int savedPos)148 static int getStrrtokenPos(char* str, int savedPos)
149 {
150 	int result =-1;
151 	int i;
152 
153 	for(i=savedPos-1; i>=0; i--) {
154 		if(isIDSeparator(*(str+i)) ){
155 			/* delimiter found; check for singleton */
156 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
157 				/* a singleton; so send the position of token before the singleton */
158 				result = i-2;
159 			} else {
160 				result = i;
161 			}
162 			break;
163 		}
164 	}
165 	if(result < 1){
166 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167 		result =-1;
168 	}
169 	return result;
170 }
171 /* }}} */
172 
173 /* {{{
174 * returns the position of a singleton if present
175 * returns -1 if no singleton
176 * strtok equivalent search for singleton
177 */
getSingletonPos(char * str)178 static int getSingletonPos(char* str)
179 {
180 	int result =-1;
181 	int i=0;
182 	int len = 0;
183 
184 	if( str && ((len=strlen(str))>0) ){
185 		for( i=0; i<len ; i++){
186 			if( isIDSeparator(*(str+i)) ){
187 				if( i==1){
188 					/* string is of the form x-avy or a-prv1 */
189 					result =0;
190 					break;
191 				} else {
192 					/* delimiter found; check for singleton */
193 					if( isIDSeparator(*(str+i+2)) ){
194 						/* a singleton; so send the position of separator before singleton */
195 						result = i+1;
196 						break;
197 					}
198 				}
199 			}
200 		}/* end of for */
201 
202 	}
203 	return result;
204 }
205 /* }}} */
206 
207 /* {{{ proto static string Locale::getDefault(  )
208    Get default locale */
209 /* }}} */
210 /* {{{ proto static string locale_get_default( )
211    Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)212 PHP_NAMED_FUNCTION(zif_locale_get_default)
213 {
214 	if( INTL_G(default_locale) == NULL ) {
215 		INTL_G(default_locale) = pestrdup( uloc_getDefault(), 1);
216  	}
217 	RETURN_STRING( INTL_G(default_locale), TRUE );
218 }
219 
220 /* }}} */
221 
222 /* {{{ proto static string Locale::setDefault( string $locale )
223    Set default locale */
224 /* }}} */
225 /* {{{ proto static string locale_set_default( string $locale )
226    Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)227 PHP_NAMED_FUNCTION(zif_locale_set_default)
228 {
229 	char* locale_name = NULL;
230 	int   len=0;
231 
232 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
233 		&locale_name ,&len ) == FAILURE)
234 	{
235 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
236 			 	"locale_set_default: unable to parse input params", 0 TSRMLS_CC );
237 
238 		RETURN_FALSE;
239 	}
240 
241 	if(len == 0) {
242 		locale_name =  (char *)uloc_getDefault() ;
243 		len = strlen(locale_name);
244 	}
245 
246 	zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
247 
248 	RETURN_TRUE;
249 }
250 /* }}} */
251 
252 /* {{{
253 * Gets the value from ICU
254 * common code shared by get_primary_language,get_script or get_region or get_variant
255 * result = 0 if error, 1 if successful , -1 if no value
256 */
get_icu_value_internal(char * loc_name,char * tag_name,int * result,int fromParseLocale)257 static char* get_icu_value_internal( char* loc_name , char* tag_name, int* result , int fromParseLocale)
258 {
259 	char*		tag_value	= NULL;
260 	int32_t     	tag_value_len   = 512;
261 
262 	int		singletonPos   	= 0;
263 	char*       	mod_loc_name	= NULL;
264 	int 		grOffset	= 0;
265 
266 	int32_t     	buflen          = 512;
267 	UErrorCode  	status          = U_ZERO_ERROR;
268 
269 
270 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
271 		/* Handle  grandfathered languages */
272 		grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
273 		if( grOffset >= 0 ){
274 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
275 				return estrdup(loc_name);
276 			} else {
277 				/* Since Grandfathered , no value , do nothing , retutn NULL */
278 				return NULL;
279 			}
280 		}
281 
282 	if( fromParseLocale==1 ){
283 		/* Handle singletons */
284 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
285 			if( strlen(loc_name)>1 && isIDPrefix(loc_name) ){
286 				return estrdup(loc_name);
287 			}
288 		}
289 
290 		singletonPos = getSingletonPos( loc_name );
291 		if( singletonPos == 0){
292 			/* singleton at start of script, region , variant etc.
293 			 * or invalid singleton at start of language */
294 			return NULL;
295 		} else if( singletonPos > 0 ){
296 			/* singleton at some position except at start
297 			 * strip off the singleton and rest of the loc_name */
298 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
299 		}
300 	} /* end of if fromParse */
301 
302 	} /* end of if != LOC_CANONICAL_TAG */
303 
304 	if( mod_loc_name == NULL){
305 		mod_loc_name = estrdup(loc_name );
306 	}
307 
308 	/* Proceed to ICU */
309     do{
310 		tag_value = erealloc( tag_value , buflen  );
311 		tag_value_len = buflen;
312 
313 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
314 			buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
315 		}
316 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
317 			buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
318 		}
319 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
320 			buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
321 		}
322 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
323 			buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
324 		}
325 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
326 			buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
327 		}
328 
329 		if( U_FAILURE( status ) ) {
330 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
331 				status = U_ZERO_ERROR;
332 				continue;
333 			}
334 
335 			/* Error in retriving data */
336 			*result = 0;
337 			if( tag_value ){
338 				efree( tag_value );
339 			}
340 			if( mod_loc_name ){
341 				efree( mod_loc_name);
342 			}
343 			return NULL;
344 		}
345 	} while( buflen > tag_value_len );
346 
347 	if(  buflen ==0 ){
348 		/* No value found */
349 		*result = -1;
350 		if( tag_value ){
351 			efree( tag_value );
352 		}
353 		if( mod_loc_name ){
354 			efree( mod_loc_name);
355 		}
356 		return NULL;
357 	} else {
358 		*result = 1;
359 	}
360 
361 	if( mod_loc_name ){
362 		efree( mod_loc_name);
363 	}
364 	return tag_value;
365 }
366 /* }}} */
367 
368 /* {{{
369 * Gets the value from ICU , called when PHP userspace function is called
370 * common code shared by get_primary_language,get_script or get_region or get_variant
371 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)372 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
373 {
374 
375 	char*       loc_name        	= NULL;
376 	int         loc_name_len    	= 0;
377 
378 	char*       tag_value		= NULL;
379 	char*       empty_result	= "";
380 
381 	int         result    		= 0;
382 	char*       msg        		= NULL;
383 
384 	UErrorCode  status          	= U_ZERO_ERROR;
385 
386 	intl_error_reset( NULL TSRMLS_CC );
387 
388 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
389 	&loc_name ,&loc_name_len ) == FAILURE) {
390 		spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
391 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
392 		efree(msg);
393 
394 		RETURN_FALSE;
395     }
396 
397 	if(loc_name_len == 0) {
398 		loc_name = INTL_G(default_locale);
399 	}
400 
401 	/* Call ICU get */
402 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
403 
404 	/* No value found */
405 	if( result == -1 ) {
406 		if( tag_value){
407 			efree( tag_value);
408 		}
409 		RETURN_STRING( empty_result , TRUE);
410 	}
411 
412 	/* value found */
413 	if( tag_value){
414 		RETURN_STRING( tag_value , FALSE);
415 	}
416 
417 	/* Error encountered while fetching the value */
418 	if( result ==0) {
419 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
420 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
421 		efree(msg);
422 		RETURN_NULL();
423 	}
424 
425 }
426 /* }}} */
427 
428 /* {{{ proto static string Locale::getScript($locale)
429  * gets the script for the $locale
430  }}} */
431 /* {{{ proto static string locale_get_script($locale)
432  * gets the script for the $locale
433  */
PHP_FUNCTION(locale_get_script)434 PHP_FUNCTION( locale_get_script )
435 {
436 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
437 }
438 /* }}} */
439 
440 /* {{{ proto static string Locale::getRegion($locale)
441  * gets the region for the $locale
442  }}} */
443 /* {{{ proto static string locale_get_region($locale)
444  * gets the region for the $locale
445  */
PHP_FUNCTION(locale_get_region)446 PHP_FUNCTION( locale_get_region )
447 {
448 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
449 }
450 /* }}} */
451 
452 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
453  * gets the primary language for the $locale
454  }}} */
455 /* {{{ proto static string locale_get_primary_language($locale)
456  * gets the primary language for the $locale
457  */
PHP_FUNCTION(locale_get_primary_language)458 PHP_FUNCTION(locale_get_primary_language )
459 {
460 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
461 }
462 /* }}} */
463 
464 
465 /* {{{
466  * common code shared by display_xyz functions to  get the value from ICU
467  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)468 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
469 {
470 	char*       loc_name        	= NULL;
471 	int         loc_name_len    	= 0;
472 
473 	char*       disp_loc_name       = NULL;
474 	int         disp_loc_name_len   = 0;
475 	int         free_loc_name       = 0;
476 
477 	UChar*      disp_name      	= NULL;
478 	int32_t     disp_name_len  	= 0;
479 
480 	char*       mod_loc_name        = NULL;
481 
482 	int32_t     buflen          	= 512;
483 	UErrorCode  status          	= U_ZERO_ERROR;
484 
485 	char*       utf8value		= NULL;
486 	int         utf8value_len   	= 0;
487 
488   	char*       msg             	= NULL;
489 	int         grOffset    	= 0;
490 
491 	intl_error_reset( NULL TSRMLS_CC );
492 
493 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
494 		&loc_name, &loc_name_len ,
495 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
496 	{
497 		spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
498 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
499 		efree(msg);
500 		RETURN_FALSE;
501 	}
502 
503     if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
504         /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
505 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
506 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
507 		efree(msg);
508 		RETURN_FALSE;
509     }
510 
511 	if(loc_name_len == 0) {
512         loc_name = INTL_G(default_locale);
513 	}
514 
515 	if( strcmp(tag_name, DISP_NAME) != 0 ){
516 		/* Handle grandfathered languages */
517 		grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
518 		if( grOffset >= 0 ){
519 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
520 				mod_loc_name = getPreferredTag( loc_name );
521 			} else {
522 				/* Since Grandfathered, no value, do nothing, retutn NULL */
523 				RETURN_FALSE;
524 			}
525 		}
526 	} /* end of if != LOC_CANONICAL_TAG */
527 
528 	if( mod_loc_name==NULL ){
529 		mod_loc_name = estrdup( loc_name );
530 	}
531 
532 	/* Check if disp_loc_name passed , if not use default locale */
533 	if( !disp_loc_name){
534 		disp_loc_name = estrdup(INTL_G(default_locale));
535 		free_loc_name = 1;
536 	}
537 
538     /* Get the disp_value for the given locale */
539     do{
540         disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
541         disp_name_len = buflen;
542 
543 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
544 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
545 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
546 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
547 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
548 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
549 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
550 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
551 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
552 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
553 		}
554 
555 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
556 		if( U_FAILURE( status ) )
557 		{
558 			if( status == U_BUFFER_OVERFLOW_ERROR )
559 			{
560 				status = U_ZERO_ERROR;
561 				continue;
562 			}
563 
564 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
565 			intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
566 			efree(msg);
567 			if( disp_name){
568 				efree( disp_name );
569 			}
570 			if( mod_loc_name){
571 				efree( mod_loc_name );
572 			}
573 			if (free_loc_name) {
574 				efree(disp_loc_name);
575 				disp_loc_name = NULL;
576 			}
577 			RETURN_FALSE;
578 		}
579 	} while( buflen > disp_name_len );
580 
581 	if( mod_loc_name){
582 		efree( mod_loc_name );
583 	}
584 	if (free_loc_name) {
585 		efree(disp_loc_name);
586 		disp_loc_name = NULL;
587 	}
588 	/* Convert display locale name from UTF-16 to UTF-8. */
589 	intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
590 	efree( disp_name );
591 	if( U_FAILURE( status ) )
592 	{
593 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
594 		intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
595 		efree(msg);
596 		RETURN_FALSE;
597 	}
598 
599 	RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
600 
601 }
602 /* }}} */
603 
604 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
605 * gets the name for the $locale in $in_locale or default_locale
606  }}} */
607 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
608 * gets the name for the $locale in $in_locale or default_locale
609 */
PHP_FUNCTION(locale_get_display_name)610 PHP_FUNCTION(locale_get_display_name)
611 {
612     get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
613 }
614 /* }}} */
615 
616 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
617 * gets the language for the $locale in $in_locale or default_locale
618  }}} */
619 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
620 * gets the language for the $locale in $in_locale or default_locale
621 */
PHP_FUNCTION(locale_get_display_language)622 PHP_FUNCTION(locale_get_display_language)
623 {
624     get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
625 }
626 /* }}} */
627 
628 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
629 * gets the script for the $locale in $in_locale or default_locale
630  }}} */
631 /* {{{ proto static string get_display_script($locale, $in_locale = null)
632 * gets the script for the $locale in $in_locale or default_locale
633 */
PHP_FUNCTION(locale_get_display_script)634 PHP_FUNCTION(locale_get_display_script)
635 {
636     get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
637 }
638 /* }}} */
639 
640 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
641 * gets the region for the $locale in $in_locale or default_locale
642  }}} */
643 /* {{{ proto static string get_display_region($locale, $in_locale = null)
644 * gets the region for the $locale in $in_locale or default_locale
645 */
PHP_FUNCTION(locale_get_display_region)646 PHP_FUNCTION(locale_get_display_region)
647 {
648     get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
649 }
650 /* }}} */
651 
652 /* {{{
653 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
654 * gets the variant for the $locale in $in_locale or default_locale
655  }}} */
656 /* {{{
657 * proto static string get_display_variant($locale, $in_locale = null)
658 * gets the variant for the $locale in $in_locale or default_locale
659 */
PHP_FUNCTION(locale_get_display_variant)660 PHP_FUNCTION(locale_get_display_variant)
661 {
662     get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
663 }
664 /* }}} */
665 
666  /* {{{ proto static array getKeywords(string $locale) {
667  * return an associative array containing keyword-value
668  * pairs for this locale. The keys are keys to the array (doh!)
669  * }}}*/
670  /* {{{ proto static array locale_get_keywords(string $locale) {
671  * return an associative array containing keyword-value
672  * pairs for this locale. The keys are keys to the array (doh!)
673  */
PHP_FUNCTION(locale_get_keywords)674 PHP_FUNCTION( locale_get_keywords )
675 {
676     UEnumeration*   e        = NULL;
677     UErrorCode      status   = U_ZERO_ERROR;
678 
679 	const char*	 	kw_key        = NULL;
680     int32_t         kw_key_len    = 0;
681 
682     char*       	loc_name        = NULL;
683     int        	 	loc_name_len    = 0;
684 
685 /*
686 	ICU expects the buffer to be allocated  before calling the function
687 	and so the buffer size has been explicitly specified
688 	ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
689 	hence the kw_value buffer size is 100
690 */
691 	char*	 	kw_value        = NULL;
692     int32_t     kw_value_len    = 100;
693 
694     intl_error_reset( NULL TSRMLS_CC );
695 
696     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
697         &loc_name, &loc_name_len ) == FAILURE)
698     {
699         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
700              "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
701 
702         RETURN_FALSE;
703     }
704 
705     if(loc_name_len == 0) {
706         loc_name = INTL_G(default_locale);
707     }
708 
709 	/* Get the keywords */
710     e = uloc_openKeywords( loc_name, &status );
711     if( e != NULL )
712     {
713 		/* Traverse it, filling the return array. */
714     	array_init( return_value );
715 
716     	while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
717 			kw_value = ecalloc( 1 , kw_value_len  );
718 
719 			/* Get the keyword value for each keyword */
720 			kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
721 			if (status == U_BUFFER_OVERFLOW_ERROR) {
722 				status = U_ZERO_ERROR;
723 				kw_value = erealloc( kw_value , kw_value_len+1);
724 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
725 			} else if(!U_FAILURE(status)) {
726 				kw_value = erealloc( kw_value , kw_value_len+1);
727 			}
728 			if (U_FAILURE(status)) {
729         		intl_error_set( NULL, FAILURE, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
730 				if( kw_value){
731 					efree( kw_value );
732 				}
733 				zval_dtor(return_value);
734         		RETURN_FALSE;
735 			}
736 
737        		add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
738 		} /* end of while */
739 
740 	} /* end of if e!=NULL */
741 
742     uenum_close( e );
743 }
744 /* }}} */
745 
746  /* {{{ proto static string Locale::canonicalize($locale)
747  * @return string the canonicalized locale
748  * }}} */
749  /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
750  * @param string $locale	The locale string to canonicalize
751  */
PHP_FUNCTION(locale_canonicalize)752 PHP_FUNCTION(locale_canonicalize)
753 {
754 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
755 }
756 /* }}} */
757 
758 /* {{{ append_key_value
759 * Internal function which is called from locale_compose
760 * gets the value for the key_name and appends to the loc_name
761 * returns 1 if successful , -1 if not found ,
762 * 0 if array element is not a string , -2 if buffer-overflow
763 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)764 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
765 {
766 	zval**	ele_value	= NULL;
767 
768 	if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
769 		if(Z_TYPE_PP(ele_value)!= IS_STRING ){
770 			/* element value is not a string */
771 			return FAILURE;
772 		}
773 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
774 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
775 			/* not lang or grandfathered tag */
776 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
777 		}
778 		smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
779 		return SUCCESS;
780 	}
781 
782 	return LOC_NOT_FOUND;
783 }
784 /* }}} */
785 
786 /* {{{ append_prefix , appends the prefix needed
787 * e.g. private adds 'x'
788 */
add_prefix(smart_str * loc_name,char * key_name)789 static void add_prefix(smart_str* loc_name, char* key_name)
790 {
791 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
792 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
793 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
794 	}
795 }
796 /* }}} */
797 
798 /* {{{ append_multiple_key_values
799 * Internal function which is called from locale_compose
800 * gets the multiple values for the key_name and appends to the loc_name
801 * used for 'variant','extlang','private'
802 * returns 1 if successful , -1 if not found ,
803 * 0 if array element is not a string , -2 if buffer-overflow
804 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name TSRMLS_DC)805 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
806 {
807 	zval**	ele_value    	= NULL;
808 	int 	i 		= 0;
809 	int 	isFirstSubtag 	= 0;
810 	int 	max_value 	= 0;
811 
812 	/* Variant/ Extlang/Private etc. */
813 	if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
814 		if( Z_TYPE_PP(ele_value) == IS_STRING ){
815 			add_prefix( loc_name , key_name);
816 
817 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
818 			smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
819 			return SUCCESS;
820 		} else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
821 			HashPosition pos;
822 			HashTable *arr = HASH_OF(*ele_value);
823 			zval **data = NULL;
824 
825 			zend_hash_internal_pointer_reset_ex(arr, &pos);
826 			while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
827 				if(Z_TYPE_PP(data) != IS_STRING) {
828 					return FAILURE;
829 				}
830 				if (isFirstSubtag++ == 0){
831 					add_prefix(loc_name , key_name);
832 				}
833 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
834 				smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
835 				zend_hash_move_forward_ex(arr, &pos);
836 			}
837 			return SUCCESS;
838 		} else {
839 			return FAILURE;
840 		}
841 	} else {
842 		char cur_key_name[31];
843 		/* Decide the max_value: the max. no. of elements allowed */
844 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
845 			max_value  = MAX_NO_VARIANT;
846 		}
847 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
848 			max_value  = MAX_NO_EXTLANG;
849 		}
850 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
851 			max_value  = MAX_NO_PRIVATE;
852 		}
853 
854 		/* Multiple variant values as variant0, variant1 ,variant2 */
855 		isFirstSubtag = 0;
856 		for( i=0 ; i< max_value; i++ ){
857 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
858 			if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
859 				if( Z_TYPE_PP(ele_value)!= IS_STRING ){
860 					/* variant is not a string */
861 					return FAILURE;
862 				}
863 				/* Add the contents */
864 				if (isFirstSubtag++ == 0){
865 					add_prefix(loc_name , cur_key_name);
866 				}
867 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
868 				smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
869 			}
870 		} /* end of for */
871 	} /* end of else */
872 
873 	return SUCCESS;
874 }
875 /* }}} */
876 
877 /*{{{
878 * If applicable sets error message and aborts locale_compose gracefully
879 * returns 0  if locale_compose needs to be aborted
880 * otherwise returns 1
881 */
handleAppendResult(int result,smart_str * loc_name TSRMLS_DC)882 static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
883 {
884 	intl_error_reset( NULL TSRMLS_CC );
885 	if( result == FAILURE) {
886 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
887 			 "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
888 		smart_str_free(loc_name);
889 		return 0;
890 	}
891 	return 1;
892 }
893 /* }}} */
894 
895 #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
896 /* {{{ proto static string Locale::composeLocale($array)
897 * Creates a locale by combining the parts of locale-ID passed
898 * }}} */
899 /* {{{ proto static string compose_locale($array)
900 * Creates a locale by combining the parts of locale-ID passed
901 * }}} */
PHP_FUNCTION(locale_compose)902 PHP_FUNCTION(locale_compose)
903 {
904 	smart_str      	loc_name_s = {0};
905 	smart_str *loc_name = &loc_name_s;
906 	zval*			arr	= NULL;
907 	HashTable*		hash_arr = NULL;
908 	int 			result = 0;
909 
910 	intl_error_reset( NULL TSRMLS_CC );
911 
912 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
913 		&arr) == FAILURE)
914 	{
915 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
916 			 "locale_compose: unable to parse input params", 0 TSRMLS_CC );
917 		RETURN_FALSE;
918 	}
919 
920 	hash_arr = HASH_OF( arr );
921 
922 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
923 		RETURN_FALSE;
924 
925 	/* Check for grandfathered first */
926 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
927 	if( result == SUCCESS){
928 		RETURN_SMART_STR(loc_name);
929 	}
930 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
931 		RETURN_FALSE;
932 	}
933 
934 	/* Not grandfathered */
935 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
936 	if( result == LOC_NOT_FOUND ){
937 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
938 		"locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
939 		smart_str_free(loc_name);
940 		RETURN_FALSE;
941 	}
942 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
943 		RETURN_FALSE;
944 	}
945 
946 	/* Extlang */
947 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
948 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
949 		RETURN_FALSE;
950 	}
951 
952 	/* Script */
953 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
954 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
955 		RETURN_FALSE;
956 	}
957 
958 	/* Region */
959 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
960 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
961 		RETURN_FALSE;
962 	}
963 
964 	/* Variant */
965 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
966 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
967 		RETURN_FALSE;
968 	}
969 
970 	/* Private */
971 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
972 	if( !handleAppendResult( result, loc_name TSRMLS_CC)){
973 		RETURN_FALSE;
974 	}
975 
976 	RETURN_SMART_STR(loc_name);
977 }
978 /* }}} */
979 
980 
981 /*{{{
982 * Parses the locale and returns private subtags  if existing
983 * else returns NULL
984 * e.g. for locale='en_US-x-prv1-prv2-prv3'
985 * returns a pointer to the string 'prv1-prv2-prv3'
986 */
get_private_subtags(char * loc_name)987 static char* get_private_subtags(char* loc_name)
988 {
989 	char* 	result =NULL;
990 	int 	singletonPos = 0;
991 	int 	len =0;
992 	char* 	mod_loc_name =NULL;
993 
994 	if( loc_name && (len = strlen(loc_name)>0 ) ){
995 		mod_loc_name = loc_name ;
996 		len   = strlen(mod_loc_name);
997 		while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
998 
999 			if( singletonPos!=-1){
1000 				if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1001 					/* private subtag start found */
1002 					if( singletonPos + 2 ==  len){
1003 						/* loc_name ends with '-x-' ; return  NULL */
1004 					}
1005 					else{
1006 						/* result = mod_loc_name + singletonPos +2; */
1007 						result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1008 					}
1009 					break;
1010 				}
1011 				else{
1012 					if( singletonPos + 1 >=  len){
1013 						/* String end */
1014 						break;
1015 					} else {
1016 						/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1017 						mod_loc_name = mod_loc_name + singletonPos +1;
1018 						len = strlen(mod_loc_name);
1019 					}
1020 				}
1021 			}
1022 
1023 		} /* end of while */
1024 	}
1025 
1026 	return result;
1027 }
1028 /* }}} */
1029 
1030 /* {{{ code used by locale_parse
1031 */
add_array_entry(char * loc_name,zval * hash_arr,char * key_name TSRMLS_DC)1032 static int add_array_entry(char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1033 {
1034 	char*   key_value 	= NULL;
1035 	char*   cur_key_name	= NULL;
1036 	char*   token        	= NULL;
1037 	char*   last_ptr  	= NULL;
1038 
1039 	int	result		= 0;
1040 	int 	cur_result  	= 0;
1041 	int 	cnt  		= 0;
1042 
1043 
1044 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1045 		key_value = get_private_subtags( loc_name );
1046 		result = 1;
1047 	} else {
1048 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1049 	}
1050 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1051 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1052 		if( result > 0 && key_value){
1053 			/* Tokenize on the "_" or "-"  */
1054 			token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1055 			if( cur_key_name ){
1056 				efree( cur_key_name);
1057 			}
1058 			cur_key_name = (char*)ecalloc( 25,  25);
1059 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1060 			add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1061 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1062 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1063 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1064 				add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1065 			}
1066 /*
1067 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1068 			}
1069 */
1070 		}
1071 	} else {
1072 		if( result == 1 ){
1073 			add_assoc_string( hash_arr, key_name , key_value , TRUE );
1074 			cur_result = 1;
1075 		}
1076 	}
1077 
1078 	if( cur_key_name ){
1079 		efree( cur_key_name);
1080 	}
1081 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1082 	if( key_value){
1083 		efree(key_value);
1084 	}
1085 	return cur_result;
1086 }
1087 /* }}} */
1088 
1089 /* {{{ proto static array Locale::parseLocale($locale)
1090 * parses a locale-id into an array the different parts of it
1091  }}} */
1092 /* {{{ proto static array parse_locale($locale)
1093 * parses a locale-id into an array the different parts of it
1094 */
PHP_FUNCTION(locale_parse)1095 PHP_FUNCTION(locale_parse)
1096 {
1097     char*       loc_name        = NULL;
1098     int         loc_name_len    = 0;
1099     int         grOffset    	= 0;
1100 
1101     intl_error_reset( NULL TSRMLS_CC );
1102 
1103     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1104         &loc_name, &loc_name_len ) == FAILURE)
1105     {
1106         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1107              "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1108 
1109         RETURN_FALSE;
1110     }
1111 
1112     if(loc_name_len == 0) {
1113         loc_name = INTL_G(default_locale);
1114     }
1115 
1116 	array_init( return_value );
1117 
1118 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1119 	if( grOffset >= 0 ){
1120 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1121 	}
1122 	else{
1123 		/* Not grandfathered */
1124 		add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1125 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1126 		add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1127 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1128 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1129 	}
1130 }
1131 /* }}} */
1132 
1133 /* {{{ proto static array Locale::getAllVariants($locale)
1134 * gets an array containing the list of variants, or null
1135  }}} */
1136 /* {{{ proto static array locale_get_all_variants($locale)
1137 * gets an array containing the list of variants, or null
1138 */
PHP_FUNCTION(locale_get_all_variants)1139 PHP_FUNCTION(locale_get_all_variants)
1140 {
1141 	char*  	loc_name        = NULL;
1142 	int    	loc_name_len    = 0;
1143 
1144 	int	result		= 0;
1145 	char*	token		= NULL;
1146 	char*	variant		= NULL;
1147 	char*	saved_ptr	= NULL;
1148 
1149 	intl_error_reset( NULL TSRMLS_CC );
1150 
1151 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1152 	&loc_name, &loc_name_len ) == FAILURE)
1153 	{
1154 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1155 	     "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1156 
1157 		RETURN_FALSE;
1158 	}
1159 
1160 	if(loc_name_len == 0) {
1161 		loc_name = INTL_G(default_locale);
1162 	}
1163 
1164 
1165 	array_init( return_value );
1166 
1167 	/* If the locale is grandfathered, stop, no variants */
1168 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1169 		/* ("Grandfathered Tag. No variants."); */
1170 	}
1171 	else {
1172 	/* Call ICU variant */
1173 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1174 		if( result > 0 && variant){
1175 			/* Tokenize on the "_" or "-" */
1176 			token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1177 			add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1178 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1179 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1180  				add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1181 			}
1182 		}
1183 		if( variant ){
1184 			efree( variant );
1185 		}
1186 	}
1187 
1188 
1189 }
1190 /* }}} */
1191 
1192 /*{{{
1193 * Converts to lower case and also replaces all hyphens with the underscore
1194 */
strToMatch(char * str,char * retstr)1195 static int strToMatch(char* str ,char *retstr)
1196 {
1197 	char* 	anchor 	= NULL;
1198 	char* 	anchor1 = NULL;
1199 	int 	result 	= 0;
1200 	int 	len 	= 0;
1201 
1202     if( (!str) || str[0] == '\0'){
1203         return result;
1204     } else {
1205 	anchor = retstr;
1206 	anchor1 = str;
1207         len = strlen(str);
1208         while( (*str)!='\0' ){
1209 		if( *str == '-' ){
1210 			*retstr =  '_';
1211 		} else {
1212 			*retstr = tolower(*str);
1213 		}
1214             str++;
1215             retstr++;
1216 	}
1217 	*retstr = '\0';
1218 	retstr=  anchor;
1219 	str=  anchor1;
1220 	result = 1;
1221     }
1222 
1223     return(result);
1224 }
1225 /* }}} */
1226 
1227 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1228 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1229 */
1230 /* }}} */
1231 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1232 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1233 */
PHP_FUNCTION(locale_filter_matches)1234 PHP_FUNCTION(locale_filter_matches)
1235 {
1236 	char*       	lang_tag        = NULL;
1237 	int         	lang_tag_len    = 0;
1238 	char*       	loc_range       = NULL;
1239 	int         	loc_range_len   = 0;
1240 
1241 	int		result		= 0;
1242 	char*		token		= 0;
1243 	char*		chrcheck	= NULL;
1244 
1245 	char*       	can_lang_tag    = NULL;
1246 	char*       	can_loc_range   = NULL;
1247 
1248 	char*       	cur_lang_tag    = NULL;
1249 	char*       	cur_loc_range   = NULL;
1250 
1251 	zend_bool 	boolCanonical 	= 0;
1252 	UErrorCode	status		= U_ZERO_ERROR;
1253 
1254 	intl_error_reset( NULL TSRMLS_CC );
1255 
1256 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1257 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1258 		&boolCanonical) == FAILURE)
1259 	{
1260 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1261 		"locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1262 
1263 		RETURN_FALSE;
1264 	}
1265 
1266 	if(loc_range_len == 0) {
1267 		loc_range = INTL_G(default_locale);
1268 	}
1269 
1270 	if( strcmp(loc_range,"*")==0){
1271 		RETURN_TRUE;
1272 	}
1273 
1274 	if( boolCanonical ){
1275 		/* canonicalize loc_range */
1276 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1277 		if( result ==0) {
1278 			intl_error_set( NULL, status,
1279 				"locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1280 			RETURN_FALSE;
1281 		}
1282 
1283 		/* canonicalize lang_tag */
1284 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1285 		if( result ==0) {
1286 			intl_error_set( NULL, status,
1287 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1288 			RETURN_FALSE;
1289 		}
1290 
1291 		/* Convert to lower case for case-insensitive comparison */
1292 		cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1293 
1294 		/* Convert to lower case for case-insensitive comparison */
1295 		result = strToMatch( can_lang_tag , cur_lang_tag);
1296 		if( result == 0) {
1297 			efree( cur_lang_tag );
1298 			efree( can_lang_tag );
1299 			RETURN_FALSE;
1300 		}
1301 
1302 		cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1303 		result = strToMatch( can_loc_range , cur_loc_range );
1304 		if( result == 0) {
1305 			efree( cur_lang_tag );
1306 			efree( can_lang_tag );
1307 			efree( cur_loc_range );
1308 			efree( can_loc_range );
1309 			RETURN_FALSE;
1310 		}
1311 
1312 		/* check if prefix */
1313 		token 	= strstr( cur_lang_tag , cur_loc_range );
1314 
1315 		if( token && (token==cur_lang_tag) ){
1316 			/* check if the char. after match is SEPARATOR */
1317 			chrcheck = token + (strlen(cur_loc_range));
1318 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1319 				if( cur_lang_tag){
1320 					efree( cur_lang_tag );
1321 				}
1322 				if( cur_loc_range){
1323 					efree( cur_loc_range );
1324 				}
1325 				if( can_lang_tag){
1326 					efree( can_lang_tag );
1327 				}
1328 				if( can_loc_range){
1329 					efree( can_loc_range );
1330 				}
1331 				RETURN_TRUE;
1332 			}
1333 		}
1334 
1335 		/* No prefix as loc_range */
1336 		if( cur_lang_tag){
1337 			efree( cur_lang_tag );
1338 		}
1339 		if( cur_loc_range){
1340 			efree( cur_loc_range );
1341 		}
1342 		if( can_lang_tag){
1343 			efree( can_lang_tag );
1344 		}
1345 		if( can_loc_range){
1346 			efree( can_loc_range );
1347 		}
1348 		RETURN_FALSE;
1349 
1350 	} /* end of if isCanonical */
1351 	else{
1352 		/* Convert to lower case for case-insensitive comparison */
1353 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1354 
1355 		result = strToMatch( lang_tag , cur_lang_tag);
1356 		if( result == 0) {
1357 			efree( cur_lang_tag );
1358 			RETURN_FALSE;
1359 		}
1360 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1361 		result = strToMatch( loc_range , cur_loc_range );
1362 		if( result == 0) {
1363 			efree( cur_lang_tag );
1364 			efree( cur_loc_range );
1365 			RETURN_FALSE;
1366 		}
1367 
1368 		/* check if prefix */
1369 		token 	= strstr( cur_lang_tag , cur_loc_range );
1370 
1371 		if( token && (token==cur_lang_tag) ){
1372 			/* check if the char. after match is SEPARATOR */
1373 			chrcheck = token + (strlen(cur_loc_range));
1374 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1375 				if( cur_lang_tag){
1376 					efree( cur_lang_tag );
1377 				}
1378 				if( cur_loc_range){
1379 					efree( cur_loc_range );
1380 				}
1381 				RETURN_TRUE;
1382 			}
1383 		}
1384 
1385 		/* No prefix as loc_range */
1386 		if( cur_lang_tag){
1387 			efree( cur_lang_tag );
1388 		}
1389 		if( cur_loc_range){
1390 			efree( cur_loc_range );
1391 		}
1392 		RETURN_FALSE;
1393 
1394 	}
1395 }
1396 /* }}} */
1397 
array_cleanup(char * arr[],int arr_size)1398 static void array_cleanup( char* arr[] , int arr_size)
1399 {
1400 	int i=0;
1401 	for( i=0; i< arr_size; i++ ){
1402 		if( arr[i*2] ){
1403 			efree( arr[i*2]);
1404 		}
1405 	}
1406 	efree(arr);
1407 }
1408 
1409 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1410 /* {{{
1411 * returns the lookup result to lookup_loc_range_src_php
1412 * internal function
1413 */
lookup_loc_range(char * loc_range,HashTable * hash_arr,int canonicalize TSRMLS_DC)1414 static char* lookup_loc_range(char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1415 {
1416 	int	i = 0;
1417 	int	cur_arr_len = 0;
1418 	int result = 0;
1419 
1420 	char* lang_tag = NULL;
1421 	zval** ele_value = NULL;
1422 	char** cur_arr = NULL;
1423 
1424 	char* cur_loc_range	= NULL;
1425 	char* can_loc_range	= NULL;
1426 	int	saved_pos = 0;
1427 
1428 	char* return_value = NULL;
1429 
1430 	cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1431 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1432 	for(zend_hash_internal_pointer_reset(hash_arr);
1433 		zend_hash_has_more_elements(hash_arr) == SUCCESS;
1434 		zend_hash_move_forward(hash_arr)) {
1435 
1436 		if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1437 			/* Should never actually fail since the key is known to exist.*/
1438 			continue;
1439 		}
1440 		if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1441 			/* element value is not a string */
1442 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1443 			LOOKUP_CLEAN_RETURN(NULL);
1444 		}
1445 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1446 		result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1447 		if(result == 0) {
1448 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1449 			LOOKUP_CLEAN_RETURN(NULL);
1450 		}
1451 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1452 		cur_arr_len++ ;
1453 	} /* end of for */
1454 
1455 	/* Canonicalize array elements */
1456 	if(canonicalize) {
1457 		for(i=0; i<cur_arr_len; i++) {
1458 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1459 			if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1460 				if(lang_tag) {
1461 					efree(lang_tag);
1462 				}
1463 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1464 				LOOKUP_CLEAN_RETURN(NULL);
1465 			}
1466 			cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1467 			result = strToMatch(lang_tag, cur_arr[i*2]);
1468 			efree(lang_tag);
1469 			if(result == 0) {
1470 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1471 				LOOKUP_CLEAN_RETURN(NULL);
1472 			}
1473 		}
1474 
1475 	}
1476 
1477 	if(canonicalize) {
1478 		/* Canonicalize the loc_range */
1479 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1480 		if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1481 			/* Error */
1482 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1483 			if(can_loc_range) {
1484 				efree(can_loc_range);
1485 			}
1486 			LOOKUP_CLEAN_RETURN(NULL);
1487 		} else {
1488 			loc_range = can_loc_range;
1489 		}
1490 	}
1491 
1492 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1493 	/* convert to lower and replace hyphens */
1494 	result = strToMatch(loc_range, cur_loc_range);
1495 	if(can_loc_range) {
1496 		efree(can_loc_range);
1497 	}
1498 	if(result == 0) {
1499 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1500 		LOOKUP_CLEAN_RETURN(NULL);
1501 	}
1502 
1503 	/* Lookup for the lang_tag match */
1504 	saved_pos = strlen(cur_loc_range);
1505 	while(saved_pos > 0) {
1506 		for(i=0; i< cur_arr_len; i++){
1507 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1508 				/* Match found */
1509 				return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1510 				efree(cur_loc_range);
1511 				LOOKUP_CLEAN_RETURN(return_value);
1512 			}
1513 		}
1514 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1515 	}
1516 
1517 	/* Match not found */
1518 	efree(cur_loc_range);
1519 	LOOKUP_CLEAN_RETURN(NULL);
1520 }
1521 /* }}} */
1522 
1523 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1524 * Searchs the items in $langtag for the best match to the language
1525 * range
1526 */
1527 /* }}} */
1528 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1529 * Searchs the items in $langtag for the best match to the language
1530 * range
1531 */
PHP_FUNCTION(locale_lookup)1532 PHP_FUNCTION(locale_lookup)
1533 {
1534 	char*      	fallback_loc  		= NULL;
1535 	int        	fallback_loc_len	= 0;
1536 	char*      	loc_range      		= NULL;
1537 	int        	loc_range_len  		= 0;
1538 
1539 	zval*		arr				= NULL;
1540 	HashTable*	hash_arr		= NULL;
1541 	zend_bool	boolCanonical	= 0;
1542 	char*	 	result			=NULL;
1543 
1544 	intl_error_reset( NULL TSRMLS_CC );
1545 
1546 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1547 		&boolCanonical,	&fallback_loc, &fallback_loc_len) == FAILURE) {
1548 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,	"locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1549 		RETURN_FALSE;
1550 	}
1551 
1552 	if(loc_range_len == 0) {
1553 		loc_range = INTL_G(default_locale);
1554 	}
1555 
1556 	hash_arr = HASH_OF(arr);
1557 
1558 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1559 		RETURN_EMPTY_STRING();
1560 	}
1561 
1562 	result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1563 	if(result == NULL || result[0] == '\0') {
1564 		if( fallback_loc ) {
1565 			result = estrndup(fallback_loc, fallback_loc_len);
1566 		} else {
1567 			RETURN_EMPTY_STRING();
1568 		}
1569 	}
1570 
1571 	RETVAL_STRINGL(result, strlen(result), 0);
1572 }
1573 /* }}} */
1574 
1575 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1576 * Tries to find out best available locale based on HTTP �Accept-Language� header
1577 */
1578 /* }}} */
1579 /* {{{ proto string locale_accept_from_http(string $http_accept)
1580 * Tries to find out best available locale based on HTTP �Accept-Language� header
1581 */
PHP_FUNCTION(locale_accept_from_http)1582 PHP_FUNCTION(locale_accept_from_http)
1583 {
1584 	UEnumeration *available;
1585 	char *http_accept = NULL;
1586 	int http_accept_len;
1587 	UErrorCode status = 0;
1588 	int len;
1589 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1590 	UAcceptResult outResult;
1591 
1592 	if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1593 	{
1594 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1595 		"locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1596 		RETURN_FALSE;
1597 	}
1598 
1599 	available = ures_openAvailableLocales(NULL, &status);
1600 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1601 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1602 						&outResult, http_accept, available, &status);
1603 	uenum_close(available);
1604 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1605 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1606 		RETURN_FALSE;
1607 	}
1608 	RETURN_STRINGL(resultLocale, len, 1);
1609 }
1610 /* }}} */
1611 
1612 /*
1613  * Local variables:
1614  * tab-width: 4
1615  * c-basic-offset: 4
1616  * End:
1617  * vim600: noet sw=4 ts=4 fdm=marker
1618  * vim<600: noet sw=4 ts=4
1619  *can_loc_len
1620 */
1621