xref: /PHP-7.2/ext/intl/locale/locale_methods.c (revision 2c9926f1)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14    +----------------------------------------------------------------------+
15 */
16 
17 /* $Id$ */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <unicode/ustring.h>
24 #include <unicode/udata.h>
25 #include <unicode/putil.h>
26 #include <unicode/ures.h>
27 
28 #include "php_intl.h"
29 #include "locale.h"
30 #include "locale_class.h"
31 #include "locale_methods.h"
32 #include "intl_convert.h"
33 #include "intl_data.h"
34 
35 #include <zend_API.h>
36 #include <zend.h>
37 #include <php.h>
38 #include "main/php_ini.h"
39 #include "zend_smart_str.h"
40 
41 ZEND_EXTERN_MODULE_GLOBALS( intl )
42 
43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44 #define SEPARATOR "_"
45 #define SEPARATOR1 "-"
46 #define DELIMITER "-_"
47 #define EXTLANG_PREFIX "a"
48 #define PRIVATE_PREFIX "x"
49 #define DISP_NAME "name"
50 
51 #define MAX_NO_VARIANT  15
52 #define MAX_NO_EXTLANG  3
53 #define MAX_NO_PRIVATE  15
54 #define MAX_NO_LOOKUP_LANG_TAG  100
55 
56 #define LOC_NOT_FOUND 1
57 
58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59 #define VARIANT_KEYNAME_LEN  11
60 #define EXTLANG_KEYNAME_LEN  10
61 #define PRIVATE_KEYNAME_LEN  11
62 
63 /* Based on IANA registry at the time of writing this code
64 *
65 */
66 static const char * const LOC_GRANDFATHERED[] = {
67 	"art-lojban",		"i-klingon",		"i-lux",			"i-navajo",		"no-bok",		"no-nyn",
68 	"cel-gaulish",		"en-GB-oed",		"i-ami",
69 	"i-bnn",		"i-default",		"i-enochian",
70 	"i-mingo",		"i-pwn", 		"i-tao",
71 	"i-tay",		"i-tsu",		"sgn-BE-fr",
72 	"sgn-BE-nl",		"sgn-CH-de", 		"zh-cmn",
73  	"zh-cmn-Hans", 		"zh-cmn-Hant",		"zh-gan" ,
74 	"zh-guoyu", 		"zh-hakka", 		"zh-min",
75 	"zh-min-nan", 		"zh-wuu", 		"zh-xiang",
76 	"zh-yue",		NULL
77 };
78 
79 /* Based on IANA registry at the time of writing this code
80 *  This array lists the preferred values for the grandfathered tags if applicable
81 *  This is in sync with the array LOC_GRANDFATHERED
82 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83 */
84 static const int 		LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85 static const char * const 	LOC_PREFERRED_GRANDFATHERED[]  = {
86 	"jbo",			"tlh",			"lb",
87 	"nv", 			"nb",			"nn",
88 	NULL
89 };
90 
91 /*returns TRUE if a is an ID separator FALSE otherwise*/
92 #define isIDSeparator(a) (a == '_' || a == '-')
93 #define isKeywordSeparator(a) (a == '@' )
94 #define isEndOfTag(a) (a == '\0' )
95 
96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97 
98 /*returns TRUE if one of the special prefixes is here (s=string)
99   'x-' or 'i-' */
100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102 
103 /* Dot terminates it because of POSIX form  where dot precedes the codepage
104  * except for variant */
105 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106 
107 /* {{{ return the offset of 'key' in the array 'list'.
108  * returns -1 if not present */
findOffset(const char * const * list,const char * key)109 static int16_t findOffset(const char* const* list, const char* key)
110 {
111 	const char* const* anchor = list;
112 	while (*list != NULL) {
113 		if (strcmp(key, *list) == 0) {
114 			return (int16_t)(list - anchor);
115 		}
116 		list++;
117 	}
118 
119 	return -1;
120 
121 }
122 /*}}}*/
123 
getPreferredTag(const char * gf_tag)124 static char* getPreferredTag(const char* gf_tag)
125 {
126 	char* result = NULL;
127 	zend_off_t grOffset = 0;
128 
129 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130 	if(grOffset < 0) {
131 		return NULL;
132 	}
133 	if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134 		/* return preferred tag */
135 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136 	} else {
137 		/* Return correct grandfathered language tag */
138 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
139 	}
140 	return result;
141 }
142 
143 /* {{{
144 * returns the position of next token for lookup
145 * or -1 if no token
146 * strtokr equivalent search for token in reverse direction
147 */
getStrrtokenPos(char * str,zend_off_t savedPos)148 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
149 {
150 	zend_off_t result =-1;
151 	zend_off_t i;
152 
153 	for(i=savedPos-1; i>=0; i--) {
154 		if(isIDSeparator(*(str+i)) ){
155 			/* delimiter found; check for singleton */
156 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
157 				/* a singleton; so send the position of token before the singleton */
158 				result = i-2;
159 			} else {
160 				result = i;
161 			}
162 			break;
163 		}
164 	}
165 	if(result < 1){
166 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167 		result =-1;
168 	}
169 	return result;
170 }
171 /* }}} */
172 
173 /* {{{
174 * returns the position of a singleton if present
175 * returns -1 if no singleton
176 * strtok equivalent search for singleton
177 */
getSingletonPos(const char * str)178 static zend_off_t getSingletonPos(const char* str)
179 {
180 	zend_off_t result =-1;
181 	zend_off_t i=0;
182 	size_t len = 0;
183 
184 	if( str && ((len=strlen(str))>0) ){
185 		for( i=0; (size_t)i < len ; i++){
186 			if( isIDSeparator(*(str+i)) ){
187 				if( i==1){
188 					/* string is of the form x-avy or a-prv1 */
189 					result =0;
190 					break;
191 				} else {
192 					/* delimiter found; check for singleton */
193 					if( isIDSeparator(*(str+i+2)) ){
194 						/* a singleton; so send the position of separator before singleton */
195 						result = i+1;
196 						break;
197 					}
198 				}
199 			}
200 		}/* end of for */
201 
202 	}
203 	return result;
204 }
205 /* }}} */
206 
207 /* {{{ proto static string Locale::getDefault(  )
208    Get default locale */
209 /* }}} */
210 /* {{{ proto static string locale_get_default( )
211    Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)212 PHP_NAMED_FUNCTION(zif_locale_get_default)
213 {
214 	RETURN_STRING( intl_locale_get_default(  ) );
215 }
216 
217 /* }}} */
218 
219 /* {{{ proto static string Locale::setDefault( string $locale )
220    Set default locale */
221 /* }}} */
222 /* {{{ proto static string locale_set_default( string $locale )
223    Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)224 PHP_NAMED_FUNCTION(zif_locale_set_default)
225 {
226 	zend_string* locale_name;
227 	zend_string *ini_name;
228 	char *default_locale = NULL;
229 
230 	if(zend_parse_parameters( ZEND_NUM_ARGS(),  "S", &locale_name) == FAILURE)
231 	{
232 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233 			 	"locale_set_default: unable to parse input params", 0 );
234 
235 		RETURN_FALSE;
236 	}
237 
238 	if (ZSTR_LEN(locale_name) == 0) {
239 		default_locale = (char *)uloc_getDefault();
240 		locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
241 	}
242 
243 	ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
244 	zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
245 	zend_string_release(ini_name);
246 	if (default_locale != NULL) {
247 		zend_string_release(locale_name);
248 	}
249 
250 	RETURN_TRUE;
251 }
252 /* }}} */
253 
254 /* {{{
255 * Gets the value from ICU
256 * common code shared by get_primary_language,get_script or get_region or get_variant
257 * result = 0 if error, 1 if successful , -1 if no value
258 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)259 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
260 {
261 	zend_string* tag_value	    = NULL;
262 	int32_t      tag_value_len  = 512;
263 
264 	zend_off_t   singletonPos   = 0;
265 	char*        mod_loc_name   = NULL;
266 	zend_off_t   grOffset       = 0;
267 
268 	int32_t      buflen         = 512;
269 	UErrorCode   status         = U_ZERO_ERROR;
270 
271 	if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
272 		return NULL;
273 	}
274 
275 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
276 		/* Handle  grandfathered languages */
277 		grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
278 		if( grOffset >= 0 ){
279 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
280 				return zend_string_init(loc_name, strlen(loc_name), 0);
281 			} else {
282 				/* Since Grandfathered , no value , do nothing , retutn NULL */
283 				return NULL;
284 			}
285 		}
286 
287 	if( fromParseLocale==1 ){
288 		/* Handle singletons */
289 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
290 			if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
291 				return zend_string_init(loc_name, strlen(loc_name), 0);
292 			}
293 		}
294 
295 		singletonPos = getSingletonPos( loc_name );
296 		if( singletonPos == 0){
297 			/* singleton at start of script, region , variant etc.
298 			 * or invalid singleton at start of language */
299 			return NULL;
300 		} else if( singletonPos > 0 ){
301 			/* singleton at some position except at start
302 			 * strip off the singleton and rest of the loc_name */
303 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
304 		}
305 	} /* end of if fromParse */
306 
307 	} /* end of if != LOC_CANONICAL_TAG */
308 
309 	if( mod_loc_name == NULL){
310 		mod_loc_name = estrdup(loc_name );
311 	}
312 
313 	/* Proceed to ICU */
314 	do{
315 		if (tag_value) {
316 			tag_value = zend_string_realloc( tag_value , buflen, 0);
317 		} else {
318 			tag_value = zend_string_alloc( buflen, 0);
319 		}
320 		tag_value_len = buflen;
321 
322 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
323 			buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
324 		}
325 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
326 			buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
327 		}
328 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
329 			buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
330 		}
331 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
332 			buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
333 		}
334 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
335 			buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
336 		}
337 
338 		if( U_FAILURE( status ) ) {
339 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
340 				status = U_ZERO_ERROR;
341 				buflen++; /* add space for \0 */
342 				continue;
343 			}
344 
345 			/* Error in retriving data */
346 			*result = 0;
347 			if( tag_value ){
348 				zend_string_release( tag_value );
349 			}
350 			if( mod_loc_name ){
351 				efree( mod_loc_name);
352 			}
353 			return NULL;
354 		}
355 	} while( buflen > tag_value_len );
356 
357 	if(  buflen ==0 ){
358 		/* No value found */
359 		*result = -1;
360 		if( tag_value ){
361 			zend_string_release( tag_value );
362 		}
363 		if( mod_loc_name ){
364 			efree( mod_loc_name);
365 		}
366 		return NULL;
367 	} else {
368 		*result = 1;
369 	}
370 
371 	if( mod_loc_name ){
372 		efree( mod_loc_name);
373 	}
374 
375 	tag_value->len = strlen(tag_value->val);
376 	return tag_value;
377 }
378 /* }}} */
379 
380 /* {{{
381 * Gets the value from ICU , called when PHP userspace function is called
382 * common code shared by get_primary_language,get_script or get_region or get_variant
383 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)384 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
385 {
386 
387 	const char* loc_name        	= NULL;
388 	size_t         loc_name_len    	= 0;
389 
390 	zend_string*   tag_value		= NULL;
391 	char*       empty_result	= "";
392 
393 	int         result    		= 0;
394 	char*       msg        		= NULL;
395 
396 	UErrorCode  status          	= U_ZERO_ERROR;
397 
398 	intl_error_reset( NULL );
399 
400 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
401 	&loc_name ,&loc_name_len ) == FAILURE) {
402 		spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
403 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
404 		efree(msg);
405 
406 		RETURN_FALSE;
407     }
408 
409 	if(loc_name_len == 0) {
410 		loc_name = intl_locale_get_default();
411 		loc_name_len = strlen(loc_name);
412 	}
413 
414 	INTL_CHECK_LOCALE_LEN(loc_name_len);
415 
416 	/* Call ICU get */
417 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
418 
419 	/* No value found */
420 	if( result == -1 ) {
421 		if( tag_value){
422 			zend_string_release( tag_value);
423 		}
424 		RETURN_STRING( empty_result);
425 	}
426 
427 	/* value found */
428 	if( tag_value){
429 		RETVAL_STR( tag_value );
430 		return;
431 	}
432 
433 	/* Error encountered while fetching the value */
434 	if( result ==0) {
435 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
436 		intl_error_set( NULL, status, msg , 1 );
437 		efree(msg);
438 		RETURN_NULL();
439 	}
440 
441 }
442 /* }}} */
443 
444 /* {{{ proto static string Locale::getScript($locale)
445  * gets the script for the $locale
446  }}} */
447 /* {{{ proto static string locale_get_script($locale)
448  * gets the script for the $locale
449  */
PHP_FUNCTION(locale_get_script)450 PHP_FUNCTION( locale_get_script )
451 {
452 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
453 }
454 /* }}} */
455 
456 /* {{{ proto static string Locale::getRegion($locale)
457  * gets the region for the $locale
458  }}} */
459 /* {{{ proto static string locale_get_region($locale)
460  * gets the region for the $locale
461  */
PHP_FUNCTION(locale_get_region)462 PHP_FUNCTION( locale_get_region )
463 {
464 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
465 }
466 /* }}} */
467 
468 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
469  * gets the primary language for the $locale
470  }}} */
471 /* {{{ proto static string locale_get_primary_language($locale)
472  * gets the primary language for the $locale
473  */
PHP_FUNCTION(locale_get_primary_language)474 PHP_FUNCTION(locale_get_primary_language )
475 {
476 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
477 }
478 /* }}} */
479 
480 
481 /* {{{
482  * common code shared by display_xyz functions to  get the value from ICU
483  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)484 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
485 {
486 	const char* loc_name        	= NULL;
487 	size_t         loc_name_len    	= 0;
488 
489 	const char* disp_loc_name       = NULL;
490 	size_t      disp_loc_name_len   = 0;
491 	int         free_loc_name       = 0;
492 
493 	UChar*      disp_name      	= NULL;
494 	int32_t     disp_name_len  	= 0;
495 
496 	char*       mod_loc_name        = NULL;
497 
498 	int32_t     buflen          	= 512;
499 	UErrorCode  status          	= U_ZERO_ERROR;
500 
501 	zend_string* u8str;
502 
503   	char*       msg             	= NULL;
504 	int         grOffset    	= 0;
505 
506 	intl_error_reset( NULL );
507 
508 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s",
509 		&loc_name, &loc_name_len ,
510 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
511 	{
512 		spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
513 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
514 		efree(msg);
515 		RETURN_FALSE;
516 	}
517 
518     if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
519         /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
520 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
521 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
522 		efree(msg);
523 		RETURN_FALSE;
524     }
525 
526 	if(loc_name_len == 0) {
527 		loc_name = intl_locale_get_default();
528 	}
529 
530 	if( strcmp(tag_name, DISP_NAME) != 0 ){
531 		/* Handle grandfathered languages */
532 		grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
533 		if( grOffset >= 0 ){
534 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
535 				mod_loc_name = getPreferredTag( loc_name );
536 			} else {
537 				/* Since Grandfathered, no value, do nothing, retutn NULL */
538 				RETURN_FALSE;
539 			}
540 		}
541 	} /* end of if != LOC_CANONICAL_TAG */
542 
543 	if( mod_loc_name==NULL ){
544 		mod_loc_name = estrdup( loc_name );
545 	}
546 
547 	/* Check if disp_loc_name passed , if not use default locale */
548 	if( !disp_loc_name){
549 		disp_loc_name = estrdup(intl_locale_get_default());
550 		free_loc_name = 1;
551 	}
552 
553     /* Get the disp_value for the given locale */
554     do{
555         disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
556         disp_name_len = buflen;
557 
558 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
559 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
560 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
561 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
562 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
563 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
564 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
565 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
566 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
567 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
568 		}
569 
570 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
571 		if( U_FAILURE( status ) )
572 		{
573 			if( status == U_BUFFER_OVERFLOW_ERROR )
574 			{
575 				status = U_ZERO_ERROR;
576 				continue;
577 			}
578 
579 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
580 			intl_error_set( NULL, status, msg , 1 );
581 			efree(msg);
582 			if( disp_name){
583 				efree( disp_name );
584 			}
585 			if( mod_loc_name){
586 				efree( mod_loc_name );
587 			}
588 			if (free_loc_name) {
589 				efree((void *)disp_loc_name);
590 				disp_loc_name = NULL;
591 			}
592 			RETURN_FALSE;
593 		}
594 	} while( buflen > disp_name_len );
595 
596 	if( mod_loc_name){
597 		efree( mod_loc_name );
598 	}
599 	if (free_loc_name) {
600 		efree((void *)disp_loc_name);
601 		disp_loc_name = NULL;
602 	}
603 	/* Convert display locale name from UTF-16 to UTF-8. */
604 	u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
605 	efree( disp_name );
606 	if( !u8str )
607 	{
608 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
609 		intl_error_set( NULL, status, msg , 1 );
610 		efree(msg);
611 		RETURN_FALSE;
612 	}
613 
614 	RETVAL_NEW_STR( u8str );
615 }
616 /* }}} */
617 
618 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
619 * gets the name for the $locale in $in_locale or default_locale
620  }}} */
621 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
622 * gets the name for the $locale in $in_locale or default_locale
623 */
PHP_FUNCTION(locale_get_display_name)624 PHP_FUNCTION(locale_get_display_name)
625 {
626     get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
627 }
628 /* }}} */
629 
630 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
631 * gets the language for the $locale in $in_locale or default_locale
632  }}} */
633 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
634 * gets the language for the $locale in $in_locale or default_locale
635 */
PHP_FUNCTION(locale_get_display_language)636 PHP_FUNCTION(locale_get_display_language)
637 {
638     get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
639 }
640 /* }}} */
641 
642 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
643 * gets the script for the $locale in $in_locale or default_locale
644  }}} */
645 /* {{{ proto static string get_display_script($locale, $in_locale = null)
646 * gets the script for the $locale in $in_locale or default_locale
647 */
PHP_FUNCTION(locale_get_display_script)648 PHP_FUNCTION(locale_get_display_script)
649 {
650     get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
651 }
652 /* }}} */
653 
654 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
655 * gets the region for the $locale in $in_locale or default_locale
656  }}} */
657 /* {{{ proto static string get_display_region($locale, $in_locale = null)
658 * gets the region for the $locale in $in_locale or default_locale
659 */
PHP_FUNCTION(locale_get_display_region)660 PHP_FUNCTION(locale_get_display_region)
661 {
662     get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
663 }
664 /* }}} */
665 
666 /* {{{
667 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
668 * gets the variant for the $locale in $in_locale or default_locale
669  }}} */
670 /* {{{
671 * proto static string get_display_variant($locale, $in_locale = null)
672 * gets the variant for the $locale in $in_locale or default_locale
673 */
PHP_FUNCTION(locale_get_display_variant)674 PHP_FUNCTION(locale_get_display_variant)
675 {
676     get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
677 }
678 /* }}} */
679 
680  /* {{{ proto static array getKeywords(string $locale) {
681  * return an associative array containing keyword-value
682  * pairs for this locale. The keys are keys to the array (doh!)
683  * }}}*/
684  /* {{{ proto static array locale_get_keywords(string $locale) {
685  * return an associative array containing keyword-value
686  * pairs for this locale. The keys are keys to the array (doh!)
687  */
PHP_FUNCTION(locale_get_keywords)688 PHP_FUNCTION( locale_get_keywords )
689 {
690     UEnumeration*   e        = NULL;
691     UErrorCode      status   = U_ZERO_ERROR;
692 
693     const char*	 	kw_key        = NULL;
694     int32_t         kw_key_len    = 0;
695 
696     const char*       	loc_name        = NULL;
697     size_t        	 	loc_name_len    = 0;
698 
699 /*
700 	ICU expects the buffer to be allocated  before calling the function
701 	and so the buffer size has been explicitly specified
702 	ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
703 	hence the kw_value buffer size is 100
704 */
705 	zend_string *kw_value_str;
706     int32_t     kw_value_len = 100;
707 
708     intl_error_reset( NULL );
709 
710     if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
711         &loc_name, &loc_name_len ) == FAILURE)
712     {
713         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
714              "locale_get_keywords: unable to parse input params", 0 );
715 
716         RETURN_FALSE;
717     }
718 
719 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
720 
721     if(loc_name_len == 0) {
722         loc_name = intl_locale_get_default();
723     }
724 
725 	/* Get the keywords */
726     e = uloc_openKeywords( loc_name, &status );
727     if( e != NULL )
728     {
729 		/* Traverse it, filling the return array. */
730     	array_init( return_value );
731 
732     	while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
733     		kw_value_len = 100;
734 			kw_value_str = zend_string_alloc(kw_value_len, 0);
735 
736 			/* Get the keyword value for each keyword */
737 			kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
738 			if (status == U_BUFFER_OVERFLOW_ERROR) {
739 				status = U_ZERO_ERROR;
740 				kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
741 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
742 			} else if(!U_FAILURE(status)) {
743 				kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
744 			}
745 			if (U_FAILURE(status)) {
746 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 );
747 				if( kw_value_str){
748 					zend_string_free( kw_value_str );
749 				}
750 				zval_dtor(return_value);
751         		RETURN_FALSE;
752 			}
753 
754        		add_assoc_str( return_value, (char *)kw_key, kw_value_str);
755 		} /* end of while */
756 
757 	} /* end of if e!=NULL */
758 
759     uenum_close( e );
760 }
761 /* }}} */
762 
763  /* {{{ proto static string Locale::canonicalize($locale)
764  * @return string the canonicalized locale
765  * }}} */
766  /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
767  * @param string $locale	The locale string to canonicalize
768  */
PHP_FUNCTION(locale_canonicalize)769 PHP_FUNCTION(locale_canonicalize)
770 {
771 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
772 }
773 /* }}} */
774 
775 /* {{{ append_key_value
776 * Internal function which is called from locale_compose
777 * gets the value for the key_name and appends to the loc_name
778 * returns 1 if successful , -1 if not found ,
779 * 0 if array element is not a string , -2 if buffer-overflow
780 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)781 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
782 {
783 	zval *ele_value;
784 
785 	if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
786 		if(Z_TYPE_P(ele_value)!= IS_STRING ){
787 			/* element value is not a string */
788 			return FAILURE;
789 		}
790 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
791 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
792 			/* not lang or grandfathered tag */
793 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
794 		}
795 		smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
796 		return SUCCESS;
797 	}
798 
799 	return LOC_NOT_FOUND;
800 }
801 /* }}} */
802 
803 /* {{{ append_prefix , appends the prefix needed
804 * e.g. private adds 'x'
805 */
add_prefix(smart_str * loc_name,char * key_name)806 static void add_prefix(smart_str* loc_name, char* key_name)
807 {
808 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
809 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
810 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
811 	}
812 }
813 /* }}} */
814 
815 /* {{{ append_multiple_key_values
816 * Internal function which is called from locale_compose
817 * gets the multiple values for the key_name and appends to the loc_name
818 * used for 'variant','extlang','private'
819 * returns 1 if successful , -1 if not found ,
820 * 0 if array element is not a string , -2 if buffer-overflow
821 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)822 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
823 {
824 	zval	*ele_value;
825 	int 	i 		= 0;
826 	int 	isFirstSubtag 	= 0;
827 	int 	max_value 	= 0;
828 
829 	/* Variant/ Extlang/Private etc. */
830 	if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
831 		if( Z_TYPE_P(ele_value) == IS_STRING ){
832 			add_prefix( loc_name , key_name);
833 
834 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
835 			smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
836 			return SUCCESS;
837 		} else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
838 			HashTable *arr = Z_ARRVAL_P(ele_value);
839 			zval *data;
840 
841 			ZEND_HASH_FOREACH_VAL(arr, data) {
842 				if(Z_TYPE_P(data) != IS_STRING) {
843 					return FAILURE;
844 				}
845 				if (isFirstSubtag++ == 0){
846 					add_prefix(loc_name , key_name);
847 				}
848 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
849 				smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
850 			} ZEND_HASH_FOREACH_END();
851 			return SUCCESS;
852 		} else {
853 			return FAILURE;
854 		}
855 	} else {
856 		char cur_key_name[31];
857 		/* Decide the max_value: the max. no. of elements allowed */
858 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
859 			max_value  = MAX_NO_VARIANT;
860 		}
861 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
862 			max_value  = MAX_NO_EXTLANG;
863 		}
864 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
865 			max_value  = MAX_NO_PRIVATE;
866 		}
867 
868 		/* Multiple variant values as variant0, variant1 ,variant2 */
869 		isFirstSubtag = 0;
870 		for( i=0 ; i< max_value; i++ ){
871 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
872 			if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
873 				if( Z_TYPE_P(ele_value)!= IS_STRING ){
874 					/* variant is not a string */
875 					return FAILURE;
876 				}
877 				/* Add the contents */
878 				if (isFirstSubtag++ == 0){
879 					add_prefix(loc_name , cur_key_name);
880 				}
881 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
882 				smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
883 			}
884 		} /* end of for */
885 	} /* end of else */
886 
887 	return SUCCESS;
888 }
889 /* }}} */
890 
891 /*{{{
892 * If applicable sets error message and aborts locale_compose gracefully
893 * returns 0  if locale_compose needs to be aborted
894 * otherwise returns 1
895 */
handleAppendResult(int result,smart_str * loc_name)896 static int handleAppendResult( int result, smart_str* loc_name)
897 {
898 	intl_error_reset( NULL );
899 	if( result == FAILURE) {
900 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
901 			 "locale_compose: parameter array element is not a string", 0 );
902 		smart_str_free(loc_name);
903 		return 0;
904 	}
905 	return 1;
906 }
907 /* }}} */
908 
909 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
910 /* {{{ proto static string Locale::composeLocale($array)
911 * Creates a locale by combining the parts of locale-ID passed
912 * }}} */
913 /* {{{ proto static string compose_locale($array)
914 * Creates a locale by combining the parts of locale-ID passed
915 * }}} */
PHP_FUNCTION(locale_compose)916 PHP_FUNCTION(locale_compose)
917 {
918 	smart_str      	loc_name_s = {0};
919 	smart_str *loc_name = &loc_name_s;
920 	zval*			arr	= NULL;
921 	HashTable*		hash_arr = NULL;
922 	int 			result = 0;
923 
924 	intl_error_reset( NULL );
925 
926 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
927 		&arr) == FAILURE)
928 	{
929 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
930 			 "locale_compose: unable to parse input params", 0 );
931 		RETURN_FALSE;
932 	}
933 
934 	hash_arr = Z_ARRVAL_P( arr );
935 
936 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
937 		RETURN_FALSE;
938 
939 	/* Check for grandfathered first */
940 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
941 	if( result == SUCCESS){
942 		RETURN_SMART_STR(loc_name);
943 	}
944 	if( !handleAppendResult( result, loc_name)){
945 		RETURN_FALSE;
946 	}
947 
948 	/* Not grandfathered */
949 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
950 	if( result == LOC_NOT_FOUND ){
951 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
952 		"locale_compose: parameter array does not contain 'language' tag.", 0 );
953 		smart_str_free(loc_name);
954 		RETURN_FALSE;
955 	}
956 	if( !handleAppendResult( result, loc_name)){
957 		RETURN_FALSE;
958 	}
959 
960 	/* Extlang */
961 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
962 	if( !handleAppendResult( result, loc_name)){
963 		RETURN_FALSE;
964 	}
965 
966 	/* Script */
967 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
968 	if( !handleAppendResult( result, loc_name)){
969 		RETURN_FALSE;
970 	}
971 
972 	/* Region */
973 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
974 	if( !handleAppendResult( result, loc_name)){
975 		RETURN_FALSE;
976 	}
977 
978 	/* Variant */
979 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
980 	if( !handleAppendResult( result, loc_name)){
981 		RETURN_FALSE;
982 	}
983 
984 	/* Private */
985 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
986 	if( !handleAppendResult( result, loc_name)){
987 		RETURN_FALSE;
988 	}
989 
990 	RETURN_SMART_STR(loc_name);
991 }
992 /* }}} */
993 
994 
995 /*{{{
996 * Parses the locale and returns private subtags  if existing
997 * else returns NULL
998 * e.g. for locale='en_US-x-prv1-prv2-prv3'
999 * returns a pointer to the string 'prv1-prv2-prv3'
1000 */
get_private_subtags(const char * loc_name)1001 static zend_string* get_private_subtags(const char* loc_name)
1002 {
1003 	zend_string* result = NULL;
1004 	zend_off_t   singletonPos = 0;
1005 	size_t       len = 0;
1006 	const char*  mod_loc_name =NULL;
1007 
1008 	if( loc_name && (len = strlen(loc_name)>0 ) ){
1009 		mod_loc_name = loc_name ;
1010 		len   = strlen(mod_loc_name);
1011 		while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
1012 			if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1013 				/* private subtag start found */
1014 				if( singletonPos + 2 ==  len){
1015 					/* loc_name ends with '-x-' ; return  NULL */
1016 				}
1017 				else{
1018 					/* result = mod_loc_name + singletonPos +2; */
1019 					result = zend_string_init(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ), 0);
1020 				}
1021 				break;
1022 			}
1023 			else{
1024 				if((size_t)(singletonPos + 1) >= len){
1025 					/* String end */
1026 					break;
1027 				} else {
1028 					/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1029 					mod_loc_name = mod_loc_name + singletonPos +1;
1030 					len = strlen(mod_loc_name);
1031 				}
1032 			}
1033 		} /* end of while */
1034 	}
1035 
1036 	return result;
1037 }
1038 /* }}} */
1039 
1040 /* {{{ code used by locale_parse
1041 */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)1042 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1043 {
1044 	zend_string*   key_value 	= NULL;
1045 	char*   cur_key_name	= NULL;
1046 	char*   token        	= NULL;
1047 	char*   last_ptr  	= NULL;
1048 
1049 	int	result		= 0;
1050 	int 	cur_result  	= 0;
1051 	int 	cnt  		= 0;
1052 
1053 
1054 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1055 		key_value = get_private_subtags( loc_name );
1056 		result = 1;
1057 	} else {
1058 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1059 	}
1060 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1061 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1062 		if( result > 0 && key_value){
1063 			/* Tokenize on the "_" or "-"  */
1064 			token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
1065 			if( cur_key_name ){
1066 				efree( cur_key_name);
1067 			}
1068 			cur_key_name = (char*)ecalloc( 25,  25);
1069 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1070 			add_assoc_string( hash_arr, cur_key_name , token);
1071 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1072 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1073 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1074 				add_assoc_string( hash_arr, cur_key_name , token);
1075 			}
1076 /*
1077 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1078 			}
1079 */
1080 		}
1081 		if (key_value) {
1082 			zend_string_release(key_value);
1083 		}
1084 	} else {
1085 		if( result == 1 ){
1086 			add_assoc_str( hash_arr, key_name , key_value);
1087 			cur_result = 1;
1088 		} else if (key_value) {
1089 			zend_string_release(key_value);
1090 		}
1091 	}
1092 
1093 	if( cur_key_name ){
1094 		efree( cur_key_name);
1095 	}
1096 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1097 	return cur_result;
1098 }
1099 /* }}} */
1100 
1101 /* {{{ proto static array Locale::parseLocale($locale)
1102 * parses a locale-id into an array the different parts of it
1103  }}} */
1104 /* {{{ proto static array parse_locale($locale)
1105 * parses a locale-id into an array the different parts of it
1106 */
PHP_FUNCTION(locale_parse)1107 PHP_FUNCTION(locale_parse)
1108 {
1109     const char* loc_name        = NULL;
1110     size_t         loc_name_len    = 0;
1111     int         grOffset    	= 0;
1112 
1113     intl_error_reset( NULL );
1114 
1115     if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1116         &loc_name, &loc_name_len ) == FAILURE)
1117     {
1118         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1119              "locale_parse: unable to parse input params", 0 );
1120 
1121         RETURN_FALSE;
1122     }
1123 
1124     INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1125 
1126     if(loc_name_len == 0) {
1127         loc_name = intl_locale_get_default();
1128     }
1129 
1130 	array_init( return_value );
1131 
1132 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1133 	if( grOffset >= 0 ){
1134 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1135 	}
1136 	else{
1137 		/* Not grandfathered */
1138 		add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1139 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1140 		add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1141 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1142 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1143 	}
1144 }
1145 /* }}} */
1146 
1147 /* {{{ proto static array Locale::getAllVariants($locale)
1148 * gets an array containing the list of variants, or null
1149  }}} */
1150 /* {{{ proto static array locale_get_all_variants($locale)
1151 * gets an array containing the list of variants, or null
1152 */
PHP_FUNCTION(locale_get_all_variants)1153 PHP_FUNCTION(locale_get_all_variants)
1154 {
1155 	const char*  	loc_name        = NULL;
1156 	size_t    		loc_name_len    = 0;
1157 
1158 	int	result		= 0;
1159 	char*	token		= NULL;
1160 	zend_string*	variant		= NULL;
1161 	char*	saved_ptr	= NULL;
1162 
1163 	intl_error_reset( NULL );
1164 
1165 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1166 	&loc_name, &loc_name_len ) == FAILURE)
1167 	{
1168 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1169 	     "locale_parse: unable to parse input params", 0 );
1170 
1171 		RETURN_FALSE;
1172 	}
1173 
1174 	if(loc_name_len == 0) {
1175 		loc_name = intl_locale_get_default();
1176 		loc_name_len = strlen(loc_name);
1177 	}
1178 
1179 	INTL_CHECK_LOCALE_LEN(loc_name_len);
1180 
1181 	array_init( return_value );
1182 
1183 	/* If the locale is grandfathered, stop, no variants */
1184 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1185 		/* ("Grandfathered Tag. No variants."); */
1186 	}
1187 	else {
1188 	/* Call ICU variant */
1189 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1190 		if( result > 0 && variant){
1191 			/* Tokenize on the "_" or "-" */
1192 			token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1193 			add_next_index_stringl( return_value, token , strlen(token));
1194 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1195 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1196  				add_next_index_stringl( return_value, token , strlen(token));
1197 			}
1198 		}
1199 		if( variant ){
1200 			zend_string_release( variant );
1201 		}
1202 	}
1203 
1204 
1205 }
1206 /* }}} */
1207 
1208 /*{{{
1209 * Converts to lower case and also replaces all hyphens with the underscore
1210 */
strToMatch(const char * str,char * retstr)1211 static int strToMatch(const char* str ,char *retstr)
1212 {
1213 	char* 	anchor 	= NULL;
1214 	const char* 	anchor1 = NULL;
1215 	int 	result 	= 0;
1216 
1217     if( (!str) || str[0] == '\0'){
1218         return result;
1219     } else {
1220 	anchor = retstr;
1221 	anchor1 = str;
1222         while( (*str)!='\0' ){
1223 		if( *str == '-' ){
1224 			*retstr =  '_';
1225 		} else {
1226 			*retstr = tolower(*str);
1227 		}
1228             str++;
1229             retstr++;
1230 	}
1231 	*retstr = '\0';
1232 	retstr=  anchor;
1233 	str=  anchor1;
1234 	result = 1;
1235     }
1236 
1237     return(result);
1238 }
1239 /* }}} */
1240 
1241 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1242 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1243 */
1244 /* }}} */
1245 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1246 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1247 */
PHP_FUNCTION(locale_filter_matches)1248 PHP_FUNCTION(locale_filter_matches)
1249 {
1250 	char*       	lang_tag        = NULL;
1251 	size_t         	lang_tag_len    = 0;
1252 	const char*     loc_range       = NULL;
1253 	size_t         	loc_range_len   = 0;
1254 
1255 	int		result		= 0;
1256 	char*		token		= 0;
1257 	char*		chrcheck	= NULL;
1258 
1259 	zend_string*   	can_lang_tag    = NULL;
1260 	zend_string*   	can_loc_range   = NULL;
1261 
1262 	char*       	cur_lang_tag    = NULL;
1263 	char*       	cur_loc_range   = NULL;
1264 
1265 	zend_bool 	boolCanonical 	= 0;
1266 	UErrorCode	status		= U_ZERO_ERROR;
1267 
1268 	intl_error_reset( NULL );
1269 
1270 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1271 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1272 		&boolCanonical) == FAILURE)
1273 	{
1274 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1275 		"locale_filter_matches: unable to parse input params", 0 );
1276 
1277 		RETURN_FALSE;
1278 	}
1279 
1280 	if(loc_range_len == 0) {
1281 		loc_range = intl_locale_get_default();
1282 		loc_range_len = strlen(loc_range);
1283 	}
1284 
1285 	if( strcmp(loc_range,"*")==0){
1286 		RETURN_TRUE;
1287 	}
1288 
1289 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1290 	INTL_CHECK_LOCALE_LEN(lang_tag_len);
1291 
1292 	if( boolCanonical ){
1293 		/* canonicalize loc_range */
1294 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1295 		if( result <=0) {
1296 			intl_error_set( NULL, status,
1297 				"locale_filter_matches : unable to canonicalize loc_range" , 0 );
1298 			RETURN_FALSE;
1299 		}
1300 
1301 		/* canonicalize lang_tag */
1302 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1303 		if( result <=0) {
1304 			intl_error_set( NULL, status,
1305 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1306 			RETURN_FALSE;
1307 		}
1308 
1309 		/* Convert to lower case for case-insensitive comparison */
1310 		cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1311 
1312 		/* Convert to lower case for case-insensitive comparison */
1313 		result = strToMatch( can_lang_tag->val , cur_lang_tag);
1314 		if( result == 0) {
1315 			efree( cur_lang_tag );
1316 			zend_string_release( can_lang_tag );
1317 			RETURN_FALSE;
1318 		}
1319 
1320 		cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1321 		result = strToMatch( can_loc_range->val , cur_loc_range );
1322 		if( result == 0) {
1323 			efree( cur_lang_tag );
1324 			zend_string_release( can_lang_tag );
1325 			efree( cur_loc_range );
1326 			zend_string_release( can_loc_range );
1327 			RETURN_FALSE;
1328 		}
1329 
1330 		/* check if prefix */
1331 		token 	= strstr( cur_lang_tag , cur_loc_range );
1332 
1333 		if( token && (token==cur_lang_tag) ){
1334 			/* check if the char. after match is SEPARATOR */
1335 			chrcheck = token + (strlen(cur_loc_range));
1336 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1337 				if( cur_lang_tag){
1338 					efree( cur_lang_tag );
1339 				}
1340 				if( cur_loc_range){
1341 					efree( cur_loc_range );
1342 				}
1343 				if( can_lang_tag){
1344 					zend_string_release( can_lang_tag );
1345 				}
1346 				if( can_loc_range){
1347 					zend_string_release( can_loc_range );
1348 				}
1349 				RETURN_TRUE;
1350 			}
1351 		}
1352 
1353 		/* No prefix as loc_range */
1354 		if( cur_lang_tag){
1355 			efree( cur_lang_tag );
1356 		}
1357 		if( cur_loc_range){
1358 			efree( cur_loc_range );
1359 		}
1360 		if( can_lang_tag){
1361 			zend_string_release( can_lang_tag );
1362 		}
1363 		if( can_loc_range){
1364 			zend_string_release( can_loc_range );
1365 		}
1366 		RETURN_FALSE;
1367 
1368 	} /* end of if isCanonical */
1369 	else{
1370 		/* Convert to lower case for case-insensitive comparison */
1371 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1372 
1373 		result = strToMatch( lang_tag , cur_lang_tag);
1374 		if( result == 0) {
1375 			efree( cur_lang_tag );
1376 			RETURN_FALSE;
1377 		}
1378 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1379 		result = strToMatch( loc_range , cur_loc_range );
1380 		if( result == 0) {
1381 			efree( cur_lang_tag );
1382 			efree( cur_loc_range );
1383 			RETURN_FALSE;
1384 		}
1385 
1386 		/* check if prefix */
1387 		token 	= strstr( cur_lang_tag , cur_loc_range );
1388 
1389 		if( token && (token==cur_lang_tag) ){
1390 			/* check if the char. after match is SEPARATOR */
1391 			chrcheck = token + (strlen(cur_loc_range));
1392 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1393 				if( cur_lang_tag){
1394 					efree( cur_lang_tag );
1395 				}
1396 				if( cur_loc_range){
1397 					efree( cur_loc_range );
1398 				}
1399 				RETURN_TRUE;
1400 			}
1401 		}
1402 
1403 		/* No prefix as loc_range */
1404 		if( cur_lang_tag){
1405 			efree( cur_lang_tag );
1406 		}
1407 		if( cur_loc_range){
1408 			efree( cur_loc_range );
1409 		}
1410 		RETURN_FALSE;
1411 
1412 	}
1413 }
1414 /* }}} */
1415 
array_cleanup(char * arr[],int arr_size)1416 static void array_cleanup( char* arr[] , int arr_size)
1417 {
1418 	int i=0;
1419 	for( i=0; i< arr_size; i++ ){
1420 		if( arr[i*2] ){
1421 			efree( arr[i*2]);
1422 		}
1423 	}
1424 	efree(arr);
1425 }
1426 
1427 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1428 /* {{{
1429 * returns the lookup result to lookup_loc_range_src_php
1430 * internal function
1431 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1432 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1433 {
1434 	int	i = 0;
1435 	int	cur_arr_len = 0;
1436 	int result = 0;
1437 
1438 	zend_string* lang_tag = NULL;
1439 	zval* ele_value = NULL;
1440 	char** cur_arr = NULL;
1441 
1442 	char* cur_loc_range	= NULL;
1443 	zend_string* can_loc_range	= NULL;
1444 	zend_off_t saved_pos = 0;
1445 
1446 	zend_string* return_value = NULL;
1447 
1448 	cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1449 	ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1450 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1451 		if(Z_TYPE_P(ele_value)!= IS_STRING) {
1452 			/* element value is not a string */
1453 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0);
1454 			LOOKUP_CLEAN_RETURN(NULL);
1455 		}
1456 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1457 		result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1458 		if(result == 0) {
1459 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1460 			LOOKUP_CLEAN_RETURN(NULL);
1461 		}
1462 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1463 		cur_arr_len++ ;
1464 	} ZEND_HASH_FOREACH_END(); /* end of for */
1465 
1466 	/* Canonicalize array elements */
1467 	if(canonicalize) {
1468 		for(i=0; i<cur_arr_len; i++) {
1469 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1470 			if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1471 				if(lang_tag) {
1472 					zend_string_release(lang_tag);
1473 				}
1474 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1475 				LOOKUP_CLEAN_RETURN(NULL);
1476 			}
1477 			cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1478 			result = strToMatch(lang_tag->val, cur_arr[i*2]);
1479 			zend_string_release(lang_tag);
1480 			if(result == 0) {
1481 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1482 				LOOKUP_CLEAN_RETURN(NULL);
1483 			}
1484 		}
1485 
1486 	}
1487 
1488 	if(canonicalize) {
1489 		/* Canonicalize the loc_range */
1490 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1491 		if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1492 			/* Error */
1493 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1494 			if(can_loc_range) {
1495 				zend_string_release(can_loc_range);
1496 			}
1497 			LOOKUP_CLEAN_RETURN(NULL);
1498 		} else {
1499 			loc_range = can_loc_range->val;
1500 		}
1501 	}
1502 
1503 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1504 	/* convert to lower and replace hyphens */
1505 	result = strToMatch(loc_range, cur_loc_range);
1506 	if(can_loc_range) {
1507 		zend_string_release(can_loc_range);
1508 	}
1509 	if(result == 0) {
1510 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1511 		LOOKUP_CLEAN_RETURN(NULL);
1512 	}
1513 
1514 	/* Lookup for the lang_tag match */
1515 	saved_pos = strlen(cur_loc_range);
1516 	while(saved_pos > 0) {
1517 		for(i=0; i< cur_arr_len; i++){
1518 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1519 				/* Match found */
1520 				char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1521 				return_value = zend_string_init(str, strlen(str), 0);
1522 				efree(cur_loc_range);
1523 				LOOKUP_CLEAN_RETURN(return_value);
1524 			}
1525 		}
1526 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1527 	}
1528 
1529 	/* Match not found */
1530 	efree(cur_loc_range);
1531 	LOOKUP_CLEAN_RETURN(NULL);
1532 }
1533 /* }}} */
1534 
1535 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1536 * Searchs the items in $langtag for the best match to the language
1537 * range
1538 */
1539 /* }}} */
1540 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1541 * Searchs the items in $langtag for the best match to the language
1542 * range
1543 */
PHP_FUNCTION(locale_lookup)1544 PHP_FUNCTION(locale_lookup)
1545 {
1546 	zend_string*   	fallback_loc_str	= NULL;
1547 	const char*    	loc_range      		= NULL;
1548 	size_t        	loc_range_len  		= 0;
1549 
1550 	zval*		arr				= NULL;
1551 	HashTable*	hash_arr		= NULL;
1552 	zend_bool	boolCanonical	= 0;
1553 	zend_string* 	result_str	= NULL;
1554 
1555 	intl_error_reset( NULL );
1556 
1557 #if U_ICU_VERSION_MAJOR_NUM > 63
1558 # define BANG "!"
1559 #else
1560 # define BANG
1561 #endif
1562 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS" BANG, &arr, &loc_range, &loc_range_len,
1563 		&boolCanonical,	&fallback_loc_str) == FAILURE) {
1564 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,	"locale_lookup: unable to parse input params", 0 );
1565 		RETURN_FALSE;
1566 	}
1567 #undef BANG
1568 
1569 	if(loc_range_len == 0) {
1570 		if(fallback_loc_str) {
1571 			loc_range = ZSTR_VAL(fallback_loc_str);
1572 			loc_range_len = ZSTR_LEN(fallback_loc_str);
1573 		} else {
1574 			loc_range = intl_locale_get_default();
1575 			loc_range_len = strlen(loc_range);
1576 		}
1577 	}
1578 
1579 	hash_arr = Z_ARRVAL_P(arr);
1580 
1581 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1582 
1583 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1584 		RETURN_EMPTY_STRING();
1585 	}
1586 
1587 	result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1588 	if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1589 		if( fallback_loc_str ) {
1590 			result_str = zend_string_copy(fallback_loc_str);
1591 		} else {
1592 			RETURN_EMPTY_STRING();
1593 		}
1594 	}
1595 
1596 	RETURN_STR(result_str);
1597 }
1598 /* }}} */
1599 
1600 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1601 * Tries to find out best available locale based on HTTP �Accept-Language� header
1602 */
1603 /* }}} */
1604 /* {{{ proto string locale_accept_from_http(string $http_accept)
1605 * Tries to find out best available locale based on HTTP �Accept-Language� header
1606 */
PHP_FUNCTION(locale_accept_from_http)1607 PHP_FUNCTION(locale_accept_from_http)
1608 {
1609 	UEnumeration *available;
1610 	char *http_accept = NULL;
1611 	size_t http_accept_len;
1612 	UErrorCode status = 0;
1613 	int len;
1614 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1615 	UAcceptResult outResult;
1616 
1617 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1618 	{
1619 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1620 		"locale_accept_from_http: unable to parse input parameters", 0 );
1621 		RETURN_FALSE;
1622 	}
1623 	if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1624 		/* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1625 		char *start = http_accept;
1626 		char *end;
1627 		size_t len;
1628 		do {
1629 			end = strchr(start, ',');
1630 			len = end ? end-start : http_accept_len-(start-http_accept);
1631 			if(len > ULOC_FULLNAME_CAPACITY) {
1632 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1633 						"locale_accept_from_http: locale string too long", 0 );
1634 				RETURN_FALSE;
1635 			}
1636 			if(end) {
1637 				start = end+1;
1638 			}
1639 		} while(end != NULL);
1640 	}
1641 
1642 	available = ures_openAvailableLocales(NULL, &status);
1643 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1644 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1645 						&outResult, http_accept, available, &status);
1646 	uenum_close(available);
1647 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1648 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1649 		RETURN_FALSE;
1650 	}
1651 	RETURN_STRINGL(resultLocale, len);
1652 }
1653 /* }}} */
1654 
1655 /*
1656  * Local variables:
1657  * tab-width: 4
1658  * c-basic-offset: 4
1659  * End:
1660  * vim600: noet sw=4 ts=4 fdm=marker
1661  * vim<600: noet sw=4 ts=4
1662  *can_loc_len
1663 */
1664