xref: /PHP-7.4/ext/intl/locale/locale_methods.c (revision 52cda6fc)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14    +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <unicode/ustring.h>
22 #include <unicode/udata.h>
23 #include <unicode/putil.h>
24 #include <unicode/ures.h>
25 
26 #include "php_intl.h"
27 #include "locale.h"
28 #include "locale_class.h"
29 #include "locale_methods.h"
30 #include "intl_convert.h"
31 #include "intl_data.h"
32 
33 #include <zend_API.h>
34 #include <zend.h>
35 #include <php.h>
36 #include "main/php_ini.h"
37 #include "zend_smart_str.h"
38 
39 ZEND_EXTERN_MODULE_GLOBALS( intl )
40 
41 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
42 #define SEPARATOR "_"
43 #define SEPARATOR1 "-"
44 #define DELIMITER "-_"
45 #define EXTLANG_PREFIX "a"
46 #define PRIVATE_PREFIX "x"
47 #define DISP_NAME "name"
48 
49 #define MAX_NO_VARIANT  15
50 #define MAX_NO_EXTLANG  3
51 #define MAX_NO_PRIVATE  15
52 #define MAX_NO_LOOKUP_LANG_TAG  100
53 
54 #define LOC_NOT_FOUND 1
55 
56 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
57 #define VARIANT_KEYNAME_LEN  11
58 #define EXTLANG_KEYNAME_LEN  10
59 #define PRIVATE_KEYNAME_LEN  11
60 
61 /* Based on IANA registry at the time of writing this code
62 *
63 */
64 static const char * const LOC_GRANDFATHERED[] = {
65 	"art-lojban",		"i-klingon",		"i-lux",			"i-navajo",		"no-bok",		"no-nyn",
66 	"cel-gaulish",		"en-GB-oed",		"i-ami",
67 	"i-bnn",		"i-default",		"i-enochian",
68 	"i-mingo",		"i-pwn", 		"i-tao",
69 	"i-tay",		"i-tsu",		"sgn-BE-fr",
70 	"sgn-BE-nl",		"sgn-CH-de", 		"zh-cmn",
71  	"zh-cmn-Hans", 		"zh-cmn-Hant",		"zh-gan" ,
72 	"zh-guoyu", 		"zh-hakka", 		"zh-min",
73 	"zh-min-nan", 		"zh-wuu", 		"zh-xiang",
74 	"zh-yue",		NULL
75 };
76 
77 /* Based on IANA registry at the time of writing this code
78 *  This array lists the preferred values for the grandfathered tags if applicable
79 *  This is in sync with the array LOC_GRANDFATHERED
80 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
81 */
82 static const int 		LOC_PREFERRED_GRANDFATHERED_LEN = 6;
83 static const char * const 	LOC_PREFERRED_GRANDFATHERED[]  = {
84 	"jbo",			"tlh",			"lb",
85 	"nv", 			"nb",			"nn",
86 	NULL
87 };
88 
89 /*returns TRUE if a is an ID separator FALSE otherwise*/
90 #define isIDSeparator(a) (a == '_' || a == '-')
91 #define isKeywordSeparator(a) (a == '@' )
92 #define isEndOfTag(a) (a == '\0' )
93 
94 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
95 
96 /*returns TRUE if one of the special prefixes is here (s=string)
97   'x-' or 'i-' */
98 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
99 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
100 
101 /* Dot terminates it because of POSIX form  where dot precedes the codepage
102  * except for variant */
103 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
104 
105 /* {{{ return the offset of 'key' in the array 'list'.
106  * returns -1 if not present */
findOffset(const char * const * list,const char * key)107 static int16_t findOffset(const char* const* list, const char* key)
108 {
109 	const char* const* anchor = list;
110 	while (*list != NULL) {
111 		if (strcmp(key, *list) == 0) {
112 			return (int16_t)(list - anchor);
113 		}
114 		list++;
115 	}
116 
117 	return -1;
118 
119 }
120 /*}}}*/
121 
getPreferredTag(const char * gf_tag)122 static char* getPreferredTag(const char* gf_tag)
123 {
124 	char* result = NULL;
125 	zend_off_t grOffset = 0;
126 
127 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
128 	if(grOffset < 0) {
129 		return NULL;
130 	}
131 	if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
132 		/* return preferred tag */
133 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
134 	} else {
135 		/* Return correct grandfathered language tag */
136 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
137 	}
138 	return result;
139 }
140 
141 /* {{{
142 * returns the position of next token for lookup
143 * or -1 if no token
144 * strtokr equivalent search for token in reverse direction
145 */
getStrrtokenPos(char * str,zend_off_t savedPos)146 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
147 {
148 	zend_off_t result =-1;
149 	zend_off_t i;
150 
151 	for(i=savedPos-1; i>=0; i--) {
152 		if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
153 			/* delimiter found; check for singleton */
154 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
155 				/* a singleton; so send the position of token before the singleton */
156 				result = i-2;
157 			} else {
158 				result = i;
159 			}
160 			break;
161 		}
162 	}
163 	if(result < 1){
164 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
165 		result =-1;
166 	}
167 	return result;
168 }
169 /* }}} */
170 
171 /* {{{
172 * returns the position of a singleton if present
173 * returns -1 if no singleton
174 * strtok equivalent search for singleton
175 */
getSingletonPos(const char * str)176 static zend_off_t getSingletonPos(const char* str)
177 {
178 	zend_off_t result =-1;
179 	size_t len = 0;
180 
181 	if( str && ((len=strlen(str))>0) ){
182 		zend_off_t i = 0;
183 		for( i=0; (size_t)i < len ; i++){
184 			if( isIDSeparator(*(str+i)) ){
185 				if( i==1){
186 					/* string is of the form x-avy or a-prv1 */
187 					result =0;
188 					break;
189 				} else {
190 					/* delimiter found; check for singleton */
191 					if( isIDSeparator(*(str+i+2)) ){
192 						/* a singleton; so send the position of separator before singleton */
193 						result = i+1;
194 						break;
195 					}
196 				}
197 			}
198 		}/* end of for */
199 
200 	}
201 	return result;
202 }
203 /* }}} */
204 
205 /* {{{ proto static string Locale::getDefault(  )
206    Get default locale */
207 /* }}} */
208 /* {{{ proto static string locale_get_default( )
209    Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)210 PHP_NAMED_FUNCTION(zif_locale_get_default)
211 {
212 	RETURN_STRING( intl_locale_get_default(  ) );
213 }
214 
215 /* }}} */
216 
217 /* {{{ proto static string Locale::setDefault( string $locale )
218    Set default locale */
219 /* }}} */
220 /* {{{ proto static string locale_set_default( string $locale )
221    Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)222 PHP_NAMED_FUNCTION(zif_locale_set_default)
223 {
224 	zend_string* locale_name;
225 	zend_string *ini_name;
226 	char *default_locale = NULL;
227 
228 	if(zend_parse_parameters( ZEND_NUM_ARGS(),  "S", &locale_name) == FAILURE)
229 	{
230 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
231 			 	"locale_set_default: unable to parse input params", 0 );
232 
233 		RETURN_FALSE;
234 	}
235 
236 	if (ZSTR_LEN(locale_name) == 0) {
237 		default_locale = (char *)uloc_getDefault();
238 		locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
239 	}
240 
241 	ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
242 	zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
243 	zend_string_release_ex(ini_name, 0);
244 	if (default_locale != NULL) {
245 		zend_string_release_ex(locale_name, 0);
246 	}
247 
248 	RETURN_TRUE;
249 }
250 /* }}} */
251 
252 /* {{{
253 * Gets the value from ICU
254 * common code shared by get_primary_language,get_script or get_region or get_variant
255 * result = 0 if error, 1 if successful , -1 if no value
256 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)257 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
258 {
259 	zend_string* tag_value	    = NULL;
260 	int32_t      tag_value_len  = 512;
261 
262 	char*        mod_loc_name   = NULL;
263 
264 	int32_t      buflen         = 512;
265 	UErrorCode   status         = U_ZERO_ERROR;
266 
267 	if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
268 		return NULL;
269 	}
270 
271 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
272 		/* Handle  grandfathered languages */
273 		zend_off_t grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
274 		if( grOffset >= 0 ){
275 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
276 				return zend_string_init(loc_name, strlen(loc_name), 0);
277 			} else {
278 				/* Since Grandfathered , no value , do nothing , retutn NULL */
279 				return NULL;
280 			}
281 		}
282 
283 	if( fromParseLocale==1 ){
284 		zend_off_t singletonPos = 0;
285 
286 		/* Handle singletons */
287 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
288 			if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
289 				return zend_string_init(loc_name, strlen(loc_name), 0);
290 			}
291 		}
292 
293 		singletonPos = getSingletonPos( loc_name );
294 		if( singletonPos == 0){
295 			/* singleton at start of script, region , variant etc.
296 			 * or invalid singleton at start of language */
297 			return NULL;
298 		} else if( singletonPos > 0 ){
299 			/* singleton at some position except at start
300 			 * strip off the singleton and rest of the loc_name */
301 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
302 		}
303 	} /* end of if fromParse */
304 
305 	} /* end of if != LOC_CANONICAL_TAG */
306 
307 	if( mod_loc_name == NULL){
308 		mod_loc_name = estrdup(loc_name );
309 	}
310 
311 	/* Proceed to ICU */
312 	do{
313 		if (tag_value) {
314 			tag_value = zend_string_realloc( tag_value , buflen, 0);
315 		} else {
316 			tag_value = zend_string_alloc( buflen, 0);
317 		}
318 		tag_value_len = buflen;
319 
320 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
321 			buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
322 		}
323 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
324 			buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
325 		}
326 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
327 			buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
328 		}
329 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
330 			buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
331 		}
332 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
333 			buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
334 		}
335 
336 		if( U_FAILURE( status ) ) {
337 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
338 				status = U_ZERO_ERROR;
339 				buflen++; /* add space for \0 */
340 				continue;
341 			}
342 
343 			/* Error in retrieving data */
344 			*result = 0;
345 			if( tag_value ){
346 				zend_string_release_ex( tag_value, 0 );
347 			}
348 			if( mod_loc_name ){
349 				efree( mod_loc_name);
350 			}
351 			return NULL;
352 		}
353 	} while( buflen > tag_value_len );
354 
355 	if(  buflen ==0 ){
356 		/* No value found */
357 		*result = -1;
358 		if( tag_value ){
359 			zend_string_release_ex( tag_value, 0 );
360 		}
361 		if( mod_loc_name ){
362 			efree( mod_loc_name);
363 		}
364 		return NULL;
365 	} else {
366 		*result = 1;
367 	}
368 
369 	if( mod_loc_name ){
370 		efree( mod_loc_name);
371 	}
372 
373 	tag_value->len = strlen(tag_value->val);
374 	return tag_value;
375 }
376 /* }}} */
377 
378 /* {{{
379 * Gets the value from ICU , called when PHP userspace function is called
380 * common code shared by get_primary_language,get_script or get_region or get_variant
381 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)382 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
383 {
384 
385 	const char* loc_name        	= NULL;
386 	size_t         loc_name_len    	= 0;
387 
388 	zend_string*   tag_value		= NULL;
389 	char*       empty_result	= "";
390 
391 	int         result    		= 0;
392 	char*       msg        		= NULL;
393 
394 	UErrorCode  status          	= U_ZERO_ERROR;
395 
396 	intl_error_reset( NULL );
397 
398 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
399 	&loc_name ,&loc_name_len ) == FAILURE) {
400 		spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
401 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
402 		efree(msg);
403 
404 		RETURN_FALSE;
405     }
406 
407 	if(loc_name_len == 0) {
408 		loc_name = intl_locale_get_default();
409 		loc_name_len = strlen(loc_name);
410 	}
411 
412 	INTL_CHECK_LOCALE_LEN(loc_name_len);
413 
414 	/* Call ICU get */
415 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
416 
417 	/* No value found */
418 	if( result == -1 ) {
419 		if( tag_value){
420 			zend_string_release_ex( tag_value, 0 );
421 		}
422 		RETURN_STRING( empty_result);
423 	}
424 
425 	/* value found */
426 	if( tag_value){
427 		RETVAL_STR( tag_value );
428 		return;
429 	}
430 
431 	/* Error encountered while fetching the value */
432 	if( result ==0) {
433 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
434 		intl_error_set( NULL, status, msg , 1 );
435 		efree(msg);
436 		RETURN_NULL();
437 	}
438 
439 }
440 /* }}} */
441 
442 /* {{{ proto static string Locale::getScript($locale)
443  * gets the script for the $locale
444  }}} */
445 /* {{{ proto static string locale_get_script($locale)
446  * gets the script for the $locale
447  */
PHP_FUNCTION(locale_get_script)448 PHP_FUNCTION( locale_get_script )
449 {
450 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
451 }
452 /* }}} */
453 
454 /* {{{ proto static string Locale::getRegion($locale)
455  * gets the region for the $locale
456  }}} */
457 /* {{{ proto static string locale_get_region($locale)
458  * gets the region for the $locale
459  */
PHP_FUNCTION(locale_get_region)460 PHP_FUNCTION( locale_get_region )
461 {
462 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
463 }
464 /* }}} */
465 
466 /* {{{ proto static string Locale::getPrimaryLanguage($locale)
467  * gets the primary language for the $locale
468  }}} */
469 /* {{{ proto static string locale_get_primary_language($locale)
470  * gets the primary language for the $locale
471  */
PHP_FUNCTION(locale_get_primary_language)472 PHP_FUNCTION(locale_get_primary_language )
473 {
474 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
475 }
476 /* }}} */
477 
478 
479 /* {{{
480  * common code shared by display_xyz functions to  get the value from ICU
481  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)482 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
483 {
484 	const char* loc_name        	= NULL;
485 	size_t         loc_name_len    	= 0;
486 
487 	const char* disp_loc_name       = NULL;
488 	size_t      disp_loc_name_len   = 0;
489 	int         free_loc_name       = 0;
490 
491 	UChar*      disp_name      	= NULL;
492 	int32_t     disp_name_len  	= 0;
493 
494 	char*       mod_loc_name        = NULL;
495 
496 	int32_t     buflen          	= 512;
497 	UErrorCode  status          	= U_ZERO_ERROR;
498 
499 	zend_string* u8str;
500 
501   	char*       msg             	= NULL;
502 
503 	intl_error_reset( NULL );
504 
505 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s",
506 		&loc_name, &loc_name_len ,
507 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
508 	{
509 		spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
510 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
511 		efree(msg);
512 		RETURN_FALSE;
513 	}
514 
515     if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
516         /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
517 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
518 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
519 		efree(msg);
520 		RETURN_FALSE;
521     }
522 
523 	if(loc_name_len == 0) {
524 		loc_name = intl_locale_get_default();
525 	}
526 
527 	if( strcmp(tag_name, DISP_NAME) != 0 ){
528 		/* Handle grandfathered languages */
529 		int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
530 		if( grOffset >= 0 ){
531 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
532 				mod_loc_name = getPreferredTag( loc_name );
533 			} else {
534 				/* Since Grandfathered, no value, do nothing, retutn NULL */
535 				RETURN_FALSE;
536 			}
537 		}
538 	} /* end of if != LOC_CANONICAL_TAG */
539 
540 	if( mod_loc_name==NULL ){
541 		mod_loc_name = estrdup( loc_name );
542 	}
543 
544 	/* Check if disp_loc_name passed , if not use default locale */
545 	if( !disp_loc_name){
546 		disp_loc_name = estrdup(intl_locale_get_default());
547 		free_loc_name = 1;
548 	}
549 
550     /* Get the disp_value for the given locale */
551     do{
552         disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
553         disp_name_len = buflen;
554 
555 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
556 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
557 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
558 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
559 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
560 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
561 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
562 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
563 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
564 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
565 		}
566 
567 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
568 		if( U_FAILURE( status ) )
569 		{
570 			if( status == U_BUFFER_OVERFLOW_ERROR )
571 			{
572 				status = U_ZERO_ERROR;
573 				continue;
574 			}
575 
576 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
577 			intl_error_set( NULL, status, msg , 1 );
578 			efree(msg);
579 			if( disp_name){
580 				efree( disp_name );
581 			}
582 			if( mod_loc_name){
583 				efree( mod_loc_name );
584 			}
585 			if (free_loc_name) {
586 				efree((void *)disp_loc_name);
587 				disp_loc_name = NULL;
588 			}
589 			RETURN_FALSE;
590 		}
591 	} while( buflen > disp_name_len );
592 
593 	if( mod_loc_name){
594 		efree( mod_loc_name );
595 	}
596 	if (free_loc_name) {
597 		efree((void *)disp_loc_name);
598 		disp_loc_name = NULL;
599 	}
600 	/* Convert display locale name from UTF-16 to UTF-8. */
601 	u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
602 	efree( disp_name );
603 	if( !u8str )
604 	{
605 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
606 		intl_error_set( NULL, status, msg , 1 );
607 		efree(msg);
608 		RETURN_FALSE;
609 	}
610 
611 	RETVAL_NEW_STR( u8str );
612 }
613 /* }}} */
614 
615 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
616 * gets the name for the $locale in $in_locale or default_locale
617  }}} */
618 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
619 * gets the name for the $locale in $in_locale or default_locale
620 */
PHP_FUNCTION(locale_get_display_name)621 PHP_FUNCTION(locale_get_display_name)
622 {
623     get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
624 }
625 /* }}} */
626 
627 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
628 * gets the language for the $locale in $in_locale or default_locale
629  }}} */
630 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
631 * gets the language for the $locale in $in_locale or default_locale
632 */
PHP_FUNCTION(locale_get_display_language)633 PHP_FUNCTION(locale_get_display_language)
634 {
635     get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
636 }
637 /* }}} */
638 
639 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
640 * gets the script for the $locale in $in_locale or default_locale
641  }}} */
642 /* {{{ proto static string get_display_script($locale, $in_locale = null)
643 * gets the script for the $locale in $in_locale or default_locale
644 */
PHP_FUNCTION(locale_get_display_script)645 PHP_FUNCTION(locale_get_display_script)
646 {
647     get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
648 }
649 /* }}} */
650 
651 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
652 * gets the region for the $locale in $in_locale or default_locale
653  }}} */
654 /* {{{ proto static string get_display_region($locale, $in_locale = null)
655 * gets the region for the $locale in $in_locale or default_locale
656 */
PHP_FUNCTION(locale_get_display_region)657 PHP_FUNCTION(locale_get_display_region)
658 {
659     get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
660 }
661 /* }}} */
662 
663 /* {{{
664 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
665 * gets the variant for the $locale in $in_locale or default_locale
666  }}} */
667 /* {{{
668 * proto static string get_display_variant($locale, $in_locale = null)
669 * gets the variant for the $locale in $in_locale or default_locale
670 */
PHP_FUNCTION(locale_get_display_variant)671 PHP_FUNCTION(locale_get_display_variant)
672 {
673     get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
674 }
675 /* }}} */
676 
677  /* {{{ proto static array getKeywords(string $locale) {
678  * return an associative array containing keyword-value
679  * pairs for this locale. The keys are keys to the array (doh!)
680  * }}}*/
681  /* {{{ proto static array locale_get_keywords(string $locale) {
682  * return an associative array containing keyword-value
683  * pairs for this locale. The keys are keys to the array (doh!)
684  */
PHP_FUNCTION(locale_get_keywords)685 PHP_FUNCTION( locale_get_keywords )
686 {
687     UEnumeration*   e        = NULL;
688     UErrorCode      status   = U_ZERO_ERROR;
689 
690     const char*	 	kw_key        = NULL;
691     int32_t         kw_key_len    = 0;
692 
693     const char*       	loc_name        = NULL;
694     size_t        	 	loc_name_len    = 0;
695 
696     intl_error_reset( NULL );
697 
698     if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
699         &loc_name, &loc_name_len ) == FAILURE)
700     {
701         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
702              "locale_get_keywords: unable to parse input params", 0 );
703 
704         RETURN_FALSE;
705     }
706 
707 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
708 
709     if(loc_name_len == 0) {
710         loc_name = intl_locale_get_default();
711     }
712 
713 	/* Get the keywords */
714     e = uloc_openKeywords( loc_name, &status );
715     if( e != NULL )
716     {
717 		/*
718 		ICU expects the buffer to be allocated  before calling the function
719 		and so the buffer size has been explicitly specified
720 		ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
721 		hence the kw_value buffer size is 100
722 		*/
723 
724 		/* Traverse it, filling the return array. */
725     	array_init( return_value );
726 
727     	while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
728     		int32_t kw_value_len = 100;
729 			zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
730 
731 			/* Get the keyword value for each keyword */
732 			kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
733 			if (status == U_BUFFER_OVERFLOW_ERROR) {
734 				status = U_ZERO_ERROR;
735 				kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
736 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
737 			} else if(!U_FAILURE(status)) {
738 				kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
739 			}
740 			if (U_FAILURE(status)) {
741 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 );
742 				if( kw_value_str){
743 					zend_string_efree( kw_value_str );
744 				}
745 				zend_array_destroy(Z_ARR_P(return_value));
746         		RETURN_FALSE;
747 			}
748 
749        		add_assoc_str( return_value, (char *)kw_key, kw_value_str);
750 		} /* end of while */
751 
752 	} /* end of if e!=NULL */
753 
754     uenum_close( e );
755 }
756 /* }}} */
757 
758  /* {{{ proto static string Locale::canonicalize($locale)
759  * @return string the canonicalized locale
760  * }}} */
761  /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
762  * @param string $locale	The locale string to canonicalize
763  */
PHP_FUNCTION(locale_canonicalize)764 PHP_FUNCTION(locale_canonicalize)
765 {
766 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
767 }
768 /* }}} */
769 
770 /* {{{ append_key_value
771 * Internal function which is called from locale_compose
772 * gets the value for the key_name and appends to the loc_name
773 * returns 1 if successful , -1 if not found ,
774 * 0 if array element is not a string , -2 if buffer-overflow
775 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)776 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
777 {
778 	zval *ele_value;
779 
780 	if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
781 		if(Z_TYPE_P(ele_value)!= IS_STRING ){
782 			/* element value is not a string */
783 			return FAILURE;
784 		}
785 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
786 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
787 			/* not lang or grandfathered tag */
788 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
789 		}
790 		smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
791 		return SUCCESS;
792 	}
793 
794 	return LOC_NOT_FOUND;
795 }
796 /* }}} */
797 
798 /* {{{ append_prefix , appends the prefix needed
799 * e.g. private adds 'x'
800 */
add_prefix(smart_str * loc_name,char * key_name)801 static void add_prefix(smart_str* loc_name, char* key_name)
802 {
803 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
804 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
805 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
806 	}
807 }
808 /* }}} */
809 
810 /* {{{ append_multiple_key_values
811 * Internal function which is called from locale_compose
812 * gets the multiple values for the key_name and appends to the loc_name
813 * used for 'variant','extlang','private'
814 * returns 1 if successful , -1 if not found ,
815 * 0 if array element is not a string , -2 if buffer-overflow
816 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)817 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
818 {
819 	zval	*ele_value;
820 	int 	isFirstSubtag 	= 0;
821 
822 	/* Variant/ Extlang/Private etc. */
823 	if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
824 		if( Z_TYPE_P(ele_value) == IS_STRING ){
825 			add_prefix( loc_name , key_name);
826 
827 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
828 			smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
829 			return SUCCESS;
830 		} else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
831 			HashTable *arr = Z_ARRVAL_P(ele_value);
832 			zval *data;
833 
834 			ZEND_HASH_FOREACH_VAL(arr, data) {
835 				if(Z_TYPE_P(data) != IS_STRING) {
836 					return FAILURE;
837 				}
838 				if (isFirstSubtag++ == 0){
839 					add_prefix(loc_name , key_name);
840 				}
841 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
842 				smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
843 			} ZEND_HASH_FOREACH_END();
844 			return SUCCESS;
845 		} else {
846 			return FAILURE;
847 		}
848 	} else {
849 		char cur_key_name[31];
850 		int  max_value = 0, i;
851 		/* Decide the max_value: the max. no. of elements allowed */
852 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
853 			max_value  = MAX_NO_VARIANT;
854 		}
855 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
856 			max_value  = MAX_NO_EXTLANG;
857 		}
858 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
859 			max_value  = MAX_NO_PRIVATE;
860 		}
861 
862 		/* Multiple variant values as variant0, variant1 ,variant2 */
863 		isFirstSubtag = 0;
864 		for( i=0 ; i< max_value; i++ ){
865 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
866 			if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
867 				if( Z_TYPE_P(ele_value)!= IS_STRING ){
868 					/* variant is not a string */
869 					return FAILURE;
870 				}
871 				/* Add the contents */
872 				if (isFirstSubtag++ == 0){
873 					add_prefix(loc_name , cur_key_name);
874 				}
875 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
876 				smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
877 			}
878 		} /* end of for */
879 	} /* end of else */
880 
881 	return SUCCESS;
882 }
883 /* }}} */
884 
885 /*{{{
886 * If applicable sets error message and aborts locale_compose gracefully
887 * returns 0  if locale_compose needs to be aborted
888 * otherwise returns 1
889 */
handleAppendResult(int result,smart_str * loc_name)890 static int handleAppendResult( int result, smart_str* loc_name)
891 {
892 	intl_error_reset( NULL );
893 	if( result == FAILURE) {
894 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
895 			 "locale_compose: parameter array element is not a string", 0 );
896 		smart_str_free(loc_name);
897 		return 0;
898 	}
899 	return 1;
900 }
901 /* }}} */
902 
903 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
904 /* {{{ proto static string Locale::composeLocale($array)
905 * Creates a locale by combining the parts of locale-ID passed
906 * }}} */
907 /* {{{ proto static string compose_locale($array)
908 * Creates a locale by combining the parts of locale-ID passed
909 * }}} */
PHP_FUNCTION(locale_compose)910 PHP_FUNCTION(locale_compose)
911 {
912 	smart_str      	loc_name_s = {0};
913 	smart_str *loc_name = &loc_name_s;
914 	zval*			arr	= NULL;
915 	HashTable*		hash_arr = NULL;
916 	int 			result = 0;
917 
918 	intl_error_reset( NULL );
919 
920 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
921 		&arr) == FAILURE)
922 	{
923 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
924 			 "locale_compose: unable to parse input params", 0 );
925 		RETURN_FALSE;
926 	}
927 
928 	hash_arr = Z_ARRVAL_P( arr );
929 
930 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
931 		RETURN_FALSE;
932 
933 	/* Check for grandfathered first */
934 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
935 	if( result == SUCCESS){
936 		RETURN_SMART_STR(loc_name);
937 	}
938 	if( !handleAppendResult( result, loc_name)){
939 		RETURN_FALSE;
940 	}
941 
942 	/* Not grandfathered */
943 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
944 	if( result == LOC_NOT_FOUND ){
945 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
946 		"locale_compose: parameter array does not contain 'language' tag.", 0 );
947 		smart_str_free(loc_name);
948 		RETURN_FALSE;
949 	}
950 	if( !handleAppendResult( result, loc_name)){
951 		RETURN_FALSE;
952 	}
953 
954 	/* Extlang */
955 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
956 	if( !handleAppendResult( result, loc_name)){
957 		RETURN_FALSE;
958 	}
959 
960 	/* Script */
961 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
962 	if( !handleAppendResult( result, loc_name)){
963 		RETURN_FALSE;
964 	}
965 
966 	/* Region */
967 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
968 	if( !handleAppendResult( result, loc_name)){
969 		RETURN_FALSE;
970 	}
971 
972 	/* Variant */
973 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
974 	if( !handleAppendResult( result, loc_name)){
975 		RETURN_FALSE;
976 	}
977 
978 	/* Private */
979 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
980 	if( !handleAppendResult( result, loc_name)){
981 		RETURN_FALSE;
982 	}
983 
984 	RETURN_SMART_STR(loc_name);
985 }
986 /* }}} */
987 
988 
989 /*{{{
990 * Parses the locale and returns private subtags  if existing
991 * else returns NULL
992 * e.g. for locale='en_US-x-prv1-prv2-prv3'
993 * returns a pointer to the string 'prv1-prv2-prv3'
994 */
get_private_subtags(const char * loc_name)995 static zend_string* get_private_subtags(const char* loc_name)
996 {
997 	zend_string* result = NULL;
998 	size_t       len = 0;
999 	const char*  mod_loc_name =NULL;
1000 
1001 	if( loc_name && (len = strlen(loc_name)) > 0 ){
1002 		zend_off_t singletonPos = 0;
1003 		mod_loc_name = loc_name ;
1004 		while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
1005 			if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1006 				/* private subtag start found */
1007 				if( singletonPos + 2 ==  len){
1008 					/* loc_name ends with '-x-' ; return  NULL */
1009 				}
1010 				else{
1011 					/* result = mod_loc_name + singletonPos +2; */
1012 					result = zend_string_init(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ), 0);
1013 				}
1014 				break;
1015 			}
1016 			else{
1017 				if((size_t)(singletonPos + 1) >= len){
1018 					/* String end */
1019 					break;
1020 				} else {
1021 					/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1022 					mod_loc_name = mod_loc_name + singletonPos +1;
1023 					len = strlen(mod_loc_name);
1024 				}
1025 			}
1026 		} /* end of while */
1027 	}
1028 
1029 	return result;
1030 }
1031 /* }}} */
1032 
1033 /* {{{ code used by locale_parse
1034 */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)1035 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1036 {
1037 	zend_string*   key_value 	= NULL;
1038 	char*   cur_key_name	= NULL;
1039 	char*   token        	= NULL;
1040 	char*   last_ptr  	= NULL;
1041 
1042 	int	result		= 0;
1043 	int 	cur_result  	= 0;
1044 
1045 
1046 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1047 		key_value = get_private_subtags( loc_name );
1048 		result = 1;
1049 	} else {
1050 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1051 	}
1052 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1053 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1054 		if( result > 0 && key_value){
1055 			int cnt = 0;
1056 			/* Tokenize on the "_" or "-"  */
1057 			token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
1058 			if( cur_key_name ){
1059 				efree( cur_key_name);
1060 			}
1061 			cur_key_name = (char*)ecalloc( 25,  25);
1062 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1063 			add_assoc_string( hash_arr, cur_key_name , token);
1064 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1065 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1066 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1067 				add_assoc_string( hash_arr, cur_key_name , token);
1068 			}
1069 /*
1070 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1071 			}
1072 */
1073 		}
1074 		if (key_value) {
1075 			zend_string_release_ex(key_value, 0);
1076 		}
1077 	} else {
1078 		if( result == 1 ){
1079 			add_assoc_str( hash_arr, key_name , key_value);
1080 			cur_result = 1;
1081 		} else if (key_value) {
1082 			zend_string_release_ex(key_value, 0);
1083 		}
1084 	}
1085 
1086 	if( cur_key_name ){
1087 		efree( cur_key_name);
1088 	}
1089 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1090 	return cur_result;
1091 }
1092 /* }}} */
1093 
1094 /* {{{ proto static array Locale::parseLocale($locale)
1095 * parses a locale-id into an array the different parts of it
1096  }}} */
1097 /* {{{ proto static array parse_locale($locale)
1098 * parses a locale-id into an array the different parts of it
1099 */
PHP_FUNCTION(locale_parse)1100 PHP_FUNCTION(locale_parse)
1101 {
1102     const char* loc_name        = NULL;
1103     size_t         loc_name_len    = 0;
1104     int         grOffset    	= 0;
1105 
1106     intl_error_reset( NULL );
1107 
1108     if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1109         &loc_name, &loc_name_len ) == FAILURE)
1110     {
1111         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1112              "locale_parse: unable to parse input params", 0 );
1113 
1114         RETURN_FALSE;
1115     }
1116 
1117     INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1118 
1119     if(loc_name_len == 0) {
1120         loc_name = intl_locale_get_default();
1121     }
1122 
1123 	array_init( return_value );
1124 
1125 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1126 	if( grOffset >= 0 ){
1127 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1128 	}
1129 	else{
1130 		/* Not grandfathered */
1131 		add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1132 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1133 		add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1134 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1135 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1136 	}
1137 }
1138 /* }}} */
1139 
1140 /* {{{ proto static array Locale::getAllVariants($locale)
1141 * gets an array containing the list of variants, or null
1142  }}} */
1143 /* {{{ proto static array locale_get_all_variants($locale)
1144 * gets an array containing the list of variants, or null
1145 */
PHP_FUNCTION(locale_get_all_variants)1146 PHP_FUNCTION(locale_get_all_variants)
1147 {
1148 	const char*  	loc_name        = NULL;
1149 	size_t    		loc_name_len    = 0;
1150 
1151 	int	result		= 0;
1152 	char*	token		= NULL;
1153 	zend_string*	variant		= NULL;
1154 	char*	saved_ptr	= NULL;
1155 
1156 	intl_error_reset( NULL );
1157 
1158 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1159 	&loc_name, &loc_name_len ) == FAILURE)
1160 	{
1161 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1162 	     "locale_parse: unable to parse input params", 0 );
1163 
1164 		RETURN_FALSE;
1165 	}
1166 
1167 	if(loc_name_len == 0) {
1168 		loc_name = intl_locale_get_default();
1169 		loc_name_len = strlen(loc_name);
1170 	}
1171 
1172 	INTL_CHECK_LOCALE_LEN(loc_name_len);
1173 
1174 	array_init( return_value );
1175 
1176 	/* If the locale is grandfathered, stop, no variants */
1177 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1178 		/* ("Grandfathered Tag. No variants."); */
1179 	}
1180 	else {
1181 	/* Call ICU variant */
1182 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1183 		if( result > 0 && variant){
1184 			/* Tokenize on the "_" or "-" */
1185 			token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1186 			add_next_index_stringl( return_value, token , strlen(token));
1187 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1188 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1189  				add_next_index_stringl( return_value, token , strlen(token));
1190 			}
1191 		}
1192 		if( variant ){
1193 			zend_string_release_ex( variant, 0 );
1194 		}
1195 	}
1196 
1197 
1198 }
1199 /* }}} */
1200 
1201 /*{{{
1202 * Converts to lower case and also replaces all hyphens with the underscore
1203 */
strToMatch(const char * str,char * retstr)1204 static int strToMatch(const char* str ,char *retstr)
1205 {
1206 	char* 	anchor 	= NULL;
1207 	const char* 	anchor1 = NULL;
1208 	int 	result 	= 0;
1209 
1210     if( (!str) || str[0] == '\0'){
1211         return result;
1212     } else {
1213 	anchor = retstr;
1214 	anchor1 = str;
1215         while( (*str)!='\0' ){
1216 		if( *str == '-' ){
1217 			*retstr =  '_';
1218 		} else {
1219 			*retstr = tolower(*str);
1220 		}
1221             str++;
1222             retstr++;
1223 	}
1224 	*retstr = '\0';
1225 	retstr=  anchor;
1226 	str=  anchor1;
1227 	result = 1;
1228     }
1229 
1230     return(result);
1231 }
1232 /* }}} */
1233 
1234 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1235 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1236 */
1237 /* }}} */
1238 /* {{{ proto bool locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1239 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1240 */
PHP_FUNCTION(locale_filter_matches)1241 PHP_FUNCTION(locale_filter_matches)
1242 {
1243 	char*       	lang_tag        = NULL;
1244 	size_t         	lang_tag_len    = 0;
1245 	const char*     loc_range       = NULL;
1246 	size_t         	loc_range_len   = 0;
1247 
1248 	int		result		= 0;
1249 	char*		token		= 0;
1250 	char*		chrcheck	= NULL;
1251 
1252 	zend_string*   	can_lang_tag    = NULL;
1253 	zend_string*   	can_loc_range   = NULL;
1254 
1255 	char*       	cur_lang_tag    = NULL;
1256 	char*       	cur_loc_range   = NULL;
1257 
1258 	zend_bool 	boolCanonical 	= 0;
1259 	UErrorCode	status		= U_ZERO_ERROR;
1260 
1261 	intl_error_reset( NULL );
1262 
1263 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1264 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1265 		&boolCanonical) == FAILURE)
1266 	{
1267 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1268 		"locale_filter_matches: unable to parse input params", 0 );
1269 
1270 		RETURN_FALSE;
1271 	}
1272 
1273 	if(loc_range_len == 0) {
1274 		loc_range = intl_locale_get_default();
1275 		loc_range_len = strlen(loc_range);
1276 	}
1277 
1278 	if( strcmp(loc_range,"*")==0){
1279 		RETURN_TRUE;
1280 	}
1281 
1282 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1283 	INTL_CHECK_LOCALE_LEN(lang_tag_len);
1284 
1285 	if( boolCanonical ){
1286 		/* canonicalize loc_range */
1287 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1288 		if( result <=0) {
1289 			intl_error_set( NULL, status,
1290 				"locale_filter_matches : unable to canonicalize loc_range" , 0 );
1291 			RETURN_FALSE;
1292 		}
1293 
1294 		/* canonicalize lang_tag */
1295 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1296 		if( result <=0) {
1297 			intl_error_set( NULL, status,
1298 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1299 			RETURN_FALSE;
1300 		}
1301 
1302 		/* Convert to lower case for case-insensitive comparison */
1303 		cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1304 
1305 		/* Convert to lower case for case-insensitive comparison */
1306 		result = strToMatch( can_lang_tag->val , cur_lang_tag);
1307 		if( result == 0) {
1308 			efree( cur_lang_tag );
1309 			zend_string_release_ex( can_lang_tag, 0 );
1310 			RETURN_FALSE;
1311 		}
1312 
1313 		cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1314 		result = strToMatch( can_loc_range->val , cur_loc_range );
1315 		if( result == 0) {
1316 			efree( cur_lang_tag );
1317 			zend_string_release_ex( can_lang_tag, 0 );
1318 			efree( cur_loc_range );
1319 			zend_string_release_ex( can_loc_range, 0 );
1320 			RETURN_FALSE;
1321 		}
1322 
1323 		/* check if prefix */
1324 		token 	= strstr( cur_lang_tag , cur_loc_range );
1325 
1326 		if( token && (token==cur_lang_tag) ){
1327 			/* check if the char. after match is SEPARATOR */
1328 			chrcheck = token + (strlen(cur_loc_range));
1329 			if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1330 				efree( cur_lang_tag );
1331 				efree( cur_loc_range );
1332 				if( can_lang_tag){
1333 					zend_string_release_ex( can_lang_tag, 0 );
1334 				}
1335 				if( can_loc_range){
1336 					zend_string_release_ex( can_loc_range, 0 );
1337 				}
1338 				RETURN_TRUE;
1339 			}
1340 		}
1341 
1342 		/* No prefix as loc_range */
1343 		if( cur_lang_tag){
1344 			efree( cur_lang_tag );
1345 		}
1346 		if( cur_loc_range){
1347 			efree( cur_loc_range );
1348 		}
1349 		if( can_lang_tag){
1350 			zend_string_release_ex( can_lang_tag, 0 );
1351 		}
1352 		if( can_loc_range){
1353 			zend_string_release_ex( can_loc_range, 0 );
1354 		}
1355 		RETURN_FALSE;
1356 
1357 	} /* end of if isCanonical */
1358 	else{
1359 		/* Convert to lower case for case-insensitive comparison */
1360 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1361 
1362 		result = strToMatch( lang_tag , cur_lang_tag);
1363 		if( result == 0) {
1364 			efree( cur_lang_tag );
1365 			RETURN_FALSE;
1366 		}
1367 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1368 		result = strToMatch( loc_range , cur_loc_range );
1369 		if( result == 0) {
1370 			efree( cur_lang_tag );
1371 			efree( cur_loc_range );
1372 			RETURN_FALSE;
1373 		}
1374 
1375 		/* check if prefix */
1376 		token 	= strstr( cur_lang_tag , cur_loc_range );
1377 
1378 		if( token && (token==cur_lang_tag) ){
1379 			/* check if the char. after match is SEPARATOR */
1380 			chrcheck = token + (strlen(cur_loc_range));
1381 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1382 				efree( cur_lang_tag );
1383 				efree( cur_loc_range );
1384 				RETURN_TRUE;
1385 			}
1386 		}
1387 
1388 		/* No prefix as loc_range */
1389 		if( cur_lang_tag){
1390 			efree( cur_lang_tag );
1391 		}
1392 		if( cur_loc_range){
1393 			efree( cur_loc_range );
1394 		}
1395 		RETURN_FALSE;
1396 
1397 	}
1398 }
1399 /* }}} */
1400 
array_cleanup(char * arr[],int arr_size)1401 static void array_cleanup( char* arr[] , int arr_size)
1402 {
1403 	int i=0;
1404 	for( i=0; i< arr_size; i++ ){
1405 		if( arr[i*2] ){
1406 			efree( arr[i*2]);
1407 		}
1408 	}
1409 	efree(arr);
1410 }
1411 
1412 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1413 /* {{{
1414 * returns the lookup result to lookup_loc_range_src_php
1415 * internal function
1416 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1417 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1418 {
1419 	int	i = 0;
1420 	int	cur_arr_len = 0;
1421 	int result = 0;
1422 
1423 	zend_string* lang_tag = NULL;
1424 	zval* ele_value = NULL;
1425 
1426 	char* cur_loc_range	= NULL;
1427 	zend_string* can_loc_range	= NULL;
1428 	zend_off_t saved_pos = 0;
1429 
1430 	zend_string* return_value = NULL;
1431 
1432 	char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1433 	ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1434 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1435 		if(Z_TYPE_P(ele_value)!= IS_STRING) {
1436 			/* element value is not a string */
1437 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0);
1438 			LOOKUP_CLEAN_RETURN(NULL);
1439 		}
1440 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1441 		result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1442 		if(result == 0) {
1443 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1444 			LOOKUP_CLEAN_RETURN(NULL);
1445 		}
1446 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1447 		cur_arr_len++ ;
1448 	} ZEND_HASH_FOREACH_END(); /* end of for */
1449 
1450 	/* Canonicalize array elements */
1451 	if(canonicalize) {
1452 		for(i=0; i<cur_arr_len; i++) {
1453 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1454 			if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1455 				if(lang_tag) {
1456 					zend_string_release_ex(lang_tag, 0);
1457 				}
1458 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1459 				LOOKUP_CLEAN_RETURN(NULL);
1460 			}
1461 			cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1462 			result = strToMatch(lang_tag->val, cur_arr[i*2]);
1463 			zend_string_release_ex(lang_tag, 0);
1464 			if(result == 0) {
1465 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1466 				LOOKUP_CLEAN_RETURN(NULL);
1467 			}
1468 		}
1469 
1470 	}
1471 
1472 	if(canonicalize) {
1473 		/* Canonicalize the loc_range */
1474 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1475 		if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1476 			/* Error */
1477 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1478 			if(can_loc_range) {
1479 				zend_string_release_ex(can_loc_range, 0);
1480 			}
1481 			LOOKUP_CLEAN_RETURN(NULL);
1482 		} else {
1483 			loc_range = can_loc_range->val;
1484 		}
1485 	}
1486 
1487 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1488 	/* convert to lower and replace hyphens */
1489 	result = strToMatch(loc_range, cur_loc_range);
1490 	if(can_loc_range) {
1491 		zend_string_release_ex(can_loc_range, 0);
1492 	}
1493 	if(result == 0) {
1494 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1495 		LOOKUP_CLEAN_RETURN(NULL);
1496 	}
1497 
1498 	/* Lookup for the lang_tag match */
1499 	saved_pos = strlen(cur_loc_range);
1500 	while(saved_pos > 0) {
1501 		for(i=0; i< cur_arr_len; i++){
1502 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1503 				/* Match found */
1504 				char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1505 				return_value = zend_string_init(str, strlen(str), 0);
1506 				efree(cur_loc_range);
1507 				LOOKUP_CLEAN_RETURN(return_value);
1508 			}
1509 		}
1510 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1511 	}
1512 
1513 	/* Match not found */
1514 	efree(cur_loc_range);
1515 	LOOKUP_CLEAN_RETURN(NULL);
1516 }
1517 /* }}} */
1518 
1519 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1520 * Searches the items in $langtag for the best match to the language
1521 * range
1522 */
1523 /* }}} */
1524 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1525 * Searches the items in $langtag for the best match to the language
1526 * range
1527 */
PHP_FUNCTION(locale_lookup)1528 PHP_FUNCTION(locale_lookup)
1529 {
1530 	zend_string*   	fallback_loc_str	= NULL;
1531 	const char*    	loc_range      		= NULL;
1532 	size_t        	loc_range_len  		= 0;
1533 
1534 	zval*		arr				= NULL;
1535 	HashTable*	hash_arr		= NULL;
1536 	zend_bool	boolCanonical	= 0;
1537 	zend_string* 	result_str	= NULL;
1538 
1539 	intl_error_reset( NULL );
1540 
1541 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS!", &arr, &loc_range, &loc_range_len,
1542 		&boolCanonical,	&fallback_loc_str) == FAILURE) {
1543 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,	"locale_lookup: unable to parse input params", 0 );
1544 		RETURN_FALSE;
1545 	}
1546 
1547 	if(loc_range_len == 0) {
1548 		if(fallback_loc_str) {
1549 			loc_range = ZSTR_VAL(fallback_loc_str);
1550 			loc_range_len = ZSTR_LEN(fallback_loc_str);
1551 		} else {
1552 			loc_range = intl_locale_get_default();
1553 			loc_range_len = strlen(loc_range);
1554 		}
1555 	}
1556 
1557 	hash_arr = Z_ARRVAL_P(arr);
1558 
1559 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1560 
1561 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1562 		RETURN_EMPTY_STRING();
1563 	}
1564 
1565 	result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1566 	if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1567 		if( fallback_loc_str ) {
1568 			result_str = zend_string_copy(fallback_loc_str);
1569 		} else {
1570 			RETURN_EMPTY_STRING();
1571 		}
1572 	}
1573 
1574 	RETURN_STR(result_str);
1575 }
1576 /* }}} */
1577 
1578 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1579 * Tries to find out best available locale based on HTTP �Accept-Language� header
1580 */
1581 /* }}} */
1582 /* {{{ proto string locale_accept_from_http(string $http_accept)
1583 * Tries to find out best available locale based on HTTP �Accept-Language� header
1584 */
PHP_FUNCTION(locale_accept_from_http)1585 PHP_FUNCTION(locale_accept_from_http)
1586 {
1587 	UEnumeration *available;
1588 	char *http_accept = NULL;
1589 	size_t http_accept_len;
1590 	UErrorCode status = 0;
1591 	int len;
1592 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1593 	UAcceptResult outResult;
1594 
1595 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1596 	{
1597 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1598 		"locale_accept_from_http: unable to parse input parameters", 0 );
1599 		RETURN_FALSE;
1600 	}
1601 	if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1602 		/* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1603 		char *start = http_accept;
1604 		char *end;
1605 		size_t len;
1606 		do {
1607 			end = strchr(start, ',');
1608 			len = end ? end-start : http_accept_len-(start-http_accept);
1609 			if(len > ULOC_FULLNAME_CAPACITY) {
1610 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1611 						"locale_accept_from_http: locale string too long", 0 );
1612 				RETURN_FALSE;
1613 			}
1614 			if(end) {
1615 				start = end+1;
1616 			}
1617 		} while(end != NULL);
1618 	}
1619 
1620 	available = ures_openAvailableLocales(NULL, &status);
1621 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1622 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1623 						&outResult, http_accept, available, &status);
1624 	uenum_close(available);
1625 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1626 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1627 		RETURN_FALSE;
1628 	}
1629 	RETURN_STRINGL(resultLocale, len);
1630 }
1631 /* }}} */
1632