xref: /PHP-8.1/ext/intl/locale/locale_methods.c (revision 2654c344)
1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
12    +----------------------------------------------------------------------+
13 */
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <unicode/ustring.h>
20 #include <unicode/udata.h>
21 #include <unicode/putil.h>
22 #include <unicode/ures.h>
23 
24 #include "php_intl.h"
25 #include "locale.h"
26 #include "locale_class.h"
27 #include "intl_convert.h"
28 #include "intl_data.h"
29 
30 #include <zend_API.h>
31 #include <zend.h>
32 #include <php.h>
33 #include "main/php_ini.h"
34 #include "zend_smart_str.h"
35 
36 ZEND_EXTERN_MODULE_GLOBALS( intl )
37 
38 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
39 #define SEPARATOR "_"
40 #define SEPARATOR1 "-"
41 #define DELIMITER "-_"
42 #define EXTLANG_PREFIX "a"
43 #define PRIVATE_PREFIX "x"
44 #define DISP_NAME "name"
45 
46 #define MAX_NO_VARIANT  15
47 #define MAX_NO_EXTLANG  3
48 #define MAX_NO_PRIVATE  15
49 #define MAX_NO_LOOKUP_LANG_TAG  100
50 
51 #define LOC_NOT_FOUND 1
52 
53 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
54 #define VARIANT_KEYNAME_LEN  11
55 #define EXTLANG_KEYNAME_LEN  10
56 #define PRIVATE_KEYNAME_LEN  11
57 
58 /* Based on IANA registry at the time of writing this code
59 *
60 */
61 static const char * const LOC_GRANDFATHERED[] = {
62 	"art-lojban",		"i-klingon",		"i-lux",			"i-navajo",		"no-bok",		"no-nyn",
63 	"cel-gaulish",		"en-GB-oed",		"i-ami",
64 	"i-bnn",		"i-default",		"i-enochian",
65 	"i-mingo",		"i-pwn", 		"i-tao",
66 	"i-tay",		"i-tsu",		"sgn-BE-fr",
67 	"sgn-BE-nl",		"sgn-CH-de", 		"zh-cmn",
68  	"zh-cmn-Hans", 		"zh-cmn-Hant",		"zh-gan" ,
69 	"zh-guoyu", 		"zh-hakka", 		"zh-min",
70 	"zh-min-nan", 		"zh-wuu", 		"zh-xiang",
71 	"zh-yue",		NULL
72 };
73 
74 /* Based on IANA registry at the time of writing this code
75 *  This array lists the preferred values for the grandfathered tags if applicable
76 *  This is in sync with the array LOC_GRANDFATHERED
77 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
78 */
79 static const int 		LOC_PREFERRED_GRANDFATHERED_LEN = 6;
80 static const char * const 	LOC_PREFERRED_GRANDFATHERED[]  = {
81 	"jbo",			"tlh",			"lb",
82 	"nv", 			"nb",			"nn",
83 	NULL
84 };
85 
86 /* returns true if a is an ID separator, false otherwise */
87 #define isIDSeparator(a) (a == '_' || a == '-')
88 #define isKeywordSeparator(a) (a == '@' )
89 #define isEndOfTag(a) (a == '\0' )
90 
91 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
92 
93 /*returns true if one of the special prefixes is here (s=string)
94   'x-' or 'i-' */
95 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
96 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
97 
98 /* Dot terminates it because of POSIX form  where dot precedes the codepage
99  * except for variant */
100 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
101 
102 /* {{{ return the offset of 'key' in the array 'list'.
103  * returns -1 if not present */
findOffset(const char * const * list,const char * key)104 static int16_t findOffset(const char* const* list, const char* key)
105 {
106 	const char* const* anchor = list;
107 	while (*list != NULL) {
108 		if (strcmp(key, *list) == 0) {
109 			return (int16_t)(list - anchor);
110 		}
111 		list++;
112 	}
113 
114 	return -1;
115 
116 }
117 /*}}}*/
118 
getPreferredTag(const char * gf_tag)119 static char* getPreferredTag(const char* gf_tag)
120 {
121 	char* result = NULL;
122 	zend_off_t grOffset = 0;
123 
124 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
125 	if(grOffset < 0) {
126 		return NULL;
127 	}
128 	if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
129 		/* return preferred tag */
130 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
131 	} else {
132 		/* Return correct grandfathered language tag */
133 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
134 	}
135 	return result;
136 }
137 
138 /* {{{
139 * returns the position of next token for lookup
140 * or -1 if no token
141 * strtokr equivalent search for token in reverse direction
142 */
getStrrtokenPos(char * str,zend_off_t savedPos)143 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
144 {
145 	zend_off_t result =-1;
146 	zend_off_t i;
147 
148 	for(i=savedPos-1; i>=0; i--) {
149 		if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
150 			/* delimiter found; check for singleton */
151 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
152 				/* a singleton; so send the position of token before the singleton */
153 				result = i-2;
154 			} else {
155 				result = i;
156 			}
157 			break;
158 		}
159 	}
160 	if(result < 1){
161 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
162 		result =-1;
163 	}
164 	return result;
165 }
166 /* }}} */
167 
168 /* {{{
169 * returns the position of a singleton if present
170 * returns -1 if no singleton
171 * strtok equivalent search for singleton
172 */
getSingletonPos(const char * str)173 static zend_off_t getSingletonPos(const char* str)
174 {
175 	zend_off_t result =-1;
176 	size_t len = 0;
177 
178 	if( str && ((len=strlen(str))>0) ){
179 		zend_off_t i = 0;
180 		for( i=0; (size_t)i < len ; i++){
181 			if( isIDSeparator(*(str+i)) ){
182 				if( i==1){
183 					/* string is of the form x-avy or a-prv1 */
184 					result =0;
185 					break;
186 				} else {
187 					/* delimiter found; check for singleton */
188 					if( isIDSeparator(*(str+i+2)) ){
189 						/* a singleton; so send the position of separator before singleton */
190 						result = i+1;
191 						break;
192 					}
193 				}
194 			}
195 		}/* end of for */
196 
197 	}
198 	return result;
199 }
200 /* }}} */
201 
202 /* {{{ Get default locale */
203 /* }}} */
204 /* {{{ Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)205 PHP_NAMED_FUNCTION(zif_locale_get_default)
206 {
207 	if (zend_parse_parameters_none() == FAILURE) {
208 		RETURN_THROWS();
209 	}
210 
211 	RETURN_STRING( intl_locale_get_default(  ) );
212 }
213 
214 /* }}} */
215 
216 /* {{{ Set default locale */
217 /* }}} */
218 /* {{{ Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)219 PHP_NAMED_FUNCTION(zif_locale_set_default)
220 {
221 	zend_string* locale_name;
222 	zend_string *ini_name;
223 	char *default_locale = NULL;
224 
225 	if(zend_parse_parameters( ZEND_NUM_ARGS(),  "S", &locale_name) == FAILURE)
226 	{
227 		RETURN_THROWS();
228 	}
229 
230 	if (ZSTR_LEN(locale_name) == 0) {
231 		default_locale = (char *)uloc_getDefault();
232 		locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
233 	}
234 
235 	ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
236 	zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
237 	zend_string_release_ex(ini_name, 0);
238 	if (default_locale != NULL) {
239 		zend_string_release_ex(locale_name, 0);
240 	}
241 
242 	RETURN_TRUE;
243 }
244 /* }}} */
245 
246 /* {{{
247 * Gets the value from ICU
248 * common code shared by get_primary_language,get_script or get_region or get_variant
249 * result = 0 if error, 1 if successful , -1 if no value
250 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)251 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
252 {
253 	zend_string* tag_value	    = NULL;
254 	int32_t      tag_value_len  = 512;
255 
256 	char*        mod_loc_name   = NULL;
257 
258 	int32_t      buflen         = 512;
259 	UErrorCode   status         = U_ZERO_ERROR;
260 
261 	if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
262 		return NULL;
263 	}
264 
265 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
266 		/* Handle  grandfathered languages */
267 		zend_off_t grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
268 		if( grOffset >= 0 ){
269 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
270 				return zend_string_init(loc_name, strlen(loc_name), 0);
271 			} else {
272 				/* Since Grandfathered , no value , do nothing , retutn NULL */
273 				return NULL;
274 			}
275 		}
276 
277 	if( fromParseLocale==1 ){
278 		zend_off_t singletonPos = 0;
279 
280 		/* Handle singletons */
281 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282 			if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
283 				return zend_string_init(loc_name, strlen(loc_name), 0);
284 			}
285 		}
286 
287 		singletonPos = getSingletonPos( loc_name );
288 		if( singletonPos == 0){
289 			/* singleton at start of script, region , variant etc.
290 			 * or invalid singleton at start of language */
291 			return NULL;
292 		} else if( singletonPos > 0 ){
293 			/* singleton at some position except at start
294 			 * strip off the singleton and rest of the loc_name */
295 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
296 		}
297 	} /* end of if fromParse */
298 
299 	} /* end of if != LOC_CANONICAL_TAG */
300 
301 	if( mod_loc_name == NULL){
302 		mod_loc_name = estrdup(loc_name );
303 	}
304 
305 	/* Proceed to ICU */
306 	do{
307 		if (tag_value) {
308 			tag_value = zend_string_realloc( tag_value , buflen, 0);
309 		} else {
310 			tag_value = zend_string_alloc( buflen, 0);
311 		}
312 		tag_value_len = buflen;
313 
314 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
315 			buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
316 		}
317 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
318 			buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
319 		}
320 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
321 			buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
322 		}
323 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
324 			buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
325 		}
326 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
327 			buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
328 		}
329 
330 		if( U_FAILURE( status ) ) {
331 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
332 				status = U_ZERO_ERROR;
333 				buflen++; /* add space for \0 */
334 				continue;
335 			}
336 
337 			/* Error in retrieving data */
338 			*result = 0;
339 			if( tag_value ){
340 				zend_string_release_ex( tag_value, 0 );
341 			}
342 			if( mod_loc_name ){
343 				efree( mod_loc_name);
344 			}
345 			return NULL;
346 		}
347 	} while( buflen > tag_value_len );
348 
349 	if(  buflen ==0 ){
350 		/* No value found */
351 		*result = -1;
352 		if( tag_value ){
353 			zend_string_release_ex( tag_value, 0 );
354 		}
355 		if( mod_loc_name ){
356 			efree( mod_loc_name);
357 		}
358 		return NULL;
359 	} else {
360 		*result = 1;
361 	}
362 
363 	if( mod_loc_name ){
364 		efree( mod_loc_name);
365 	}
366 
367 	tag_value->len = strlen(tag_value->val);
368 	return tag_value;
369 }
370 /* }}} */
371 
372 /* {{{
373 * Gets the value from ICU , called when PHP userspace function is called
374 * common code shared by get_primary_language,get_script or get_region or get_variant
375 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)376 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
377 {
378 
379 	const char* loc_name        	= NULL;
380 	size_t         loc_name_len    	= 0;
381 
382 	zend_string*   tag_value		= NULL;
383 	char*       empty_result	= "";
384 
385 	int         result    		= 0;
386 	char*       msg        		= NULL;
387 
388 	UErrorCode  status          	= U_ZERO_ERROR;
389 
390 	intl_error_reset( NULL );
391 
392 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
393 	&loc_name ,&loc_name_len ) == FAILURE) {
394 		RETURN_THROWS();
395 	}
396 
397 	if(loc_name_len == 0) {
398 		loc_name = intl_locale_get_default();
399 		loc_name_len = strlen(loc_name);
400 	}
401 
402 	INTL_CHECK_LOCALE_LEN(loc_name_len);
403 
404 	/* Call ICU get */
405 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
406 
407 	/* No value found */
408 	if( result == -1 ) {
409 		if( tag_value){
410 			zend_string_release_ex( tag_value, 0 );
411 		}
412 		RETURN_STRING( empty_result);
413 	}
414 
415 	/* value found */
416 	if( tag_value){
417 		RETVAL_STR( tag_value );
418 		return;
419 	}
420 
421 	/* Error encountered while fetching the value */
422 	if( result ==0) {
423 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
424 		intl_error_set( NULL, status, msg , 1 );
425 		efree(msg);
426 		RETURN_NULL();
427 	}
428 
429 }
430 /* }}} */
431 
432 /* {{{ gets the script for the $locale */
PHP_FUNCTION(locale_get_script)433 PHP_FUNCTION( locale_get_script )
434 {
435 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
436 }
437 /* }}} */
438 
439 /* {{{ gets the region for the $locale */
PHP_FUNCTION(locale_get_region)440 PHP_FUNCTION( locale_get_region )
441 {
442 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
443 }
444 /* }}} */
445 
446 /* {{{ gets the primary language for the $locale */
PHP_FUNCTION(locale_get_primary_language)447 PHP_FUNCTION(locale_get_primary_language )
448 {
449 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
450 }
451 /* }}} */
452 
453 
454 /* {{{
455  * common code shared by display_xyz functions to  get the value from ICU
456  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)457 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
458 {
459 	const char* loc_name        	= NULL;
460 	size_t         loc_name_len    	= 0;
461 
462 	const char* disp_loc_name       = NULL;
463 	size_t      disp_loc_name_len   = 0;
464 	int         free_loc_name       = 0;
465 
466 	UChar*      disp_name      	= NULL;
467 	int32_t     disp_name_len  	= 0;
468 
469 	char*       mod_loc_name        = NULL;
470 
471 	int32_t     buflen          	= 512;
472 	UErrorCode  status          	= U_ZERO_ERROR;
473 
474 	zend_string* u8str;
475 
476 	char*       msg             	= NULL;
477 
478 	intl_error_reset( NULL );
479 
480 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s!",
481 		&loc_name, &loc_name_len ,
482 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
483 	{
484 		RETURN_THROWS();
485 	}
486 
487 	if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
488 		/* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
489 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
490 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
491 		efree(msg);
492 		RETURN_FALSE;
493 	}
494 
495 	if(loc_name_len == 0) {
496 		loc_name = intl_locale_get_default();
497 	}
498 
499 	if( strcmp(tag_name, DISP_NAME) != 0 ){
500 		/* Handle grandfathered languages */
501 		int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
502 		if( grOffset >= 0 ){
503 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
504 				mod_loc_name = getPreferredTag( loc_name );
505 			} else {
506 				/* Since Grandfathered, no value, do nothing, retutn NULL */
507 				RETURN_FALSE;
508 			}
509 		}
510 	} /* end of if != LOC_CANONICAL_TAG */
511 
512 	if( mod_loc_name==NULL ){
513 		mod_loc_name = estrdup( loc_name );
514 	}
515 
516 	/* Check if disp_loc_name passed , if not use default locale */
517 	if( !disp_loc_name){
518 		disp_loc_name = estrdup(intl_locale_get_default());
519 		free_loc_name = 1;
520 	}
521 
522 	/* Get the disp_value for the given locale */
523 	do{
524 		disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
525 		disp_name_len = buflen;
526 
527 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
528 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
529 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
530 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
531 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
532 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
533 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
534 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
535 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
536 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
537 		}
538 
539 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
540 		if( U_FAILURE( status ) )
541 		{
542 			if( status == U_BUFFER_OVERFLOW_ERROR )
543 			{
544 				status = U_ZERO_ERROR;
545 				continue;
546 			}
547 
548 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
549 			intl_error_set( NULL, status, msg , 1 );
550 			efree(msg);
551 			if( disp_name){
552 				efree( disp_name );
553 			}
554 			if( mod_loc_name){
555 				efree( mod_loc_name );
556 			}
557 			if (free_loc_name) {
558 				efree((void *)disp_loc_name);
559 				disp_loc_name = NULL;
560 			}
561 			RETURN_FALSE;
562 		}
563 	} while( buflen > disp_name_len );
564 
565 	if( mod_loc_name){
566 		efree( mod_loc_name );
567 	}
568 	if (free_loc_name) {
569 		efree((void *)disp_loc_name);
570 		disp_loc_name = NULL;
571 	}
572 	/* Convert display locale name from UTF-16 to UTF-8. */
573 	u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
574 	efree( disp_name );
575 	if( !u8str )
576 	{
577 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
578 		intl_error_set( NULL, status, msg , 1 );
579 		efree(msg);
580 		RETURN_FALSE;
581 	}
582 
583 	RETVAL_NEW_STR( u8str );
584 }
585 /* }}} */
586 
587 /* {{{ gets the name for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_name)588 PHP_FUNCTION(locale_get_display_name)
589 {
590 	get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
591 }
592 /* }}} */
593 
594 /* {{{ gets the language for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_language)595 PHP_FUNCTION(locale_get_display_language)
596 {
597 	get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
598 }
599 /* }}} */
600 
601 /* {{{ gets the script for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_script)602 PHP_FUNCTION(locale_get_display_script)
603 {
604 	get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
605 }
606 /* }}} */
607 
608 /* {{{ gets the region for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_region)609 PHP_FUNCTION(locale_get_display_region)
610 {
611 	get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
612 }
613 /* }}} */
614 
615 /* {{{
616 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
617 * gets the variant for the $locale in $in_locale or default_locale
618  }}} */
619 /* {{{
620 * proto static string get_display_variant($locale, $in_locale = null)
621 * gets the variant for the $locale in $in_locale or default_locale
622 */
PHP_FUNCTION(locale_get_display_variant)623 PHP_FUNCTION(locale_get_display_variant)
624 {
625 	get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
626 }
627 /* }}} */
628 
629  /* {{{ return an associative array containing keyword-value
630  * pairs for this locale. The keys are keys to the array (doh!)
631  * }}}*/
632  /* {{{ return an associative array containing keyword-value
633  * pairs for this locale. The keys are keys to the array (doh!)
634  */
PHP_FUNCTION(locale_get_keywords)635 PHP_FUNCTION( locale_get_keywords )
636 {
637 	UEnumeration*   e        = NULL;
638 	UErrorCode      status   = U_ZERO_ERROR;
639 
640 	const char*	 	kw_key        = NULL;
641 	int32_t         kw_key_len    = 0;
642 
643 	const char*       	loc_name        = NULL;
644 	size_t        	 	loc_name_len    = 0;
645 
646 	intl_error_reset( NULL );
647 
648 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
649 		&loc_name, &loc_name_len ) == FAILURE)
650 	{
651 		RETURN_THROWS();
652 	}
653 
654 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
655 
656 	if(loc_name_len == 0) {
657 		loc_name = intl_locale_get_default();
658 	}
659 
660 	/* Get the keywords */
661 	e = uloc_openKeywords( loc_name, &status );
662 	if( e != NULL ) {
663 		/*
664 		ICU expects the buffer to be allocated  before calling the function
665 		and so the buffer size has been explicitly specified
666 		ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
667 		hence the kw_value buffer size is 100
668 		*/
669 
670 		/* Traverse it, filling the return array. */
671 		array_init( return_value );
672 
673 		while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
674 			int32_t kw_value_len = 100;
675 			zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
676 
677 			/* Get the keyword value for each keyword */
678 			kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
679 			if (status == U_BUFFER_OVERFLOW_ERROR) {
680 				status = U_ZERO_ERROR;
681 				kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
682 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
683 			} else if(!U_FAILURE(status)) {
684 				kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
685 			}
686 			if (U_FAILURE(status)) {
687 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 );
688 				if( kw_value_str){
689 					zend_string_efree( kw_value_str );
690 				}
691 				zend_array_destroy(Z_ARR_P(return_value));
692 				RETURN_FALSE;
693 			}
694 
695 			add_assoc_str( return_value, (char *)kw_key, kw_value_str);
696 		} /* end of while */
697 
698 	} /* end of if e!=NULL */
699 
700 	uenum_close( e );
701 }
702 /* }}} */
703 
704  /* {{{ @return string the canonicalized locale
705  * }}} */
706  /* {{{ @param string $locale	The locale string to canonicalize */
PHP_FUNCTION(locale_canonicalize)707 PHP_FUNCTION(locale_canonicalize)
708 {
709 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
710 }
711 /* }}} */
712 
713 /* {{{ append_key_value
714 * Internal function which is called from locale_compose
715 * gets the value for the key_name and appends to the loc_name
716 * returns 1 if successful , -1 if not found ,
717 * 0 if array element is not a string , -2 if buffer-overflow
718 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)719 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
720 {
721 	zval *ele_value;
722 
723 	if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
724 		if(Z_TYPE_P(ele_value)!= IS_STRING ){
725 			/* element value is not a string */
726 			return FAILURE;
727 		}
728 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
729 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
730 			/* not lang or grandfathered tag */
731 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
732 		}
733 		smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
734 		return SUCCESS;
735 	}
736 
737 	return LOC_NOT_FOUND;
738 }
739 /* }}} */
740 
741 /* {{{ append_prefix , appends the prefix needed
742 * e.g. private adds 'x'
743 */
add_prefix(smart_str * loc_name,char * key_name)744 static void add_prefix(smart_str* loc_name, char* key_name)
745 {
746 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
747 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
748 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
749 	}
750 }
751 /* }}} */
752 
753 /* {{{ append_multiple_key_values
754 * Internal function which is called from locale_compose
755 * gets the multiple values for the key_name and appends to the loc_name
756 * used for 'variant','extlang','private'
757 * returns 1 if successful , -1 if not found ,
758 * 0 if array element is not a string , -2 if buffer-overflow
759 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)760 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
761 {
762 	zval	*ele_value;
763 	int 	isFirstSubtag 	= 0;
764 
765 	/* Variant/ Extlang/Private etc. */
766 	if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
767 		if( Z_TYPE_P(ele_value) == IS_STRING ){
768 			add_prefix( loc_name , key_name);
769 
770 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
771 			smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
772 			return SUCCESS;
773 		} else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
774 			HashTable *arr = Z_ARRVAL_P(ele_value);
775 			zval *data;
776 
777 			ZEND_HASH_FOREACH_VAL(arr, data) {
778 				if(Z_TYPE_P(data) != IS_STRING) {
779 					return FAILURE;
780 				}
781 				if (isFirstSubtag++ == 0){
782 					add_prefix(loc_name , key_name);
783 				}
784 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
785 				smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
786 			} ZEND_HASH_FOREACH_END();
787 			return SUCCESS;
788 		} else {
789 			return FAILURE;
790 		}
791 	} else {
792 		char cur_key_name[31];
793 		int  max_value = 0, i;
794 		/* Decide the max_value: the max. no. of elements allowed */
795 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
796 			max_value  = MAX_NO_VARIANT;
797 		}
798 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
799 			max_value  = MAX_NO_EXTLANG;
800 		}
801 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
802 			max_value  = MAX_NO_PRIVATE;
803 		}
804 
805 		/* Multiple variant values as variant0, variant1 ,variant2 */
806 		isFirstSubtag = 0;
807 		for( i=0 ; i< max_value; i++ ){
808 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
809 			if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
810 				if( Z_TYPE_P(ele_value)!= IS_STRING ){
811 					/* variant is not a string */
812 					return FAILURE;
813 				}
814 				/* Add the contents */
815 				if (isFirstSubtag++ == 0){
816 					add_prefix(loc_name , cur_key_name);
817 				}
818 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
819 				smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
820 			}
821 		} /* end of for */
822 	} /* end of else */
823 
824 	return SUCCESS;
825 }
826 /* }}} */
827 
828 /*{{{
829 * If applicable sets error message and aborts locale_compose gracefully
830 * returns 0  if locale_compose needs to be aborted
831 * otherwise returns 1
832 */
handleAppendResult(int result,smart_str * loc_name)833 static int handleAppendResult( int result, smart_str* loc_name)
834 {
835 	intl_error_reset( NULL );
836 	if( result == FAILURE) {
837 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
838 			 "locale_compose: parameter array element is not a string", 0 );
839 		smart_str_free(loc_name);
840 		return 0;
841 	}
842 	return 1;
843 }
844 /* }}} */
845 
846 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
847 /* {{{ Creates a locale by combining the parts of locale-ID passed
848 * }}} */
849 /* {{{ Creates a locale by combining the parts of locale-ID passed
850 * }}} */
PHP_FUNCTION(locale_compose)851 PHP_FUNCTION(locale_compose)
852 {
853 	smart_str      	loc_name_s = {0};
854 	smart_str *loc_name = &loc_name_s;
855 	zval*			arr	= NULL;
856 	HashTable*		hash_arr = NULL;
857 	int 			result = 0;
858 
859 	intl_error_reset( NULL );
860 
861 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
862 		&arr) == FAILURE)
863 	{
864 		RETURN_THROWS();
865 	}
866 
867 	hash_arr = Z_ARRVAL_P( arr );
868 
869 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
870 		RETURN_FALSE;
871 
872 	/* Check for grandfathered first */
873 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
874 	if( result == SUCCESS){
875 		RETURN_SMART_STR(loc_name);
876 	}
877 	if( !handleAppendResult( result, loc_name)){
878 		RETURN_FALSE;
879 	}
880 
881 	/* Not grandfathered */
882 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
883 	if( result == LOC_NOT_FOUND ){
884 		zend_argument_value_error(1, "must contain a \"%s\" key", LOC_LANG_TAG);
885 		smart_str_free(loc_name);
886 		RETURN_THROWS();
887 	}
888 	if( !handleAppendResult( result, loc_name)){
889 		RETURN_FALSE;
890 	}
891 
892 	/* Extlang */
893 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
894 	if( !handleAppendResult( result, loc_name)){
895 		RETURN_FALSE;
896 	}
897 
898 	/* Script */
899 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
900 	if( !handleAppendResult( result, loc_name)){
901 		RETURN_FALSE;
902 	}
903 
904 	/* Region */
905 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
906 	if( !handleAppendResult( result, loc_name)){
907 		RETURN_FALSE;
908 	}
909 
910 	/* Variant */
911 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
912 	if( !handleAppendResult( result, loc_name)){
913 		RETURN_FALSE;
914 	}
915 
916 	/* Private */
917 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
918 	if( !handleAppendResult( result, loc_name)){
919 		RETURN_FALSE;
920 	}
921 
922 	RETURN_SMART_STR(loc_name);
923 }
924 /* }}} */
925 
926 
927 /*{{{
928 * Parses the locale and returns private subtags  if existing
929 * else returns NULL
930 * e.g. for locale='en_US-x-prv1-prv2-prv3'
931 * returns a pointer to the string 'prv1-prv2-prv3'
932 */
get_private_subtags(const char * loc_name)933 static zend_string* get_private_subtags(const char* loc_name)
934 {
935 	zend_string* result = NULL;
936 	size_t       len = 0;
937 	const char*  mod_loc_name =NULL;
938 
939 	if( loc_name && (len = strlen(loc_name)) > 0 ){
940 		zend_off_t singletonPos = 0;
941 		mod_loc_name = loc_name ;
942 		while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
943 			if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
944 				/* private subtag start found */
945 				if( singletonPos + 2 ==  len){
946 					/* loc_name ends with '-x-' ; return  NULL */
947 				}
948 				else{
949 					/* result = mod_loc_name + singletonPos +2; */
950 					result = zend_string_init(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ), 0);
951 				}
952 				break;
953 			}
954 			else{
955 				if((size_t)(singletonPos + 1) >= len){
956 					/* String end */
957 					break;
958 				} else {
959 					/* singleton found but not a private subtag , hence check further in the string for the private subtag */
960 					mod_loc_name = mod_loc_name + singletonPos +1;
961 					len = strlen(mod_loc_name);
962 				}
963 			}
964 		} /* end of while */
965 	}
966 
967 	return result;
968 }
969 /* }}} */
970 
971 /* {{{ code used by locale_parse */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)972 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
973 {
974 	zend_string*   key_value 	= NULL;
975 	char*   cur_key_name	= NULL;
976 	char*   token        	= NULL;
977 	char*   last_ptr  	= NULL;
978 
979 	int	result		= 0;
980 	int 	cur_result  	= 0;
981 
982 
983 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
984 		key_value = get_private_subtags( loc_name );
985 		result = 1;
986 	} else {
987 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
988 	}
989 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
990 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
991 		if( result > 0 && key_value){
992 			int cnt = 0;
993 			/* Tokenize on the "_" or "-"  */
994 			token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
995 			if( cur_key_name ){
996 				efree( cur_key_name);
997 			}
998 			cur_key_name = (char*)ecalloc( 25,  25);
999 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1000 			add_assoc_string( hash_arr, cur_key_name , token);
1001 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1002 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1003 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1004 				add_assoc_string( hash_arr, cur_key_name , token);
1005 			}
1006 /*
1007 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1008 			}
1009 */
1010 		}
1011 		if (key_value) {
1012 			zend_string_release_ex(key_value, 0);
1013 		}
1014 	} else {
1015 		if( result == 1 ){
1016 			add_assoc_str( hash_arr, key_name , key_value);
1017 			cur_result = 1;
1018 		} else if (key_value) {
1019 			zend_string_release_ex(key_value, 0);
1020 		}
1021 	}
1022 
1023 	if( cur_key_name ){
1024 		efree( cur_key_name);
1025 	}
1026 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1027 	return cur_result;
1028 }
1029 /* }}} */
1030 
1031 /* {{{ parses a locale-id into an array the different parts of it */
PHP_FUNCTION(locale_parse)1032 PHP_FUNCTION(locale_parse)
1033 {
1034 	const char* loc_name        = NULL;
1035 	size_t         loc_name_len    = 0;
1036 	int         grOffset    	= 0;
1037 
1038 	intl_error_reset( NULL );
1039 
1040 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1041 		&loc_name, &loc_name_len ) == FAILURE)
1042 	{
1043 		RETURN_THROWS();
1044 	}
1045 
1046 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1047 
1048 	if(loc_name_len == 0) {
1049 		loc_name = intl_locale_get_default();
1050 	}
1051 
1052 	array_init( return_value );
1053 
1054 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1055 	if( grOffset >= 0 ){
1056 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1057 	}
1058 	else{
1059 		/* Not grandfathered */
1060 		add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1061 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1062 		add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1063 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1064 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1065 	}
1066 }
1067 /* }}} */
1068 
1069 /* {{{ gets an array containing the list of variants, or null */
PHP_FUNCTION(locale_get_all_variants)1070 PHP_FUNCTION(locale_get_all_variants)
1071 {
1072 	const char*  	loc_name        = NULL;
1073 	size_t    		loc_name_len    = 0;
1074 
1075 	int	result		= 0;
1076 	char*	token		= NULL;
1077 	zend_string*	variant		= NULL;
1078 	char*	saved_ptr	= NULL;
1079 
1080 	intl_error_reset( NULL );
1081 
1082 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1083 	&loc_name, &loc_name_len ) == FAILURE)
1084 	{
1085 		RETURN_THROWS();
1086 	}
1087 
1088 	if(loc_name_len == 0) {
1089 		loc_name = intl_locale_get_default();
1090 		loc_name_len = strlen(loc_name);
1091 	}
1092 
1093 	INTL_CHECK_LOCALE_LEN(loc_name_len);
1094 
1095 	array_init( return_value );
1096 
1097 	/* If the locale is grandfathered, stop, no variants */
1098 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1099 		/* ("Grandfathered Tag. No variants."); */
1100 	}
1101 	else {
1102 	/* Call ICU variant */
1103 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1104 		if( result > 0 && variant){
1105 			/* Tokenize on the "_" or "-" */
1106 			token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1107 			add_next_index_stringl( return_value, token , strlen(token));
1108 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1109 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1110 				add_next_index_stringl( return_value, token , strlen(token));
1111 			}
1112 		}
1113 		if( variant ){
1114 			zend_string_release_ex( variant, 0 );
1115 		}
1116 	}
1117 
1118 
1119 }
1120 /* }}} */
1121 
1122 /* {{{ Converts to lower case and also replaces all hyphens with the underscore */
strToMatch(const char * str,char * retstr)1123 static int strToMatch(const char* str ,char *retstr)
1124 {
1125 	char* 	anchor 	= NULL;
1126 	const char* 	anchor1 = NULL;
1127 	int 	result 	= 0;
1128 
1129 	if( (!str) || str[0] == '\0'){
1130 		return result;
1131 	} else {
1132 		anchor = retstr;
1133 		anchor1 = str;
1134 			while( (*str)!='\0' ){
1135 			if( *str == '-' ){
1136 				*retstr =  '_';
1137 			} else {
1138 				*retstr = tolower(*str);
1139 			}
1140 				str++;
1141 				retstr++;
1142 		}
1143 		*retstr = '\0';
1144 		retstr=  anchor;
1145 		str=  anchor1;
1146 		result = 1;
1147 	}
1148 
1149 	return(result);
1150 }
1151 /* }}} */
1152 
1153 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
1154 /* }}} */
1155 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
PHP_FUNCTION(locale_filter_matches)1156 PHP_FUNCTION(locale_filter_matches)
1157 {
1158 	char*       	lang_tag        = NULL;
1159 	size_t         	lang_tag_len    = 0;
1160 	const char*     loc_range       = NULL;
1161 	size_t         	loc_range_len   = 0;
1162 
1163 	int		result		= 0;
1164 	char*		token		= 0;
1165 	char*		chrcheck	= NULL;
1166 
1167 	zend_string*   	can_lang_tag    = NULL;
1168 	zend_string*   	can_loc_range   = NULL;
1169 
1170 	char*       	cur_lang_tag    = NULL;
1171 	char*       	cur_loc_range   = NULL;
1172 
1173 	bool 	boolCanonical 	= 0;
1174 	UErrorCode	status		= U_ZERO_ERROR;
1175 
1176 	intl_error_reset( NULL );
1177 
1178 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1179 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1180 		&boolCanonical) == FAILURE)
1181 	{
1182 		RETURN_THROWS();
1183 	}
1184 
1185 	if(loc_range_len == 0) {
1186 		loc_range = intl_locale_get_default();
1187 		loc_range_len = strlen(loc_range);
1188 	}
1189 
1190 	if( strcmp(loc_range,"*")==0){
1191 		RETURN_TRUE;
1192 	}
1193 
1194 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1195 	INTL_CHECK_LOCALE_LEN(lang_tag_len);
1196 
1197 	if( boolCanonical ){
1198 		/* canonicalize loc_range */
1199 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1200 		if( result <=0) {
1201 			intl_error_set( NULL, status,
1202 				"locale_filter_matches : unable to canonicalize loc_range" , 0 );
1203 			RETURN_FALSE;
1204 		}
1205 
1206 		/* canonicalize lang_tag */
1207 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1208 		if( result <=0) {
1209 			intl_error_set( NULL, status,
1210 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1211 			RETURN_FALSE;
1212 		}
1213 
1214 		/* Convert to lower case for case-insensitive comparison */
1215 		cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1216 
1217 		/* Convert to lower case for case-insensitive comparison */
1218 		result = strToMatch( can_lang_tag->val , cur_lang_tag);
1219 		if( result == 0) {
1220 			efree( cur_lang_tag );
1221 			zend_string_release_ex( can_lang_tag, 0 );
1222 			RETURN_FALSE;
1223 		}
1224 
1225 		cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1226 		result = strToMatch( can_loc_range->val , cur_loc_range );
1227 		if( result == 0) {
1228 			efree( cur_lang_tag );
1229 			zend_string_release_ex( can_lang_tag, 0 );
1230 			efree( cur_loc_range );
1231 			zend_string_release_ex( can_loc_range, 0 );
1232 			RETURN_FALSE;
1233 		}
1234 
1235 		/* check if prefix */
1236 		token 	= strstr( cur_lang_tag , cur_loc_range );
1237 
1238 		if( token && (token==cur_lang_tag) ){
1239 			/* check if the char. after match is SEPARATOR */
1240 			chrcheck = token + (strlen(cur_loc_range));
1241 			if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1242 				efree( cur_lang_tag );
1243 				efree( cur_loc_range );
1244 				if( can_lang_tag){
1245 					zend_string_release_ex( can_lang_tag, 0 );
1246 				}
1247 				if( can_loc_range){
1248 					zend_string_release_ex( can_loc_range, 0 );
1249 				}
1250 				RETURN_TRUE;
1251 			}
1252 		}
1253 
1254 		/* No prefix as loc_range */
1255 		if( cur_lang_tag){
1256 			efree( cur_lang_tag );
1257 		}
1258 		if( cur_loc_range){
1259 			efree( cur_loc_range );
1260 		}
1261 		if( can_lang_tag){
1262 			zend_string_release_ex( can_lang_tag, 0 );
1263 		}
1264 		if( can_loc_range){
1265 			zend_string_release_ex( can_loc_range, 0 );
1266 		}
1267 		RETURN_FALSE;
1268 
1269 	} /* end of if isCanonical */
1270 	else{
1271 		/* Convert to lower case for case-insensitive comparison */
1272 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1273 
1274 		result = strToMatch( lang_tag , cur_lang_tag);
1275 		if( result == 0) {
1276 			efree( cur_lang_tag );
1277 			RETURN_FALSE;
1278 		}
1279 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1280 		result = strToMatch( loc_range , cur_loc_range );
1281 		if( result == 0) {
1282 			efree( cur_lang_tag );
1283 			efree( cur_loc_range );
1284 			RETURN_FALSE;
1285 		}
1286 
1287 		/* check if prefix */
1288 		token 	= strstr( cur_lang_tag , cur_loc_range );
1289 
1290 		if( token && (token==cur_lang_tag) ){
1291 			/* check if the char. after match is SEPARATOR */
1292 			chrcheck = token + (strlen(cur_loc_range));
1293 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1294 				efree( cur_lang_tag );
1295 				efree( cur_loc_range );
1296 				RETURN_TRUE;
1297 			}
1298 		}
1299 
1300 		/* No prefix as loc_range */
1301 		if( cur_lang_tag){
1302 			efree( cur_lang_tag );
1303 		}
1304 		if( cur_loc_range){
1305 			efree( cur_loc_range );
1306 		}
1307 		RETURN_FALSE;
1308 
1309 	}
1310 }
1311 /* }}} */
1312 
array_cleanup(char * arr[],int arr_size)1313 static void array_cleanup( char* arr[] , int arr_size)
1314 {
1315 	int i=0;
1316 	for( i=0; i< arr_size; i++ ){
1317 		if( arr[i*2] ){
1318 			efree( arr[i*2]);
1319 		}
1320 	}
1321 	efree(arr);
1322 }
1323 
1324 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1325 /* {{{
1326 * returns the lookup result to lookup_loc_range_src_php
1327 * internal function
1328 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1329 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1330 {
1331 	int	i = 0;
1332 	int	cur_arr_len = 0;
1333 	int result = 0;
1334 
1335 	zend_string* lang_tag = NULL;
1336 	zval* ele_value = NULL;
1337 
1338 	char* cur_loc_range	= NULL;
1339 	zend_string* can_loc_range	= NULL;
1340 	zend_off_t saved_pos = 0;
1341 
1342 	zend_string* return_value = NULL;
1343 
1344 	char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1345 	ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1346 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1347 		if(Z_TYPE_P(ele_value)!= IS_STRING) {
1348 			/* element value is not a string */
1349 			zend_argument_type_error(2, "must only contain string values");
1350 			LOOKUP_CLEAN_RETURN(NULL);
1351 		}
1352 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1353 		result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1354 		if(result == 0) {
1355 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1356 			LOOKUP_CLEAN_RETURN(NULL);
1357 		}
1358 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1359 		cur_arr_len++ ;
1360 	} ZEND_HASH_FOREACH_END(); /* end of for */
1361 
1362 	/* Canonicalize array elements */
1363 	if(canonicalize) {
1364 		for(i=0; i<cur_arr_len; i++) {
1365 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1366 			if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1367 				if(lang_tag) {
1368 					zend_string_release_ex(lang_tag, 0);
1369 				}
1370 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1371 				LOOKUP_CLEAN_RETURN(NULL);
1372 			}
1373 			cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1374 			result = strToMatch(lang_tag->val, cur_arr[i*2]);
1375 			zend_string_release_ex(lang_tag, 0);
1376 			if(result == 0) {
1377 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1378 				LOOKUP_CLEAN_RETURN(NULL);
1379 			}
1380 		}
1381 
1382 	}
1383 
1384 	if(canonicalize) {
1385 		/* Canonicalize the loc_range */
1386 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1387 		if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1388 			/* Error */
1389 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1390 			if(can_loc_range) {
1391 				zend_string_release_ex(can_loc_range, 0);
1392 			}
1393 			LOOKUP_CLEAN_RETURN(NULL);
1394 		} else {
1395 			loc_range = can_loc_range->val;
1396 		}
1397 	}
1398 
1399 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1400 	/* convert to lower and replace hyphens */
1401 	result = strToMatch(loc_range, cur_loc_range);
1402 	if(can_loc_range) {
1403 		zend_string_release_ex(can_loc_range, 0);
1404 	}
1405 	if(result == 0) {
1406 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1407 		LOOKUP_CLEAN_RETURN(NULL);
1408 	}
1409 
1410 	/* Lookup for the lang_tag match */
1411 	saved_pos = strlen(cur_loc_range);
1412 	while(saved_pos > 0) {
1413 		for(i=0; i< cur_arr_len; i++){
1414 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1415 				/* Match found */
1416 				char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1417 				return_value = zend_string_init(str, strlen(str), 0);
1418 				efree(cur_loc_range);
1419 				LOOKUP_CLEAN_RETURN(return_value);
1420 			}
1421 		}
1422 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1423 	}
1424 
1425 	/* Match not found */
1426 	efree(cur_loc_range);
1427 	LOOKUP_CLEAN_RETURN(NULL);
1428 }
1429 /* }}} */
1430 
1431 /* {{{ Searches the items in $langtag for the best match to the language
1432 * range
1433 */
1434 /* }}} */
1435 /* {{{ Searches the items in $langtag for the best match to the language
1436 * range
1437 */
PHP_FUNCTION(locale_lookup)1438 PHP_FUNCTION(locale_lookup)
1439 {
1440 	zend_string*   	fallback_loc_str	= NULL;
1441 	const char*    	loc_range      		= NULL;
1442 	size_t        	loc_range_len  		= 0;
1443 
1444 	zval*		arr				= NULL;
1445 	HashTable*	hash_arr		= NULL;
1446 	bool	boolCanonical	= 0;
1447 	zend_string* 	result_str	= NULL;
1448 
1449 	intl_error_reset( NULL );
1450 
1451 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS!", &arr, &loc_range, &loc_range_len,
1452 		&boolCanonical,	&fallback_loc_str) == FAILURE) {
1453 		RETURN_THROWS();
1454 	}
1455 
1456 	if(loc_range_len == 0) {
1457 		if(fallback_loc_str) {
1458 			loc_range = ZSTR_VAL(fallback_loc_str);
1459 			loc_range_len = ZSTR_LEN(fallback_loc_str);
1460 		} else {
1461 			loc_range = intl_locale_get_default();
1462 			loc_range_len = strlen(loc_range);
1463 		}
1464 	}
1465 
1466 	hash_arr = Z_ARRVAL_P(arr);
1467 
1468 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1469 
1470 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1471 		RETURN_EMPTY_STRING();
1472 	}
1473 
1474 	result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1475 	if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1476 		if( fallback_loc_str ) {
1477 			result_str = zend_string_copy(fallback_loc_str);
1478 		} else {
1479 			RETURN_EMPTY_STRING();
1480 		}
1481 	}
1482 
1483 	RETURN_STR(result_str);
1484 }
1485 /* }}} */
1486 
1487 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
1488 /* }}} */
1489 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
PHP_FUNCTION(locale_accept_from_http)1490 PHP_FUNCTION(locale_accept_from_http)
1491 {
1492 	UEnumeration *available;
1493 	char *http_accept = NULL;
1494 	size_t http_accept_len;
1495 	UErrorCode status = 0;
1496 	int len;
1497 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1498 	UAcceptResult outResult;
1499 
1500 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1501 	{
1502 		RETURN_THROWS();
1503 	}
1504 	if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1505 		/* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1506 		char *start = http_accept;
1507 		char *end;
1508 		size_t len;
1509 		do {
1510 			end = strchr(start, ',');
1511 			len = end ? end-start : http_accept_len-(start-http_accept);
1512 			if(len > ULOC_FULLNAME_CAPACITY) {
1513 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1514 						"locale_accept_from_http: locale string too long", 0 );
1515 				RETURN_FALSE;
1516 			}
1517 			if(end) {
1518 				start = end+1;
1519 			}
1520 		} while(end != NULL);
1521 	}
1522 
1523 	available = ures_openAvailableLocales(NULL, &status);
1524 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1525 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1526 						&outResult, http_accept, available, &status);
1527 	uenum_close(available);
1528 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1529 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1530 		RETURN_FALSE;
1531 	}
1532 	RETURN_STRINGL(resultLocale, len);
1533 }
1534 /* }}} */
1535