xref: /php-src/ext/intl/locale/locale_methods.c (revision 07fcf5e2)
1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
12    +----------------------------------------------------------------------+
13 */
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <unicode/ustring.h>
20 #include <unicode/udata.h>
21 #include <unicode/putil.h>
22 #include <unicode/ures.h>
23 
24 #include "php_intl.h"
25 #include "locale.h"
26 #include "locale_class.h"
27 #include "intl_convert.h"
28 #include "intl_data.h"
29 
30 #include <zend_API.h>
31 #include <zend.h>
32 #include <php.h>
33 #include "main/php_ini.h"
34 #include "zend_smart_str.h"
35 
36 ZEND_EXTERN_MODULE_GLOBALS( intl )
37 
38 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
39 #define SEPARATOR "_"
40 #define SEPARATOR1 "-"
41 #define DELIMITER "-_"
42 #define EXTLANG_PREFIX "a"
43 #define PRIVATE_PREFIX "x"
44 #define DISP_NAME "name"
45 
46 #define MAX_NO_VARIANT  15
47 #define MAX_NO_EXTLANG  3
48 #define MAX_NO_PRIVATE  15
49 #define MAX_NO_LOOKUP_LANG_TAG  100
50 
51 #define LOC_NOT_FOUND 1
52 
53 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
54 #define VARIANT_KEYNAME_LEN  11
55 #define EXTLANG_KEYNAME_LEN  10
56 #define PRIVATE_KEYNAME_LEN  11
57 
58 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
59  * https://www.iana.org/assignments/language-subtag-registry
60  *
61  * This list includes all grandfathered tags, as well as redundant
62  * tags that have a Preferred-Value.
63  */
64 static const char * const LOC_GRANDFATHERED[] = {
65 	"art-lojban",
66 	"cel-gaulish",
67 	"en-GB-oed",
68 	"i-ami",
69 	"i-bnn",
70 	"i-default",
71 	"i-enochian",
72 	"i-hak",
73 	"i-klingon",
74 	"i-lux",
75 	"i-mingo",
76 	"i-navajo",
77 	"i-pwn",
78 	"i-tao",
79 	"i-tay",
80 	"i-tsu",
81 	"no-bok",
82 	"no-nyn",
83 	"sgn-BE-FR",
84 	"sgn-BE-NL",
85 	"sgn-BR",
86 	"sgn-CH-DE",
87 	"sgn-CO",
88 	"sgn-DE",
89 	"sgn-DK",
90 	"sgn-ES",
91 	"sgn-FR",
92 	"sgn-GB",
93 	"sgn-GR",
94 	"sgn-IE",
95 	"sgn-IT",
96 	"sgn-JP",
97 	"sgn-MX",
98 	"sgn-NI",
99 	"sgn-NL",
100 	"sgn-NO",
101 	"sgn-PT",
102 	"sgn-SE",
103 	"sgn-US",
104 	"sgn-ZA",
105 	"zh-cmn",
106 	"zh-cmn-Hans",
107 	"zh-cmn-Hant",
108 	"zh-gan",
109 	"zh-guoyu",
110 	"zh-hakka",
111 	"zh-min",
112 	"zh-min-nan",
113 	"zh-wuu",
114 	"zh-xiang",
115 	NULL
116 };
117 
118 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
119  *
120  * This array lists the preferred values for the grandfathered and redundant
121  * tags listed in LOC_GRANDFATHERED. This is in sync with the array
122  * LOC_GRANDFATHERED, i.e., the offsets of the grandfathered tags match the
123  * offsets of the preferred value. If a value in LOC_PREFERRED_GRANDFATHERED is
124  * NULL, then the matching offset in LOC_GRANDFATHERED has no preferred value.
125  */
126 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
127 	"jbo",
128 	NULL,
129 	"en-GB-oxendict",
130 	"ami",
131 	"bnn",
132 	NULL,
133 	NULL,
134 	"hak",
135 	"tlh",
136 	"lb",
137 	NULL,
138 	"nv",
139 	"pwn",
140 	"tao",
141 	"tay",
142 	"tsu",
143 	"nb",
144 	"nn",
145 	"sfb",
146 	"vgt",
147 	"bzs",
148 	"sgg",
149 	"csn",
150 	"gsg",
151 	"dsl",
152 	"ssp",
153 	"fsl",
154 	"bfi",
155 	"gss",
156 	"isg",
157 	"ise",
158 	"jsl",
159 	"mfs",
160 	"ncs",
161 	"dse",
162 	"nsl",
163 	"psr",
164 	"swl",
165 	"ase",
166 	"sfs",
167 	"cmn",
168 	"cmn-Hans",
169 	"cmn-Hant",
170 	"gan",
171 	"cmn",
172 	"hak",
173 	NULL,
174 	"nan",
175 	"wuu",
176 	"hsn",
177 	NULL
178 };
179 
180 /* returns true if a is an ID separator, false otherwise */
181 #define isIDSeparator(a) (a == '_' || a == '-')
182 #define isKeywordSeparator(a) (a == '@' )
183 #define isEndOfTag(a) (a == '\0' )
184 
185 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
186 
187 /*returns true if one of the special prefixes is here (s=string)
188   'x-' or 'i-' */
189 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
190 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
191 
192 /* Dot terminates it because of POSIX form  where dot precedes the codepage
193  * except for variant */
194 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
195 
196 /* {{{ return the offset of 'key' in the array 'list'.
197  * returns -1 if not present */
findOffset(const char * const * list,const char * key)198 static int16_t findOffset(const char* const* list, const char* key)
199 {
200 	const char* const* anchor = list;
201 	while (*list != NULL) {
202 		if (strcasecmp(key, *list) == 0) {
203 			return (int16_t)(list - anchor);
204 		}
205 		list++;
206 	}
207 
208 	return -1;
209 
210 }
211 /*}}}*/
212 
getPreferredTag(const char * gf_tag)213 static char* getPreferredTag(const char* gf_tag)
214 {
215 	char* result = NULL;
216 	zend_off_t grOffset = 0;
217 
218 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
219 	if(grOffset < 0) {
220 		return NULL;
221 	}
222 	if( LOC_PREFERRED_GRANDFATHERED[grOffset] != NULL ){
223 		/* return preferred tag */
224 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
225 	} else {
226 		/* Return correct grandfathered language tag */
227 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
228 	}
229 	return result;
230 }
231 
232 /* {{{
233 * returns the position of next token for lookup
234 * or -1 if no token
235 * strtokr equivalent search for token in reverse direction
236 */
getStrrtokenPos(char * str,zend_off_t savedPos)237 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
238 {
239 	zend_off_t result =-1;
240 	zend_off_t i;
241 
242 	for(i=savedPos-1; i>=0; i--) {
243 		if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
244 			/* delimiter found; check for singleton */
245 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
246 				/* a singleton; so send the position of token before the singleton */
247 				result = i-2;
248 			} else {
249 				result = i;
250 			}
251 			break;
252 		}
253 	}
254 	if(result < 1){
255 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
256 		result =-1;
257 	}
258 	return result;
259 }
260 /* }}} */
261 
262 /* {{{
263 * returns the position of a singleton if present
264 * returns -1 if no singleton
265 * strtok equivalent search for singleton
266 */
getSingletonPos(const char * str)267 static zend_off_t getSingletonPos(const char* str)
268 {
269 	zend_off_t result =-1;
270 	size_t len = 0;
271 
272 	if( str && ((len=strlen(str))>0) ){
273 		zend_off_t i = 0;
274 		for( i=0; (size_t)i < len ; i++){
275 			if( isIDSeparator(*(str+i)) ){
276 				if( i==1){
277 					/* string is of the form x-avy or a-prv1 */
278 					result =0;
279 					break;
280 				} else {
281 					/* delimiter found; check for singleton */
282 					if( isIDSeparator(*(str+i+2)) ){
283 						/* a singleton; so send the position of separator before singleton */
284 						result = i+1;
285 						break;
286 					}
287 				}
288 			}
289 		}/* end of for */
290 
291 	}
292 	return result;
293 }
294 /* }}} */
295 
296 /* {{{ Get default locale */
297 /* }}} */
298 /* {{{ Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)299 PHP_NAMED_FUNCTION(zif_locale_get_default)
300 {
301 	if (zend_parse_parameters_none() == FAILURE) {
302 		RETURN_THROWS();
303 	}
304 
305 	RETURN_STRING( intl_locale_get_default(  ) );
306 }
307 
308 /* }}} */
309 
310 /* {{{ Set default locale */
311 /* }}} */
312 /* {{{ Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)313 PHP_NAMED_FUNCTION(zif_locale_set_default)
314 {
315 	zend_string* locale_name;
316 	zend_string *ini_name;
317 	char *default_locale = NULL;
318 
319 	if(zend_parse_parameters( ZEND_NUM_ARGS(),  "S", &locale_name) == FAILURE)
320 	{
321 		RETURN_THROWS();
322 	}
323 
324 	if (ZSTR_LEN(locale_name) == 0) {
325 		default_locale = (char *)uloc_getDefault();
326 		locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
327 	}
328 
329 	ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
330 	zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
331 	zend_string_release_ex(ini_name, 0);
332 	if (default_locale != NULL) {
333 		zend_string_release_ex(locale_name, 0);
334 	}
335 
336 	RETURN_TRUE;
337 }
338 /* }}} */
339 
340 /* {{{
341 * Gets the value from ICU
342 * common code shared by get_primary_language,get_script or get_region or get_variant
343 * result = 0 if error, 1 if successful , -1 if no value
344 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)345 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
346 {
347 	zend_string* tag_value	    = NULL;
348 	int32_t      tag_value_len  = 512;
349 
350 	char*        mod_loc_name   = NULL;
351 
352 	int32_t      buflen         = 512;
353 	UErrorCode   status         = U_ZERO_ERROR;
354 
355 	if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
356 		return NULL;
357 	}
358 
359 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
360 		/* Handle  grandfathered languages */
361 		zend_off_t grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
362 		if( grOffset >= 0 ){
363 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
364 				return zend_string_init(loc_name, strlen(loc_name), 0);
365 			} else {
366 				/* Since Grandfathered , no value , do nothing , retutn NULL */
367 				return NULL;
368 			}
369 		}
370 
371 	if( fromParseLocale==1 ){
372 		zend_off_t singletonPos = 0;
373 
374 		/* Handle singletons */
375 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
376 			if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
377 				return zend_string_init(loc_name, strlen(loc_name), 0);
378 			}
379 		}
380 
381 		singletonPos = getSingletonPos( loc_name );
382 		if( singletonPos == 0){
383 			/* singleton at start of script, region , variant etc.
384 			 * or invalid singleton at start of language */
385 			return NULL;
386 		} else if( singletonPos > 0 ){
387 			/* singleton at some position except at start
388 			 * strip off the singleton and rest of the loc_name */
389 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
390 		}
391 	} /* end of if fromParse */
392 
393 	} /* end of if != LOC_CANONICAL_TAG */
394 
395 	if( mod_loc_name == NULL){
396 		mod_loc_name = estrdup(loc_name );
397 	}
398 
399 	/* Proceed to ICU */
400 	do{
401 		if (tag_value) {
402 			tag_value = zend_string_realloc( tag_value , buflen, 0);
403 		} else {
404 			tag_value = zend_string_alloc( buflen, 0);
405 		}
406 		tag_value_len = buflen;
407 
408 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
409 			buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
410 		}
411 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
412 			buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
413 		}
414 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
415 			buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
416 		}
417 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
418 			buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
419 		}
420 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
421 			buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
422 		}
423 
424 		if( U_FAILURE( status ) ) {
425 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
426 				status = U_ZERO_ERROR;
427 				buflen++; /* add space for \0 */
428 				continue;
429 			}
430 
431 			/* Error in retrieving data */
432 			*result = 0;
433 			if( tag_value ){
434 				zend_string_release_ex( tag_value, 0 );
435 			}
436 			if( mod_loc_name ){
437 				efree( mod_loc_name);
438 			}
439 			return NULL;
440 		}
441 	} while( buflen > tag_value_len );
442 
443 	if(  buflen ==0 ){
444 		/* No value found */
445 		*result = -1;
446 		if( tag_value ){
447 			zend_string_release_ex( tag_value, 0 );
448 		}
449 		if( mod_loc_name ){
450 			efree( mod_loc_name);
451 		}
452 		return NULL;
453 	} else {
454 		*result = 1;
455 	}
456 
457 	if( mod_loc_name ){
458 		efree( mod_loc_name);
459 	}
460 
461 	tag_value->len = strlen(tag_value->val);
462 	return tag_value;
463 }
464 /* }}} */
465 
466 /* {{{
467 * Gets the value from ICU , called when PHP userspace function is called
468 * common code shared by get_primary_language,get_script or get_region or get_variant
469 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)470 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
471 {
472 
473 	const char* loc_name        	= NULL;
474 	size_t         loc_name_len    	= 0;
475 
476 	zend_string*   tag_value		= NULL;
477 	char*       empty_result	= "";
478 
479 	int         result    		= 0;
480 	char*       msg        		= NULL;
481 
482 	UErrorCode  status          	= U_ZERO_ERROR;
483 
484 	intl_error_reset( NULL );
485 
486 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
487 	&loc_name ,&loc_name_len ) == FAILURE) {
488 		RETURN_THROWS();
489 	}
490 
491 	if(loc_name_len == 0) {
492 		loc_name = intl_locale_get_default();
493 		loc_name_len = strlen(loc_name);
494 	}
495 
496 	INTL_CHECK_LOCALE_LEN(loc_name_len);
497 
498 	/* Call ICU get */
499 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
500 
501 	/* No value found */
502 	if( result == -1 ) {
503 		if( tag_value){
504 			zend_string_release_ex( tag_value, 0 );
505 		}
506 		RETURN_STRING( empty_result);
507 	}
508 
509 	/* value found */
510 	if( tag_value){
511 		RETVAL_STR( tag_value );
512 		return;
513 	}
514 
515 	/* Error encountered while fetching the value */
516 	if( result ==0) {
517 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
518 		intl_error_set( NULL, status, msg , 1 );
519 		efree(msg);
520 		RETURN_NULL();
521 	}
522 
523 }
524 /* }}} */
525 
526 /* {{{ gets the script for the $locale */
PHP_FUNCTION(locale_get_script)527 PHP_FUNCTION( locale_get_script )
528 {
529 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
530 }
531 /* }}} */
532 
533 /* {{{ gets the region for the $locale */
PHP_FUNCTION(locale_get_region)534 PHP_FUNCTION( locale_get_region )
535 {
536 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
537 }
538 /* }}} */
539 
540 /* {{{ gets the primary language for the $locale */
PHP_FUNCTION(locale_get_primary_language)541 PHP_FUNCTION(locale_get_primary_language )
542 {
543 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
544 }
545 /* }}} */
546 
547 
548 /* {{{
549  * common code shared by display_xyz functions to  get the value from ICU
550  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)551 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
552 {
553 	const char* loc_name        	= NULL;
554 	size_t         loc_name_len    	= 0;
555 
556 	const char* disp_loc_name       = NULL;
557 	size_t      disp_loc_name_len   = 0;
558 	int         free_loc_name       = 0;
559 
560 	UChar*      disp_name      	= NULL;
561 	int32_t     disp_name_len  	= 0;
562 
563 	char*       mod_loc_name        = NULL;
564 
565 	int32_t     buflen          	= 512;
566 	UErrorCode  status          	= U_ZERO_ERROR;
567 
568 	zend_string* u8str;
569 
570 	char*       msg             	= NULL;
571 
572 	intl_error_reset( NULL );
573 
574 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s!",
575 		&loc_name, &loc_name_len ,
576 		&disp_loc_name ,&disp_loc_name_len ) == FAILURE)
577 	{
578 		RETURN_THROWS();
579 	}
580 
581 	if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
582 		/* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
583 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
584 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
585 		efree(msg);
586 		RETURN_FALSE;
587 	}
588 
589 	if(loc_name_len == 0) {
590 		loc_name = intl_locale_get_default();
591 	}
592 
593 	if( strcmp(tag_name, DISP_NAME) != 0 ){
594 		/* Handle grandfathered languages */
595 		int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
596 		if( grOffset >= 0 ){
597 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
598 				mod_loc_name = getPreferredTag( loc_name );
599 			} else {
600 				/* Since Grandfathered, no value, do nothing, return NULL */
601 				RETURN_FALSE;
602 			}
603 		}
604 	} /* end of if != LOC_CANONICAL_TAG */
605 
606 	if( mod_loc_name==NULL ){
607 		mod_loc_name = estrdup( loc_name );
608 	}
609 
610 	/* Check if disp_loc_name passed , if not use default locale */
611 	if( !disp_loc_name){
612 		disp_loc_name = estrdup(intl_locale_get_default());
613 		free_loc_name = 1;
614 	}
615 
616 	/* Get the disp_value for the given locale */
617 	do{
618 		disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
619 		disp_name_len = buflen;
620 
621 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
622 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
623 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
624 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
625 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
626 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
627 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
628 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
629 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
630 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
631 		}
632 
633 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
634 		if( U_FAILURE( status ) )
635 		{
636 			if( status == U_BUFFER_OVERFLOW_ERROR )
637 			{
638 				status = U_ZERO_ERROR;
639 				continue;
640 			}
641 
642 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
643 			intl_error_set( NULL, status, msg , 1 );
644 			efree(msg);
645 			if( disp_name){
646 				efree( disp_name );
647 			}
648 			if( mod_loc_name){
649 				efree( mod_loc_name );
650 			}
651 			if (free_loc_name) {
652 				efree((void *)disp_loc_name);
653 				disp_loc_name = NULL;
654 			}
655 			RETURN_FALSE;
656 		}
657 	} while( buflen > disp_name_len );
658 
659 	if( mod_loc_name){
660 		efree( mod_loc_name );
661 	}
662 	if (free_loc_name) {
663 		efree((void *)disp_loc_name);
664 		disp_loc_name = NULL;
665 	}
666 	/* Convert display locale name from UTF-16 to UTF-8. */
667 	u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
668 	efree( disp_name );
669 	if( !u8str )
670 	{
671 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
672 		intl_error_set( NULL, status, msg , 1 );
673 		efree(msg);
674 		RETURN_FALSE;
675 	}
676 
677 	RETVAL_NEW_STR( u8str );
678 }
679 /* }}} */
680 
681 /* {{{ gets the name for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_name)682 PHP_FUNCTION(locale_get_display_name)
683 {
684 	get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
685 }
686 /* }}} */
687 
688 /* {{{ gets the language for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_language)689 PHP_FUNCTION(locale_get_display_language)
690 {
691 	get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
692 }
693 /* }}} */
694 
695 /* {{{ gets the script for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_script)696 PHP_FUNCTION(locale_get_display_script)
697 {
698 	get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
699 }
700 /* }}} */
701 
702 /* {{{ gets the region for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_region)703 PHP_FUNCTION(locale_get_display_region)
704 {
705 	get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
706 }
707 /* }}} */
708 
709 /* {{{
710 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
711 * gets the variant for the $locale in $in_locale or default_locale
712  }}} */
713 /* {{{
714 * proto static string get_display_variant($locale, $in_locale = null)
715 * gets the variant for the $locale in $in_locale or default_locale
716 */
PHP_FUNCTION(locale_get_display_variant)717 PHP_FUNCTION(locale_get_display_variant)
718 {
719 	get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
720 }
721 /* }}} */
722 
723  /* {{{ return an associative array containing keyword-value
724  * pairs for this locale. The keys are keys to the array (doh!)
725  * }}}*/
726  /* {{{ return an associative array containing keyword-value
727  * pairs for this locale. The keys are keys to the array (doh!)
728  */
PHP_FUNCTION(locale_get_keywords)729 PHP_FUNCTION( locale_get_keywords )
730 {
731 	UEnumeration*   e        = NULL;
732 	UErrorCode      status   = U_ZERO_ERROR;
733 
734 	const char*	 	kw_key        = NULL;
735 	int32_t         kw_key_len    = 0;
736 
737 	const char*       	loc_name        = NULL;
738 	size_t        	 	loc_name_len    = 0;
739 
740 	intl_error_reset( NULL );
741 
742 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
743 		&loc_name, &loc_name_len ) == FAILURE)
744 	{
745 		RETURN_THROWS();
746 	}
747 
748 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
749 
750 	if(loc_name_len == 0) {
751 		loc_name = intl_locale_get_default();
752 	}
753 
754 	/* Get the keywords */
755 	e = uloc_openKeywords( loc_name, &status );
756 	if( e != NULL ) {
757 		/*
758 		ICU expects the buffer to be allocated  before calling the function
759 		and so the buffer size has been explicitly specified
760 		ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
761 		hence the kw_value buffer size is 100
762 		*/
763 
764 		/* Traverse it, filling the return array. */
765 		array_init( return_value );
766 
767 		while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
768 			int32_t kw_value_len = 100;
769 			zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
770 
771 			/* Get the keyword value for each keyword */
772 			kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
773 			if (status == U_BUFFER_OVERFLOW_ERROR) {
774 				status = U_ZERO_ERROR;
775 				kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
776 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
777 			} else if(!U_FAILURE(status)) {
778 				kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
779 			}
780 			if (U_FAILURE(status)) {
781 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 );
782 				if( kw_value_str){
783 					zend_string_efree( kw_value_str );
784 				}
785 				zend_array_destroy(Z_ARR_P(return_value));
786 				RETURN_FALSE;
787 			}
788 
789 			add_assoc_str( return_value, (char *)kw_key, kw_value_str);
790 		} /* end of while */
791 
792 	} /* end of if e!=NULL */
793 
794 	uenum_close( e );
795 }
796 /* }}} */
797 
798  /* {{{ @return string the canonicalized locale
799  * }}} */
800  /* {{{ @param string $locale	The locale string to canonicalize */
PHP_FUNCTION(locale_canonicalize)801 PHP_FUNCTION(locale_canonicalize)
802 {
803 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
804 }
805 /* }}} */
806 
807 /* {{{ append_key_value
808 * Internal function which is called from locale_compose
809 * gets the value for the key_name and appends to the loc_name
810 * returns 1 if successful , -1 if not found ,
811 * 0 if array element is not a string , -2 if buffer-overflow
812 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)813 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
814 {
815 	zval *ele_value;
816 
817 	if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
818 		if(Z_TYPE_P(ele_value)!= IS_STRING ){
819 			/* element value is not a string */
820 			return FAILURE;
821 		}
822 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
823 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
824 			/* not lang or grandfathered tag */
825 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
826 		}
827 		smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
828 		return SUCCESS;
829 	}
830 
831 	return LOC_NOT_FOUND;
832 }
833 /* }}} */
834 
835 /* {{{ append_prefix , appends the prefix needed
836 * e.g. private adds 'x'
837 */
add_prefix(smart_str * loc_name,char * key_name)838 static void add_prefix(smart_str* loc_name, char* key_name)
839 {
840 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
841 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
842 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
843 	}
844 }
845 /* }}} */
846 
847 /* {{{ append_multiple_key_values
848 * Internal function which is called from locale_compose
849 * gets the multiple values for the key_name and appends to the loc_name
850 * used for 'variant','extlang','private'
851 * returns 1 if successful , -1 if not found ,
852 * 0 if array element is not a string , -2 if buffer-overflow
853 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)854 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
855 {
856 	zval	*ele_value;
857 	int 	isFirstSubtag 	= 0;
858 
859 	/* Variant/ Extlang/Private etc. */
860 	if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
861 		if( Z_TYPE_P(ele_value) == IS_STRING ){
862 			add_prefix( loc_name , key_name);
863 
864 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
865 			smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
866 			return SUCCESS;
867 		} else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
868 			HashTable *arr = Z_ARRVAL_P(ele_value);
869 			zval *data;
870 
871 			ZEND_HASH_FOREACH_VAL(arr, data) {
872 				if(Z_TYPE_P(data) != IS_STRING) {
873 					return FAILURE;
874 				}
875 				if (isFirstSubtag++ == 0){
876 					add_prefix(loc_name , key_name);
877 				}
878 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
879 				smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
880 			} ZEND_HASH_FOREACH_END();
881 			return SUCCESS;
882 		} else {
883 			return FAILURE;
884 		}
885 	} else {
886 		char cur_key_name[31];
887 		int  max_value = 0, i;
888 		/* Decide the max_value: the max. no. of elements allowed */
889 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
890 			max_value  = MAX_NO_VARIANT;
891 		}
892 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
893 			max_value  = MAX_NO_EXTLANG;
894 		}
895 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
896 			max_value  = MAX_NO_PRIVATE;
897 		}
898 
899 		/* Multiple variant values as variant0, variant1 ,variant2 */
900 		isFirstSubtag = 0;
901 		for( i=0 ; i< max_value; i++ ){
902 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
903 			if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
904 				if( Z_TYPE_P(ele_value)!= IS_STRING ){
905 					/* variant is not a string */
906 					return FAILURE;
907 				}
908 				/* Add the contents */
909 				if (isFirstSubtag++ == 0){
910 					add_prefix(loc_name , cur_key_name);
911 				}
912 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
913 				smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
914 			}
915 		} /* end of for */
916 	} /* end of else */
917 
918 	return SUCCESS;
919 }
920 /* }}} */
921 
922 /*{{{
923 * If applicable sets error message and aborts locale_compose gracefully
924 * returns 0  if locale_compose needs to be aborted
925 * otherwise returns 1
926 */
handleAppendResult(int result,smart_str * loc_name)927 static int handleAppendResult( int result, smart_str* loc_name)
928 {
929 	intl_error_reset( NULL );
930 	if( result == FAILURE) {
931 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
932 			 "locale_compose: parameter array element is not a string", 0 );
933 		smart_str_free(loc_name);
934 		return 0;
935 	}
936 	return 1;
937 }
938 /* }}} */
939 
940 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
941 /* {{{ Creates a locale by combining the parts of locale-ID passed
942 * }}} */
943 /* {{{ Creates a locale by combining the parts of locale-ID passed
944 * }}} */
PHP_FUNCTION(locale_compose)945 PHP_FUNCTION(locale_compose)
946 {
947 	smart_str      	loc_name_s = {0};
948 	smart_str *loc_name = &loc_name_s;
949 	zval*			arr	= NULL;
950 	HashTable*		hash_arr = NULL;
951 	int 			result = 0;
952 
953 	intl_error_reset( NULL );
954 
955 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
956 		&arr) == FAILURE)
957 	{
958 		RETURN_THROWS();
959 	}
960 
961 	hash_arr = Z_ARRVAL_P( arr );
962 
963 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
964 		RETURN_FALSE;
965 
966 	/* Check for grandfathered first */
967 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
968 	if( result == SUCCESS){
969 		RETURN_SMART_STR(loc_name);
970 	}
971 	if( !handleAppendResult( result, loc_name)){
972 		RETURN_FALSE;
973 	}
974 
975 	/* Not grandfathered */
976 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
977 	if( result == LOC_NOT_FOUND ){
978 		zend_argument_value_error(1, "must contain a \"%s\" key", LOC_LANG_TAG);
979 		smart_str_free(loc_name);
980 		RETURN_THROWS();
981 	}
982 	if( !handleAppendResult( result, loc_name)){
983 		RETURN_FALSE;
984 	}
985 
986 	/* Extlang */
987 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
988 	if( !handleAppendResult( result, loc_name)){
989 		RETURN_FALSE;
990 	}
991 
992 	/* Script */
993 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
994 	if( !handleAppendResult( result, loc_name)){
995 		RETURN_FALSE;
996 	}
997 
998 	/* Region */
999 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
1000 	if( !handleAppendResult( result, loc_name)){
1001 		RETURN_FALSE;
1002 	}
1003 
1004 	/* Variant */
1005 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
1006 	if( !handleAppendResult( result, loc_name)){
1007 		RETURN_FALSE;
1008 	}
1009 
1010 	/* Private */
1011 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
1012 	if( !handleAppendResult( result, loc_name)){
1013 		RETURN_FALSE;
1014 	}
1015 
1016 	RETURN_SMART_STR(loc_name);
1017 }
1018 /* }}} */
1019 
1020 
1021 /*{{{
1022 * Parses the locale and returns private subtags  if existing
1023 * else returns NULL
1024 * e.g. for locale='en_US-x-prv1-prv2-prv3'
1025 * returns a pointer to the string 'prv1-prv2-prv3'
1026 */
get_private_subtags(const char * loc_name)1027 static zend_string* get_private_subtags(const char* loc_name)
1028 {
1029 	zend_string* result = NULL;
1030 	size_t       len = 0;
1031 	const char*  mod_loc_name =NULL;
1032 
1033 	if( loc_name && (len = strlen(loc_name)) > 0 ){
1034 		zend_off_t singletonPos = 0;
1035 		mod_loc_name = loc_name ;
1036 		while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
1037 			if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1038 				/* private subtag start found */
1039 				if( singletonPos + 2 ==  len){
1040 					/* loc_name ends with '-x-' ; return  NULL */
1041 				}
1042 				else{
1043 					/* result = mod_loc_name + singletonPos +2; */
1044 					result = zend_string_init(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ), 0);
1045 				}
1046 				break;
1047 			}
1048 			else{
1049 				if((size_t)(singletonPos + 1) >= len){
1050 					/* String end */
1051 					break;
1052 				} else {
1053 					/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1054 					mod_loc_name = mod_loc_name + singletonPos +1;
1055 					len = strlen(mod_loc_name);
1056 				}
1057 			}
1058 		} /* end of while */
1059 	}
1060 
1061 	return result;
1062 }
1063 /* }}} */
1064 
1065 /* {{{ code used by locale_parse */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)1066 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1067 {
1068 	zend_string*   key_value 	= NULL;
1069 	char*   cur_key_name	= NULL;
1070 	char*   token        	= NULL;
1071 	char*   last_ptr  	= NULL;
1072 
1073 	int	result		= 0;
1074 	int 	cur_result  	= 0;
1075 
1076 
1077 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1078 		key_value = get_private_subtags( loc_name );
1079 		result = 1;
1080 	} else {
1081 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1082 	}
1083 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1084 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1085 		if( result > 0 && key_value){
1086 			int cnt = 0;
1087 			/* Tokenize on the "_" or "-"  */
1088 			token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
1089 			if( cur_key_name ){
1090 				efree( cur_key_name);
1091 			}
1092 			cur_key_name = (char*)ecalloc( 25,  25);
1093 			sprintf( cur_key_name , "%s%d", key_name , cnt++);
1094 			add_assoc_string( hash_arr, cur_key_name , token);
1095 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1096 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1097 				sprintf( cur_key_name , "%s%d", key_name , cnt++);
1098 				add_assoc_string( hash_arr, cur_key_name , token);
1099 			}
1100 /*
1101 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1102 			}
1103 */
1104 		}
1105 		if (key_value) {
1106 			zend_string_release_ex(key_value, 0);
1107 		}
1108 	} else {
1109 		if( result == 1 ){
1110 			add_assoc_str( hash_arr, key_name , key_value);
1111 			cur_result = 1;
1112 		} else if (key_value) {
1113 			zend_string_release_ex(key_value, 0);
1114 		}
1115 	}
1116 
1117 	if( cur_key_name ){
1118 		efree( cur_key_name);
1119 	}
1120 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1121 	return cur_result;
1122 }
1123 /* }}} */
1124 
1125 /* {{{ parses a locale-id into an array the different parts of it */
PHP_FUNCTION(locale_parse)1126 PHP_FUNCTION(locale_parse)
1127 {
1128 	const char* loc_name        = NULL;
1129 	size_t         loc_name_len    = 0;
1130 	int         grOffset    	= 0;
1131 
1132 	intl_error_reset( NULL );
1133 
1134 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1135 		&loc_name, &loc_name_len ) == FAILURE)
1136 	{
1137 		RETURN_THROWS();
1138 	}
1139 
1140 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1141 
1142 	if(loc_name_len == 0) {
1143 		loc_name = intl_locale_get_default();
1144 	}
1145 
1146 	array_init( return_value );
1147 
1148 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1149 	if( grOffset >= 0 ){
1150 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1151 	}
1152 	else{
1153 		/* Not grandfathered */
1154 		add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1155 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1156 		add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1157 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1158 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1159 	}
1160 }
1161 /* }}} */
1162 
1163 /* {{{ gets an array containing the list of variants, or null */
PHP_FUNCTION(locale_get_all_variants)1164 PHP_FUNCTION(locale_get_all_variants)
1165 {
1166 	const char*  	loc_name        = NULL;
1167 	size_t    		loc_name_len    = 0;
1168 
1169 	int	result		= 0;
1170 	char*	token		= NULL;
1171 	zend_string*	variant		= NULL;
1172 	char*	saved_ptr	= NULL;
1173 
1174 	intl_error_reset( NULL );
1175 
1176 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1177 	&loc_name, &loc_name_len ) == FAILURE)
1178 	{
1179 		RETURN_THROWS();
1180 	}
1181 
1182 	if(loc_name_len == 0) {
1183 		loc_name = intl_locale_get_default();
1184 		loc_name_len = strlen(loc_name);
1185 	}
1186 
1187 	INTL_CHECK_LOCALE_LEN(loc_name_len);
1188 
1189 	array_init( return_value );
1190 
1191 	/* If the locale is grandfathered, stop, no variants */
1192 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1193 		/* ("Grandfathered Tag. No variants."); */
1194 	}
1195 	else {
1196 	/* Call ICU variant */
1197 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1198 		if( result > 0 && variant){
1199 			/* Tokenize on the "_" or "-" */
1200 			token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1201 			add_next_index_stringl( return_value, token , strlen(token));
1202 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1203 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1204 				add_next_index_stringl( return_value, token , strlen(token));
1205 			}
1206 		}
1207 		if( variant ){
1208 			zend_string_release_ex( variant, 0 );
1209 		}
1210 	}
1211 
1212 
1213 }
1214 /* }}} */
1215 
1216 /* {{{ Converts to lower case and also replaces all hyphens with the underscore */
strToMatch(const char * str,char * retstr)1217 static int strToMatch(const char* str ,char *retstr)
1218 {
1219 	char* 	anchor 	= NULL;
1220 	const char* 	anchor1 = NULL;
1221 	int 	result 	= 0;
1222 
1223 	if( (!str) || str[0] == '\0'){
1224 		return result;
1225 	} else {
1226 		anchor = retstr;
1227 		anchor1 = str;
1228 			while( (*str)!='\0' ){
1229 			if( *str == '-' ){
1230 				*retstr =  '_';
1231 			} else {
1232 				*retstr = tolower(*str);
1233 			}
1234 				str++;
1235 				retstr++;
1236 		}
1237 		*retstr = '\0';
1238 		retstr=  anchor;
1239 		str=  anchor1;
1240 		result = 1;
1241 	}
1242 
1243 	return(result);
1244 }
1245 /* }}} */
1246 
1247 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
1248 /* }}} */
1249 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
PHP_FUNCTION(locale_filter_matches)1250 PHP_FUNCTION(locale_filter_matches)
1251 {
1252 	char*       	lang_tag        = NULL;
1253 	size_t         	lang_tag_len    = 0;
1254 	const char*     loc_range       = NULL;
1255 	size_t         	loc_range_len   = 0;
1256 
1257 	int		result		= 0;
1258 	char*		token		= 0;
1259 	char*		chrcheck	= NULL;
1260 
1261 	zend_string*   	can_lang_tag    = NULL;
1262 	zend_string*   	can_loc_range   = NULL;
1263 
1264 	char*       	cur_lang_tag    = NULL;
1265 	char*       	cur_loc_range   = NULL;
1266 
1267 	bool 	boolCanonical 	= 0;
1268 	UErrorCode	status		= U_ZERO_ERROR;
1269 
1270 	intl_error_reset( NULL );
1271 
1272 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1273 		&lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1274 		&boolCanonical) == FAILURE)
1275 	{
1276 		RETURN_THROWS();
1277 	}
1278 
1279 	if(loc_range_len == 0) {
1280 		loc_range = intl_locale_get_default();
1281 		loc_range_len = strlen(loc_range);
1282 	}
1283 
1284 	if( strcmp(loc_range,"*")==0){
1285 		RETURN_TRUE;
1286 	}
1287 
1288 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1289 	INTL_CHECK_LOCALE_LEN(lang_tag_len);
1290 
1291 	if( boolCanonical ){
1292 		/* canonicalize loc_range */
1293 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1294 		if( result <=0) {
1295 			intl_error_set( NULL, status,
1296 				"locale_filter_matches : unable to canonicalize loc_range" , 0 );
1297 			RETURN_FALSE;
1298 		}
1299 
1300 		/* canonicalize lang_tag */
1301 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1302 		if( result <=0) {
1303 			intl_error_set( NULL, status,
1304 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1305 			RETURN_FALSE;
1306 		}
1307 
1308 		/* Convert to lower case for case-insensitive comparison */
1309 		cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1310 
1311 		/* Convert to lower case for case-insensitive comparison */
1312 		result = strToMatch( can_lang_tag->val , cur_lang_tag);
1313 		if( result == 0) {
1314 			efree( cur_lang_tag );
1315 			zend_string_release_ex( can_lang_tag, 0 );
1316 			RETURN_FALSE;
1317 		}
1318 
1319 		cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1320 		result = strToMatch( can_loc_range->val , cur_loc_range );
1321 		if( result == 0) {
1322 			efree( cur_lang_tag );
1323 			zend_string_release_ex( can_lang_tag, 0 );
1324 			efree( cur_loc_range );
1325 			zend_string_release_ex( can_loc_range, 0 );
1326 			RETURN_FALSE;
1327 		}
1328 
1329 		/* check if prefix */
1330 		token 	= strstr( cur_lang_tag , cur_loc_range );
1331 
1332 		if( token && (token==cur_lang_tag) ){
1333 			/* check if the char. after match is SEPARATOR */
1334 			chrcheck = token + (strlen(cur_loc_range));
1335 			if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1336 				efree( cur_lang_tag );
1337 				efree( cur_loc_range );
1338 				if( can_lang_tag){
1339 					zend_string_release_ex( can_lang_tag, 0 );
1340 				}
1341 				if( can_loc_range){
1342 					zend_string_release_ex( can_loc_range, 0 );
1343 				}
1344 				RETURN_TRUE;
1345 			}
1346 		}
1347 
1348 		/* No prefix as loc_range */
1349 		if( cur_lang_tag){
1350 			efree( cur_lang_tag );
1351 		}
1352 		if( cur_loc_range){
1353 			efree( cur_loc_range );
1354 		}
1355 		if( can_lang_tag){
1356 			zend_string_release_ex( can_lang_tag, 0 );
1357 		}
1358 		if( can_loc_range){
1359 			zend_string_release_ex( can_loc_range, 0 );
1360 		}
1361 		RETURN_FALSE;
1362 
1363 	} /* end of if isCanonical */
1364 	else{
1365 		/* Convert to lower case for case-insensitive comparison */
1366 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1367 
1368 		result = strToMatch( lang_tag , cur_lang_tag);
1369 		if( result == 0) {
1370 			efree( cur_lang_tag );
1371 			RETURN_FALSE;
1372 		}
1373 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1374 		result = strToMatch( loc_range , cur_loc_range );
1375 		if( result == 0) {
1376 			efree( cur_lang_tag );
1377 			efree( cur_loc_range );
1378 			RETURN_FALSE;
1379 		}
1380 
1381 		/* check if prefix */
1382 		token 	= strstr( cur_lang_tag , cur_loc_range );
1383 
1384 		if( token && (token==cur_lang_tag) ){
1385 			/* check if the char. after match is SEPARATOR */
1386 			chrcheck = token + (strlen(cur_loc_range));
1387 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1388 				efree( cur_lang_tag );
1389 				efree( cur_loc_range );
1390 				RETURN_TRUE;
1391 			}
1392 		}
1393 
1394 		/* No prefix as loc_range */
1395 		if( cur_lang_tag){
1396 			efree( cur_lang_tag );
1397 		}
1398 		if( cur_loc_range){
1399 			efree( cur_loc_range );
1400 		}
1401 		RETURN_FALSE;
1402 
1403 	}
1404 }
1405 /* }}} */
1406 
array_cleanup(char * arr[],int arr_size)1407 static void array_cleanup( char* arr[] , int arr_size)
1408 {
1409 	int i=0;
1410 	for( i=0; i< arr_size; i++ ){
1411 		if( arr[i*2] ){
1412 			efree( arr[i*2]);
1413 		}
1414 	}
1415 	efree(arr);
1416 }
1417 
1418 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1419 /* {{{
1420 * returns the lookup result to lookup_loc_range_src_php
1421 * internal function
1422 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1423 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1424 {
1425 	int	i = 0;
1426 	int	cur_arr_len = 0;
1427 	int result = 0;
1428 
1429 	zend_string* lang_tag = NULL;
1430 	zval* ele_value = NULL;
1431 
1432 	char* cur_loc_range	= NULL;
1433 	zend_string* can_loc_range	= NULL;
1434 	zend_off_t saved_pos = 0;
1435 
1436 	zend_string* return_value = NULL;
1437 
1438 	char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1439 	ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1440 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1441 		if(Z_TYPE_P(ele_value)!= IS_STRING) {
1442 			/* element value is not a string */
1443 			zend_argument_type_error(2, "must only contain string values");
1444 			LOOKUP_CLEAN_RETURN(NULL);
1445 		}
1446 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1447 		result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1448 		if(result == 0) {
1449 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1450 			LOOKUP_CLEAN_RETURN(NULL);
1451 		}
1452 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1453 		cur_arr_len++ ;
1454 	} ZEND_HASH_FOREACH_END(); /* end of for */
1455 
1456 	/* Canonicalize array elements */
1457 	if(canonicalize) {
1458 		for(i=0; i<cur_arr_len; i++) {
1459 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1460 			if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1461 				if(lang_tag) {
1462 					zend_string_release_ex(lang_tag, 0);
1463 				}
1464 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1465 				LOOKUP_CLEAN_RETURN(NULL);
1466 			}
1467 			cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1468 			result = strToMatch(lang_tag->val, cur_arr[i*2]);
1469 			zend_string_release_ex(lang_tag, 0);
1470 			if(result == 0) {
1471 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1472 				LOOKUP_CLEAN_RETURN(NULL);
1473 			}
1474 		}
1475 
1476 	}
1477 
1478 	if(canonicalize) {
1479 		/* Canonicalize the loc_range */
1480 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1481 		if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1482 			/* Error */
1483 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1484 			if(can_loc_range) {
1485 				zend_string_release_ex(can_loc_range, 0);
1486 			}
1487 			LOOKUP_CLEAN_RETURN(NULL);
1488 		} else {
1489 			loc_range = can_loc_range->val;
1490 		}
1491 	}
1492 
1493 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1494 	/* convert to lower and replace hyphens */
1495 	result = strToMatch(loc_range, cur_loc_range);
1496 	if(can_loc_range) {
1497 		zend_string_release_ex(can_loc_range, 0);
1498 	}
1499 	if(result == 0) {
1500 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1501 		LOOKUP_CLEAN_RETURN(NULL);
1502 	}
1503 
1504 	/* Lookup for the lang_tag match */
1505 	saved_pos = strlen(cur_loc_range);
1506 	while(saved_pos > 0) {
1507 		for(i=0; i< cur_arr_len; i++){
1508 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1509 				/* Match found */
1510 				char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1511 				return_value = zend_string_init(str, strlen(str), 0);
1512 				efree(cur_loc_range);
1513 				LOOKUP_CLEAN_RETURN(return_value);
1514 			}
1515 		}
1516 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1517 	}
1518 
1519 	/* Match not found */
1520 	efree(cur_loc_range);
1521 	LOOKUP_CLEAN_RETURN(NULL);
1522 }
1523 /* }}} */
1524 
1525 /* {{{ Searches the items in $langtag for the best match to the language
1526 * range
1527 */
1528 /* }}} */
1529 /* {{{ Searches the items in $langtag for the best match to the language
1530 * range
1531 */
PHP_FUNCTION(locale_lookup)1532 PHP_FUNCTION(locale_lookup)
1533 {
1534 	zend_string*   	fallback_loc_str	= NULL;
1535 	const char*    	loc_range      		= NULL;
1536 	size_t        	loc_range_len  		= 0;
1537 
1538 	zval*		arr				= NULL;
1539 	HashTable*	hash_arr		= NULL;
1540 	bool	boolCanonical	= 0;
1541 	zend_string* 	result_str	= NULL;
1542 
1543 	intl_error_reset( NULL );
1544 
1545 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS!", &arr, &loc_range, &loc_range_len,
1546 		&boolCanonical,	&fallback_loc_str) == FAILURE) {
1547 		RETURN_THROWS();
1548 	}
1549 
1550 	if(loc_range_len == 0) {
1551 		if(fallback_loc_str) {
1552 			loc_range = ZSTR_VAL(fallback_loc_str);
1553 			loc_range_len = ZSTR_LEN(fallback_loc_str);
1554 		} else {
1555 			loc_range = intl_locale_get_default();
1556 			loc_range_len = strlen(loc_range);
1557 		}
1558 	}
1559 
1560 	hash_arr = Z_ARRVAL_P(arr);
1561 
1562 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1563 
1564 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1565 		RETURN_EMPTY_STRING();
1566 	}
1567 
1568 	result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1569 	if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1570 		if( fallback_loc_str ) {
1571 			result_str = zend_string_copy(fallback_loc_str);
1572 		} else {
1573 			RETURN_EMPTY_STRING();
1574 		}
1575 	}
1576 
1577 	RETURN_STR(result_str);
1578 }
1579 /* }}} */
1580 
1581 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
1582 /* }}} */
1583 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
PHP_FUNCTION(locale_accept_from_http)1584 PHP_FUNCTION(locale_accept_from_http)
1585 {
1586 	UEnumeration *available;
1587 	char *http_accept = NULL;
1588 	size_t http_accept_len;
1589 	UErrorCode status = 0;
1590 	int len;
1591 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1592 	UAcceptResult outResult;
1593 
1594 	if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1595 	{
1596 		RETURN_THROWS();
1597 	}
1598 	if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1599 		/* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1600 		char *start = http_accept;
1601 		char *end;
1602 		size_t len;
1603 		do {
1604 			end = strchr(start, ',');
1605 			len = end ? end-start : http_accept_len-(start-http_accept);
1606 			if(len > ULOC_FULLNAME_CAPACITY) {
1607 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1608 						"locale_accept_from_http: locale string too long", 0 );
1609 				RETURN_FALSE;
1610 			}
1611 			if(end) {
1612 				start = end+1;
1613 			}
1614 		} while(end != NULL);
1615 	}
1616 
1617 	available = ures_openAvailableLocales(NULL, &status);
1618 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1619 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1620 						&outResult, http_accept, available, &status);
1621 	uenum_close(available);
1622 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1623 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1624 		RETURN_FALSE;
1625 	}
1626 	RETURN_STRINGL(resultLocale, len);
1627 }
1628 /* }}} */
1629