xref: /php-src/ext/intl/locale/locale_methods.c (revision 11accb5c)
1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
12    +----------------------------------------------------------------------+
13 */
14 
15 #ifdef HAVE_CONFIG_H
16 #include <config.h>
17 #endif
18 
19 #include <unicode/ustring.h>
20 #include <unicode/udata.h>
21 #include <unicode/putil.h>
22 #include <unicode/ures.h>
23 
24 #include "php_intl.h"
25 #include "locale.h"
26 #include "locale_class.h"
27 #include "intl_convert.h"
28 #include "intl_data.h"
29 
30 #include <zend_API.h>
31 #include <zend.h>
32 #include <php.h>
33 #include "main/php_ini.h"
34 #include "zend_smart_str.h"
35 
36 ZEND_EXTERN_MODULE_GLOBALS( intl )
37 
38 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
39 #define SEPARATOR "_"
40 #define SEPARATOR1 "-"
41 #define DELIMITER "-_"
42 #define EXTLANG_PREFIX "a"
43 #define PRIVATE_PREFIX "x"
44 #define DISP_NAME "name"
45 
46 #define MAX_NO_VARIANT  15
47 #define MAX_NO_EXTLANG  3
48 #define MAX_NO_PRIVATE  15
49 #define MAX_NO_LOOKUP_LANG_TAG  100
50 
51 #define LOC_NOT_FOUND 1
52 
53 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
54 #define VARIANT_KEYNAME_LEN  11
55 #define EXTLANG_KEYNAME_LEN  10
56 #define PRIVATE_KEYNAME_LEN  11
57 
58 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
59  * https://www.iana.org/assignments/language-subtag-registry
60  *
61  * This list includes all grandfathered tags, as well as redundant
62  * tags that have a Preferred-Value.
63  */
64 static const char * const LOC_GRANDFATHERED[] = {
65 	"art-lojban",
66 	"cel-gaulish",
67 	"en-GB-oed",
68 	"i-ami",
69 	"i-bnn",
70 	"i-default",
71 	"i-enochian",
72 	"i-hak",
73 	"i-klingon",
74 	"i-lux",
75 	"i-mingo",
76 	"i-navajo",
77 	"i-pwn",
78 	"i-tao",
79 	"i-tay",
80 	"i-tsu",
81 	"no-bok",
82 	"no-nyn",
83 	"sgn-BE-FR",
84 	"sgn-BE-NL",
85 	"sgn-BR",
86 	"sgn-CH-DE",
87 	"sgn-CO",
88 	"sgn-DE",
89 	"sgn-DK",
90 	"sgn-ES",
91 	"sgn-FR",
92 	"sgn-GB",
93 	"sgn-GR",
94 	"sgn-IE",
95 	"sgn-IT",
96 	"sgn-JP",
97 	"sgn-MX",
98 	"sgn-NI",
99 	"sgn-NL",
100 	"sgn-NO",
101 	"sgn-PT",
102 	"sgn-SE",
103 	"sgn-US",
104 	"sgn-ZA",
105 	"zh-cmn",
106 	"zh-cmn-Hans",
107 	"zh-cmn-Hant",
108 	"zh-gan",
109 	"zh-guoyu",
110 	"zh-hakka",
111 	"zh-min",
112 	"zh-min-nan",
113 	"zh-wuu",
114 	"zh-xiang",
115 	NULL
116 };
117 
118 /* Based on the IANA language subtag registry (File-Date: 2021-08-06)
119  *
120  * This array lists the preferred values for the grandfathered and redundant
121  * tags listed in LOC_GRANDFATHERED. This is in sync with the array
122  * LOC_GRANDFATHERED, i.e., the offsets of the grandfathered tags match the
123  * offsets of the preferred value. If a value in LOC_PREFERRED_GRANDFATHERED is
124  * NULL, then the matching offset in LOC_GRANDFATHERED has no preferred value.
125  */
126 static const char * const LOC_PREFERRED_GRANDFATHERED[] = {
127 	"jbo",
128 	NULL,
129 	"en-GB-oxendict",
130 	"ami",
131 	"bnn",
132 	NULL,
133 	NULL,
134 	"hak",
135 	"tlh",
136 	"lb",
137 	NULL,
138 	"nv",
139 	"pwn",
140 	"tao",
141 	"tay",
142 	"tsu",
143 	"nb",
144 	"nn",
145 	"sfb",
146 	"vgt",
147 	"bzs",
148 	"sgg",
149 	"csn",
150 	"gsg",
151 	"dsl",
152 	"ssp",
153 	"fsl",
154 	"bfi",
155 	"gss",
156 	"isg",
157 	"ise",
158 	"jsl",
159 	"mfs",
160 	"ncs",
161 	"dse",
162 	"nsl",
163 	"psr",
164 	"swl",
165 	"ase",
166 	"sfs",
167 	"cmn",
168 	"cmn-Hans",
169 	"cmn-Hant",
170 	"gan",
171 	"cmn",
172 	"hak",
173 	NULL,
174 	"nan",
175 	"wuu",
176 	"hsn",
177 	NULL
178 };
179 
180 /* returns true if a is an ID separator, false otherwise */
181 #define isIDSeparator(a) (a == '_' || a == '-')
182 #define isKeywordSeparator(a) (a == '@' )
183 #define isEndOfTag(a) (a == '\0' )
184 
185 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
186 
187 /*returns true if one of the special prefixes is here (s=string)
188   'x-' or 'i-' */
189 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
190 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
191 
192 /* Dot terminates it because of POSIX form  where dot precedes the codepage
193  * except for variant */
194 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
195 
196 /* {{{ return the offset of 'key' in the array 'list'.
197  * returns -1 if not present */
findOffset(const char * const * list,const char * key)198 static int16_t findOffset(const char* const* list, const char* key)
199 {
200 	const char* const* anchor = list;
201 	while (*list != NULL) {
202 		if (strcasecmp(key, *list) == 0) {
203 			return (int16_t)(list - anchor);
204 		}
205 		list++;
206 	}
207 
208 	return -1;
209 
210 }
211 /*}}}*/
212 
getPreferredTag(const char * gf_tag)213 static char* getPreferredTag(const char* gf_tag)
214 {
215 	char* result = NULL;
216 	zend_off_t grOffset = 0;
217 
218 	grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
219 	if(grOffset < 0) {
220 		return NULL;
221 	}
222 	if( LOC_PREFERRED_GRANDFATHERED[grOffset] != NULL ){
223 		/* return preferred tag */
224 		result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
225 	} else {
226 		/* Return correct grandfathered language tag */
227 		result = estrdup( LOC_GRANDFATHERED[grOffset] );
228 	}
229 	return result;
230 }
231 
232 /* {{{
233 * returns the position of next token for lookup
234 * or -1 if no token
235 * strtokr equivalent search for token in reverse direction
236 */
getStrrtokenPos(char * str,zend_off_t savedPos)237 static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos)
238 {
239 	zend_off_t result =-1;
240 	zend_off_t i;
241 
242 	for(i=savedPos-1; i>=0; i--) {
243 		if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){
244 			/* delimiter found; check for singleton */
245 			if(i>=2 && isIDSeparator(*(str+i-2)) ){
246 				/* a singleton; so send the position of token before the singleton */
247 				result = i-2;
248 			} else {
249 				result = i;
250 			}
251 			break;
252 		}
253 	}
254 	if(result < 1){
255 		/* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
256 		result =-1;
257 	}
258 	return result;
259 }
260 /* }}} */
261 
262 /* {{{
263 * returns the position of a singleton if present
264 * returns -1 if no singleton
265 * strtok equivalent search for singleton
266 */
getSingletonPos(const char * str)267 static zend_off_t getSingletonPos(const char* str)
268 {
269 	zend_off_t result =-1;
270 	size_t len = 0;
271 
272 	if( str && ((len=strlen(str))>0) ){
273 		zend_off_t i = 0;
274 		for( i=0; (size_t)i < len ; i++){
275 			if( isIDSeparator(*(str+i)) ){
276 				if( i==1){
277 					/* string is of the form x-avy or a-prv1 */
278 					result =0;
279 					break;
280 				} else {
281 					/* delimiter found; check for singleton */
282 					if( isIDSeparator(*(str+i+2)) ){
283 						/* a singleton; so send the position of separator before singleton */
284 						result = i+1;
285 						break;
286 					}
287 				}
288 			}
289 		}/* end of for */
290 
291 	}
292 	return result;
293 }
294 /* }}} */
295 
296 /* {{{ Get default locale */
297 /* }}} */
298 /* {{{ Get default locale */
PHP_NAMED_FUNCTION(zif_locale_get_default)299 PHP_NAMED_FUNCTION(zif_locale_get_default)
300 {
301 	ZEND_PARSE_PARAMETERS_NONE();
302 
303 	RETURN_STRING( intl_locale_get_default(  ) );
304 }
305 
306 /* }}} */
307 
308 /* {{{ Set default locale */
309 /* }}} */
310 /* {{{ Set default locale */
PHP_NAMED_FUNCTION(zif_locale_set_default)311 PHP_NAMED_FUNCTION(zif_locale_set_default)
312 {
313 	zend_string* locale_name;
314 	zend_string *ini_name;
315 	char *default_locale = NULL;
316 
317 	ZEND_PARSE_PARAMETERS_START(1, 1)
318 		Z_PARAM_STR(locale_name)
319 	ZEND_PARSE_PARAMETERS_END();
320 
321 	if (ZSTR_LEN(locale_name) == 0) {
322 		default_locale = (char *)uloc_getDefault();
323 		locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
324 	}
325 
326 	ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
327 	zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
328 	zend_string_release_ex(ini_name, 0);
329 	if (default_locale != NULL) {
330 		zend_string_release_ex(locale_name, 0);
331 	}
332 
333 	RETURN_TRUE;
334 }
335 /* }}} */
336 
337 /* {{{
338 * Gets the value from ICU
339 * common code shared by get_primary_language,get_script or get_region or get_variant
340 * result = 0 if error, 1 if successful , -1 if no value
341 */
get_icu_value_internal(const char * loc_name,char * tag_name,int * result,int fromParseLocale)342 static zend_string* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
343 {
344 	zend_string* tag_value	    = NULL;
345 	int32_t      tag_value_len  = 512;
346 
347 	char*        mod_loc_name   = NULL;
348 
349 	int32_t      buflen         = 512;
350 	UErrorCode   status         = U_ZERO_ERROR;
351 
352 	if (strlen(loc_name) > INTL_MAX_LOCALE_LEN) {
353 		return NULL;
354 	}
355 
356 	if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
357 		/* Handle  grandfathered languages */
358 		zend_off_t grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
359 		if( grOffset >= 0 ){
360 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
361 				return zend_string_init(loc_name, strlen(loc_name), 0);
362 			} else {
363 				/* Since Grandfathered , no value , do nothing , retutn NULL */
364 				return NULL;
365 			}
366 		}
367 
368 	if( fromParseLocale==1 ){
369 		zend_off_t singletonPos = 0;
370 
371 		/* Handle singletons */
372 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
373 			if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
374 				return zend_string_init(loc_name, strlen(loc_name), 0);
375 			}
376 		}
377 
378 		singletonPos = getSingletonPos( loc_name );
379 		if( singletonPos == 0){
380 			/* singleton at start of script, region , variant etc.
381 			 * or invalid singleton at start of language */
382 			return NULL;
383 		} else if( singletonPos > 0 ){
384 			/* singleton at some position except at start
385 			 * strip off the singleton and rest of the loc_name */
386 			mod_loc_name = estrndup ( loc_name , singletonPos-1);
387 		}
388 	} /* end of if fromParse */
389 
390 	} /* end of if != LOC_CANONICAL_TAG */
391 
392 	if( mod_loc_name == NULL){
393 		mod_loc_name = estrdup(loc_name );
394 	}
395 
396 	/* Proceed to ICU */
397 	do{
398 		if (tag_value) {
399 			tag_value = zend_string_realloc( tag_value , buflen, 0);
400 		} else {
401 			tag_value = zend_string_alloc( buflen, 0);
402 		}
403 		tag_value_len = buflen;
404 
405 		if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
406 			buflen = uloc_getScript ( mod_loc_name , tag_value->val , tag_value_len , &status);
407 		}
408 		if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
409 			buflen = uloc_getLanguage ( mod_loc_name , tag_value->val , tag_value_len , &status);
410 		}
411 		if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
412 			buflen = uloc_getCountry ( mod_loc_name , tag_value->val , tag_value_len , &status);
413 		}
414 		if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
415 			buflen = uloc_getVariant ( mod_loc_name , tag_value->val , tag_value_len , &status);
416 		}
417 		if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
418 			buflen = uloc_canonicalize ( mod_loc_name , tag_value->val , tag_value_len , &status);
419 		}
420 
421 		if( U_FAILURE( status ) ) {
422 			if( status == U_BUFFER_OVERFLOW_ERROR ) {
423 				status = U_ZERO_ERROR;
424 				buflen++; /* add space for \0 */
425 				continue;
426 			}
427 
428 			/* Error in retrieving data */
429 			*result = 0;
430 			if( tag_value ){
431 				zend_string_release_ex( tag_value, 0 );
432 			}
433 			if( mod_loc_name ){
434 				efree( mod_loc_name);
435 			}
436 			return NULL;
437 		}
438 	} while( buflen > tag_value_len );
439 
440 	if(  buflen ==0 ){
441 		/* No value found */
442 		*result = -1;
443 		if( tag_value ){
444 			zend_string_release_ex( tag_value, 0 );
445 		}
446 		if( mod_loc_name ){
447 			efree( mod_loc_name);
448 		}
449 		return NULL;
450 	} else {
451 		*result = 1;
452 	}
453 
454 	if( mod_loc_name ){
455 		efree( mod_loc_name);
456 	}
457 
458 	tag_value->len = strlen(tag_value->val);
459 	return tag_value;
460 }
461 /* }}} */
462 
463 /* {{{
464 * Gets the value from ICU , called when PHP userspace function is called
465 * common code shared by get_primary_language,get_script or get_region or get_variant
466 */
get_icu_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)467 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
468 {
469 
470 	char*          loc_name        	= NULL;
471 	size_t         loc_name_len    	= 0;
472 
473 	zend_string*   tag_value		= NULL;
474 	char*       empty_result	= "";
475 
476 	int         result    		= 0;
477 	char*       msg        		= NULL;
478 
479 	UErrorCode  status          	= U_ZERO_ERROR;
480 
481 	intl_error_reset( NULL );
482 
483 	ZEND_PARSE_PARAMETERS_START(1, 1)
484 		Z_PARAM_STRING(loc_name, loc_name_len)
485 	ZEND_PARSE_PARAMETERS_END();
486 
487 	if(loc_name_len == 0) {
488 		loc_name = (char *)intl_locale_get_default();
489 		loc_name_len = strlen(loc_name);
490 	}
491 
492 	INTL_CHECK_LOCALE_LEN(loc_name_len);
493 
494 	/* Call ICU get */
495 	tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
496 
497 	/* No value found */
498 	if( result == -1 ) {
499 		if( tag_value){
500 			zend_string_release_ex( tag_value, 0 );
501 		}
502 		RETURN_STRING( empty_result);
503 	}
504 
505 	/* value found */
506 	if( tag_value){
507 		RETVAL_STR( tag_value );
508 		return;
509 	}
510 
511 	/* Error encountered while fetching the value */
512 	if( result ==0) {
513 		spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
514 		intl_error_set( NULL, status, msg , 1 );
515 		efree(msg);
516 		RETURN_NULL();
517 	}
518 
519 }
520 /* }}} */
521 
522 /* {{{ gets the script for the $locale */
PHP_FUNCTION(locale_get_script)523 PHP_FUNCTION( locale_get_script )
524 {
525 	get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
526 }
527 /* }}} */
528 
529 /* {{{ gets the region for the $locale */
PHP_FUNCTION(locale_get_region)530 PHP_FUNCTION( locale_get_region )
531 {
532 	get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
533 }
534 /* }}} */
535 
536 /* {{{ gets the primary language for the $locale */
PHP_FUNCTION(locale_get_primary_language)537 PHP_FUNCTION(locale_get_primary_language )
538 {
539 	get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
540 }
541 /* }}} */
542 
543 
544 /* {{{
545  * common code shared by display_xyz functions to  get the value from ICU
546  }}} */
get_icu_disp_value_src_php(char * tag_name,INTERNAL_FUNCTION_PARAMETERS)547 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
548 {
549 	char*          loc_name        	= NULL;
550 	size_t         loc_name_len    	= 0;
551 
552 	char*       disp_loc_name       = NULL;
553 	size_t      disp_loc_name_len   = 0;
554 	int         free_loc_name       = 0;
555 
556 	UChar*      disp_name      	= NULL;
557 	int32_t     disp_name_len  	= 0;
558 
559 	char*       mod_loc_name        = NULL;
560 
561 	int32_t     buflen          	= 512;
562 	UErrorCode  status          	= U_ZERO_ERROR;
563 
564 	zend_string* u8str;
565 
566 	char*       msg             	= NULL;
567 
568 	intl_error_reset( NULL );
569 
570 	ZEND_PARSE_PARAMETERS_START(1, 2)
571 		Z_PARAM_STRING(loc_name, loc_name_len)
572 		Z_PARAM_OPTIONAL
573 		Z_PARAM_STRING_OR_NULL(disp_loc_name, disp_loc_name_len)
574 	ZEND_PARSE_PARAMETERS_END();
575 
576 	if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
577 		/* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
578 		spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
579 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
580 		efree(msg);
581 		RETURN_FALSE;
582 	}
583 
584 	if(loc_name_len == 0) {
585 		loc_name = (char *)intl_locale_get_default();
586 	}
587 
588 	if( strcmp(tag_name, DISP_NAME) != 0 ){
589 		/* Handle grandfathered languages */
590 		int grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
591 		if( grOffset >= 0 ){
592 			if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
593 				mod_loc_name = getPreferredTag( loc_name );
594 			} else {
595 				/* Since Grandfathered, no value, do nothing, return NULL */
596 				RETURN_FALSE;
597 			}
598 		}
599 	} /* end of if != LOC_CANONICAL_TAG */
600 
601 	if( mod_loc_name==NULL ){
602 		mod_loc_name = estrdup( loc_name );
603 	}
604 
605 	/* Check if disp_loc_name passed , if not use default locale */
606 	if( !disp_loc_name){
607 		disp_loc_name = estrdup(intl_locale_get_default());
608 		free_loc_name = 1;
609 	}
610 
611 	/* Get the disp_value for the given locale */
612 	do{
613 		disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
614 		disp_name_len = buflen;
615 
616 		if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
617 			buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
618 		} else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
619 			buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
620 		} else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
621 			buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
622 		} else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
623 			buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
624 		} else if( strcmp(tag_name , DISP_NAME)==0 ){
625 			buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
626 		}
627 
628 		/* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
629 		if( U_FAILURE( status ) )
630 		{
631 			if( status == U_BUFFER_OVERFLOW_ERROR )
632 			{
633 				status = U_ZERO_ERROR;
634 				continue;
635 			}
636 
637 			spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
638 			intl_error_set( NULL, status, msg , 1 );
639 			efree(msg);
640 			if( disp_name){
641 				efree( disp_name );
642 			}
643 			if( mod_loc_name){
644 				efree( mod_loc_name );
645 			}
646 			if (free_loc_name) {
647 				efree((void *)disp_loc_name);
648 				disp_loc_name = NULL;
649 			}
650 			RETURN_FALSE;
651 		}
652 	} while( buflen > disp_name_len );
653 
654 	if( mod_loc_name){
655 		efree( mod_loc_name );
656 	}
657 	if (free_loc_name) {
658 		efree((void *)disp_loc_name);
659 		disp_loc_name = NULL;
660 	}
661 	/* Convert display locale name from UTF-16 to UTF-8. */
662 	u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
663 	efree( disp_name );
664 	if( !u8str )
665 	{
666 		spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
667 		intl_error_set( NULL, status, msg , 1 );
668 		efree(msg);
669 		RETURN_FALSE;
670 	}
671 
672 	RETVAL_NEW_STR( u8str );
673 }
674 /* }}} */
675 
676 /* {{{ gets the name for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_name)677 PHP_FUNCTION(locale_get_display_name)
678 {
679 	get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
680 }
681 /* }}} */
682 
683 /* {{{ gets the language for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_language)684 PHP_FUNCTION(locale_get_display_language)
685 {
686 	get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
687 }
688 /* }}} */
689 
690 /* {{{ gets the script for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_script)691 PHP_FUNCTION(locale_get_display_script)
692 {
693 	get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
694 }
695 /* }}} */
696 
697 /* {{{ gets the region for the $locale in $in_locale or default_locale */
PHP_FUNCTION(locale_get_display_region)698 PHP_FUNCTION(locale_get_display_region)
699 {
700 	get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
701 }
702 /* }}} */
703 
704 /* {{{
705 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
706 * gets the variant for the $locale in $in_locale or default_locale
707  }}} */
708 /* {{{
709 * proto static string get_display_variant($locale, $in_locale = null)
710 * gets the variant for the $locale in $in_locale or default_locale
711 */
PHP_FUNCTION(locale_get_display_variant)712 PHP_FUNCTION(locale_get_display_variant)
713 {
714 	get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
715 }
716 /* }}} */
717 
718  /* {{{ return an associative array containing keyword-value
719  * pairs for this locale. The keys are keys to the array (doh!)
720  * }}}*/
721  /* {{{ return an associative array containing keyword-value
722  * pairs for this locale. The keys are keys to the array (doh!)
723  */
PHP_FUNCTION(locale_get_keywords)724 PHP_FUNCTION( locale_get_keywords )
725 {
726 	UEnumeration*   e        = NULL;
727 	UErrorCode      status   = U_ZERO_ERROR;
728 
729 	const char*	 	kw_key        = NULL;
730 	int32_t         kw_key_len    = 0;
731 
732 	char*       	        loc_name        = NULL;
733 	size_t        	 	loc_name_len    = 0;
734 
735 	intl_error_reset( NULL );
736 
737 	ZEND_PARSE_PARAMETERS_START(1, 1)
738 		Z_PARAM_STRING(loc_name, loc_name_len)
739 	ZEND_PARSE_PARAMETERS_END();
740 
741 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
742 
743 	if(loc_name_len == 0) {
744 		loc_name = (char *)intl_locale_get_default();
745 	}
746 
747 	/* Get the keywords */
748 	e = uloc_openKeywords( loc_name, &status );
749 	if( e != NULL ) {
750 		/*
751 		ICU expects the buffer to be allocated  before calling the function
752 		and so the buffer size has been explicitly specified
753 		ICU uloc.h #define 	ULOC_KEYWORD_AND_VALUES_CAPACITY   100
754 		hence the kw_value buffer size is 100
755 		*/
756 
757 		/* Traverse it, filling the return array. */
758 		array_init( return_value );
759 
760 		while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
761 			int32_t kw_value_len = 100;
762 			zend_string *kw_value_str = zend_string_alloc(kw_value_len, 0);
763 
764 			/* Get the keyword value for each keyword */
765 			kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
766 			if (status == U_BUFFER_OVERFLOW_ERROR) {
767 				status = U_ZERO_ERROR;
768 				kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
769 				kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
770 			} else if(!U_FAILURE(status)) {
771 				kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
772 			}
773 			if (U_FAILURE(status)) {
774 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 );
775 				if( kw_value_str){
776 					zend_string_efree( kw_value_str );
777 				}
778 				zend_array_destroy(Z_ARR_P(return_value));
779 				RETURN_FALSE;
780 			}
781 
782 			add_assoc_str( return_value, (char *)kw_key, kw_value_str);
783 		} /* end of while */
784 
785 	} /* end of if e!=NULL */
786 
787 	uenum_close( e );
788 }
789 /* }}} */
790 
791  /* {{{ @return string the canonicalized locale
792  * }}} */
793  /* {{{ @param string $locale	The locale string to canonicalize */
PHP_FUNCTION(locale_canonicalize)794 PHP_FUNCTION(locale_canonicalize)
795 {
796 	get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
797 }
798 /* }}} */
799 
800 /* {{{ append_key_value
801 * Internal function which is called from locale_compose
802 * gets the value for the key_name and appends to the loc_name
803 * returns 1 if successful , -1 if not found ,
804 * 0 if array element is not a string , -2 if buffer-overflow
805 */
append_key_value(smart_str * loc_name,HashTable * hash_arr,char * key_name)806 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
807 {
808 	zval *ele_value;
809 
810 	if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
811 		if(Z_TYPE_P(ele_value)!= IS_STRING ){
812 			/* element value is not a string */
813 			return FAILURE;
814 		}
815 		if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
816 		   strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
817 			/* not lang or grandfathered tag */
818 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
819 		}
820 		smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
821 		return SUCCESS;
822 	}
823 
824 	return LOC_NOT_FOUND;
825 }
826 /* }}} */
827 
828 /* {{{ append_prefix , appends the prefix needed
829 * e.g. private adds 'x'
830 */
add_prefix(smart_str * loc_name,char * key_name)831 static void add_prefix(smart_str* loc_name, char* key_name)
832 {
833 	if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
834 		smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
835 		smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
836 	}
837 }
838 /* }}} */
839 
840 /* {{{ append_multiple_key_values
841 * Internal function which is called from locale_compose
842 * gets the multiple values for the key_name and appends to the loc_name
843 * used for 'variant','extlang','private'
844 * returns 1 if successful , -1 if not found ,
845 * 0 if array element is not a string , -2 if buffer-overflow
846 */
append_multiple_key_values(smart_str * loc_name,HashTable * hash_arr,char * key_name)847 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
848 {
849 	zval	*ele_value;
850 	int 	isFirstSubtag 	= 0;
851 
852 	/* Variant/ Extlang/Private etc. */
853 	if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
854 		if( Z_TYPE_P(ele_value) == IS_STRING ){
855 			add_prefix( loc_name , key_name);
856 
857 			smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
858 			smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
859 			return SUCCESS;
860 		} else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
861 			HashTable *arr = Z_ARRVAL_P(ele_value);
862 			zval *data;
863 
864 			ZEND_HASH_FOREACH_VAL(arr, data) {
865 				if(Z_TYPE_P(data) != IS_STRING) {
866 					return FAILURE;
867 				}
868 				if (isFirstSubtag++ == 0){
869 					add_prefix(loc_name , key_name);
870 				}
871 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
872 				smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
873 			} ZEND_HASH_FOREACH_END();
874 			return SUCCESS;
875 		} else {
876 			return FAILURE;
877 		}
878 	} else {
879 		char cur_key_name[31];
880 		int  max_value = 0, i;
881 		/* Decide the max_value: the max. no. of elements allowed */
882 		if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
883 			max_value  = MAX_NO_VARIANT;
884 		}
885 		if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
886 			max_value  = MAX_NO_EXTLANG;
887 		}
888 		if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
889 			max_value  = MAX_NO_PRIVATE;
890 		}
891 
892 		/* Multiple variant values as variant0, variant1 ,variant2 */
893 		isFirstSubtag = 0;
894 		for( i=0 ; i< max_value; i++ ){
895 			snprintf( cur_key_name , 30, "%s%d", key_name , i);
896 			if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
897 				if( Z_TYPE_P(ele_value)!= IS_STRING ){
898 					/* variant is not a string */
899 					return FAILURE;
900 				}
901 				/* Add the contents */
902 				if (isFirstSubtag++ == 0){
903 					add_prefix(loc_name , cur_key_name);
904 				}
905 				smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
906 				smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
907 			}
908 		} /* end of for */
909 	} /* end of else */
910 
911 	return SUCCESS;
912 }
913 /* }}} */
914 
915 /*{{{
916 * If applicable sets error message and aborts locale_compose gracefully
917 * returns 0  if locale_compose needs to be aborted
918 * otherwise returns 1
919 */
handleAppendResult(int result,smart_str * loc_name)920 static int handleAppendResult( int result, smart_str* loc_name)
921 {
922 	intl_error_reset( NULL );
923 	if( result == FAILURE) {
924 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
925 			 "locale_compose: parameter array element is not a string", 0 );
926 		smart_str_free(loc_name);
927 		return 0;
928 	}
929 	return 1;
930 }
931 /* }}} */
932 
933 #define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
934 /* {{{ Creates a locale by combining the parts of locale-ID passed
935 * }}} */
936 /* {{{ Creates a locale by combining the parts of locale-ID passed
937 * }}} */
PHP_FUNCTION(locale_compose)938 PHP_FUNCTION(locale_compose)
939 {
940 	smart_str      	loc_name_s = {0};
941 	smart_str *loc_name = &loc_name_s;
942 	zval*			arr	= NULL;
943 	HashTable*		hash_arr = NULL;
944 	int 			result = 0;
945 
946 	intl_error_reset( NULL );
947 
948 	ZEND_PARSE_PARAMETERS_START(1, 1)
949 		Z_PARAM_ARRAY(arr)
950 	ZEND_PARSE_PARAMETERS_END();
951 
952 	hash_arr = Z_ARRVAL_P( arr );
953 
954 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
955 		RETURN_FALSE;
956 
957 	/* Check for grandfathered first */
958 	result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
959 	if( result == SUCCESS){
960 		RETURN_SMART_STR(loc_name);
961 	}
962 	if( !handleAppendResult( result, loc_name)){
963 		RETURN_FALSE;
964 	}
965 
966 	/* Not grandfathered */
967 	result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
968 	if( result == LOC_NOT_FOUND ){
969 		zend_argument_value_error(1, "must contain a \"%s\" key", LOC_LANG_TAG);
970 		smart_str_free(loc_name);
971 		RETURN_THROWS();
972 	}
973 	if( !handleAppendResult( result, loc_name)){
974 		RETURN_FALSE;
975 	}
976 
977 	/* Extlang */
978 	result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
979 	if( !handleAppendResult( result, loc_name)){
980 		RETURN_FALSE;
981 	}
982 
983 	/* Script */
984 	result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
985 	if( !handleAppendResult( result, loc_name)){
986 		RETURN_FALSE;
987 	}
988 
989 	/* Region */
990 	result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
991 	if( !handleAppendResult( result, loc_name)){
992 		RETURN_FALSE;
993 	}
994 
995 	/* Variant */
996 	result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
997 	if( !handleAppendResult( result, loc_name)){
998 		RETURN_FALSE;
999 	}
1000 
1001 	/* Private */
1002 	result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
1003 	if( !handleAppendResult( result, loc_name)){
1004 		RETURN_FALSE;
1005 	}
1006 
1007 	RETURN_SMART_STR(loc_name);
1008 }
1009 /* }}} */
1010 
1011 
1012 /*{{{
1013 * Parses the locale and returns private subtags  if existing
1014 * else returns NULL
1015 * e.g. for locale='en_US-x-prv1-prv2-prv3'
1016 * returns a pointer to the string 'prv1-prv2-prv3'
1017 */
get_private_subtags(const char * loc_name)1018 static zend_string* get_private_subtags(const char* loc_name)
1019 {
1020 	zend_string* result = NULL;
1021 	size_t       len = 0;
1022 	const char*  mod_loc_name =NULL;
1023 
1024 	if( loc_name && (len = strlen(loc_name)) > 0 ){
1025 		zend_off_t singletonPos = 0;
1026 		mod_loc_name = loc_name ;
1027 		while( (singletonPos = getSingletonPos(mod_loc_name)) > -1){
1028 			if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1029 				/* private subtag start found */
1030 				if( singletonPos + 2 ==  len){
1031 					/* loc_name ends with '-x-' ; return  NULL */
1032 				}
1033 				else{
1034 					/* result = mod_loc_name + singletonPos +2; */
1035 					result = zend_string_init(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ), 0);
1036 				}
1037 				break;
1038 			}
1039 			else{
1040 				if((size_t)(singletonPos + 1) >= len){
1041 					/* String end */
1042 					break;
1043 				} else {
1044 					/* singleton found but not a private subtag , hence check further in the string for the private subtag */
1045 					mod_loc_name = mod_loc_name + singletonPos +1;
1046 					len = strlen(mod_loc_name);
1047 				}
1048 			}
1049 		} /* end of while */
1050 	}
1051 
1052 	return result;
1053 }
1054 /* }}} */
1055 
1056 /* {{{ code used by locale_parse */
add_array_entry(const char * loc_name,zval * hash_arr,char * key_name)1057 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1058 {
1059 	zend_string*   key_value 	= NULL;
1060 	char*   cur_key_name	= NULL;
1061 	char*   token        	= NULL;
1062 	char*   last_ptr  	= NULL;
1063 
1064 	int	result		= 0;
1065 	int 	cur_result  	= 0;
1066 
1067 
1068 	if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1069 		key_value = get_private_subtags( loc_name );
1070 		result = 1;
1071 	} else {
1072 		key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1073 	}
1074 	if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1075 		( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1076 		if( result > 0 && key_value){
1077 			int cnt = 0;
1078 			/* Tokenize on the "_" or "-"  */
1079 			token = php_strtok_r( key_value->val , DELIMITER ,&last_ptr);
1080 			if( cur_key_name ){
1081 				efree( cur_key_name);
1082 			}
1083 			/* Over-allocates a few bytes for the integer so we don't have to reallocate. */
1084 			size_t cur_key_name_size = (sizeof("-2147483648") - 1) + strlen(key_name) + 1;
1085 			cur_key_name = emalloc(cur_key_name_size);
1086 			snprintf( cur_key_name, cur_key_name_size , "%s%d", key_name , cnt++);
1087 			add_assoc_string( hash_arr, cur_key_name , token);
1088 			/* tokenize on the "_" or "-" and stop  at singleton if any */
1089 			while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1090 				snprintf( cur_key_name , cur_key_name_size, "%s%d", key_name , cnt++);
1091 				add_assoc_string( hash_arr, cur_key_name , token);
1092 			}
1093 /*
1094 			if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1095 			}
1096 */
1097 		}
1098 		if (key_value) {
1099 			zend_string_release_ex(key_value, 0);
1100 		}
1101 	} else {
1102 		if( result == 1 ){
1103 			add_assoc_str( hash_arr, key_name , key_value);
1104 			cur_result = 1;
1105 		} else if (key_value) {
1106 			zend_string_release_ex(key_value, 0);
1107 		}
1108 	}
1109 
1110 	if( cur_key_name ){
1111 		efree( cur_key_name);
1112 	}
1113 	/*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1114 	return cur_result;
1115 }
1116 /* }}} */
1117 
1118 /* {{{ parses a locale-id into an array the different parts of it */
PHP_FUNCTION(locale_parse)1119 PHP_FUNCTION(locale_parse)
1120 {
1121 	char*          loc_name        = NULL;
1122 	size_t         loc_name_len    = 0;
1123 	int         grOffset    	= 0;
1124 
1125 	intl_error_reset( NULL );
1126 
1127 	ZEND_PARSE_PARAMETERS_START(1, 1)
1128 		Z_PARAM_STRING(loc_name, loc_name_len)
1129 	ZEND_PARSE_PARAMETERS_END();
1130 
1131 	INTL_CHECK_LOCALE_LEN(strlen(loc_name));
1132 
1133 	if(loc_name_len == 0) {
1134 		loc_name = (char *)intl_locale_get_default();
1135 	}
1136 
1137 	array_init( return_value );
1138 
1139 	grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1140 	if( grOffset >= 0 ){
1141 		add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1142 	}
1143 	else{
1144 		/* Not grandfathered */
1145 		add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1146 		add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1147 		add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1148 		add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1149 		add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1150 	}
1151 }
1152 /* }}} */
1153 
1154 /* {{{ gets an array containing the list of variants, or null */
PHP_FUNCTION(locale_get_all_variants)1155 PHP_FUNCTION(locale_get_all_variants)
1156 {
1157 	char*  	                loc_name        = NULL;
1158 	size_t    		loc_name_len    = 0;
1159 
1160 	int	result		= 0;
1161 	char*	token		= NULL;
1162 	zend_string*	variant		= NULL;
1163 	char*	saved_ptr	= NULL;
1164 
1165 	intl_error_reset( NULL );
1166 
1167 	ZEND_PARSE_PARAMETERS_START(1, 1)
1168 		Z_PARAM_STRING(loc_name, loc_name_len)
1169 	ZEND_PARSE_PARAMETERS_END();
1170 
1171 	if(loc_name_len == 0) {
1172 		loc_name = (char *)intl_locale_get_default();
1173 		loc_name_len = strlen(loc_name);
1174 	}
1175 
1176 	INTL_CHECK_LOCALE_LEN(loc_name_len);
1177 
1178 	array_init( return_value );
1179 
1180 	/* If the locale is grandfathered, stop, no variants */
1181 	if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1182 		/* ("Grandfathered Tag. No variants."); */
1183 	}
1184 	else {
1185 	/* Call ICU variant */
1186 		variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1187 		if( result > 0 && variant){
1188 			/* Tokenize on the "_" or "-" */
1189 			token = php_strtok_r( variant->val , DELIMITER , &saved_ptr);
1190 			add_next_index_stringl( return_value, token , strlen(token));
1191 			/* tokenize on the "_" or "-" and stop  at singleton if any	*/
1192 			while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1193 				add_next_index_stringl( return_value, token , strlen(token));
1194 			}
1195 		}
1196 		if( variant ){
1197 			zend_string_release_ex( variant, 0 );
1198 		}
1199 	}
1200 
1201 
1202 }
1203 /* }}} */
1204 
1205 /* {{{ Converts to lower case and also replaces all hyphens with the underscore */
strToMatch(const char * str,char * retstr)1206 static int strToMatch(const char* str ,char *retstr)
1207 {
1208 	char* 	anchor 	= NULL;
1209 	const char* 	anchor1 = NULL;
1210 	int 	result 	= 0;
1211 
1212 	if( (!str) || str[0] == '\0'){
1213 		return result;
1214 	} else {
1215 		anchor = retstr;
1216 		anchor1 = str;
1217 			while( (*str)!='\0' ){
1218 			if( *str == '-' ){
1219 				*retstr =  '_';
1220 			} else {
1221 				*retstr = tolower(*str);
1222 			}
1223 				str++;
1224 				retstr++;
1225 		}
1226 		*retstr = '\0';
1227 		retstr=  anchor;
1228 		str=  anchor1;
1229 		result = 1;
1230 	}
1231 
1232 	return(result);
1233 }
1234 /* }}} */
1235 
1236 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
1237 /* }}} */
1238 /* {{{ Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */
PHP_FUNCTION(locale_filter_matches)1239 PHP_FUNCTION(locale_filter_matches)
1240 {
1241 	char*       	lang_tag        = NULL;
1242 	size_t         	lang_tag_len    = 0;
1243 	char*           loc_range       = NULL;
1244 	size_t         	loc_range_len   = 0;
1245 
1246 	int		result		= 0;
1247 	char*		token		= 0;
1248 	char*		chrcheck	= NULL;
1249 
1250 	zend_string*   	can_lang_tag    = NULL;
1251 	zend_string*   	can_loc_range   = NULL;
1252 
1253 	char*       	cur_lang_tag    = NULL;
1254 	char*       	cur_loc_range   = NULL;
1255 
1256 	bool 	boolCanonical 	= 0;
1257 	UErrorCode	status		= U_ZERO_ERROR;
1258 
1259 	intl_error_reset( NULL );
1260 
1261 	ZEND_PARSE_PARAMETERS_START(2, 3)
1262 		Z_PARAM_STRING(lang_tag, lang_tag_len)
1263 		Z_PARAM_STRING(loc_range,  loc_range_len)
1264 		Z_PARAM_OPTIONAL
1265 		Z_PARAM_BOOL(boolCanonical)
1266 	ZEND_PARSE_PARAMETERS_END();
1267 
1268 	if(loc_range_len == 0) {
1269 		loc_range = (char *)intl_locale_get_default();
1270 		loc_range_len = strlen(loc_range);
1271 	}
1272 
1273 	if( strcmp(loc_range,"*")==0){
1274 		RETURN_TRUE;
1275 	}
1276 
1277 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1278 	INTL_CHECK_LOCALE_LEN(lang_tag_len);
1279 
1280 	if( boolCanonical ){
1281 		/* canonicalize loc_range */
1282 		can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1283 		if( result <=0) {
1284 			intl_error_set( NULL, status,
1285 				"locale_filter_matches : unable to canonicalize loc_range" , 0 );
1286 			RETURN_FALSE;
1287 		}
1288 
1289 		/* canonicalize lang_tag */
1290 		can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1291 		if( result <=0) {
1292 			intl_error_set( NULL, status,
1293 				"locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1294 			RETURN_FALSE;
1295 		}
1296 
1297 		/* Convert to lower case for case-insensitive comparison */
1298 		cur_lang_tag = ecalloc( 1, can_lang_tag->len + 1);
1299 
1300 		/* Convert to lower case for case-insensitive comparison */
1301 		result = strToMatch( can_lang_tag->val , cur_lang_tag);
1302 		if( result == 0) {
1303 			efree( cur_lang_tag );
1304 			zend_string_release_ex( can_lang_tag, 0 );
1305 			RETURN_FALSE;
1306 		}
1307 
1308 		cur_loc_range = ecalloc( 1, can_loc_range->len + 1);
1309 		result = strToMatch( can_loc_range->val , cur_loc_range );
1310 		if( result == 0) {
1311 			efree( cur_lang_tag );
1312 			zend_string_release_ex( can_lang_tag, 0 );
1313 			efree( cur_loc_range );
1314 			zend_string_release_ex( can_loc_range, 0 );
1315 			RETURN_FALSE;
1316 		}
1317 
1318 		/* check if prefix */
1319 		token 	= strstr( cur_lang_tag , cur_loc_range );
1320 
1321 		if( token && (token==cur_lang_tag) ){
1322 			/* check if the char. after match is SEPARATOR */
1323 			chrcheck = token + (strlen(cur_loc_range));
1324 			if( isIDSeparator(*chrcheck) || isKeywordSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1325 				efree( cur_lang_tag );
1326 				efree( cur_loc_range );
1327 				if( can_lang_tag){
1328 					zend_string_release_ex( can_lang_tag, 0 );
1329 				}
1330 				if( can_loc_range){
1331 					zend_string_release_ex( can_loc_range, 0 );
1332 				}
1333 				RETURN_TRUE;
1334 			}
1335 		}
1336 
1337 		/* No prefix as loc_range */
1338 		if( cur_lang_tag){
1339 			efree( cur_lang_tag );
1340 		}
1341 		if( cur_loc_range){
1342 			efree( cur_loc_range );
1343 		}
1344 		if( can_lang_tag){
1345 			zend_string_release_ex( can_lang_tag, 0 );
1346 		}
1347 		if( can_loc_range){
1348 			zend_string_release_ex( can_loc_range, 0 );
1349 		}
1350 		RETURN_FALSE;
1351 
1352 	} /* end of if isCanonical */
1353 	else{
1354 		/* Convert to lower case for case-insensitive comparison */
1355 		cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1356 
1357 		result = strToMatch( lang_tag , cur_lang_tag);
1358 		if( result == 0) {
1359 			efree( cur_lang_tag );
1360 			RETURN_FALSE;
1361 		}
1362 		cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1363 		result = strToMatch( loc_range , cur_loc_range );
1364 		if( result == 0) {
1365 			efree( cur_lang_tag );
1366 			efree( cur_loc_range );
1367 			RETURN_FALSE;
1368 		}
1369 
1370 		/* check if prefix */
1371 		token 	= strstr( cur_lang_tag , cur_loc_range );
1372 
1373 		if( token && (token==cur_lang_tag) ){
1374 			/* check if the char. after match is SEPARATOR */
1375 			chrcheck = token + (strlen(cur_loc_range));
1376 			if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1377 				efree( cur_lang_tag );
1378 				efree( cur_loc_range );
1379 				RETURN_TRUE;
1380 			}
1381 		}
1382 
1383 		/* No prefix as loc_range */
1384 		if( cur_lang_tag){
1385 			efree( cur_lang_tag );
1386 		}
1387 		if( cur_loc_range){
1388 			efree( cur_loc_range );
1389 		}
1390 		RETURN_FALSE;
1391 
1392 	}
1393 }
1394 /* }}} */
1395 
array_cleanup(char * arr[],int arr_size)1396 static void array_cleanup( char* arr[] , int arr_size)
1397 {
1398 	int i=0;
1399 	for( i=0; i< arr_size; i++ ){
1400 		if( arr[i*2] ){
1401 			efree( arr[i*2]);
1402 		}
1403 	}
1404 	efree(arr);
1405 }
1406 
1407 #define LOOKUP_CLEAN_RETURN(value)	array_cleanup(cur_arr, cur_arr_len); return (value)
1408 /* {{{
1409 * returns the lookup result to lookup_loc_range_src_php
1410 * internal function
1411 */
lookup_loc_range(const char * loc_range,HashTable * hash_arr,int canonicalize)1412 static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1413 {
1414 	int	i = 0;
1415 	int	cur_arr_len = 0;
1416 	int result = 0;
1417 
1418 	zend_string* lang_tag = NULL;
1419 	zval* ele_value = NULL;
1420 
1421 	char* cur_loc_range	= NULL;
1422 	zend_string* can_loc_range	= NULL;
1423 	zend_off_t saved_pos = 0;
1424 
1425 	zend_string* return_value = NULL;
1426 
1427 	char **cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1428 	ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1429 	/* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1430 		if(Z_TYPE_P(ele_value)!= IS_STRING) {
1431 			/* element value is not a string */
1432 			zend_argument_type_error(2, "must only contain string values");
1433 			LOOKUP_CLEAN_RETURN(NULL);
1434 		}
1435 		cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1436 		result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1437 		if(result == 0) {
1438 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1439 			LOOKUP_CLEAN_RETURN(NULL);
1440 		}
1441 		cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1442 		cur_arr_len++ ;
1443 	} ZEND_HASH_FOREACH_END(); /* end of for */
1444 
1445 	/* Canonicalize array elements */
1446 	if(canonicalize) {
1447 		for(i=0; i<cur_arr_len; i++) {
1448 			lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1449 			if(result != 1 || lang_tag == NULL || !lang_tag->val[0]) {
1450 				if(lang_tag) {
1451 					zend_string_release_ex(lang_tag, 0);
1452 				}
1453 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1454 				LOOKUP_CLEAN_RETURN(NULL);
1455 			}
1456 			cur_arr[i*2] = erealloc(cur_arr[i*2], lang_tag->len+1);
1457 			result = strToMatch(lang_tag->val, cur_arr[i*2]);
1458 			zend_string_release_ex(lang_tag, 0);
1459 			if(result == 0) {
1460 				intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1461 				LOOKUP_CLEAN_RETURN(NULL);
1462 			}
1463 		}
1464 
1465 	}
1466 
1467 	if(canonicalize) {
1468 		/* Canonicalize the loc_range */
1469 		can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1470 		if( result != 1 || can_loc_range == NULL || !can_loc_range->val[0]) {
1471 			/* Error */
1472 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1473 			if(can_loc_range) {
1474 				zend_string_release_ex(can_loc_range, 0);
1475 			}
1476 			LOOKUP_CLEAN_RETURN(NULL);
1477 		} else {
1478 			loc_range = can_loc_range->val;
1479 		}
1480 	}
1481 
1482 	cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1483 	/* convert to lower and replace hyphens */
1484 	result = strToMatch(loc_range, cur_loc_range);
1485 	if(can_loc_range) {
1486 		zend_string_release_ex(can_loc_range, 0);
1487 	}
1488 	if(result == 0) {
1489 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1490 		LOOKUP_CLEAN_RETURN(NULL);
1491 	}
1492 
1493 	/* Lookup for the lang_tag match */
1494 	saved_pos = strlen(cur_loc_range);
1495 	while(saved_pos > 0) {
1496 		for(i=0; i< cur_arr_len; i++){
1497 			if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1498 				/* Match found */
1499 				char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1500 				return_value = zend_string_init(str, strlen(str), 0);
1501 				efree(cur_loc_range);
1502 				LOOKUP_CLEAN_RETURN(return_value);
1503 			}
1504 		}
1505 		saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1506 	}
1507 
1508 	/* Match not found */
1509 	efree(cur_loc_range);
1510 	LOOKUP_CLEAN_RETURN(NULL);
1511 }
1512 /* }}} */
1513 
1514 /* {{{ Searches the items in $langtag for the best match to the language
1515 * range
1516 */
1517 /* }}} */
1518 /* {{{ Searches the items in $langtag for the best match to the language
1519 * range
1520 */
PHP_FUNCTION(locale_lookup)1521 PHP_FUNCTION(locale_lookup)
1522 {
1523 	zend_string*   	fallback_loc_str	= NULL;
1524 	char*    	loc_range      		= NULL;
1525 	size_t        	loc_range_len  		= 0;
1526 
1527 	zval*		arr				= NULL;
1528 	HashTable*	hash_arr		= NULL;
1529 	bool	boolCanonical	= 0;
1530 	zend_string* 	result_str	= NULL;
1531 
1532 	intl_error_reset( NULL );
1533 
1534 	ZEND_PARSE_PARAMETERS_START(2, 4)
1535 		Z_PARAM_ARRAY(arr)
1536 		Z_PARAM_STRING(loc_range, loc_range_len)
1537 		Z_PARAM_OPTIONAL
1538 		Z_PARAM_BOOL(boolCanonical)
1539 		Z_PARAM_STR_OR_NULL(fallback_loc_str)
1540 	ZEND_PARSE_PARAMETERS_END();
1541 
1542 	if(loc_range_len == 0) {
1543 		if(fallback_loc_str) {
1544 			loc_range = ZSTR_VAL(fallback_loc_str);
1545 			loc_range_len = ZSTR_LEN(fallback_loc_str);
1546 		} else {
1547 			loc_range = (char *)intl_locale_get_default();
1548 			loc_range_len = strlen(loc_range);
1549 		}
1550 	}
1551 
1552 	hash_arr = Z_ARRVAL_P(arr);
1553 
1554 	INTL_CHECK_LOCALE_LEN(loc_range_len);
1555 
1556 	if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1557 		RETURN_EMPTY_STRING();
1558 	}
1559 
1560 	result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1561 	if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1562 		if( fallback_loc_str ) {
1563 			result_str = zend_string_copy(fallback_loc_str);
1564 		} else {
1565 			RETURN_EMPTY_STRING();
1566 		}
1567 	}
1568 
1569 	RETURN_STR(result_str);
1570 }
1571 /* }}} */
1572 
1573 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
1574 /* }}} */
1575 /* {{{ Tries to find out best available locale based on HTTP "Accept-Language" header */
PHP_FUNCTION(locale_accept_from_http)1576 PHP_FUNCTION(locale_accept_from_http)
1577 {
1578 	UEnumeration *available;
1579 	char *http_accept = NULL;
1580 	size_t http_accept_len;
1581 	UErrorCode status = 0;
1582 	int len;
1583 	char resultLocale[INTL_MAX_LOCALE_LEN+1];
1584 	UAcceptResult outResult;
1585 
1586 	ZEND_PARSE_PARAMETERS_START(1, 1)
1587 		Z_PARAM_STRING(http_accept, http_accept_len)
1588 	ZEND_PARSE_PARAMETERS_END();
1589 	if(http_accept_len > ULOC_FULLNAME_CAPACITY) {
1590 		/* check each fragment, if any bigger than capacity, can't do it due to bug #72533 */
1591 		char *start = http_accept;
1592 		char *end;
1593 		size_t len;
1594 		do {
1595 			end = strchr(start, ',');
1596 			len = end ? end-start : http_accept_len-(start-http_accept);
1597 			if(len > ULOC_FULLNAME_CAPACITY) {
1598 				intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1599 						"locale_accept_from_http: locale string too long", 0 );
1600 				RETURN_FALSE;
1601 			}
1602 			if(end) {
1603 				start = end+1;
1604 			}
1605 		} while(end != NULL);
1606 	}
1607 
1608 	available = ures_openAvailableLocales(NULL, &status);
1609 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1610 	len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1611 						&outResult, http_accept, available, &status);
1612 	uenum_close(available);
1613 	INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1614 	if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1615 		RETURN_FALSE;
1616 	}
1617 	RETURN_STRINGL(resultLocale, len);
1618 }
1619 /* }}} */
1620