xref: /PHP-7.3/ext/intl/idn/idn.c (revision c43fc204)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 2009 The PHP Group                                     |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Pierre A. Joye <pierre@php.net>                              |
16    |         Gustavo Lopes  <cataphract@php.net>                          |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* {{{ includes */
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include <php.h>
26 
27 #include <unicode/uidna.h>
28 #include <unicode/ustring.h>
29 #include "ext/standard/php_string.h"
30 
31 #include "intl_error.h"
32 #include "intl_convert.h"
33 /* }}} */
34 
35 #ifdef UIDNA_INFO_INITIALIZER
36 #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
37 #endif
38 
39 enum {
40 	INTL_IDN_VARIANT_2003 = 0,
41 	INTL_IDN_VARIANT_UTS46
42 };
43 
44 /* {{{ grapheme_register_constants
45  * Register API constants
46  */
idn_register_constants(INIT_FUNC_ARGS)47 void idn_register_constants( INIT_FUNC_ARGS )
48 {
49 	/* OPTIONS */
50 
51 	/* Option to prohibit processing of unassigned codepoints in the input and
52 	   do not check if the input conforms to STD-3 ASCII rules. */
53 	REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
54 
55 	/* Option to allow processing of unassigned codepoints in the input */
56 	REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
57 
58 	/* Option to check if input conforms to STD-3 ASCII rules */
59 	REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
60 
61 #ifdef HAVE_46_API
62 
63 	/* Option to check for whether the input conforms to the BiDi rules.
64 	 * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
65 	REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
66 
67 	/* Option to check for whether the input conforms to the CONTEXTJ rules.
68 	 * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
69 	REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
70 
71 	/* Option for nontransitional processing in ToASCII().
72 	 * By default, ToASCII() uses transitional processing.
73 	 * Ignored by the IDNA2003 implementation. */
74 	REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
75 
76 	/* Option for nontransitional processing in ToUnicode().
77 	 * By default, ToUnicode() uses transitional processing.
78 	 * Ignored by the IDNA2003 implementation. */
79 	REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
80 #endif
81 
82 	/* VARIANTS */
83 	REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
84 #ifdef HAVE_46_API
85 	REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
86 #endif
87 
88 #ifdef HAVE_46_API
89 	/* PINFO ERROR CODES */
90 	REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
91 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
92 	REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
93 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
94 	REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
95 	REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
96 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
97 	REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
98 	REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
99 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
100 	REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
101 	REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
102 	REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
103 #endif
104 }
105 /* }}} */
106 
107 enum {
108 	INTL_IDN_TO_ASCII = 0,
109 	INTL_IDN_TO_UTF8
110 };
111 
112 /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
php_intl_idn_check_status(UErrorCode err,const char * msg)113 static int php_intl_idn_check_status(UErrorCode err, const char *msg)
114 {
115 	intl_error_set_code(NULL, err);
116 	if (U_FAILURE(err)) {
117 		char *buff;
118 		spprintf(&buff, 0, "%s: %s",
119 			get_active_function_name(),
120 			msg);
121 		intl_error_set_custom_msg(NULL, buff, 1);
122 		efree(buff);
123 		return FAILURE;
124 	}
125 
126 	return SUCCESS;
127 }
128 
php_intl_bad_args(const char * msg)129 static inline void php_intl_bad_args(const char *msg)
130 {
131 	php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg);
132 }
133 
134 #ifdef HAVE_46_API
php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,const zend_string * domain,uint32_t option,int mode,zval * idna_info)135 static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
136 		const zend_string *domain, uint32_t option, int mode, zval *idna_info)
137 {
138 	UErrorCode	  status = U_ZERO_ERROR;
139 	UIDNA		  *uts46;
140 	int32_t		  len;
141 	zend_string	  *buffer;
142 	UIDNAInfo	  info = UIDNA_INFO_INITIALIZER;
143 	int			  buffer_used = 0;
144 
145 	uts46 = uidna_openUTS46(option, &status);
146 	if (php_intl_idn_check_status(status, "failed to open UIDNA instance") == FAILURE) {
147 		RETURN_FALSE;
148 	}
149 
150 	if (mode == INTL_IDN_TO_ASCII) {
151 		const int32_t buffer_capac = 255;
152 		buffer = zend_string_alloc(buffer_capac, 0);
153 		len = uidna_nameToASCII_UTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
154 				ZSTR_VAL(buffer), buffer_capac, &info, &status);
155 		if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
156 			uidna_close(uts46);
157 			zend_string_efree(buffer);
158 			RETURN_FALSE;
159 		}
160 	} else {
161 		const int32_t buffer_capac = 252*4;
162 		buffer = zend_string_alloc(buffer_capac, 0);
163 		len = uidna_nameToUnicodeUTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
164 				ZSTR_VAL(buffer), buffer_capac, &info, &status);
165 		if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
166 			uidna_close(uts46);
167 			zend_string_efree(buffer);
168 			RETURN_FALSE;
169 		}
170 	}
171 
172 	ZSTR_VAL(buffer)[len] = '\0';
173 	ZSTR_LEN(buffer) = len;
174 
175 	if (info.errors == 0) {
176 		RETVAL_STR(buffer);
177 		buffer_used = 1;
178 	} else {
179 		RETVAL_FALSE;
180 	}
181 
182 	if (idna_info) {
183 		if (buffer_used) { /* used in return_value then */
184 			zval_addref_p(return_value);
185 			add_assoc_zval_ex(idna_info, "result", sizeof("result")-1, return_value);
186 		} else {
187 			zval zv;
188 			ZVAL_NEW_STR(&zv, buffer);
189 			buffer_used = 1;
190 			add_assoc_zval_ex(idna_info, "result", sizeof("result")-1, &zv);
191 		}
192 		add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
193 				sizeof("isTransitionalDifferent")-1, info.isTransitionalDifferent);
194 		add_assoc_long_ex(idna_info, "errors", sizeof("errors")-1, (zend_long)info.errors);
195 	}
196 
197 	if (!buffer_used) {
198 		zend_string_efree(buffer);
199 	}
200 
201 	uidna_close(uts46);
202 }
203 #endif
204 
php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,const zend_string * domain,uint32_t option,int mode)205 static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
206 		const zend_string *domain, uint32_t option, int mode)
207 {
208 	UChar* ustring = NULL;
209 	int ustring_len = 0;
210 	UErrorCode status;
211 	zend_string *u8str;
212 
213 	/* convert the string to UTF-16. */
214 	status = U_ZERO_ERROR;
215 	intl_convert_utf8_to_utf16(&ustring, &ustring_len, ZSTR_VAL(domain), ZSTR_LEN(domain), &status);
216 
217 	if (U_FAILURE(status)) {
218 		intl_error_set_code(NULL, status);
219 
220 		/* Set error messages. */
221 		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
222 		if (ustring) {
223 			efree(ustring);
224 		}
225 		RETURN_FALSE;
226 	} else {
227 		UParseError parse_error;
228 		UChar       converted[MAXPATHLEN];
229 		int32_t     converted_ret_len;
230 
231 		status = U_ZERO_ERROR;
232 #if defined(__clang__)
233 # pragma clang diagnostic push
234 # pragma clang diagnostic ignored "-Wdeprecated-declarations"
235 #elif ZEND_GCC_VERSION >= 4008
236 # pragma GCC diagnostic push
237 # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
238 #endif
239 		if (mode == INTL_IDN_TO_ASCII) {
240 			converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
241 		} else {
242 			converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
243 		}
244 #if defined(__clang__)
245 # pragma clang diagnostic pop
246 #elif ZEND_GCC_VERSION >= 4008
247 # pragma GCC diagnostic pop
248 #endif
249 		efree(ustring);
250 
251 		if (U_FAILURE(status)) {
252 			intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 );
253 			RETURN_FALSE;
254 		}
255 
256 		status = U_ZERO_ERROR;
257 		u8str = intl_convert_utf16_to_utf8(converted, converted_ret_len, &status);
258 
259 		if (!u8str) {
260 			/* Set global error code. */
261 			intl_error_set_code(NULL, status);
262 
263 			/* Set error messages. */
264 			intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
265 			RETURN_FALSE;
266 		}
267 	}
268 
269 	/* return the allocated string, not a duplicate */
270 	RETVAL_NEW_STR(u8str);
271 }
272 
php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS,int mode)273 static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
274 {
275 	zend_string *domain;
276 	zend_long option = 0,
277 		 variant = INTL_IDN_VARIANT_2003;
278 	zval *idna_info = NULL;
279 
280 	intl_error_reset(NULL);
281 
282 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|llz/",
283 			&domain, &option, &variant, &idna_info) == FAILURE) {
284 		php_intl_bad_args("bad arguments");
285 		RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
286 	}
287 
288 #ifdef HAVE_46_API
289 	if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
290 		php_intl_bad_args("invalid variant, must be one of {"
291 			"INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}");
292 		RETURN_FALSE;
293 	}
294 #else
295 	if (variant != INTL_IDN_VARIANT_2003) {
296 		php_intl_bad_args("invalid variant, PHP was compiled against "
297 			"an old version of ICU and only supports INTL_IDN_VARIANT_2003");
298 		RETURN_FALSE;
299 	}
300 #endif
301 
302 	if (ZSTR_LEN(domain) < 1) {
303 		php_intl_bad_args("empty domain name");
304 		RETURN_FALSE;
305 	}
306 	if (ZSTR_LEN(domain) > INT32_MAX - 1) {
307 		php_intl_bad_args("domain name too large");
308 		RETURN_FALSE;
309 	}
310 	/* don't check options; it wasn't checked before */
311 
312 	if (variant == INTL_IDN_VARIANT_2003) {
313 		php_error_docref(NULL, E_DEPRECATED, "INTL_IDNA_VARIANT_2003 is deprecated");
314 	}
315 
316 	if (idna_info != NULL) {
317 		if (variant == INTL_IDN_VARIANT_2003) {
318 			php_error_docref0(NULL, E_NOTICE,
319 				"4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
320 				"takes 3 - extra argument ignored");
321 		} else {
322 			zval_ptr_dtor(idna_info);
323 			array_init(idna_info);
324 		}
325 	}
326 
327 	if (variant == INTL_IDN_VARIANT_2003) {
328 		php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode);
329 	}
330 #ifdef HAVE_46_API
331 	else {
332 		php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode, idna_info);
333 	}
334 #endif
335 }
336 
337 /* {{{ proto string idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
338    Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_ascii)339 PHP_FUNCTION(idn_to_ascii)
340 {
341 	php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
342 }
343 /* }}} */
344 
345 
346 /* {{{ proto string idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
347    Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_utf8)348 PHP_FUNCTION(idn_to_utf8)
349 {
350 	php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
351 }
352 /* }}} */
353 
354 
355 /*
356  * Local variables:
357  * tab-width: 4
358  * c-basic-offset: 4
359  * End:
360  * vim600: fdm=marker
361  * vim: noet sw=4 ts=4
362  */
363