xref: /PHP-7.4/ext/intl/idn/idn.c (revision c43fc204)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) The PHP Group                                          |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Pierre A. Joye <pierre@php.net>                              |
16    |         Gustavo Lopes  <cataphract@php.net>                          |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* {{{ includes */
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include <php.h>
26 
27 #include <unicode/uidna.h>
28 #include <unicode/ustring.h>
29 #include "ext/standard/php_string.h"
30 
31 #include "intl_error.h"
32 #include "intl_convert.h"
33 /* }}} */
34 
35 enum {
36 	INTL_IDN_VARIANT_2003 = 0,
37 	INTL_IDN_VARIANT_UTS46
38 };
39 
40 /* {{{ grapheme_register_constants
41  * Register API constants
42  */
idn_register_constants(INIT_FUNC_ARGS)43 void idn_register_constants( INIT_FUNC_ARGS )
44 {
45 	/* OPTIONS */
46 
47 	/* Option to prohibit processing of unassigned codepoints in the input and
48 	   do not check if the input conforms to STD-3 ASCII rules. */
49 	REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
50 
51 	/* Option to allow processing of unassigned codepoints in the input */
52 	REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
53 
54 	/* Option to check if input conforms to STD-3 ASCII rules */
55 	REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
56 
57 	/* Option to check for whether the input conforms to the BiDi rules.
58 	 * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
59 	REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
60 
61 	/* Option to check for whether the input conforms to the CONTEXTJ rules.
62 	 * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
63 	REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
64 
65 	/* Option for nontransitional processing in ToASCII().
66 	 * By default, ToASCII() uses transitional processing.
67 	 * Ignored by the IDNA2003 implementation. */
68 	REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
69 
70 	/* Option for nontransitional processing in ToUnicode().
71 	 * By default, ToUnicode() uses transitional processing.
72 	 * Ignored by the IDNA2003 implementation. */
73 	REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
74 
75 	/* VARIANTS */
76 	REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
77 	REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
78 
79 	/* PINFO ERROR CODES */
80 	REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
81 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
82 	REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
83 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
84 	REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
85 	REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
86 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
87 	REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
88 	REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
89 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
90 	REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
91 	REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
92 	REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
93 }
94 /* }}} */
95 
96 enum {
97 	INTL_IDN_TO_ASCII = 0,
98 	INTL_IDN_TO_UTF8
99 };
100 
101 /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
php_intl_idn_check_status(UErrorCode err,const char * msg)102 static int php_intl_idn_check_status(UErrorCode err, const char *msg)
103 {
104 	intl_error_set_code(NULL, err);
105 	if (U_FAILURE(err)) {
106 		char *buff;
107 		spprintf(&buff, 0, "%s: %s",
108 			get_active_function_name(),
109 			msg);
110 		intl_error_set_custom_msg(NULL, buff, 1);
111 		efree(buff);
112 		return FAILURE;
113 	}
114 
115 	return SUCCESS;
116 }
117 
php_intl_bad_args(const char * msg)118 static inline void php_intl_bad_args(const char *msg)
119 {
120 	php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg);
121 }
122 
php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,const zend_string * domain,uint32_t option,int mode,zval * idna_info)123 static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
124 		const zend_string *domain, uint32_t option, int mode, zval *idna_info)
125 {
126 	UErrorCode	  status = U_ZERO_ERROR;
127 	UIDNA		  *uts46;
128 	int32_t		  len;
129 	zend_string	  *buffer;
130 	UIDNAInfo	  info = UIDNA_INFO_INITIALIZER;
131 
132 	uts46 = uidna_openUTS46(option, &status);
133 	if (php_intl_idn_check_status(status, "failed to open UIDNA instance") == FAILURE) {
134 		RETURN_FALSE;
135 	}
136 
137 	if (mode == INTL_IDN_TO_ASCII) {
138 		const int32_t buffer_capac = 255;
139 		buffer = zend_string_alloc(buffer_capac, 0);
140 		len = uidna_nameToASCII_UTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
141 				ZSTR_VAL(buffer), buffer_capac, &info, &status);
142 		if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
143 			uidna_close(uts46);
144 			zend_string_efree(buffer);
145 			RETURN_FALSE;
146 		}
147 	} else {
148 		const int32_t buffer_capac = 252*4;
149 		buffer = zend_string_alloc(buffer_capac, 0);
150 		len = uidna_nameToUnicodeUTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
151 				ZSTR_VAL(buffer), buffer_capac, &info, &status);
152 		if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
153 			uidna_close(uts46);
154 			zend_string_efree(buffer);
155 			RETURN_FALSE;
156 		}
157 	}
158 
159 	ZSTR_VAL(buffer)[len] = '\0';
160 	ZSTR_LEN(buffer) = len;
161 
162 	if (info.errors == 0) {
163 		RETVAL_STR_COPY(buffer);
164 	} else {
165 		RETVAL_FALSE;
166 	}
167 
168 	if (idna_info) {
169 		add_assoc_str_ex(idna_info, "result", sizeof("result")-1, zend_string_copy(buffer));
170 		add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
171 				sizeof("isTransitionalDifferent")-1, info.isTransitionalDifferent);
172 		add_assoc_long_ex(idna_info, "errors", sizeof("errors")-1, (zend_long)info.errors);
173 	}
174 
175 	zend_string_release(buffer);
176 	uidna_close(uts46);
177 }
178 
php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,const zend_string * domain,uint32_t option,int mode)179 static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
180 		const zend_string *domain, uint32_t option, int mode)
181 {
182 	UChar* ustring = NULL;
183 	int ustring_len = 0;
184 	UErrorCode status;
185 	zend_string *u8str;
186 
187 	/* convert the string to UTF-16. */
188 	status = U_ZERO_ERROR;
189 	intl_convert_utf8_to_utf16(&ustring, &ustring_len, ZSTR_VAL(domain), ZSTR_LEN(domain), &status);
190 
191 	if (U_FAILURE(status)) {
192 		intl_error_set_code(NULL, status);
193 
194 		/* Set error messages. */
195 		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
196 		if (ustring) {
197 			efree(ustring);
198 		}
199 		RETURN_FALSE;
200 	} else {
201 		UParseError parse_error;
202 		UChar       converted[MAXPATHLEN];
203 		int32_t     converted_ret_len;
204 
205 		status = U_ZERO_ERROR;
206 #if defined(__clang__)
207 # pragma clang diagnostic push
208 # pragma clang diagnostic ignored "-Wdeprecated-declarations"
209 #elif ZEND_GCC_VERSION >= 4008
210 # pragma GCC diagnostic push
211 # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
212 #endif
213 		if (mode == INTL_IDN_TO_ASCII) {
214 			converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
215 		} else {
216 			converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
217 		}
218 #if defined(__clang__)
219 # pragma clang diagnostic pop
220 #elif ZEND_GCC_VERSION >= 4008
221 # pragma GCC diagnostic pop
222 #endif
223 		efree(ustring);
224 
225 		if (U_FAILURE(status)) {
226 			intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 );
227 			RETURN_FALSE;
228 		}
229 
230 		status = U_ZERO_ERROR;
231 		u8str = intl_convert_utf16_to_utf8(converted, converted_ret_len, &status);
232 
233 		if (!u8str) {
234 			/* Set global error code. */
235 			intl_error_set_code(NULL, status);
236 
237 			/* Set error messages. */
238 			intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
239 			RETURN_FALSE;
240 		}
241 	}
242 
243 	/* return the allocated string, not a duplicate */
244 	RETVAL_NEW_STR(u8str);
245 }
246 
php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS,int mode)247 static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
248 {
249 	zend_string *domain;
250 	zend_long option = 0,
251 		 variant = INTL_IDN_VARIANT_UTS46;
252 	zval *idna_info = NULL;
253 
254 	intl_error_reset(NULL);
255 
256 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|llz",
257 			&domain, &option, &variant, &idna_info) == FAILURE) {
258 		php_intl_bad_args("bad arguments");
259 		RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
260 	}
261 
262 	if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
263 		php_intl_bad_args("invalid variant, must be one of {"
264 			"INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}");
265 		RETURN_FALSE;
266 	}
267 
268 	if (ZSTR_LEN(domain) < 1) {
269 		php_intl_bad_args("empty domain name");
270 		RETURN_FALSE;
271 	}
272 	if (ZSTR_LEN(domain) > INT32_MAX - 1) {
273 		php_intl_bad_args("domain name too large");
274 		RETURN_FALSE;
275 	}
276 	/* don't check options; it wasn't checked before */
277 
278 	if (variant == INTL_IDN_VARIANT_2003) {
279 		php_error_docref(NULL, E_DEPRECATED, "INTL_IDNA_VARIANT_2003 is deprecated");
280 	}
281 
282 	if (idna_info != NULL) {
283 		if (variant == INTL_IDN_VARIANT_2003) {
284 			php_error_docref(NULL, E_NOTICE,
285 				"4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
286 				"takes 3 - extra argument ignored");
287 		} else {
288 			idna_info = zend_try_array_init(idna_info);
289 			if (!idna_info) {
290 				return;
291 			}
292 		}
293 	}
294 
295 	if (variant == INTL_IDN_VARIANT_2003) {
296 		php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode);
297 	}
298 	else {
299 		php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode, idna_info);
300 	}
301 }
302 
303 /* {{{ proto string idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
304    Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_ascii)305 PHP_FUNCTION(idn_to_ascii)
306 {
307 	php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
308 }
309 /* }}} */
310 
311 
312 /* {{{ proto string idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
313    Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_utf8)314 PHP_FUNCTION(idn_to_utf8)
315 {
316 	php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
317 }
318 /* }}} */
319