xref: /PHP-8.0/ext/intl/idn/idn.c (revision d5aed7b0)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Pierre A. Joye <pierre@php.net>                              |
14    |         Gustavo Lopes  <cataphract@php.net>                          |
15    +----------------------------------------------------------------------+
16  */
17 
18 /* {{{ includes */
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <php.h>
24 
25 #include <unicode/uidna.h>
26 #include <unicode/ustring.h>
27 #include "ext/standard/php_string.h"
28 
29 #include "intl_error.h"
30 #include "intl_convert.h"
31 /* }}} */
32 
33 enum {
34 	INTL_IDN_VARIANT_UTS46 = 1
35 };
36 
37 /* {{{ grapheme_register_constants
38  * Register API constants
39  */
idn_register_constants(INIT_FUNC_ARGS)40 void idn_register_constants( INIT_FUNC_ARGS )
41 {
42 	/* OPTIONS */
43 
44 	/* Option to prohibit processing of unassigned codepoints in the input and
45 	   do not check if the input conforms to STD-3 ASCII rules. */
46 	REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
47 
48 	/* Option to allow processing of unassigned codepoints in the input */
49 	REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
50 
51 	/* Option to check if input conforms to STD-3 ASCII rules */
52 	REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
53 
54 	/* Option to check for whether the input conforms to the BiDi rules.
55 	 * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
56 	REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
57 
58 	/* Option to check for whether the input conforms to the CONTEXTJ rules.
59 	 * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
60 	REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
61 
62 	/* Option for nontransitional processing in ToASCII().
63 	 * By default, ToASCII() uses transitional processing.
64 	 * Ignored by the IDNA2003 implementation. */
65 	REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
66 
67 	/* Option for nontransitional processing in ToUnicode().
68 	 * By default, ToUnicode() uses transitional processing.
69 	 * Ignored by the IDNA2003 implementation. */
70 	REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
71 
72 	/* VARIANTS */
73 	REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
74 
75 	/* PINFO ERROR CODES */
76 	REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
77 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
78 	REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
79 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
80 	REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
81 	REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
82 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
83 	REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
84 	REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
85 	REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
86 	REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
87 	REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
88 	REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
89 }
90 /* }}} */
91 
92 enum {
93 	INTL_IDN_TO_ASCII = 0,
94 	INTL_IDN_TO_UTF8
95 };
96 
97 /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
php_intl_idn_check_status(UErrorCode err,const char * msg)98 static int php_intl_idn_check_status(UErrorCode err, const char *msg)
99 {
100 	intl_error_set_code(NULL, err);
101 	if (U_FAILURE(err)) {
102 		char *buff;
103 		spprintf(&buff, 0, "%s: %s",
104 			get_active_function_name(),
105 			msg);
106 		intl_error_set_custom_msg(NULL, buff, 1);
107 		efree(buff);
108 		return FAILURE;
109 	}
110 
111 	return SUCCESS;
112 }
113 
php_intl_bad_args(const char * msg)114 static inline void php_intl_bad_args(const char *msg)
115 {
116 	php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg);
117 }
118 
php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,const zend_string * domain,uint32_t option,int mode,zval * idna_info)119 static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
120 		const zend_string *domain, uint32_t option, int mode, zval *idna_info)
121 {
122 	UErrorCode	  status = U_ZERO_ERROR;
123 	UIDNA		  *uts46;
124 	int32_t		  len;
125 	zend_string	  *buffer;
126 	UIDNAInfo	  info = UIDNA_INFO_INITIALIZER;
127 
128 	uts46 = uidna_openUTS46(option, &status);
129 	if (php_intl_idn_check_status(status, "failed to open UIDNA instance") == FAILURE) {
130 		RETURN_FALSE;
131 	}
132 
133 	if (mode == INTL_IDN_TO_ASCII) {
134 		const int32_t buffer_capac = 255;
135 		buffer = zend_string_alloc(buffer_capac, 0);
136 		len = uidna_nameToASCII_UTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
137 				ZSTR_VAL(buffer), buffer_capac, &info, &status);
138 		if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
139 			uidna_close(uts46);
140 			zend_string_efree(buffer);
141 			RETURN_FALSE;
142 		}
143 	} else {
144 		const int32_t buffer_capac = 252*4;
145 		buffer = zend_string_alloc(buffer_capac, 0);
146 		len = uidna_nameToUnicodeUTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
147 				ZSTR_VAL(buffer), buffer_capac, &info, &status);
148 		if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
149 			uidna_close(uts46);
150 			zend_string_efree(buffer);
151 			RETURN_FALSE;
152 		}
153 	}
154 
155 	ZSTR_VAL(buffer)[len] = '\0';
156 	ZSTR_LEN(buffer) = len;
157 
158 	if (info.errors == 0) {
159 		RETVAL_STR_COPY(buffer);
160 	} else {
161 		RETVAL_FALSE;
162 	}
163 
164 	if (idna_info) {
165 		add_assoc_str_ex(idna_info, "result", sizeof("result")-1, zend_string_copy(buffer));
166 		add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
167 				sizeof("isTransitionalDifferent")-1, info.isTransitionalDifferent);
168 		add_assoc_long_ex(idna_info, "errors", sizeof("errors")-1, (zend_long)info.errors);
169 	}
170 
171 	zend_string_release(buffer);
172 	uidna_close(uts46);
173 }
174 
php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS,int mode)175 static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
176 {
177 	zend_string *domain;
178 	zend_long option = UIDNA_DEFAULT,
179 	variant = INTL_IDN_VARIANT_UTS46;
180 	zval *idna_info = NULL;
181 
182 	intl_error_reset(NULL);
183 
184 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|llz",
185 			&domain, &option, &variant, &idna_info) == FAILURE) {
186 		RETURN_THROWS();
187 	}
188 
189 	if (variant != INTL_IDN_VARIANT_UTS46) {
190 		php_intl_bad_args("invalid variant, must be INTL_IDNA_VARIANT_UTS46");
191 		RETURN_FALSE;
192 	}
193 
194 	if (ZSTR_LEN(domain) < 1) {
195 		php_intl_bad_args("empty domain name");
196 		RETURN_FALSE;
197 	}
198 	if (ZSTR_LEN(domain) > INT32_MAX - 1) {
199 		php_intl_bad_args("domain name too large");
200 		RETURN_FALSE;
201 	}
202 	/* don't check options; it wasn't checked before */
203 
204 	if (idna_info != NULL) {
205 		idna_info = zend_try_array_init(idna_info);
206 		if (!idna_info) {
207 			RETURN_THROWS();
208 		}
209 	}
210 
211 	php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode, idna_info);
212 }
213 
214 /* {{{ Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_ascii)215 PHP_FUNCTION(idn_to_ascii)
216 {
217 	php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
218 }
219 /* }}} */
220 
221 
222 /* {{{ Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_utf8)223 PHP_FUNCTION(idn_to_utf8)
224 {
225 	php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
226 }
227 /* }}} */
228