1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2009 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Pierre A. Joye <pierre@php.net> |
16 | Gustavo Lopes <cataphract@php.net> |
17 +----------------------------------------------------------------------+
18 */
19 /* $Id$ */
20
21 /* {{{ includes */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <php.h>
27
28 #include <unicode/uidna.h>
29 #include <unicode/ustring.h>
30 #include "ext/standard/php_string.h"
31
32 #include "intl_error.h"
33 #include "intl_convert.h"
34 /* }}} */
35
36 #ifdef UIDNA_INFO_INITIALIZER
37 #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
38 #endif
39
40 enum {
41 INTL_IDN_VARIANT_2003 = 0,
42 INTL_IDN_VARIANT_UTS46
43 };
44
45 /* {{{ grapheme_register_constants
46 * Register API constants
47 */
idn_register_constants(INIT_FUNC_ARGS)48 void idn_register_constants( INIT_FUNC_ARGS )
49 {
50 /* OPTIONS */
51
52 /* Option to prohibit processing of unassigned codepoints in the input and
53 do not check if the input conforms to STD-3 ASCII rules. */
54 REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
55
56 /* Option to allow processing of unassigned codepoints in the input */
57 REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
58
59 /* Option to check if input conforms to STD-3 ASCII rules */
60 REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
61
62 #ifdef HAVE_46_API
63
64 /* Option to check for whether the input conforms to the BiDi rules.
65 * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
66 REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
67
68 /* Option to check for whether the input conforms to the CONTEXTJ rules.
69 * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
70 REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
71
72 /* Option for nontransitional processing in ToASCII().
73 * By default, ToASCII() uses transitional processing.
74 * Ignored by the IDNA2003 implementation. */
75 REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
76
77 /* Option for nontransitional processing in ToUnicode().
78 * By default, ToUnicode() uses transitional processing.
79 * Ignored by the IDNA2003 implementation. */
80 REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
81 #endif
82
83 /* VARIANTS */
84 REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
85 #ifdef HAVE_46_API
86 REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
87 #endif
88
89 #ifdef HAVE_46_API
90 /* PINFO ERROR CODES */
91 REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
92 REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
93 REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
94 REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
95 REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
96 REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
97 REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
98 REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
99 REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
100 REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
101 REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
102 REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
103 REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
104 #endif
105 }
106 /* }}} */
107
108 enum {
109 INTL_IDN_TO_ASCII = 0,
110 INTL_IDN_TO_UTF8
111 };
112
113 /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
php_intl_idn_check_status(UErrorCode err,const char * msg,int mode TSRMLS_DC)114 static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC)
115 {
116 intl_error_set_code(NULL, err TSRMLS_CC);
117 if (U_FAILURE(err)) {
118 char *buff;
119 spprintf(&buff, 0, "%s: %s",
120 mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
121 msg);
122 intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
123 efree(buff);
124 return FAILURE;
125 }
126
127 return SUCCESS;
128 }
129
php_intl_bad_args(const char * msg,int mode TSRMLS_DC)130 static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
131 {
132 php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC);
133 }
134
135 #ifdef HAVE_46_API
php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,const char * domain,int domain_len,uint32_t option,int mode,zval * idna_info)136 static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
137 const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info)
138 {
139 UErrorCode status = U_ZERO_ERROR;
140 UIDNA *uts46;
141 int32_t len;
142 int32_t buffer_capac = 255; /* no domain name may exceed this */
143 char *buffer = emalloc(buffer_capac);
144 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
145 int buffer_used = 0;
146
147 uts46 = uidna_openUTS46(option, &status);
148 if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
149 mode TSRMLS_CC) == FAILURE) {
150 efree(buffer);
151 RETURN_FALSE;
152 }
153
154 if (mode == INTL_IDN_TO_ASCII) {
155 len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
156 buffer, buffer_capac, &info, &status);
157 } else {
158 len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len,
159 buffer, buffer_capac, &info, &status);
160 }
161 if (len >= 255 || php_intl_idn_check_status(status, "failed to convert name",
162 mode TSRMLS_CC) == FAILURE) {
163 uidna_close(uts46);
164 efree(buffer);
165 RETURN_FALSE;
166 }
167
168 buffer[len] = '\0';
169
170 if (info.errors == 0) {
171 RETVAL_STRINGL(buffer, len, 0);
172 buffer_used = 1;
173 } else {
174 RETVAL_FALSE;
175 }
176
177 if (idna_info) {
178 if (buffer_used) { /* used in return_value then */
179 zval_addref_p(return_value);
180 add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value);
181 } else {
182 zval *zv;
183 ALLOC_INIT_ZVAL(zv);
184 ZVAL_STRINGL(zv, buffer, len, 0);
185 buffer_used = 1;
186 add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv);
187 }
188 add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
189 sizeof("isTransitionalDifferent"), info.isTransitionalDifferent);
190 add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors);
191 }
192
193 if (!buffer_used) {
194 efree(buffer);
195 }
196
197 uidna_close(uts46);
198 }
199 #endif
200
php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,const char * domain,int domain_len,uint32_t option,int mode)201 static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
202 const char *domain, int domain_len, uint32_t option, int mode)
203 {
204 UChar* ustring = NULL;
205 int ustring_len = 0;
206 UErrorCode status;
207 char *converted_utf8;
208 int32_t converted_utf8_len;
209 UChar converted[MAXPATHLEN];
210 int32_t converted_ret_len;
211
212 /* convert the string to UTF-16. */
213 status = U_ZERO_ERROR;
214 intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
215
216 if (U_FAILURE(status)) {
217 intl_error_set_code(NULL, status TSRMLS_CC);
218
219 /* Set error messages. */
220 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
221 if (ustring) {
222 efree(ustring);
223 }
224 RETURN_FALSE;
225 } else {
226 UParseError parse_error;
227
228 status = U_ZERO_ERROR;
229 if (mode == INTL_IDN_TO_ASCII) {
230 converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
231 } else {
232 converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
233 }
234 efree(ustring);
235
236 if (U_FAILURE(status)) {
237 intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
238 RETURN_FALSE;
239 }
240
241 status = U_ZERO_ERROR;
242 intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
243
244 if (U_FAILURE(status)) {
245 /* Set global error code. */
246 intl_error_set_code(NULL, status TSRMLS_CC);
247
248 /* Set error messages. */
249 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
250 efree(converted_utf8);
251 RETURN_FALSE;
252 }
253 }
254
255 /* return the allocated string, not a duplicate */
256 RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
257 }
258
php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS,int mode)259 static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
260 {
261 char *domain;
262 int domain_len;
263 long option = 0,
264 variant = INTL_IDN_VARIANT_2003;
265 zval *idna_info = NULL;
266
267 intl_error_reset(NULL TSRMLS_CC);
268
269 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
270 &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
271 php_intl_bad_args("bad arguments", mode TSRMLS_CC);
272 RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
273 }
274
275 #ifdef HAVE_46_API
276 if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
277 php_intl_bad_args("invalid variant, must be one of {"
278 "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC);
279 RETURN_FALSE;
280 }
281 #else
282 if (variant != INTL_IDN_VARIANT_2003) {
283 php_intl_bad_args("invalid variant, PHP was compiled against "
284 "an old version of ICU and only supports INTL_IDN_VARIANT_2003",
285 mode TSRMLS_CC);
286 RETURN_FALSE;
287 }
288 #endif
289
290 if (domain_len < 1) {
291 php_intl_bad_args("empty domain name", mode TSRMLS_CC);
292 RETURN_FALSE;
293 }
294 if (domain_len > INT32_MAX - 1) {
295 php_intl_bad_args("domain name too large", mode TSRMLS_CC);
296 RETURN_FALSE;
297 }
298 /* don't check options; it wasn't checked before */
299
300 if (idna_info != NULL) {
301 if (variant == INTL_IDN_VARIANT_2003) {
302 php_error_docref0(NULL TSRMLS_CC, E_NOTICE,
303 "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
304 "takes 3 - extra argument ignored");
305 } else {
306 zval_dtor(idna_info);
307 array_init(idna_info);
308 }
309 }
310
311 if (variant == INTL_IDN_VARIANT_2003) {
312 php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
313 domain, domain_len, (uint32_t)option, mode);
314 }
315 #ifdef HAVE_46_API
316 else {
317 php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len,
318 (uint32_t)option, mode, idna_info);
319 }
320 #endif
321 }
322
323 /* {{{ proto int idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
324 Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_ascii)325 PHP_FUNCTION(idn_to_ascii)
326 {
327 php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
328 }
329 /* }}} */
330
331
332 /* {{{ proto int idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
333 Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_utf8)334 PHP_FUNCTION(idn_to_utf8)
335 {
336 php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
337 }
338 /* }}} */
339
340
341 /*
342 * Local variables:
343 * tab-width: 4
344 * c-basic-offset: 4
345 * End:
346 * vim600: fdm=marker
347 * vim: noet sw=4 ts=4
348 */
349