xref: /PHP-5.3/ext/intl/idn/idn.c (revision 8b27b6d5)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 2009 The PHP Group                                     |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Pierre A. Joye <pierre@php.net>                              |
16    +----------------------------------------------------------------------+
17  */
18 /* $Id$ */
19 
20 /* {{{ includes */
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include <php.h>
26 
27 #include <unicode/uidna.h>
28 #include <unicode/ustring.h>
29 #include "ext/standard/php_string.h"
30 
31 #include "intl_error.h"
32  #include "intl_convert.h"
33 /* }}} */
34 
35 /* {{{ grapheme_register_constants
36  * Register API constants
37  */
idn_register_constants(INIT_FUNC_ARGS)38 void idn_register_constants( INIT_FUNC_ARGS )
39 {
40 	/* Option to prohibit processing of unassigned codepoints in the input and
41 	   do not check if the input conforms to STD-3 ASCII rules. */
42 	REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
43 
44 	/* Option to allow processing of unassigned codepoints in the input */
45 	REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
46 
47 	/* Option to check if input conforms to STD-3 ASCII rules */
48 	REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
49 }
50 /* }}} */
51 
52 enum {
53 	INTL_IDN_TO_ASCII = 0,
54 	INTL_IDN_TO_UTF8
55 };
56 
php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,int mode)57 static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
58 {
59 	unsigned char* domain;
60 	int domain_len;
61 	long option = 0;
62 	UChar* ustring = NULL;
63 	int ustring_len = 0;
64 	UErrorCode status;
65 	char     *converted_utf8;
66 	int32_t   converted_utf8_len;
67 	UChar     converted[MAXPATHLEN];
68 	int32_t   converted_ret_len;
69 
70 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) {
71 		return;
72 	}
73 
74 	if (domain_len < 1) {
75 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC );
76 		RETURN_FALSE;
77 	}
78 
79 	/* convert the string to UTF-16. */
80 	status = U_ZERO_ERROR;
81 	intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status );
82 
83 	if (U_FAILURE(status)) {
84 		intl_error_set_code(NULL, status TSRMLS_CC);
85 
86 		/* Set error messages. */
87 		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
88 		if (ustring) {
89 			efree(ustring);
90 		}
91 		RETURN_FALSE;
92 	} else {
93 		UParseError parse_error;
94 
95 		status = U_ZERO_ERROR;
96 		if (mode == INTL_IDN_TO_ASCII) {
97 			converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
98 		} else {
99 			converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
100 		}
101 		efree(ustring);
102 
103 		if (U_FAILURE(status)) {
104 			intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
105 			RETURN_FALSE;
106 		}
107 
108 		status = U_ZERO_ERROR;
109 		intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
110 
111 		if (U_FAILURE(status)) {
112 			/* Set global error code. */
113 			intl_error_set_code(NULL, status TSRMLS_CC);
114 
115 			/* Set error messages. */
116 			intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
117 			efree(converted_utf8);
118 			RETURN_FALSE;
119 		}
120 	}
121 
122 	/* return the allocated string, not a duplicate */
123 	RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
124 }
125 
126 /* {{{ proto int idn_to_ascii(string domain[, int options])
127    Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_ascii)128 PHP_FUNCTION(idn_to_ascii)
129 {
130 	php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
131 }
132 /* }}} */
133 
134 
135 /* {{{ proto int idn_to_utf8(string domain[, int options])
136    Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
PHP_FUNCTION(idn_to_utf8)137 PHP_FUNCTION(idn_to_utf8)
138 {
139 	php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
140 }
141 /* }}} */
142 
143 
144 /*
145  * Local variables:
146  * tab-width: 4
147  * c-basic-offset: 4
148  * End:
149  * vim600: fdm=marker
150  * vim: noet sw=4 ts=4
151  */
152