1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 /*
26 * IDN conversions
27 */
28
29 #include "curl_setup.h"
30 #include "urldata.h"
31 #include "idn.h"
32 #include "sendf.h"
33 #include "curl_multibyte.h"
34 #include "warnless.h"
35
36 #ifdef USE_LIBIDN2
37 #include <idn2.h>
38
39 #if defined(_WIN32) && defined(UNICODE)
40 #define IDN2_LOOKUP(name, host, flags) \
41 idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
42 #else
43 #define IDN2_LOOKUP(name, host, flags) \
44 idn2_lookup_ul((const char *)name, (char **)host, flags)
45 #endif
46 #endif /* USE_LIBIDN2 */
47
48 /* The last 3 #include files should be in this order */
49 #include "curl_printf.h"
50 #include "curl_memory.h"
51 #include "memdebug.h"
52
53 /* for macOS and iOS targets */
54 #if defined(USE_APPLE_IDN)
55 #include <unicode/uidna.h>
56 #include <iconv.h>
57 #include <langinfo.h>
58
59 #define MAX_HOST_LENGTH 512
60
iconv_to_utf8(const char * in,size_t inlen,char ** out,size_t * outlen)61 static CURLcode iconv_to_utf8(const char *in, size_t inlen,
62 char **out, size_t *outlen)
63 {
64 iconv_t cd = iconv_open("UTF-8", nl_langinfo(CODESET));
65 if(cd != (iconv_t)-1) {
66 size_t iconv_outlen = *outlen;
67 char *iconv_in = (char *)in;
68 size_t iconv_inlen = inlen;
69 size_t iconv_result = iconv(cd, &iconv_in, &iconv_inlen,
70 out, &iconv_outlen);
71 *outlen -= iconv_outlen;
72 iconv_close(cd);
73 if(iconv_result == (size_t)-1) {
74 if(errno == ENOMEM)
75 return CURLE_OUT_OF_MEMORY;
76 else
77 return CURLE_URL_MALFORMAT;
78 }
79
80 return CURLE_OK;
81 }
82 else {
83 if(errno == ENOMEM)
84 return CURLE_OUT_OF_MEMORY;
85 else
86 return CURLE_FAILED_INIT;
87 }
88 }
89
mac_idn_to_ascii(const char * in,char ** out)90 static CURLcode mac_idn_to_ascii(const char *in, char **out)
91 {
92 size_t inlen = strlen(in);
93 if(inlen < MAX_HOST_LENGTH) {
94 char iconv_buffer[MAX_HOST_LENGTH] = {0};
95 char *iconv_outptr = iconv_buffer;
96 size_t iconv_outlen = sizeof(iconv_buffer);
97 CURLcode iconv_result = iconv_to_utf8(in, inlen,
98 &iconv_outptr, &iconv_outlen);
99 if(!iconv_result) {
100 UErrorCode err = U_ZERO_ERROR;
101 UIDNA* idna = uidna_openUTS46(
102 UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_ASCII, &err);
103 if(!U_FAILURE(err)) {
104 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
105 char buffer[MAX_HOST_LENGTH] = {0};
106 (void)uidna_nameToASCII_UTF8(idna, iconv_buffer, (int)iconv_outlen,
107 buffer, sizeof(buffer) - 1, &info, &err);
108 uidna_close(idna);
109 if(!U_FAILURE(err) && !info.errors) {
110 *out = strdup(buffer);
111 if(*out)
112 return CURLE_OK;
113 else
114 return CURLE_OUT_OF_MEMORY;
115 }
116 }
117 }
118 else
119 return iconv_result;
120 }
121 return CURLE_URL_MALFORMAT;
122 }
123
mac_ascii_to_idn(const char * in,char ** out)124 static CURLcode mac_ascii_to_idn(const char *in, char **out)
125 {
126 size_t inlen = strlen(in);
127 if(inlen < MAX_HOST_LENGTH) {
128 UErrorCode err = U_ZERO_ERROR;
129 UIDNA* idna = uidna_openUTS46(
130 UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_UNICODE, &err);
131 if(!U_FAILURE(err)) {
132 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
133 char buffer[MAX_HOST_LENGTH] = {0};
134 (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer,
135 sizeof(buffer) - 1, &info, &err);
136 uidna_close(idna);
137 if(!U_FAILURE(err)) {
138 *out = strdup(buffer);
139 if(*out)
140 return CURLE_OK;
141 else
142 return CURLE_OUT_OF_MEMORY;
143 }
144 }
145 }
146 return CURLE_URL_MALFORMAT;
147 }
148 #endif
149
150 #ifdef USE_WIN32_IDN
151 /* using Windows kernel32 and normaliz libraries. */
152
153 #if (!defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600) && \
154 (!defined(WINVER) || WINVER < 0x600)
155 WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
156 const WCHAR *lpUnicodeCharStr,
157 int cchUnicodeChar,
158 WCHAR *lpASCIICharStr,
159 int cchASCIIChar);
160 WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
161 const WCHAR *lpASCIICharStr,
162 int cchASCIIChar,
163 WCHAR *lpUnicodeCharStr,
164 int cchUnicodeChar);
165 #endif
166
167 #define IDN_MAX_LENGTH 255
168
win32_idn_to_ascii(const char * in,char ** out)169 static CURLcode win32_idn_to_ascii(const char *in, char **out)
170 {
171 wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
172 *out = NULL;
173 if(in_w) {
174 wchar_t punycode[IDN_MAX_LENGTH];
175 int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
176 IDN_MAX_LENGTH);
177 curlx_unicodefree(in_w);
178 if(chars) {
179 char *mstr = curlx_convert_wchar_to_UTF8(punycode);
180 if(mstr) {
181 *out = strdup(mstr);
182 curlx_unicodefree(mstr);
183 if(!*out)
184 return CURLE_OUT_OF_MEMORY;
185 }
186 else
187 return CURLE_OUT_OF_MEMORY;
188 }
189 else
190 return CURLE_URL_MALFORMAT;
191 }
192 else
193 return CURLE_URL_MALFORMAT;
194
195 return CURLE_OK;
196 }
197
win32_ascii_to_idn(const char * in,char ** output)198 static CURLcode win32_ascii_to_idn(const char *in, char **output)
199 {
200 char *out = NULL;
201
202 wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
203 if(in_w) {
204 WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
205 int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
206 IDN_MAX_LENGTH);
207 if(chars) {
208 /* 'chars' is "the number of characters retrieved" */
209 char *mstr = curlx_convert_wchar_to_UTF8(idn);
210 if(mstr) {
211 out = strdup(mstr);
212 curlx_unicodefree(mstr);
213 if(!out)
214 return CURLE_OUT_OF_MEMORY;
215 }
216 }
217 else
218 return CURLE_URL_MALFORMAT;
219 }
220 else
221 return CURLE_URL_MALFORMAT;
222 *output = out;
223 return CURLE_OK;
224 }
225
226 #endif /* USE_WIN32_IDN */
227
228 /*
229 * Helpers for IDNA conversions.
230 */
Curl_is_ASCII_name(const char * hostname)231 bool Curl_is_ASCII_name(const char *hostname)
232 {
233 /* get an UNSIGNED local version of the pointer */
234 const unsigned char *ch = (const unsigned char *)hostname;
235
236 if(!hostname) /* bad input, consider it ASCII! */
237 return TRUE;
238
239 while(*ch) {
240 if(*ch++ & 0x80)
241 return FALSE;
242 }
243 return TRUE;
244 }
245
246 #ifdef USE_IDN
247 /*
248 * Curl_idn_decode() returns an allocated IDN decoded string if it was
249 * possible. NULL on error.
250 *
251 * CURLE_URL_MALFORMAT - the hostname could not be converted
252 * CURLE_OUT_OF_MEMORY - memory problem
253 *
254 */
idn_decode(const char * input,char ** output)255 static CURLcode idn_decode(const char *input, char **output)
256 {
257 char *decoded = NULL;
258 CURLcode result = CURLE_OK;
259 #ifdef USE_LIBIDN2
260 if(idn2_check_version(IDN2_VERSION)) {
261 int flags = IDN2_NFC_INPUT
262 #if IDN2_VERSION_NUMBER >= 0x00140000
263 /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
264 IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
265 processing. */
266 | IDN2_NONTRANSITIONAL
267 #endif
268 ;
269 int rc = IDN2_LOOKUP(input, &decoded, flags);
270 if(rc != IDN2_OK)
271 /* fallback to TR46 Transitional mode for better IDNA2003
272 compatibility */
273 rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
274 if(rc != IDN2_OK)
275 result = CURLE_URL_MALFORMAT;
276 }
277 else
278 /* a too old libidn2 version */
279 result = CURLE_NOT_BUILT_IN;
280 #elif defined(USE_WIN32_IDN)
281 result = win32_idn_to_ascii(input, &decoded);
282 #elif defined(USE_APPLE_IDN)
283 result = mac_idn_to_ascii(input, &decoded);
284 #endif
285 if(!result)
286 *output = decoded;
287 return result;
288 }
289
idn_encode(const char * puny,char ** output)290 static CURLcode idn_encode(const char *puny, char **output)
291 {
292 char *enc = NULL;
293 #ifdef USE_LIBIDN2
294 int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
295 if(rc != IDNA_SUCCESS)
296 return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
297 #elif defined(USE_WIN32_IDN)
298 CURLcode result = win32_ascii_to_idn(puny, &enc);
299 if(result)
300 return result;
301 #elif defined(USE_APPLE_IDN)
302 CURLcode result = mac_ascii_to_idn(puny, &enc);
303 if(result)
304 return result;
305 #endif
306 *output = enc;
307 return CURLE_OK;
308 }
309
Curl_idn_decode(const char * input,char ** output)310 CURLcode Curl_idn_decode(const char *input, char **output)
311 {
312 char *d = NULL;
313 CURLcode result = idn_decode(input, &d);
314 #ifdef USE_LIBIDN2
315 if(!result) {
316 char *c = strdup(d);
317 idn2_free(d);
318 if(c)
319 d = c;
320 else
321 result = CURLE_OUT_OF_MEMORY;
322 }
323 #endif
324 if(!result)
325 *output = d;
326 return result;
327 }
328
Curl_idn_encode(const char * puny,char ** output)329 CURLcode Curl_idn_encode(const char *puny, char **output)
330 {
331 char *d = NULL;
332 CURLcode result = idn_encode(puny, &d);
333 #ifdef USE_LIBIDN2
334 if(!result) {
335 char *c = strdup(d);
336 idn2_free(d);
337 if(c)
338 d = c;
339 else
340 result = CURLE_OUT_OF_MEMORY;
341 }
342 #endif
343 if(!result)
344 *output = d;
345 return result;
346 }
347
348 /*
349 * Frees data allocated by idnconvert_hostname()
350 */
Curl_free_idnconverted_hostname(struct hostname * host)351 void Curl_free_idnconverted_hostname(struct hostname *host)
352 {
353 Curl_safefree(host->encalloc);
354 }
355
356 #endif /* USE_IDN */
357
358 /*
359 * Perform any necessary IDN conversion of hostname
360 */
Curl_idnconvert_hostname(struct hostname * host)361 CURLcode Curl_idnconvert_hostname(struct hostname *host)
362 {
363 /* set the name we use to display the hostname */
364 host->dispname = host->name;
365
366 #ifdef USE_IDN
367 /* Check name for non-ASCII and convert hostname if we can */
368 if(!Curl_is_ASCII_name(host->name)) {
369 char *decoded;
370 CURLcode result = Curl_idn_decode(host->name, &decoded);
371 if(result)
372 return result;
373 /* successful */
374 host->name = host->encalloc = decoded;
375 }
376 #endif
377 return CURLE_OK;
378 }
379