xref: /curl/lib/idn.c (revision 2625360b)
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25  /*
26   * IDN conversions
27   */
28 
29 #include "curl_setup.h"
30 #include "urldata.h"
31 #include "idn.h"
32 #include "sendf.h"
33 #include "curl_multibyte.h"
34 #include "warnless.h"
35 
36 #ifdef USE_LIBIDN2
37 #include <idn2.h>
38 
39 #if defined(_WIN32) && defined(UNICODE)
40 #define IDN2_LOOKUP(name, host, flags)                                  \
41   idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
42 #else
43 #define IDN2_LOOKUP(name, host, flags)                          \
44   idn2_lookup_ul((const char *)name, (char **)host, flags)
45 #endif
46 #endif  /* USE_LIBIDN2 */
47 
48 /* The last 3 #include files should be in this order */
49 #include "curl_printf.h"
50 #include "curl_memory.h"
51 #include "memdebug.h"
52 
53 /* for macOS and iOS targets */
54 #if defined(USE_APPLE_IDN)
55 #include <unicode/uidna.h>
56 #include <iconv.h>
57 #include <langinfo.h>
58 
59 #define MAX_HOST_LENGTH 512
60 
iconv_to_utf8(const char * in,size_t inlen,char ** out,size_t * outlen)61 static CURLcode iconv_to_utf8(const char *in, size_t inlen,
62                               char **out, size_t *outlen)
63 {
64   iconv_t cd = iconv_open("UTF-8", nl_langinfo(CODESET));
65   if(cd != (iconv_t)-1) {
66     size_t iconv_outlen = *outlen;
67     char *iconv_in = (char *)in;
68     size_t iconv_inlen = inlen;
69     size_t iconv_result = iconv(cd, &iconv_in, &iconv_inlen,
70                                 out, &iconv_outlen);
71     *outlen -= iconv_outlen;
72     iconv_close(cd);
73     if(iconv_result == (size_t)-1) {
74       if(errno == ENOMEM)
75         return CURLE_OUT_OF_MEMORY;
76       else
77         return CURLE_URL_MALFORMAT;
78     }
79 
80     return CURLE_OK;
81   }
82   else {
83     if(errno == ENOMEM)
84       return CURLE_OUT_OF_MEMORY;
85     else
86       return CURLE_FAILED_INIT;
87   }
88 }
89 
mac_idn_to_ascii(const char * in,char ** out)90 static CURLcode mac_idn_to_ascii(const char *in, char **out)
91 {
92   size_t inlen = strlen(in);
93   if(inlen < MAX_HOST_LENGTH) {
94     char iconv_buffer[MAX_HOST_LENGTH] = {0};
95     char *iconv_outptr = iconv_buffer;
96     size_t iconv_outlen = sizeof(iconv_buffer);
97     CURLcode iconv_result = iconv_to_utf8(in, inlen,
98                                           &iconv_outptr, &iconv_outlen);
99     if(!iconv_result) {
100       UErrorCode err = U_ZERO_ERROR;
101       UIDNA* idna = uidna_openUTS46(
102         UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_ASCII, &err);
103       if(!U_FAILURE(err)) {
104         UIDNAInfo info = UIDNA_INFO_INITIALIZER;
105         char buffer[MAX_HOST_LENGTH] = {0};
106         (void)uidna_nameToASCII_UTF8(idna, iconv_buffer, (int)iconv_outlen,
107                                      buffer, sizeof(buffer) - 1, &info, &err);
108         uidna_close(idna);
109         if(!U_FAILURE(err) && !info.errors) {
110           *out = strdup(buffer);
111           if(*out)
112             return CURLE_OK;
113           else
114             return CURLE_OUT_OF_MEMORY;
115         }
116       }
117     }
118     else
119       return iconv_result;
120   }
121   return CURLE_URL_MALFORMAT;
122 }
123 
mac_ascii_to_idn(const char * in,char ** out)124 static CURLcode mac_ascii_to_idn(const char *in, char **out)
125 {
126   size_t inlen = strlen(in);
127   if(inlen < MAX_HOST_LENGTH) {
128     UErrorCode err = U_ZERO_ERROR;
129     UIDNA* idna = uidna_openUTS46(
130       UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_UNICODE, &err);
131     if(!U_FAILURE(err)) {
132       UIDNAInfo info = UIDNA_INFO_INITIALIZER;
133       char buffer[MAX_HOST_LENGTH] = {0};
134       (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer,
135                                     sizeof(buffer) - 1, &info, &err);
136       uidna_close(idna);
137       if(!U_FAILURE(err)) {
138         *out = strdup(buffer);
139         if(*out)
140           return CURLE_OK;
141         else
142           return CURLE_OUT_OF_MEMORY;
143       }
144     }
145   }
146   return CURLE_URL_MALFORMAT;
147 }
148 #endif
149 
150 #ifdef USE_WIN32_IDN
151 /* using Windows kernel32 and normaliz libraries. */
152 
153 #if (!defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600) && \
154   (!defined(WINVER) || WINVER < 0x600)
155 WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
156                                  const WCHAR *lpUnicodeCharStr,
157                                  int cchUnicodeChar,
158                                  WCHAR *lpASCIICharStr,
159                                  int cchASCIIChar);
160 WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
161                                    const WCHAR *lpASCIICharStr,
162                                    int cchASCIIChar,
163                                    WCHAR *lpUnicodeCharStr,
164                                    int cchUnicodeChar);
165 #endif
166 
167 #define IDN_MAX_LENGTH 255
168 
win32_idn_to_ascii(const char * in,char ** out)169 static CURLcode win32_idn_to_ascii(const char *in, char **out)
170 {
171   wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
172   *out = NULL;
173   if(in_w) {
174     wchar_t punycode[IDN_MAX_LENGTH];
175     int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
176                            IDN_MAX_LENGTH);
177     curlx_unicodefree(in_w);
178     if(chars) {
179       char *mstr = curlx_convert_wchar_to_UTF8(punycode);
180       if(mstr) {
181         *out = strdup(mstr);
182         curlx_unicodefree(mstr);
183         if(!*out)
184           return CURLE_OUT_OF_MEMORY;
185       }
186       else
187         return CURLE_OUT_OF_MEMORY;
188     }
189     else
190       return CURLE_URL_MALFORMAT;
191   }
192   else
193     return CURLE_URL_MALFORMAT;
194 
195   return CURLE_OK;
196 }
197 
win32_ascii_to_idn(const char * in,char ** output)198 static CURLcode win32_ascii_to_idn(const char *in, char **output)
199 {
200   char *out = NULL;
201 
202   wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
203   if(in_w) {
204     WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
205     int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
206                              IDN_MAX_LENGTH);
207     if(chars) {
208       /* 'chars' is "the number of characters retrieved" */
209       char *mstr = curlx_convert_wchar_to_UTF8(idn);
210       if(mstr) {
211         out = strdup(mstr);
212         curlx_unicodefree(mstr);
213         if(!out)
214           return CURLE_OUT_OF_MEMORY;
215       }
216     }
217     else
218       return CURLE_URL_MALFORMAT;
219   }
220   else
221     return CURLE_URL_MALFORMAT;
222   *output = out;
223   return CURLE_OK;
224 }
225 
226 #endif /* USE_WIN32_IDN */
227 
228 /*
229  * Helpers for IDNA conversions.
230  */
Curl_is_ASCII_name(const char * hostname)231 bool Curl_is_ASCII_name(const char *hostname)
232 {
233   /* get an UNSIGNED local version of the pointer */
234   const unsigned char *ch = (const unsigned char *)hostname;
235 
236   if(!hostname) /* bad input, consider it ASCII! */
237     return TRUE;
238 
239   while(*ch) {
240     if(*ch++ & 0x80)
241       return FALSE;
242   }
243   return TRUE;
244 }
245 
246 #ifdef USE_IDN
247 /*
248  * Curl_idn_decode() returns an allocated IDN decoded string if it was
249  * possible. NULL on error.
250  *
251  * CURLE_URL_MALFORMAT - the hostname could not be converted
252  * CURLE_OUT_OF_MEMORY - memory problem
253  *
254  */
idn_decode(const char * input,char ** output)255 static CURLcode idn_decode(const char *input, char **output)
256 {
257   char *decoded = NULL;
258   CURLcode result = CURLE_OK;
259 #ifdef USE_LIBIDN2
260   if(idn2_check_version(IDN2_VERSION)) {
261     int flags = IDN2_NFC_INPUT
262 #if IDN2_VERSION_NUMBER >= 0x00140000
263       /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
264          IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
265          processing. */
266       | IDN2_NONTRANSITIONAL
267 #endif
268       ;
269     int rc = IDN2_LOOKUP(input, &decoded, flags);
270     if(rc != IDN2_OK)
271       /* fallback to TR46 Transitional mode for better IDNA2003
272          compatibility */
273       rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
274     if(rc != IDN2_OK)
275       result = CURLE_URL_MALFORMAT;
276   }
277   else
278     /* a too old libidn2 version */
279     result = CURLE_NOT_BUILT_IN;
280 #elif defined(USE_WIN32_IDN)
281   result = win32_idn_to_ascii(input, &decoded);
282 #elif defined(USE_APPLE_IDN)
283   result = mac_idn_to_ascii(input, &decoded);
284 #endif
285   if(!result)
286     *output = decoded;
287   return result;
288 }
289 
idn_encode(const char * puny,char ** output)290 static CURLcode idn_encode(const char *puny, char **output)
291 {
292   char *enc = NULL;
293 #ifdef USE_LIBIDN2
294   int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
295   if(rc != IDNA_SUCCESS)
296     return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
297 #elif defined(USE_WIN32_IDN)
298   CURLcode result = win32_ascii_to_idn(puny, &enc);
299   if(result)
300     return result;
301 #elif defined(USE_APPLE_IDN)
302   CURLcode result = mac_ascii_to_idn(puny, &enc);
303   if(result)
304     return result;
305 #endif
306   *output = enc;
307   return CURLE_OK;
308 }
309 
Curl_idn_decode(const char * input,char ** output)310 CURLcode Curl_idn_decode(const char *input, char **output)
311 {
312   char *d = NULL;
313   CURLcode result = idn_decode(input, &d);
314 #ifdef USE_LIBIDN2
315   if(!result) {
316     char *c = strdup(d);
317     idn2_free(d);
318     if(c)
319       d = c;
320     else
321       result = CURLE_OUT_OF_MEMORY;
322   }
323 #endif
324   if(!result)
325     *output = d;
326   return result;
327 }
328 
Curl_idn_encode(const char * puny,char ** output)329 CURLcode Curl_idn_encode(const char *puny, char **output)
330 {
331   char *d = NULL;
332   CURLcode result = idn_encode(puny, &d);
333 #ifdef USE_LIBIDN2
334   if(!result) {
335     char *c = strdup(d);
336     idn2_free(d);
337     if(c)
338       d = c;
339     else
340       result = CURLE_OUT_OF_MEMORY;
341   }
342 #endif
343   if(!result)
344     *output = d;
345   return result;
346 }
347 
348 /*
349  * Frees data allocated by idnconvert_hostname()
350  */
Curl_free_idnconverted_hostname(struct hostname * host)351 void Curl_free_idnconverted_hostname(struct hostname *host)
352 {
353   Curl_safefree(host->encalloc);
354 }
355 
356 #endif /* USE_IDN */
357 
358 /*
359  * Perform any necessary IDN conversion of hostname
360  */
Curl_idnconvert_hostname(struct hostname * host)361 CURLcode Curl_idnconvert_hostname(struct hostname *host)
362 {
363   /* set the name we use to display the hostname */
364   host->dispname = host->name;
365 
366 #ifdef USE_IDN
367   /* Check name for non-ASCII and convert hostname if we can */
368   if(!Curl_is_ASCII_name(host->name)) {
369     char *decoded;
370     CURLcode result = Curl_idn_decode(host->name, &decoded);
371     if(result)
372       return result;
373     /* successful */
374     host->name = host->encalloc = decoded;
375   }
376 #endif
377   return CURLE_OK;
378 }
379