xref: /curl/lib/idn.c (revision add22fee)
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25  /*
26   * IDN conversions
27   */
28 
29 #include "curl_setup.h"
30 #include "urldata.h"
31 #include "idn.h"
32 #include "sendf.h"
33 #include "curl_multibyte.h"
34 #include "warnless.h"
35 
36 #ifdef USE_LIBIDN2
37 #include <idn2.h>
38 
39 #if defined(_WIN32) && defined(UNICODE)
40 #define IDN2_LOOKUP(name, host, flags)                                  \
41   idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
42 #else
43 #define IDN2_LOOKUP(name, host, flags)                          \
44   idn2_lookup_ul((const char *)name, (char **)host, flags)
45 #endif
46 #endif  /* USE_LIBIDN2 */
47 
48 /* The last 3 #include files should be in this order */
49 #include "curl_printf.h"
50 #include "curl_memory.h"
51 #include "memdebug.h"
52 
53 /* for macOS and iOS targets */
54 #if defined(USE_APPLE_IDN)
55 #include <unicode/uidna.h>
56 
mac_idn_to_ascii(const char * in,char ** out)57 static CURLcode mac_idn_to_ascii(const char *in, char **out)
58 {
59   UErrorCode err = U_ZERO_ERROR;
60   UIDNA* idna = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
61   if(U_FAILURE(err)) {
62     return CURLE_OUT_OF_MEMORY;
63   }
64   else {
65     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
66     char buffer[256] = {0};
67     (void)uidna_nameToASCII_UTF8(idna, in, -1, buffer,
68       sizeof(buffer), &info, &err);
69     uidna_close(idna);
70     if(U_FAILURE(err)) {
71       return CURLE_URL_MALFORMAT;
72     }
73     else {
74       *out = strdup(buffer);
75       if(*out)
76         return CURLE_OK;
77       else
78         return CURLE_OUT_OF_MEMORY;
79     }
80   }
81 }
82 
mac_ascii_to_idn(const char * in,char ** out)83 static CURLcode mac_ascii_to_idn(const char *in, char **out)
84 {
85   UErrorCode err = U_ZERO_ERROR;
86   UIDNA* idna = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
87   if(U_FAILURE(err)) {
88     return CURLE_OUT_OF_MEMORY;
89   }
90   else {
91     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
92     char buffer[256] = {0};
93     (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer,
94       sizeof(buffer), &info, &err);
95     uidna_close(idna);
96     if(U_FAILURE(err)) {
97       return CURLE_URL_MALFORMAT;
98     }
99     else {
100       *out = strdup(buffer);
101       if(*out)
102         return CURLE_OK;
103       else
104         return CURLE_OUT_OF_MEMORY;
105     }
106   }
107 }
108 #endif
109 
110 #ifdef USE_WIN32_IDN
111 /* using Windows kernel32 and normaliz libraries. */
112 
113 #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600
114 WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
115                                  const WCHAR *lpUnicodeCharStr,
116                                  int cchUnicodeChar,
117                                  WCHAR *lpASCIICharStr,
118                                  int cchASCIIChar);
119 WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
120                                    const WCHAR *lpASCIICharStr,
121                                    int cchASCIIChar,
122                                    WCHAR *lpUnicodeCharStr,
123                                    int cchUnicodeChar);
124 #endif
125 
126 #define IDN_MAX_LENGTH 255
127 
win32_idn_to_ascii(const char * in,char ** out)128 static CURLcode win32_idn_to_ascii(const char *in, char **out)
129 {
130   wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
131   *out = NULL;
132   if(in_w) {
133     wchar_t punycode[IDN_MAX_LENGTH];
134     int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
135                            IDN_MAX_LENGTH);
136     curlx_unicodefree(in_w);
137     if(chars) {
138       char *mstr = curlx_convert_wchar_to_UTF8(punycode);
139       if(mstr) {
140         *out = strdup(mstr);
141         curlx_unicodefree(mstr);
142         if(!*out)
143           return CURLE_OUT_OF_MEMORY;
144       }
145       else
146         return CURLE_OUT_OF_MEMORY;
147     }
148     else
149       return CURLE_URL_MALFORMAT;
150   }
151   else
152     return CURLE_URL_MALFORMAT;
153 
154   return CURLE_OK;
155 }
156 
win32_ascii_to_idn(const char * in,char ** output)157 static CURLcode win32_ascii_to_idn(const char *in, char **output)
158 {
159   char *out = NULL;
160 
161   wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
162   if(in_w) {
163     WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
164     int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
165                              IDN_MAX_LENGTH);
166     if(chars) {
167       /* 'chars' is "the number of characters retrieved" */
168       char *mstr = curlx_convert_wchar_to_UTF8(idn);
169       if(mstr) {
170         out = strdup(mstr);
171         curlx_unicodefree(mstr);
172         if(!out)
173           return CURLE_OUT_OF_MEMORY;
174       }
175     }
176     else
177       return CURLE_URL_MALFORMAT;
178   }
179   else
180     return CURLE_URL_MALFORMAT;
181   *output = out;
182   return CURLE_OK;
183 }
184 
185 #endif /* USE_WIN32_IDN */
186 
187 /*
188  * Helpers for IDNA conversions.
189  */
Curl_is_ASCII_name(const char * hostname)190 bool Curl_is_ASCII_name(const char *hostname)
191 {
192   /* get an UNSIGNED local version of the pointer */
193   const unsigned char *ch = (const unsigned char *)hostname;
194 
195   if(!hostname) /* bad input, consider it ASCII! */
196     return TRUE;
197 
198   while(*ch) {
199     if(*ch++ & 0x80)
200       return FALSE;
201   }
202   return TRUE;
203 }
204 
205 #ifdef USE_IDN
206 /*
207  * Curl_idn_decode() returns an allocated IDN decoded string if it was
208  * possible. NULL on error.
209  *
210  * CURLE_URL_MALFORMAT - the host name could not be converted
211  * CURLE_OUT_OF_MEMORY - memory problem
212  *
213  */
idn_decode(const char * input,char ** output)214 static CURLcode idn_decode(const char *input, char **output)
215 {
216   char *decoded = NULL;
217   CURLcode result = CURLE_OK;
218 #ifdef USE_LIBIDN2
219   if(idn2_check_version(IDN2_VERSION)) {
220     int flags = IDN2_NFC_INPUT
221 #if IDN2_VERSION_NUMBER >= 0x00140000
222       /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
223          IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
224          processing. */
225       | IDN2_NONTRANSITIONAL
226 #endif
227       ;
228     int rc = IDN2_LOOKUP(input, &decoded, flags);
229     if(rc != IDN2_OK)
230       /* fallback to TR46 Transitional mode for better IDNA2003
231          compatibility */
232       rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
233     if(rc != IDN2_OK)
234       result = CURLE_URL_MALFORMAT;
235   }
236   else
237     /* a too old libidn2 version */
238     result = CURLE_NOT_BUILT_IN;
239 #elif defined(USE_WIN32_IDN)
240   result = win32_idn_to_ascii(input, &decoded);
241 #elif defined(USE_APPLE_IDN)
242   result = mac_idn_to_ascii(input, &decoded);
243 #endif
244   if(!result)
245     *output = decoded;
246   return result;
247 }
248 
idn_encode(const char * puny,char ** output)249 static CURLcode idn_encode(const char *puny, char **output)
250 {
251   char *enc = NULL;
252 #ifdef USE_LIBIDN2
253   int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
254   if(rc != IDNA_SUCCESS)
255     return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
256 #elif defined(USE_WIN32_IDN)
257   CURLcode result = win32_ascii_to_idn(puny, &enc);
258   if(result)
259     return result;
260 #elif defined(USE_APPLE_IDN)
261   CURLcode result = mac_ascii_to_idn(puny, &enc);
262   if(result)
263     return result;
264 #endif
265   *output = enc;
266   return CURLE_OK;
267 }
268 
Curl_idn_decode(const char * input,char ** output)269 CURLcode Curl_idn_decode(const char *input, char **output)
270 {
271   char *d = NULL;
272   CURLcode result = idn_decode(input, &d);
273 #ifdef USE_LIBIDN2
274   if(!result) {
275     char *c = strdup(d);
276     idn2_free(d);
277     if(c)
278       d = c;
279     else
280       result = CURLE_OUT_OF_MEMORY;
281   }
282 #endif
283   if(!result)
284     *output = d;
285   return result;
286 }
287 
Curl_idn_encode(const char * puny,char ** output)288 CURLcode Curl_idn_encode(const char *puny, char **output)
289 {
290   char *d = NULL;
291   CURLcode result = idn_encode(puny, &d);
292 #ifdef USE_LIBIDN2
293   if(!result) {
294     char *c = strdup(d);
295     idn2_free(d);
296     if(c)
297       d = c;
298     else
299       result = CURLE_OUT_OF_MEMORY;
300   }
301 #endif
302   if(!result)
303     *output = d;
304   return result;
305 }
306 
307 /*
308  * Frees data allocated by idnconvert_hostname()
309  */
Curl_free_idnconverted_hostname(struct hostname * host)310 void Curl_free_idnconverted_hostname(struct hostname *host)
311 {
312   Curl_safefree(host->encalloc);
313 }
314 
315 #endif /* USE_IDN */
316 
317 /*
318  * Perform any necessary IDN conversion of hostname
319  */
Curl_idnconvert_hostname(struct hostname * host)320 CURLcode Curl_idnconvert_hostname(struct hostname *host)
321 {
322   /* set the name we use to display the host name */
323   host->dispname = host->name;
324 
325 #ifdef USE_IDN
326   /* Check name for non-ASCII and convert hostname if we can */
327   if(!Curl_is_ASCII_name(host->name)) {
328     char *decoded;
329     CURLcode result = Curl_idn_decode(host->name, &decoded);
330     if(result)
331       return result;
332     /* successful */
333     host->name = host->encalloc = decoded;
334   }
335 #endif
336   return CURLE_OK;
337 }
338