1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37 #include "curl_memrchr.h"
38
39 /* The last 3 #include files should be in this order */
40 #include "curl_printf.h"
41 #include "curl_memory.h"
42 #include "memdebug.h"
43
44 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45 #define STARTS_WITH_DRIVE_PREFIX(str) \
46 ((('a' <= str[0] && str[0] <= 'z') || \
47 ('A' <= str[0] && str[0] <= 'Z')) && \
48 (str[1] == ':'))
49
50 /* MSDOS/Windows style drive prefix, optionally with
51 * a '|' instead of ':', followed by a slash or NUL */
52 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55 ((str)[1] == ':' || (str)[1] == '|') && \
56 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57
58 /* scheme is not URL encoded, the longest libcurl supported ones are... */
59 #define MAX_SCHEME_LEN 40
60
61 /*
62 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63 * sure we have _some_ value for AF_INET6 without polluting our fake value
64 * everywhere.
65 */
66 #if !defined(USE_IPV6) && !defined(AF_INET6)
67 #define AF_INET6 (AF_INET + 1)
68 #endif
69
70 /* Internal representation of CURLU. Point to URL-encoded strings. */
71 struct Curl_URL {
72 char *scheme;
73 char *user;
74 char *password;
75 char *options; /* IMAP only? */
76 char *host;
77 char *zoneid; /* for numerical IPv6 addresses */
78 char *port;
79 char *path;
80 char *query;
81 char *fragment;
82 unsigned short portnum; /* the numerical version (if 'port' is set) */
83 BIT(query_present); /* to support blank */
84 BIT(fragment_present); /* to support blank */
85 };
86
87 #define DEFAULT_SCHEME "https"
88
free_urlhandle(struct Curl_URL * u)89 static void free_urlhandle(struct Curl_URL *u)
90 {
91 free(u->scheme);
92 free(u->user);
93 free(u->password);
94 free(u->options);
95 free(u->host);
96 free(u->zoneid);
97 free(u->port);
98 free(u->path);
99 free(u->query);
100 free(u->fragment);
101 }
102
103 /*
104 * Find the separator at the end of the host name, or the '?' in cases like
105 * http://www.example.com?id=2380
106 */
find_host_sep(const char * url)107 static const char *find_host_sep(const char *url)
108 {
109 const char *sep;
110 const char *query;
111
112 /* Find the start of the hostname */
113 sep = strstr(url, "//");
114 if(!sep)
115 sep = url;
116 else
117 sep += 2;
118
119 query = strchr(sep, '?');
120 sep = strchr(sep, '/');
121
122 if(!sep)
123 sep = url + strlen(url);
124
125 if(!query)
126 query = url + strlen(url);
127
128 return sep < query ? sep : query;
129 }
130
131 /* convert CURLcode to CURLUcode */
132 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
133 CURLUE_OUT_OF_MEMORY)
134 /*
135 * Decide whether a character in a URL must be escaped.
136 */
137 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
138
139 static const char hexdigits[] = "0123456789abcdef";
140 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
141 * spaces in the source URL accordingly.
142 *
143 * URL encoding should be skipped for host names, otherwise IDN resolution
144 * will fail.
145 */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)146 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
147 size_t len, bool relative,
148 bool query)
149 {
150 /* we must add this with whitespace-replacing */
151 bool left = !query;
152 const unsigned char *iptr;
153 const unsigned char *host_sep = (const unsigned char *) url;
154 CURLcode result;
155
156 if(!relative)
157 host_sep = (const unsigned char *) find_host_sep(url);
158
159 for(iptr = (unsigned char *)url; /* read from here */
160 len; iptr++, len--) {
161
162 if(iptr < host_sep) {
163 result = Curl_dyn_addn(o, iptr, 1);
164 if(result)
165 return cc2cu(result);
166 continue;
167 }
168
169 if(*iptr == ' ') {
170 if(left)
171 result = Curl_dyn_addn(o, "%20", 3);
172 else
173 result = Curl_dyn_addn(o, "+", 1);
174 if(result)
175 return cc2cu(result);
176 continue;
177 }
178
179 if(*iptr == '?')
180 left = FALSE;
181
182 if(urlchar_needs_escaping(*iptr)) {
183 char out[3]={'%'};
184 out[1] = hexdigits[*iptr>>4];
185 out[2] = hexdigits[*iptr & 0xf];
186 result = Curl_dyn_addn(o, out, 3);
187 }
188 else
189 result = Curl_dyn_addn(o, iptr, 1);
190 if(result)
191 return cc2cu(result);
192 }
193
194 return CURLUE_OK;
195 }
196
197 /*
198 * Returns the length of the scheme if the given URL is absolute (as opposed
199 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
200 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
201 *
202 * If 'guess_scheme' is TRUE, it means the URL might be provided without
203 * scheme.
204 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)205 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
206 bool guess_scheme)
207 {
208 int i = 0;
209 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
210 (void)buflen; /* only used in debug-builds */
211 if(buf)
212 buf[0] = 0; /* always leave a defined value in buf */
213 #ifdef _WIN32
214 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
215 return 0;
216 #endif
217 if(ISALPHA(url[0]))
218 for(i = 1; i < MAX_SCHEME_LEN; ++i) {
219 char s = url[i];
220 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
221 /* RFC 3986 3.1 explains:
222 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223 */
224 }
225 else {
226 break;
227 }
228 }
229 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
230 /* If this does not guess scheme, the scheme always ends with the colon so
231 that this also detects data: URLs etc. In guessing mode, data: could
232 be the host name "data" with a specified port number. */
233
234 /* the length of the scheme is the name part only */
235 size_t len = i;
236 if(buf) {
237 buf[i] = 0;
238 while(i--) {
239 buf[i] = Curl_raw_tolower(url[i]);
240 }
241 }
242 return len;
243 }
244 return 0;
245 }
246
247 /*
248 * Concatenate a relative URL to a base URL making it absolute.
249 * URL-encodes any spaces.
250 * The returned pointer must be freed by the caller unless NULL
251 * (returns NULL on out of memory).
252 *
253 * Note that this function destroys the 'base' string.
254 */
concat_url(char * base,const char * relurl,char ** newurl)255 static CURLcode concat_url(char *base, const char *relurl, char **newurl)
256 {
257 /***
258 TRY to append this new path to the old URL
259 to the right of the host part. Oh crap, this is doomed to cause
260 problems in the future...
261 */
262 struct dynbuf newest;
263 char *protsep;
264 char *pathsep;
265 bool host_changed = FALSE;
266 const char *useurl = relurl;
267 CURLcode result = CURLE_OK;
268 CURLUcode uc;
269 bool skip_slash = FALSE;
270 *newurl = NULL;
271
272 /* protsep points to the start of the host name */
273 protsep = strstr(base, "//");
274 if(!protsep)
275 protsep = base;
276 else
277 protsep += 2; /* pass the slashes */
278
279 if('/' != relurl[0]) {
280 int level = 0;
281
282 /* First we need to find out if there's a ?-letter in the URL,
283 and cut it and the right-side of that off */
284 pathsep = strchr(protsep, '?');
285 if(pathsep)
286 *pathsep = 0;
287
288 /* we have a relative path to append to the last slash if there's one
289 available, or the new URL is just a query string (starts with a '?') or
290 a fragment (starts with '#') we append the new one at the end of the
291 current URL */
292 if((useurl[0] != '?') && (useurl[0] != '#')) {
293 pathsep = strrchr(protsep, '/');
294 if(pathsep)
295 *pathsep = 0;
296
297 /* Check if there's any slash after the host name, and if so, remember
298 that position instead */
299 pathsep = strchr(protsep, '/');
300 if(pathsep)
301 protsep = pathsep + 1;
302 else
303 protsep = NULL;
304
305 /* now deal with one "./" or any amount of "../" in the newurl
306 and act accordingly */
307
308 if((useurl[0] == '.') && (useurl[1] == '/'))
309 useurl += 2; /* just skip the "./" */
310
311 while((useurl[0] == '.') &&
312 (useurl[1] == '.') &&
313 (useurl[2] == '/')) {
314 level++;
315 useurl += 3; /* pass the "../" */
316 }
317
318 if(protsep) {
319 while(level--) {
320 /* cut off one more level from the right of the original URL */
321 pathsep = strrchr(protsep, '/');
322 if(pathsep)
323 *pathsep = 0;
324 else {
325 *protsep = 0;
326 break;
327 }
328 }
329 }
330 }
331 else
332 skip_slash = TRUE;
333 }
334 else {
335 /* We got a new absolute path for this server */
336
337 if(relurl[1] == '/') {
338 /* the new URL starts with //, just keep the protocol part from the
339 original one */
340 *protsep = 0;
341 useurl = &relurl[2]; /* we keep the slashes from the original, so we
342 skip the new ones */
343 host_changed = TRUE;
344 }
345 else {
346 /* cut off the original URL from the first slash, or deal with URLs
347 without slash */
348 pathsep = strchr(protsep, '/');
349 if(pathsep) {
350 /* When people use badly formatted URLs, such as
351 "http://www.example.com?dir=/home/daniel" we must not use the first
352 slash, if there's a ?-letter before it! */
353 char *sep = strchr(protsep, '?');
354 if(sep && (sep < pathsep))
355 pathsep = sep;
356 *pathsep = 0;
357 }
358 else {
359 /* There was no slash. Now, since we might be operating on a badly
360 formatted URL, such as "http://www.example.com?id=2380" which
361 doesn't use a slash separator as it is supposed to, we need to check
362 for a ?-letter as well! */
363 pathsep = strchr(protsep, '?');
364 if(pathsep)
365 *pathsep = 0;
366 }
367 }
368 }
369
370 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
371
372 /* copy over the root url part */
373 result = Curl_dyn_add(&newest, base);
374 if(result)
375 return result;
376
377 /* check if we need to append a slash */
378 if(('/' == useurl[0]) || (protsep && !*protsep) || skip_slash)
379 ;
380 else {
381 result = Curl_dyn_addn(&newest, "/", 1);
382 if(result)
383 return result;
384 }
385
386 /* then append the new piece on the right side */
387 uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
388 FALSE);
389 if(uc)
390 return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
391
392 *newurl = Curl_dyn_ptr(&newest);
393 return CURLE_OK;
394 }
395
396 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)397 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
398 {
399 static const char badbytes[]={
400 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
401 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
402 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
403 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
404 0x7f, 0x00 /* null-terminate */
405 };
406 size_t n = strlen(url);
407 size_t nfine;
408
409 if(n > CURL_MAX_INPUT_LENGTH)
410 /* excessive input length */
411 return CURLUE_MALFORMED_INPUT;
412
413 nfine = strcspn(url, badbytes);
414 if((nfine != n) ||
415 (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
416 return CURLUE_MALFORMED_INPUT;
417
418 *urllen = n;
419 return CURLUE_OK;
420 }
421
422 /*
423 * parse_hostname_login()
424 *
425 * Parse the login details (user name, password and options) from the URL and
426 * strip them out of the host name
427 *
428 */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)429 static CURLUcode parse_hostname_login(struct Curl_URL *u,
430 const char *login,
431 size_t len,
432 unsigned int flags,
433 size_t *offset) /* to the host name */
434 {
435 CURLUcode result = CURLUE_OK;
436 CURLcode ccode;
437 char *userp = NULL;
438 char *passwdp = NULL;
439 char *optionsp = NULL;
440 const struct Curl_handler *h = NULL;
441
442 /* At this point, we assume all the other special cases have been taken
443 * care of, so the host is at most
444 *
445 * [user[:password][;options]]@]hostname
446 *
447 * We need somewhere to put the embedded details, so do that first.
448 */
449 char *ptr;
450
451 DEBUGASSERT(login);
452
453 *offset = 0;
454 ptr = memchr(login, '@', len);
455 if(!ptr)
456 goto out;
457
458 /* We will now try to extract the
459 * possible login information in a string like:
460 * ftp://user:password@ftp.my.site:8021/README */
461 ptr++;
462
463 /* if this is a known scheme, get some details */
464 if(u->scheme)
465 h = Curl_get_scheme_handler(u->scheme);
466
467 /* We could use the login information in the URL so extract it. Only parse
468 options if the handler says we should. Note that 'h' might be NULL! */
469 ccode = Curl_parse_login_details(login, ptr - login - 1,
470 &userp, &passwdp,
471 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
472 &optionsp:NULL);
473 if(ccode) {
474 result = CURLUE_BAD_LOGIN;
475 goto out;
476 }
477
478 if(userp) {
479 if(flags & CURLU_DISALLOW_USER) {
480 /* Option DISALLOW_USER is set and url contains username. */
481 result = CURLUE_USER_NOT_ALLOWED;
482 goto out;
483 }
484 free(u->user);
485 u->user = userp;
486 }
487
488 if(passwdp) {
489 free(u->password);
490 u->password = passwdp;
491 }
492
493 if(optionsp) {
494 free(u->options);
495 u->options = optionsp;
496 }
497
498 /* the host name starts at this offset */
499 *offset = ptr - login;
500 return CURLUE_OK;
501
502 out:
503
504 free(userp);
505 free(passwdp);
506 free(optionsp);
507 u->user = NULL;
508 u->password = NULL;
509 u->options = NULL;
510
511 return result;
512 }
513
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)514 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
515 bool has_scheme)
516 {
517 char *portptr;
518 char *hostname = Curl_dyn_ptr(host);
519 /*
520 * Find the end of an IPv6 address on the ']' ending bracket.
521 */
522 if(hostname[0] == '[') {
523 portptr = strchr(hostname, ']');
524 if(!portptr)
525 return CURLUE_BAD_IPV6;
526 portptr++;
527 /* this is a RFC2732-style specified IP-address */
528 if(*portptr) {
529 if(*portptr != ':')
530 return CURLUE_BAD_PORT_NUMBER;
531 }
532 else
533 portptr = NULL;
534 }
535 else
536 portptr = strchr(hostname, ':');
537
538 if(portptr) {
539 char *rest = NULL;
540 unsigned long port;
541 size_t keep = portptr - hostname;
542
543 /* Browser behavior adaptation. If there's a colon with no digits after,
544 just cut off the name there which makes us ignore the colon and just
545 use the default port. Firefox, Chrome and Safari all do that.
546
547 Don't do it if the URL has no scheme, to make something that looks like
548 a scheme not work!
549 */
550 Curl_dyn_setlen(host, keep);
551 portptr++;
552 if(!*portptr)
553 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
554
555 if(!ISDIGIT(*portptr))
556 return CURLUE_BAD_PORT_NUMBER;
557
558 errno = 0;
559 port = strtoul(portptr, &rest, 10); /* Port number must be decimal */
560
561 if(errno || (port > 0xffff) || *rest)
562 return CURLUE_BAD_PORT_NUMBER;
563
564 u->portnum = (unsigned short) port;
565 /* generate a new port number string to get rid of leading zeroes etc */
566 free(u->port);
567 u->port = aprintf("%ld", port);
568 if(!u->port)
569 return CURLUE_OUT_OF_MEMORY;
570 }
571
572 return CURLUE_OK;
573 }
574
575 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)576 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
577 size_t hlen) /* length of hostname */
578 {
579 size_t len;
580 DEBUGASSERT(*hostname == '[');
581 if(hlen < 4) /* '[::]' is the shortest possible valid string */
582 return CURLUE_BAD_IPV6;
583 hostname++;
584 hlen -= 2;
585
586 /* only valid IPv6 letters are ok */
587 len = strspn(hostname, "0123456789abcdefABCDEF:.");
588
589 if(hlen != len) {
590 hlen = len;
591 if(hostname[len] == '%') {
592 /* this could now be '%[zone id]' */
593 char zoneid[16];
594 int i = 0;
595 char *h = &hostname[len + 1];
596 /* pass '25' if present and is a url encoded percent sign */
597 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
598 h += 2;
599 while(*h && (*h != ']') && (i < 15))
600 zoneid[i++] = *h++;
601 if(!i || (']' != *h))
602 return CURLUE_BAD_IPV6;
603 zoneid[i] = 0;
604 u->zoneid = strdup(zoneid);
605 if(!u->zoneid)
606 return CURLUE_OUT_OF_MEMORY;
607 hostname[len] = ']'; /* insert end bracket */
608 hostname[len + 1] = 0; /* terminate the hostname */
609 }
610 else
611 return CURLUE_BAD_IPV6;
612 /* hostname is fine */
613 }
614
615 /* Check the IPv6 address. */
616 {
617 char dest[16]; /* fits a binary IPv6 address */
618 char norm[MAX_IPADR_LEN];
619 hostname[hlen] = 0; /* end the address there */
620 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
621 return CURLUE_BAD_IPV6;
622
623 /* check if it can be done shorter */
624 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
625 (strlen(norm) < hlen)) {
626 strcpy(hostname, norm);
627 hlen = strlen(norm);
628 hostname[hlen + 1] = 0;
629 }
630 hostname[hlen] = ']'; /* restore ending bracket */
631 }
632 return CURLUE_OK;
633 }
634
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)635 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
636 size_t hlen) /* length of hostname */
637 {
638 size_t len;
639 DEBUGASSERT(hostname);
640
641 if(!hlen)
642 return CURLUE_NO_HOST;
643 else if(hostname[0] == '[')
644 return ipv6_parse(u, hostname, hlen);
645 else {
646 /* letters from the second string are not ok */
647 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
648 if(hlen != len)
649 /* hostname with bad content */
650 return CURLUE_BAD_HOSTNAME;
651 }
652 return CURLUE_OK;
653 }
654
655 /*
656 * Handle partial IPv4 numerical addresses and different bases, like
657 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
658 *
659 * If the given input string is syntactically wrong IPv4 or any part for
660 * example is too big, this function returns HOST_NAME.
661 *
662 * Output the "normalized" version of that input string in plain quad decimal
663 * integers.
664 *
665 * Returns the host type.
666 */
667
668 #define HOST_ERROR -1 /* out of memory */
669 #define HOST_BAD -2 /* bad IPv4 address */
670
671 #define HOST_NAME 1
672 #define HOST_IPV4 2
673 #define HOST_IPV6 3
674
ipv4_normalize(struct dynbuf * host)675 static int ipv4_normalize(struct dynbuf *host)
676 {
677 bool done = FALSE;
678 int n = 0;
679 const char *c = Curl_dyn_ptr(host);
680 unsigned long parts[4] = {0, 0, 0, 0};
681 CURLcode result = CURLE_OK;
682
683 if(*c == '[')
684 return HOST_IPV6;
685
686 errno = 0; /* for strtoul */
687 while(!done) {
688 char *endp = NULL;
689 unsigned long l;
690 if(!ISDIGIT(*c))
691 /* most importantly this doesn't allow a leading plus or minus */
692 return HOST_NAME;
693 l = strtoul(c, &endp, 0);
694 if(errno)
695 return HOST_NAME;
696 #if SIZEOF_LONG > 4
697 /* a value larger than 32 bits */
698 if(l > UINT_MAX)
699 return HOST_NAME;
700 #endif
701
702 parts[n] = l;
703 c = endp;
704
705 switch(*c) {
706 case '.':
707 if(n == 3)
708 return HOST_NAME;
709 n++;
710 c++;
711 break;
712
713 case '\0':
714 done = TRUE;
715 break;
716
717 default:
718 return HOST_NAME;
719 }
720 }
721
722 switch(n) {
723 case 0: /* a -- 32 bits */
724 Curl_dyn_reset(host);
725
726 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
727 (unsigned int)(parts[0] >> 24),
728 (unsigned int)((parts[0] >> 16) & 0xff),
729 (unsigned int)((parts[0] >> 8) & 0xff),
730 (unsigned int)(parts[0] & 0xff));
731 break;
732 case 1: /* a.b -- 8.24 bits */
733 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
734 return HOST_NAME;
735 Curl_dyn_reset(host);
736 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
737 (unsigned int)(parts[0]),
738 (unsigned int)((parts[1] >> 16) & 0xff),
739 (unsigned int)((parts[1] >> 8) & 0xff),
740 (unsigned int)(parts[1] & 0xff));
741 break;
742 case 2: /* a.b.c -- 8.8.16 bits */
743 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
744 return HOST_NAME;
745 Curl_dyn_reset(host);
746 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
747 (unsigned int)(parts[0]),
748 (unsigned int)(parts[1]),
749 (unsigned int)((parts[2] >> 8) & 0xff),
750 (unsigned int)(parts[2] & 0xff));
751 break;
752 case 3: /* a.b.c.d -- 8.8.8.8 bits */
753 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
754 (parts[3] > 0xff))
755 return HOST_NAME;
756 Curl_dyn_reset(host);
757 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
758 (unsigned int)(parts[0]),
759 (unsigned int)(parts[1]),
760 (unsigned int)(parts[2]),
761 (unsigned int)(parts[3]));
762 break;
763 }
764 if(result)
765 return HOST_ERROR;
766 return HOST_IPV4;
767 }
768
769 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)770 static CURLUcode urldecode_host(struct dynbuf *host)
771 {
772 char *per = NULL;
773 const char *hostname = Curl_dyn_ptr(host);
774 per = strchr(hostname, '%');
775 if(!per)
776 /* nothing to decode */
777 return CURLUE_OK;
778 else {
779 /* encoded */
780 size_t dlen;
781 char *decoded;
782 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
783 REJECT_CTRL);
784 if(result)
785 return CURLUE_BAD_HOSTNAME;
786 Curl_dyn_reset(host);
787 result = Curl_dyn_addn(host, decoded, dlen);
788 free(decoded);
789 if(result)
790 return cc2cu(result);
791 }
792
793 return CURLUE_OK;
794 }
795
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)796 static CURLUcode parse_authority(struct Curl_URL *u,
797 const char *auth, size_t authlen,
798 unsigned int flags,
799 struct dynbuf *host,
800 bool has_scheme)
801 {
802 size_t offset;
803 CURLUcode uc;
804 CURLcode result;
805
806 /*
807 * Parse the login details and strip them out of the host name.
808 */
809 uc = parse_hostname_login(u, auth, authlen, flags, &offset);
810 if(uc)
811 goto out;
812
813 result = Curl_dyn_addn(host, auth + offset, authlen - offset);
814 if(result) {
815 uc = cc2cu(result);
816 goto out;
817 }
818
819 uc = Curl_parse_port(u, host, has_scheme);
820 if(uc)
821 goto out;
822
823 if(!Curl_dyn_len(host))
824 return CURLUE_NO_HOST;
825
826 switch(ipv4_normalize(host)) {
827 case HOST_IPV4:
828 break;
829 case HOST_IPV6:
830 uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
831 break;
832 case HOST_NAME:
833 uc = urldecode_host(host);
834 if(!uc)
835 uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
836 break;
837 case HOST_ERROR:
838 uc = CURLUE_OUT_OF_MEMORY;
839 break;
840 case HOST_BAD:
841 default:
842 uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
843 break;
844 }
845
846 out:
847 return uc;
848 }
849
850 /* used for HTTP/2 server push */
Curl_url_set_authority(CURLU * u,const char * authority)851 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
852 {
853 CURLUcode result;
854 struct dynbuf host;
855
856 DEBUGASSERT(authority);
857 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
858
859 result = parse_authority(u, authority, strlen(authority),
860 CURLU_DISALLOW_USER, &host, !!u->scheme);
861 if(result)
862 Curl_dyn_free(&host);
863 else {
864 free(u->host);
865 u->host = Curl_dyn_ptr(&host);
866 }
867 return result;
868 }
869
870 /*
871 * "Remove Dot Segments"
872 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
873 */
874
875 /*
876 * dedotdotify()
877 * @unittest: 1395
878 *
879 * This function gets a null-terminated path with dot and dotdot sequences
880 * passed in and strips them off according to the rules in RFC 3986 section
881 * 5.2.4.
882 *
883 * The function handles a query part ('?' + stuff) appended but it expects
884 * that fragments ('#' + stuff) have already been cut off.
885 *
886 * RETURNS
887 *
888 * Zero for success and 'out' set to an allocated dedotdotified string.
889 */
890 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)891 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
892 {
893 char *outptr;
894 const char *endp = &input[clen];
895 char *out;
896
897 *outp = NULL;
898 /* the path always starts with a slash, and a slash has not dot */
899 if((clen < 2) || !memchr(input, '.', clen))
900 return 0;
901
902 out = malloc(clen + 1);
903 if(!out)
904 return 1; /* out of memory */
905
906 *out = 0; /* null-terminates, for inputs like "./" */
907 outptr = out;
908
909 do {
910 bool dotdot = TRUE;
911 if(*input == '.') {
912 /* A. If the input buffer begins with a prefix of "../" or "./", then
913 remove that prefix from the input buffer; otherwise, */
914
915 if(!strncmp("./", input, 2)) {
916 input += 2;
917 clen -= 2;
918 }
919 else if(!strncmp("../", input, 3)) {
920 input += 3;
921 clen -= 3;
922 }
923 /* D. if the input buffer consists only of "." or "..", then remove
924 that from the input buffer; otherwise, */
925
926 else if(!strcmp(".", input) || !strcmp("..", input) ||
927 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
928 *out = 0;
929 break;
930 }
931 else
932 dotdot = FALSE;
933 }
934 else if(*input == '/') {
935 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
936 "." is a complete path segment, then replace that prefix with "/" in
937 the input buffer; otherwise, */
938 if(!strncmp("/./", input, 3)) {
939 input += 2;
940 clen -= 2;
941 }
942 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
943 *outptr++ = '/';
944 *outptr = 0;
945 break;
946 }
947
948 /* C. if the input buffer begins with a prefix of "/../" or "/..",
949 where ".." is a complete path segment, then replace that prefix with
950 "/" in the input buffer and remove the last segment and its
951 preceding "/" (if any) from the output buffer; otherwise, */
952
953 else if(!strncmp("/../", input, 4)) {
954 input += 3;
955 clen -= 3;
956 /* remove the last segment from the output buffer */
957 while(outptr > out) {
958 outptr--;
959 if(*outptr == '/')
960 break;
961 }
962 *outptr = 0; /* null-terminate where it stops */
963 }
964 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
965 /* remove the last segment from the output buffer */
966 while(outptr > out) {
967 outptr--;
968 if(*outptr == '/')
969 break;
970 }
971 *outptr++ = '/';
972 *outptr = 0; /* null-terminate where it stops */
973 break;
974 }
975 else
976 dotdot = FALSE;
977 }
978 else
979 dotdot = FALSE;
980
981 if(!dotdot) {
982 /* E. move the first path segment in the input buffer to the end of
983 the output buffer, including the initial "/" character (if any) and
984 any subsequent characters up to, but not including, the next "/"
985 character or the end of the input buffer. */
986
987 do {
988 *outptr++ = *input++;
989 clen--;
990 } while(*input && (*input != '/') && (*input != '?'));
991 *outptr = 0;
992 }
993
994 /* continue until end of path */
995 } while(input < endp);
996
997 *outp = out;
998 return 0; /* success */
999 }
1000
parseurl(const char * url,CURLU * u,unsigned int flags)1001 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1002 {
1003 const char *path;
1004 size_t pathlen;
1005 char *query = NULL;
1006 char *fragment = NULL;
1007 char schemebuf[MAX_SCHEME_LEN + 1];
1008 size_t schemelen = 0;
1009 size_t urllen;
1010 CURLUcode result = CURLUE_OK;
1011 size_t fraglen = 0;
1012 struct dynbuf host;
1013
1014 DEBUGASSERT(url);
1015
1016 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1017
1018 result = junkscan(url, &urllen, flags);
1019 if(result)
1020 goto fail;
1021
1022 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1023 flags & (CURLU_GUESS_SCHEME|
1024 CURLU_DEFAULT_SCHEME));
1025
1026 /* handle the file: scheme */
1027 if(schemelen && !strcmp(schemebuf, "file")) {
1028 bool uncpath = FALSE;
1029 if(urllen <= 6) {
1030 /* file:/ is not enough to actually be a complete file: URL */
1031 result = CURLUE_BAD_FILE_URL;
1032 goto fail;
1033 }
1034
1035 /* path has been allocated large enough to hold this */
1036 path = (char *)&url[5];
1037 pathlen = urllen - 5;
1038
1039 u->scheme = strdup("file");
1040 if(!u->scheme) {
1041 result = CURLUE_OUT_OF_MEMORY;
1042 goto fail;
1043 }
1044
1045 /* Extra handling URLs with an authority component (i.e. that start with
1046 * "file://")
1047 *
1048 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1049 * RFC 8089, but not the (current) WHAT-WG URL spec.
1050 */
1051 if(path[0] == '/' && path[1] == '/') {
1052 /* swallow the two slashes */
1053 const char *ptr = &path[2];
1054
1055 /*
1056 * According to RFC 8089, a file: URL can be reliably dereferenced if:
1057 *
1058 * o it has no/blank hostname, or
1059 *
1060 * o the hostname matches "localhost" (case-insensitively), or
1061 *
1062 * o the hostname is a FQDN that resolves to this machine, or
1063 *
1064 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
1065 * Appendix E.3).
1066 *
1067 * For brevity, we only consider URLs with empty, "localhost", or
1068 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1069 *
1070 * Additionally, there is an exception for URLs with a Windows drive
1071 * letter in the authority (which was accidentally omitted from RFC 8089
1072 * Appendix E, but believe me, it was meant to be there. --MK)
1073 */
1074 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1075 /* the URL includes a host name, it must match "localhost" or
1076 "127.0.0.1" to be valid */
1077 if(checkprefix("localhost/", ptr) ||
1078 checkprefix("127.0.0.1/", ptr)) {
1079 ptr += 9; /* now points to the slash after the host */
1080 }
1081 else {
1082 #if defined(_WIN32)
1083 size_t len;
1084
1085 /* the host name, NetBIOS computer name, can not contain disallowed
1086 chars, and the delimiting slash character must be appended to the
1087 host name */
1088 path = strpbrk(ptr, "/\\:*?\"<>|");
1089 if(!path || *path != '/') {
1090 result = CURLUE_BAD_FILE_URL;
1091 goto fail;
1092 }
1093
1094 len = path - ptr;
1095 if(len) {
1096 CURLcode code = Curl_dyn_addn(&host, ptr, len);
1097 if(code) {
1098 result = cc2cu(code);
1099 goto fail;
1100 }
1101 uncpath = TRUE;
1102 }
1103
1104 ptr -= 2; /* now points to the // before the host in UNC */
1105 #else
1106 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1107 none */
1108 result = CURLUE_BAD_FILE_URL;
1109 goto fail;
1110 #endif
1111 }
1112 }
1113
1114 path = ptr;
1115 pathlen = urllen - (ptr - url);
1116 }
1117
1118 if(!uncpath)
1119 /* no host for file: URLs by default */
1120 Curl_dyn_reset(&host);
1121
1122 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1123 /* Don't allow Windows drive letters when not in Windows.
1124 * This catches both "file:/c:" and "file:c:" */
1125 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1126 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1127 /* File drive letters are only accepted in MSDOS/Windows */
1128 result = CURLUE_BAD_FILE_URL;
1129 goto fail;
1130 }
1131 #else
1132 /* If the path starts with a slash and a drive letter, ditch the slash */
1133 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1134 /* This cannot be done with strcpy, as the memory chunks overlap! */
1135 path++;
1136 pathlen--;
1137 }
1138 #endif
1139
1140 }
1141 else {
1142 /* clear path */
1143 const char *schemep = NULL;
1144 const char *hostp;
1145 size_t hostlen;
1146
1147 if(schemelen) {
1148 int i = 0;
1149 const char *p = &url[schemelen + 1];
1150 while((*p == '/') && (i < 4)) {
1151 p++;
1152 i++;
1153 }
1154
1155 schemep = schemebuf;
1156 if(!Curl_get_scheme_handler(schemep) &&
1157 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1158 result = CURLUE_UNSUPPORTED_SCHEME;
1159 goto fail;
1160 }
1161
1162 if((i < 1) || (i > 3)) {
1163 /* less than one or more than three slashes */
1164 result = CURLUE_BAD_SLASHES;
1165 goto fail;
1166 }
1167 hostp = p; /* host name starts here */
1168 }
1169 else {
1170 /* no scheme! */
1171
1172 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1173 result = CURLUE_BAD_SCHEME;
1174 goto fail;
1175 }
1176 if(flags & CURLU_DEFAULT_SCHEME)
1177 schemep = DEFAULT_SCHEME;
1178
1179 /*
1180 * The URL was badly formatted, let's try without scheme specified.
1181 */
1182 hostp = url;
1183 }
1184
1185 if(schemep) {
1186 u->scheme = strdup(schemep);
1187 if(!u->scheme) {
1188 result = CURLUE_OUT_OF_MEMORY;
1189 goto fail;
1190 }
1191 }
1192
1193 /* find the end of the host name + port number */
1194 hostlen = strcspn(hostp, "/?#");
1195 path = &hostp[hostlen];
1196
1197 /* this pathlen also contains the query and the fragment */
1198 pathlen = urllen - (path - url);
1199 if(hostlen) {
1200
1201 result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1202 if(result)
1203 goto fail;
1204
1205 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1206 const char *hostname = Curl_dyn_ptr(&host);
1207 /* legacy curl-style guess based on host name */
1208 if(checkprefix("ftp.", hostname))
1209 schemep = "ftp";
1210 else if(checkprefix("dict.", hostname))
1211 schemep = "dict";
1212 else if(checkprefix("ldap.", hostname))
1213 schemep = "ldap";
1214 else if(checkprefix("imap.", hostname))
1215 schemep = "imap";
1216 else if(checkprefix("smtp.", hostname))
1217 schemep = "smtp";
1218 else if(checkprefix("pop3.", hostname))
1219 schemep = "pop3";
1220 else
1221 schemep = "http";
1222
1223 u->scheme = strdup(schemep);
1224 if(!u->scheme) {
1225 result = CURLUE_OUT_OF_MEMORY;
1226 goto fail;
1227 }
1228 }
1229 }
1230 else if(flags & CURLU_NO_AUTHORITY) {
1231 /* allowed to be empty. */
1232 if(Curl_dyn_add(&host, "")) {
1233 result = CURLUE_OUT_OF_MEMORY;
1234 goto fail;
1235 }
1236 }
1237 else {
1238 result = CURLUE_NO_HOST;
1239 goto fail;
1240 }
1241 }
1242
1243 fragment = strchr(path, '#');
1244 if(fragment) {
1245 fraglen = pathlen - (fragment - path);
1246 u->fragment_present = TRUE;
1247 if(fraglen > 1) {
1248 /* skip the leading '#' in the copy but include the terminating null */
1249 if(flags & CURLU_URLENCODE) {
1250 struct dynbuf enc;
1251 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1252 result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1253 if(result)
1254 goto fail;
1255 u->fragment = Curl_dyn_ptr(&enc);
1256 }
1257 else {
1258 u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1259 if(!u->fragment) {
1260 result = CURLUE_OUT_OF_MEMORY;
1261 goto fail;
1262 }
1263 }
1264 }
1265 /* after this, pathlen still contains the query */
1266 pathlen -= fraglen;
1267 }
1268
1269 query = memchr(path, '?', pathlen);
1270 if(query) {
1271 size_t qlen = fragment ? (size_t)(fragment - query) :
1272 pathlen - (query - path);
1273 pathlen -= qlen;
1274 u->query_present = TRUE;
1275 if(qlen > 1) {
1276 if(flags & CURLU_URLENCODE) {
1277 struct dynbuf enc;
1278 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1279 /* skip the leading question mark */
1280 result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1281 if(result)
1282 goto fail;
1283 u->query = Curl_dyn_ptr(&enc);
1284 }
1285 else {
1286 u->query = Curl_memdup0(query + 1, qlen - 1);
1287 if(!u->query) {
1288 result = CURLUE_OUT_OF_MEMORY;
1289 goto fail;
1290 }
1291 }
1292 }
1293 else {
1294 /* single byte query */
1295 u->query = strdup("");
1296 if(!u->query) {
1297 result = CURLUE_OUT_OF_MEMORY;
1298 goto fail;
1299 }
1300 }
1301 }
1302
1303 if(pathlen && (flags & CURLU_URLENCODE)) {
1304 struct dynbuf enc;
1305 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1306 result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1307 if(result)
1308 goto fail;
1309 pathlen = Curl_dyn_len(&enc);
1310 path = u->path = Curl_dyn_ptr(&enc);
1311 }
1312
1313 if(pathlen <= 1) {
1314 /* there is no path left or just the slash, unset */
1315 path = NULL;
1316 }
1317 else {
1318 if(!u->path) {
1319 u->path = Curl_memdup0(path, pathlen);
1320 if(!u->path) {
1321 result = CURLUE_OUT_OF_MEMORY;
1322 goto fail;
1323 }
1324 path = u->path;
1325 }
1326 else if(flags & CURLU_URLENCODE)
1327 /* it might have encoded more than just the path so cut it */
1328 u->path[pathlen] = 0;
1329
1330 if(!(flags & CURLU_PATH_AS_IS)) {
1331 /* remove ../ and ./ sequences according to RFC3986 */
1332 char *dedot;
1333 int err = dedotdotify((char *)path, pathlen, &dedot);
1334 if(err) {
1335 result = CURLUE_OUT_OF_MEMORY;
1336 goto fail;
1337 }
1338 if(dedot) {
1339 free(u->path);
1340 u->path = dedot;
1341 }
1342 }
1343 }
1344
1345 u->host = Curl_dyn_ptr(&host);
1346
1347 return result;
1348 fail:
1349 Curl_dyn_free(&host);
1350 free_urlhandle(u);
1351 return result;
1352 }
1353
1354 /*
1355 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1356 */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1357 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1358 unsigned int flags)
1359 {
1360 CURLUcode result;
1361 CURLU tmpurl;
1362 memset(&tmpurl, 0, sizeof(tmpurl));
1363 result = parseurl(url, &tmpurl, flags);
1364 if(!result) {
1365 free_urlhandle(u);
1366 *u = tmpurl;
1367 }
1368 return result;
1369 }
1370
1371 /*
1372 */
curl_url(void)1373 CURLU *curl_url(void)
1374 {
1375 return calloc(1, sizeof(struct Curl_URL));
1376 }
1377
curl_url_cleanup(CURLU * u)1378 void curl_url_cleanup(CURLU *u)
1379 {
1380 if(u) {
1381 free_urlhandle(u);
1382 free(u);
1383 }
1384 }
1385
1386 #define DUP(dest, src, name) \
1387 do { \
1388 if(src->name) { \
1389 dest->name = strdup(src->name); \
1390 if(!dest->name) \
1391 goto fail; \
1392 } \
1393 } while(0)
1394
curl_url_dup(const CURLU * in)1395 CURLU *curl_url_dup(const CURLU *in)
1396 {
1397 struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1398 if(u) {
1399 DUP(u, in, scheme);
1400 DUP(u, in, user);
1401 DUP(u, in, password);
1402 DUP(u, in, options);
1403 DUP(u, in, host);
1404 DUP(u, in, port);
1405 DUP(u, in, path);
1406 DUP(u, in, query);
1407 DUP(u, in, fragment);
1408 DUP(u, in, zoneid);
1409 u->portnum = in->portnum;
1410 u->fragment_present = in->fragment_present;
1411 u->query_present = in->query_present;
1412 }
1413 return u;
1414 fail:
1415 curl_url_cleanup(u);
1416 return NULL;
1417 }
1418
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1419 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1420 char **part, unsigned int flags)
1421 {
1422 const char *ptr;
1423 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1424 char portbuf[7];
1425 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1426 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1427 bool punycode = FALSE;
1428 bool depunyfy = FALSE;
1429 bool plusdecode = FALSE;
1430 (void)flags;
1431 if(!u)
1432 return CURLUE_BAD_HANDLE;
1433 if(!part)
1434 return CURLUE_BAD_PARTPOINTER;
1435 *part = NULL;
1436
1437 switch(what) {
1438 case CURLUPART_SCHEME:
1439 ptr = u->scheme;
1440 ifmissing = CURLUE_NO_SCHEME;
1441 urldecode = FALSE; /* never for schemes */
1442 break;
1443 case CURLUPART_USER:
1444 ptr = u->user;
1445 ifmissing = CURLUE_NO_USER;
1446 break;
1447 case CURLUPART_PASSWORD:
1448 ptr = u->password;
1449 ifmissing = CURLUE_NO_PASSWORD;
1450 break;
1451 case CURLUPART_OPTIONS:
1452 ptr = u->options;
1453 ifmissing = CURLUE_NO_OPTIONS;
1454 break;
1455 case CURLUPART_HOST:
1456 ptr = u->host;
1457 ifmissing = CURLUE_NO_HOST;
1458 punycode = (flags & CURLU_PUNYCODE)?1:0;
1459 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1460 break;
1461 case CURLUPART_ZONEID:
1462 ptr = u->zoneid;
1463 ifmissing = CURLUE_NO_ZONEID;
1464 break;
1465 case CURLUPART_PORT:
1466 ptr = u->port;
1467 ifmissing = CURLUE_NO_PORT;
1468 urldecode = FALSE; /* never for port */
1469 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1470 /* there's no stored port number, but asked to deliver
1471 a default one for the scheme */
1472 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1473 if(h) {
1474 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1475 ptr = portbuf;
1476 }
1477 }
1478 else if(ptr && u->scheme) {
1479 /* there is a stored port number, but ask to inhibit if
1480 it matches the default one for the scheme */
1481 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1482 if(h && (h->defport == u->portnum) &&
1483 (flags & CURLU_NO_DEFAULT_PORT))
1484 ptr = NULL;
1485 }
1486 break;
1487 case CURLUPART_PATH:
1488 ptr = u->path;
1489 if(!ptr)
1490 ptr = "/";
1491 break;
1492 case CURLUPART_QUERY:
1493 ptr = u->query;
1494 ifmissing = CURLUE_NO_QUERY;
1495 plusdecode = urldecode;
1496 if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1497 /* there was a blank query and the user do not ask for it */
1498 ptr = NULL;
1499 break;
1500 case CURLUPART_FRAGMENT:
1501 ptr = u->fragment;
1502 ifmissing = CURLUE_NO_FRAGMENT;
1503 if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1504 /* there was a blank fragment and the user asks for it */
1505 ptr = "";
1506 break;
1507 case CURLUPART_URL: {
1508 char *url;
1509 char *scheme;
1510 char *options = u->options;
1511 char *port = u->port;
1512 char *allochost = NULL;
1513 bool show_fragment =
1514 u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1515 bool show_query =
1516 (u->query && u->query[0]) ||
1517 (u->query_present && flags & CURLU_GET_EMPTY);
1518 punycode = (flags & CURLU_PUNYCODE)?1:0;
1519 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1520 if(u->scheme && strcasecompare("file", u->scheme)) {
1521 url = aprintf("file://%s%s%s",
1522 u->path,
1523 show_fragment ? "#": "",
1524 u->fragment ? u->fragment : "");
1525 }
1526 else if(!u->host)
1527 return CURLUE_NO_HOST;
1528 else {
1529 const struct Curl_handler *h = NULL;
1530 if(u->scheme)
1531 scheme = u->scheme;
1532 else if(flags & CURLU_DEFAULT_SCHEME)
1533 scheme = (char *) DEFAULT_SCHEME;
1534 else
1535 return CURLUE_NO_SCHEME;
1536
1537 h = Curl_get_scheme_handler(scheme);
1538 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1539 /* there's no stored port number, but asked to deliver
1540 a default one for the scheme */
1541 if(h) {
1542 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1543 port = portbuf;
1544 }
1545 }
1546 else if(port) {
1547 /* there is a stored port number, but asked to inhibit if it matches
1548 the default one for the scheme */
1549 if(h && (h->defport == u->portnum) &&
1550 (flags & CURLU_NO_DEFAULT_PORT))
1551 port = NULL;
1552 }
1553
1554 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1555 options = NULL;
1556
1557 if(u->host[0] == '[') {
1558 if(u->zoneid) {
1559 /* make it '[ host %25 zoneid ]' */
1560 struct dynbuf enc;
1561 size_t hostlen = strlen(u->host);
1562 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1563 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1564 u->zoneid))
1565 return CURLUE_OUT_OF_MEMORY;
1566 allochost = Curl_dyn_ptr(&enc);
1567 }
1568 }
1569 else if(urlencode) {
1570 allochost = curl_easy_escape(NULL, u->host, 0);
1571 if(!allochost)
1572 return CURLUE_OUT_OF_MEMORY;
1573 }
1574 else if(punycode) {
1575 if(!Curl_is_ASCII_name(u->host)) {
1576 #ifndef USE_IDN
1577 return CURLUE_LACKS_IDN;
1578 #else
1579 CURLcode result = Curl_idn_decode(u->host, &allochost);
1580 if(result)
1581 return (result == CURLE_OUT_OF_MEMORY) ?
1582 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1583 #endif
1584 }
1585 }
1586 else if(depunyfy) {
1587 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1588 #ifndef USE_IDN
1589 return CURLUE_LACKS_IDN;
1590 #else
1591 CURLcode result = Curl_idn_encode(u->host, &allochost);
1592 if(result)
1593 /* this is the most likely error */
1594 return (result == CURLE_OUT_OF_MEMORY) ?
1595 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1596 #endif
1597 }
1598 }
1599
1600 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1601 scheme,
1602 u->user ? u->user : "",
1603 u->password ? ":": "",
1604 u->password ? u->password : "",
1605 options ? ";" : "",
1606 options ? options : "",
1607 (u->user || u->password || options) ? "@": "",
1608 allochost ? allochost : u->host,
1609 port ? ":": "",
1610 port ? port : "",
1611 u->path ? u->path : "/",
1612 show_query ? "?": "",
1613 u->query ? u->query : "",
1614 show_fragment ? "#": "",
1615 u->fragment? u->fragment : "");
1616 free(allochost);
1617 }
1618 if(!url)
1619 return CURLUE_OUT_OF_MEMORY;
1620 *part = url;
1621 return CURLUE_OK;
1622 }
1623 default:
1624 ptr = NULL;
1625 break;
1626 }
1627 if(ptr) {
1628 size_t partlen = strlen(ptr);
1629 size_t i = 0;
1630 *part = Curl_memdup0(ptr, partlen);
1631 if(!*part)
1632 return CURLUE_OUT_OF_MEMORY;
1633 if(plusdecode) {
1634 /* convert + to space */
1635 char *plus = *part;
1636 for(i = 0; i < partlen; ++plus, i++) {
1637 if(*plus == '+')
1638 *plus = ' ';
1639 }
1640 }
1641 if(urldecode) {
1642 char *decoded;
1643 size_t dlen;
1644 /* this unconditional rejection of control bytes is documented
1645 API behavior */
1646 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1647 free(*part);
1648 if(res) {
1649 *part = NULL;
1650 return CURLUE_URLDECODE;
1651 }
1652 *part = decoded;
1653 partlen = dlen;
1654 }
1655 if(urlencode) {
1656 struct dynbuf enc;
1657 CURLUcode uc;
1658 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1659 uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1660 if(uc)
1661 return uc;
1662 free(*part);
1663 *part = Curl_dyn_ptr(&enc);
1664 }
1665 else if(punycode) {
1666 if(!Curl_is_ASCII_name(u->host)) {
1667 #ifndef USE_IDN
1668 return CURLUE_LACKS_IDN;
1669 #else
1670 char *allochost;
1671 CURLcode result = Curl_idn_decode(*part, &allochost);
1672 if(result)
1673 return (result == CURLE_OUT_OF_MEMORY) ?
1674 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1675 free(*part);
1676 *part = allochost;
1677 #endif
1678 }
1679 }
1680 else if(depunyfy) {
1681 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1682 #ifndef USE_IDN
1683 return CURLUE_LACKS_IDN;
1684 #else
1685 char *allochost;
1686 CURLcode result = Curl_idn_encode(*part, &allochost);
1687 if(result)
1688 return (result == CURLE_OUT_OF_MEMORY) ?
1689 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1690 free(*part);
1691 *part = allochost;
1692 #endif
1693 }
1694 }
1695
1696 return CURLUE_OK;
1697 }
1698 else
1699 return ifmissing;
1700 }
1701
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1702 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1703 const char *part, unsigned int flags)
1704 {
1705 char **storep = NULL;
1706 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1707 bool plusencode = FALSE;
1708 bool urlskipslash = FALSE;
1709 bool leadingslash = FALSE;
1710 bool appendquery = FALSE;
1711 bool equalsencode = FALSE;
1712 size_t nalloc;
1713
1714 if(!u)
1715 return CURLUE_BAD_HANDLE;
1716 if(!part) {
1717 /* setting a part to NULL clears it */
1718 switch(what) {
1719 case CURLUPART_URL:
1720 break;
1721 case CURLUPART_SCHEME:
1722 storep = &u->scheme;
1723 break;
1724 case CURLUPART_USER:
1725 storep = &u->user;
1726 break;
1727 case CURLUPART_PASSWORD:
1728 storep = &u->password;
1729 break;
1730 case CURLUPART_OPTIONS:
1731 storep = &u->options;
1732 break;
1733 case CURLUPART_HOST:
1734 storep = &u->host;
1735 break;
1736 case CURLUPART_ZONEID:
1737 storep = &u->zoneid;
1738 break;
1739 case CURLUPART_PORT:
1740 u->portnum = 0;
1741 storep = &u->port;
1742 break;
1743 case CURLUPART_PATH:
1744 storep = &u->path;
1745 break;
1746 case CURLUPART_QUERY:
1747 storep = &u->query;
1748 u->query_present = FALSE;
1749 break;
1750 case CURLUPART_FRAGMENT:
1751 storep = &u->fragment;
1752 u->fragment_present = FALSE;
1753 break;
1754 default:
1755 return CURLUE_UNKNOWN_PART;
1756 }
1757 if(storep && *storep) {
1758 Curl_safefree(*storep);
1759 }
1760 else if(!storep) {
1761 free_urlhandle(u);
1762 memset(u, 0, sizeof(struct Curl_URL));
1763 }
1764 return CURLUE_OK;
1765 }
1766
1767 nalloc = strlen(part);
1768 if(nalloc > CURL_MAX_INPUT_LENGTH)
1769 /* excessive input length */
1770 return CURLUE_MALFORMED_INPUT;
1771
1772 switch(what) {
1773 case CURLUPART_SCHEME: {
1774 size_t plen = strlen(part);
1775 const char *s = part;
1776 if((plen > MAX_SCHEME_LEN) || (plen < 1))
1777 /* too long or too short */
1778 return CURLUE_BAD_SCHEME;
1779 /* verify that it is a fine scheme */
1780 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1781 return CURLUE_UNSUPPORTED_SCHEME;
1782 storep = &u->scheme;
1783 urlencode = FALSE; /* never */
1784 if(ISALPHA(*s)) {
1785 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1786 while(--plen) {
1787 if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1788 s++; /* fine */
1789 else
1790 return CURLUE_BAD_SCHEME;
1791 }
1792 }
1793 else
1794 return CURLUE_BAD_SCHEME;
1795 break;
1796 }
1797 case CURLUPART_USER:
1798 storep = &u->user;
1799 break;
1800 case CURLUPART_PASSWORD:
1801 storep = &u->password;
1802 break;
1803 case CURLUPART_OPTIONS:
1804 storep = &u->options;
1805 break;
1806 case CURLUPART_HOST:
1807 storep = &u->host;
1808 Curl_safefree(u->zoneid);
1809 break;
1810 case CURLUPART_ZONEID:
1811 storep = &u->zoneid;
1812 break;
1813 case CURLUPART_PORT:
1814 if(!ISDIGIT(part[0]))
1815 /* not a number */
1816 return CURLUE_BAD_PORT_NUMBER;
1817 else {
1818 char *tmp;
1819 char *endp;
1820 unsigned long port;
1821 errno = 0;
1822 port = strtoul(part, &endp, 10); /* must be decimal */
1823 if(errno || (port > 0xffff) || *endp)
1824 /* weirdly provided number, not good! */
1825 return CURLUE_BAD_PORT_NUMBER;
1826 tmp = strdup(part);
1827 if(!tmp)
1828 return CURLUE_OUT_OF_MEMORY;
1829 free(u->port);
1830 u->port = tmp;
1831 u->portnum = (unsigned short)port;
1832 return CURLUE_OK;
1833 }
1834 case CURLUPART_PATH:
1835 urlskipslash = TRUE;
1836 leadingslash = TRUE; /* enforce */
1837 storep = &u->path;
1838 break;
1839 case CURLUPART_QUERY:
1840 plusencode = urlencode;
1841 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1842 equalsencode = appendquery;
1843 storep = &u->query;
1844 u->query_present = TRUE;
1845 break;
1846 case CURLUPART_FRAGMENT:
1847 storep = &u->fragment;
1848 u->fragment_present = TRUE;
1849 break;
1850 case CURLUPART_URL: {
1851 /*
1852 * Allow a new URL to replace the existing (if any) contents.
1853 *
1854 * If the existing contents is enough for a URL, allow a relative URL to
1855 * replace it.
1856 */
1857 CURLcode result;
1858 CURLUcode uc;
1859 char *oldurl;
1860 char *redired_url;
1861
1862 if(!nalloc)
1863 /* a blank URL is not a valid URL */
1864 return CURLUE_MALFORMED_INPUT;
1865
1866 /* if the new thing is absolute or the old one is not
1867 * (we could not get an absolute url in 'oldurl'),
1868 * then replace the existing with the new. */
1869 if(Curl_is_absolute_url(part, NULL, 0,
1870 flags & (CURLU_GUESS_SCHEME|
1871 CURLU_DEFAULT_SCHEME))
1872 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1873 return parseurl_and_replace(part, u, flags);
1874 }
1875
1876 /* apply the relative part to create a new URL
1877 * and replace the existing one with it. */
1878 result = concat_url(oldurl, part, &redired_url);
1879 free(oldurl);
1880 if(result)
1881 return cc2cu(result);
1882
1883 uc = parseurl_and_replace(redired_url, u, flags);
1884 free(redired_url);
1885 return uc;
1886 }
1887 default:
1888 return CURLUE_UNKNOWN_PART;
1889 }
1890 DEBUGASSERT(storep);
1891 {
1892 const char *newp;
1893 struct dynbuf enc;
1894 Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1895
1896 if(leadingslash && (part[0] != '/')) {
1897 CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1898 if(result)
1899 return cc2cu(result);
1900 }
1901 if(urlencode) {
1902 const unsigned char *i;
1903
1904 for(i = (const unsigned char *)part; *i; i++) {
1905 CURLcode result;
1906 if((*i == ' ') && plusencode) {
1907 result = Curl_dyn_addn(&enc, "+", 1);
1908 if(result)
1909 return CURLUE_OUT_OF_MEMORY;
1910 }
1911 else if(ISUNRESERVED(*i) ||
1912 ((*i == '/') && urlskipslash) ||
1913 ((*i == '=') && equalsencode)) {
1914 if((*i == '=') && equalsencode)
1915 /* only skip the first equals sign */
1916 equalsencode = FALSE;
1917 result = Curl_dyn_addn(&enc, i, 1);
1918 if(result)
1919 return cc2cu(result);
1920 }
1921 else {
1922 char out[3]={'%'};
1923 out[1] = hexdigits[*i>>4];
1924 out[2] = hexdigits[*i & 0xf];
1925 result = Curl_dyn_addn(&enc, out, 3);
1926 if(result)
1927 return cc2cu(result);
1928 }
1929 }
1930 }
1931 else {
1932 char *p;
1933 CURLcode result = Curl_dyn_add(&enc, part);
1934 if(result)
1935 return cc2cu(result);
1936 p = Curl_dyn_ptr(&enc);
1937 while(*p) {
1938 /* make sure percent encoded are lower case */
1939 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1940 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1941 p[1] = Curl_raw_tolower(p[1]);
1942 p[2] = Curl_raw_tolower(p[2]);
1943 p += 3;
1944 }
1945 else
1946 p++;
1947 }
1948 }
1949 newp = Curl_dyn_ptr(&enc);
1950
1951 if(appendquery && newp) {
1952 /* Append the 'newp' string onto the old query. Add a '&' separator if
1953 none is present at the end of the existing query already */
1954
1955 size_t querylen = u->query ? strlen(u->query) : 0;
1956 bool addamperand = querylen && (u->query[querylen -1] != '&');
1957 if(querylen) {
1958 struct dynbuf qbuf;
1959 Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1960
1961 if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1962 goto nomem;
1963
1964 if(addamperand) {
1965 if(Curl_dyn_addn(&qbuf, "&", 1))
1966 goto nomem;
1967 }
1968 if(Curl_dyn_add(&qbuf, newp))
1969 goto nomem;
1970 Curl_dyn_free(&enc);
1971 free(*storep);
1972 *storep = Curl_dyn_ptr(&qbuf);
1973 return CURLUE_OK;
1974 nomem:
1975 Curl_dyn_free(&enc);
1976 return CURLUE_OUT_OF_MEMORY;
1977 }
1978 }
1979
1980 else if(what == CURLUPART_HOST) {
1981 size_t n = Curl_dyn_len(&enc);
1982 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1983 /* Skip hostname check, it's allowed to be empty. */
1984 }
1985 else {
1986 if(!n || hostname_check(u, (char *)newp, n)) {
1987 Curl_dyn_free(&enc);
1988 return CURLUE_BAD_HOSTNAME;
1989 }
1990 }
1991 }
1992
1993 free(*storep);
1994 *storep = (char *)newp;
1995 }
1996 return CURLUE_OK;
1997 }
1998