1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37
38 /* The last 3 #include files should be in this order */
39 #include "curl_printf.h"
40 #include "curl_memory.h"
41 #include "memdebug.h"
42
43 /* MS-DOS/Windows style drive prefix, eg c: in c:foo */
44 #define STARTS_WITH_DRIVE_PREFIX(str) \
45 ((('a' <= str[0] && str[0] <= 'z') || \
46 ('A' <= str[0] && str[0] <= 'Z')) && \
47 (str[1] == ':'))
48
49 /* MS-DOS/Windows style drive prefix, optionally with
50 * a '|' instead of ':', followed by a slash or NUL */
51 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
52 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
53 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
54 ((str)[1] == ':' || (str)[1] == '|') && \
55 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
56
57 /* scheme is not URL encoded, the longest libcurl supported ones are... */
58 #define MAX_SCHEME_LEN 40
59
60 /*
61 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
62 * sure we have _some_ value for AF_INET6 without polluting our fake value
63 * everywhere.
64 */
65 #if !defined(USE_IPV6) && !defined(AF_INET6)
66 #define AF_INET6 (AF_INET + 1)
67 #endif
68
69 /* Internal representation of CURLU. Point to URL-encoded strings. */
70 struct Curl_URL {
71 char *scheme;
72 char *user;
73 char *password;
74 char *options; /* IMAP only? */
75 char *host;
76 char *zoneid; /* for numerical IPv6 addresses */
77 char *port;
78 char *path;
79 char *query;
80 char *fragment;
81 unsigned short portnum; /* the numerical version (if 'port' is set) */
82 BIT(query_present); /* to support blank */
83 BIT(fragment_present); /* to support blank */
84 BIT(guessed_scheme); /* when a URL without scheme is parsed */
85 };
86
87 #define DEFAULT_SCHEME "https"
88
free_urlhandle(struct Curl_URL * u)89 static void free_urlhandle(struct Curl_URL *u)
90 {
91 free(u->scheme);
92 free(u->user);
93 free(u->password);
94 free(u->options);
95 free(u->host);
96 free(u->zoneid);
97 free(u->port);
98 free(u->path);
99 free(u->query);
100 free(u->fragment);
101 }
102
103 /*
104 * Find the separator at the end of the hostname, or the '?' in cases like
105 * http://www.example.com?id=2380
106 */
find_host_sep(const char * url)107 static const char *find_host_sep(const char *url)
108 {
109 const char *sep;
110 const char *query;
111
112 /* Find the start of the hostname */
113 sep = strstr(url, "//");
114 if(!sep)
115 sep = url;
116 else
117 sep += 2;
118
119 query = strchr(sep, '?');
120 sep = strchr(sep, '/');
121
122 if(!sep)
123 sep = url + strlen(url);
124
125 if(!query)
126 query = url + strlen(url);
127
128 return sep < query ? sep : query;
129 }
130
131 /* convert CURLcode to CURLUcode */
132 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
133 CURLUE_OUT_OF_MEMORY)
134 /*
135 * Decide whether a character in a URL must be escaped.
136 */
137 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
138
139 static const char hexdigits[] = "0123456789abcdef";
140 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
141 * spaces in the source URL accordingly.
142 *
143 * URL encoding should be skipped for hostnames, otherwise IDN resolution
144 * will fail.
145 */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)146 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
147 size_t len, bool relative,
148 bool query)
149 {
150 /* we must add this with whitespace-replacing */
151 bool left = !query;
152 const unsigned char *iptr;
153 const unsigned char *host_sep = (const unsigned char *) url;
154 CURLcode result;
155
156 if(!relative)
157 host_sep = (const unsigned char *) find_host_sep(url);
158
159 for(iptr = (unsigned char *)url; /* read from here */
160 len; iptr++, len--) {
161
162 if(iptr < host_sep) {
163 result = Curl_dyn_addn(o, iptr, 1);
164 if(result)
165 return cc2cu(result);
166 continue;
167 }
168
169 if(*iptr == ' ') {
170 if(left)
171 result = Curl_dyn_addn(o, "%20", 3);
172 else
173 result = Curl_dyn_addn(o, "+", 1);
174 if(result)
175 return cc2cu(result);
176 continue;
177 }
178
179 if(*iptr == '?')
180 left = FALSE;
181
182 if(urlchar_needs_escaping(*iptr)) {
183 char out[3]={'%'};
184 out[1] = hexdigits[*iptr >> 4];
185 out[2] = hexdigits[*iptr & 0xf];
186 result = Curl_dyn_addn(o, out, 3);
187 }
188 else
189 result = Curl_dyn_addn(o, iptr, 1);
190 if(result)
191 return cc2cu(result);
192 }
193
194 return CURLUE_OK;
195 }
196
197 /*
198 * Returns the length of the scheme if the given URL is absolute (as opposed
199 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
200 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
201 *
202 * If 'guess_scheme' is TRUE, it means the URL might be provided without
203 * scheme.
204 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)205 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
206 bool guess_scheme)
207 {
208 size_t i = 0;
209 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
210 (void)buflen; /* only used in debug-builds */
211 if(buf)
212 buf[0] = 0; /* always leave a defined value in buf */
213 #ifdef _WIN32
214 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
215 return 0;
216 #endif
217 if(ISALPHA(url[0]))
218 for(i = 1; i < MAX_SCHEME_LEN; ++i) {
219 char s = url[i];
220 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
221 /* RFC 3986 3.1 explains:
222 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223 */
224 }
225 else {
226 break;
227 }
228 }
229 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
230 /* If this does not guess scheme, the scheme always ends with the colon so
231 that this also detects data: URLs etc. In guessing mode, data: could
232 be the hostname "data" with a specified port number. */
233
234 /* the length of the scheme is the name part only */
235 size_t len = i;
236 if(buf) {
237 Curl_strntolower(buf, url, i);
238 buf[i] = 0;
239 }
240 return len;
241 }
242 return 0;
243 }
244
245 /*
246 * Concatenate a relative URL to a base URL making it absolute.
247 * URL-encodes any spaces.
248 * The returned pointer must be freed by the caller unless NULL
249 * (returns NULL on out of memory).
250 *
251 * Note that this function destroys the 'base' string.
252 */
concat_url(char * base,const char * relurl,char ** newurl)253 static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254 {
255 /***
256 TRY to append this new path to the old URL
257 to the right of the host part. Oh crap, this is doomed to cause
258 problems in the future...
259 */
260 struct dynbuf newest;
261 char *protsep;
262 char *pathsep;
263 bool host_changed = FALSE;
264 const char *useurl = relurl;
265 CURLcode result = CURLE_OK;
266 CURLUcode uc;
267 bool skip_slash = FALSE;
268 *newurl = NULL;
269
270 /* protsep points to the start of the hostname */
271 protsep = strstr(base, "//");
272 if(!protsep)
273 protsep = base;
274 else
275 protsep += 2; /* pass the slashes */
276
277 if('/' != relurl[0]) {
278 int level = 0;
279
280 /* First we need to find out if there is a ?-letter in the URL,
281 and cut it and the right-side of that off */
282 pathsep = strchr(protsep, '?');
283 if(pathsep)
284 *pathsep = 0;
285
286 /* we have a relative path to append to the last slash if there is one
287 available, or the new URL is just a query string (starts with a '?') or
288 a fragment (starts with '#') we append the new one at the end of the
289 current URL */
290 if((useurl[0] != '?') && (useurl[0] != '#')) {
291 pathsep = strrchr(protsep, '/');
292 if(pathsep)
293 *pathsep = 0;
294
295 /* Check if there is any slash after the hostname, and if so, remember
296 that position instead */
297 pathsep = strchr(protsep, '/');
298 if(pathsep)
299 protsep = pathsep + 1;
300 else
301 protsep = NULL;
302
303 /* now deal with one "./" or any amount of "../" in the newurl
304 and act accordingly */
305
306 if((useurl[0] == '.') && (useurl[1] == '/'))
307 useurl += 2; /* just skip the "./" */
308
309 while((useurl[0] == '.') &&
310 (useurl[1] == '.') &&
311 (useurl[2] == '/')) {
312 level++;
313 useurl += 3; /* pass the "../" */
314 }
315
316 if(protsep) {
317 while(level--) {
318 /* cut off one more level from the right of the original URL */
319 pathsep = strrchr(protsep, '/');
320 if(pathsep)
321 *pathsep = 0;
322 else {
323 *protsep = 0;
324 break;
325 }
326 }
327 }
328 }
329 else
330 skip_slash = TRUE;
331 }
332 else {
333 /* We got a new absolute path for this server */
334
335 if(relurl[1] == '/') {
336 /* the new URL starts with //, just keep the protocol part from the
337 original one */
338 *protsep = 0;
339 useurl = &relurl[2]; /* we keep the slashes from the original, so we
340 skip the new ones */
341 host_changed = TRUE;
342 }
343 else {
344 /* cut off the original URL from the first slash, or deal with URLs
345 without slash */
346 pathsep = strchr(protsep, '/');
347 if(pathsep) {
348 /* When people use badly formatted URLs, such as
349 "http://www.example.com?dir=/home/daniel" we must not use the first
350 slash, if there is a ?-letter before it! */
351 char *sep = strchr(protsep, '?');
352 if(sep && (sep < pathsep))
353 pathsep = sep;
354 *pathsep = 0;
355 }
356 else {
357 /* There was no slash. Now, since we might be operating on a badly
358 formatted URL, such as "http://www.example.com?id=2380" which does
359 not use a slash separator as it is supposed to, we need to check
360 for a ?-letter as well! */
361 pathsep = strchr(protsep, '?');
362 if(pathsep)
363 *pathsep = 0;
364 }
365 }
366 }
367
368 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
369
370 /* copy over the root URL part */
371 result = Curl_dyn_add(&newest, base);
372 if(result)
373 return result;
374
375 /* check if we need to append a slash */
376 if(('/' == useurl[0]) || (protsep && !*protsep) || skip_slash)
377 ;
378 else {
379 result = Curl_dyn_addn(&newest, "/", 1);
380 if(result)
381 return result;
382 }
383
384 /* then append the new piece on the right side */
385 uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
386 FALSE);
387 if(uc)
388 return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
389
390 *newurl = Curl_dyn_ptr(&newest);
391 return CURLE_OK;
392 }
393
394 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)395 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
396 {
397 static const char badbytes[]={
398 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
399 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
400 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
401 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
402 0x7f, 0x00 /* null-terminate */
403 };
404 size_t n = strlen(url);
405 size_t nfine;
406
407 if(n > CURL_MAX_INPUT_LENGTH)
408 /* excessive input length */
409 return CURLUE_MALFORMED_INPUT;
410
411 nfine = strcspn(url, badbytes);
412 if((nfine != n) ||
413 (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
414 return CURLUE_MALFORMED_INPUT;
415
416 *urllen = n;
417 return CURLUE_OK;
418 }
419
420 /*
421 * parse_hostname_login()
422 *
423 * Parse the login details (username, password and options) from the URL and
424 * strip them out of the hostname
425 *
426 */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)427 static CURLUcode parse_hostname_login(struct Curl_URL *u,
428 const char *login,
429 size_t len,
430 unsigned int flags,
431 size_t *offset) /* to the hostname */
432 {
433 CURLUcode result = CURLUE_OK;
434 CURLcode ccode;
435 char *userp = NULL;
436 char *passwdp = NULL;
437 char *optionsp = NULL;
438 const struct Curl_handler *h = NULL;
439
440 /* At this point, we assume all the other special cases have been taken
441 * care of, so the host is at most
442 *
443 * [user[:password][;options]]@]hostname
444 *
445 * We need somewhere to put the embedded details, so do that first.
446 */
447 char *ptr;
448
449 DEBUGASSERT(login);
450
451 *offset = 0;
452 ptr = memchr(login, '@', len);
453 if(!ptr)
454 goto out;
455
456 /* We will now try to extract the
457 * possible login information in a string like:
458 * ftp://user:password@ftp.my.site:8021/README */
459 ptr++;
460
461 /* if this is a known scheme, get some details */
462 if(u->scheme)
463 h = Curl_get_scheme_handler(u->scheme);
464
465 /* We could use the login information in the URL so extract it. Only parse
466 options if the handler says we should. Note that 'h' might be NULL! */
467 ccode = Curl_parse_login_details(login, ptr - login - 1,
468 &userp, &passwdp,
469 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
470 &optionsp : NULL);
471 if(ccode) {
472 result = CURLUE_BAD_LOGIN;
473 goto out;
474 }
475
476 if(userp) {
477 if(flags & CURLU_DISALLOW_USER) {
478 /* Option DISALLOW_USER is set and URL contains username. */
479 result = CURLUE_USER_NOT_ALLOWED;
480 goto out;
481 }
482 free(u->user);
483 u->user = userp;
484 }
485
486 if(passwdp) {
487 free(u->password);
488 u->password = passwdp;
489 }
490
491 if(optionsp) {
492 free(u->options);
493 u->options = optionsp;
494 }
495
496 /* the hostname starts at this offset */
497 *offset = ptr - login;
498 return CURLUE_OK;
499
500 out:
501
502 free(userp);
503 free(passwdp);
504 free(optionsp);
505 u->user = NULL;
506 u->password = NULL;
507 u->options = NULL;
508
509 return result;
510 }
511
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)512 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
513 bool has_scheme)
514 {
515 char *portptr;
516 char *hostname = Curl_dyn_ptr(host);
517 /*
518 * Find the end of an IPv6 address on the ']' ending bracket.
519 */
520 if(hostname[0] == '[') {
521 portptr = strchr(hostname, ']');
522 if(!portptr)
523 return CURLUE_BAD_IPV6;
524 portptr++;
525 /* this is a RFC2732-style specified IP-address */
526 if(*portptr) {
527 if(*portptr != ':')
528 return CURLUE_BAD_PORT_NUMBER;
529 }
530 else
531 portptr = NULL;
532 }
533 else
534 portptr = strchr(hostname, ':');
535
536 if(portptr) {
537 char *rest = NULL;
538 unsigned long port;
539 size_t keep = portptr - hostname;
540
541 /* Browser behavior adaptation. If there is a colon with no digits after,
542 just cut off the name there which makes us ignore the colon and just
543 use the default port. Firefox, Chrome and Safari all do that.
544
545 Do not do it if the URL has no scheme, to make something that looks like
546 a scheme not work!
547 */
548 Curl_dyn_setlen(host, keep);
549 portptr++;
550 if(!*portptr)
551 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
552
553 if(!ISDIGIT(*portptr))
554 return CURLUE_BAD_PORT_NUMBER;
555
556 errno = 0;
557 port = strtoul(portptr, &rest, 10); /* Port number must be decimal */
558
559 if(errno || (port > 0xffff) || *rest)
560 return CURLUE_BAD_PORT_NUMBER;
561
562 u->portnum = (unsigned short) port;
563 /* generate a new port number string to get rid of leading zeroes etc */
564 free(u->port);
565 u->port = aprintf("%ld", port);
566 if(!u->port)
567 return CURLUE_OUT_OF_MEMORY;
568 }
569
570 return CURLUE_OK;
571 }
572
573 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)574 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
575 size_t hlen) /* length of hostname */
576 {
577 size_t len;
578 DEBUGASSERT(*hostname == '[');
579 if(hlen < 4) /* '[::]' is the shortest possible valid string */
580 return CURLUE_BAD_IPV6;
581 hostname++;
582 hlen -= 2;
583
584 /* only valid IPv6 letters are ok */
585 len = strspn(hostname, "0123456789abcdefABCDEF:.");
586
587 if(hlen != len) {
588 hlen = len;
589 if(hostname[len] == '%') {
590 /* this could now be '%[zone id]' */
591 char zoneid[16];
592 int i = 0;
593 char *h = &hostname[len + 1];
594 /* pass '25' if present and is a URL encoded percent sign */
595 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
596 h += 2;
597 while(*h && (*h != ']') && (i < 15))
598 zoneid[i++] = *h++;
599 if(!i || (']' != *h))
600 return CURLUE_BAD_IPV6;
601 zoneid[i] = 0;
602 u->zoneid = strdup(zoneid);
603 if(!u->zoneid)
604 return CURLUE_OUT_OF_MEMORY;
605 hostname[len] = ']'; /* insert end bracket */
606 hostname[len + 1] = 0; /* terminate the hostname */
607 }
608 else
609 return CURLUE_BAD_IPV6;
610 /* hostname is fine */
611 }
612
613 /* Check the IPv6 address. */
614 {
615 char dest[16]; /* fits a binary IPv6 address */
616 char norm[MAX_IPADR_LEN];
617 hostname[hlen] = 0; /* end the address there */
618 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
619 return CURLUE_BAD_IPV6;
620
621 /* check if it can be done shorter */
622 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
623 (strlen(norm) < hlen)) {
624 strcpy(hostname, norm);
625 hlen = strlen(norm);
626 hostname[hlen + 1] = 0;
627 }
628 hostname[hlen] = ']'; /* restore ending bracket */
629 }
630 return CURLUE_OK;
631 }
632
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)633 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
634 size_t hlen) /* length of hostname */
635 {
636 size_t len;
637 DEBUGASSERT(hostname);
638
639 if(!hlen)
640 return CURLUE_NO_HOST;
641 else if(hostname[0] == '[')
642 return ipv6_parse(u, hostname, hlen);
643 else {
644 /* letters from the second string are not ok */
645 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
646 if(hlen != len)
647 /* hostname with bad content */
648 return CURLUE_BAD_HOSTNAME;
649 }
650 return CURLUE_OK;
651 }
652
653 /*
654 * Handle partial IPv4 numerical addresses and different bases, like
655 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
656 *
657 * If the given input string is syntactically wrong IPv4 or any part for
658 * example is too big, this function returns HOST_NAME.
659 *
660 * Output the "normalized" version of that input string in plain quad decimal
661 * integers.
662 *
663 * Returns the host type.
664 */
665
666 #define HOST_ERROR -1 /* out of memory */
667
668 #define HOST_NAME 1
669 #define HOST_IPV4 2
670 #define HOST_IPV6 3
671
ipv4_normalize(struct dynbuf * host)672 static int ipv4_normalize(struct dynbuf *host)
673 {
674 bool done = FALSE;
675 int n = 0;
676 const char *c = Curl_dyn_ptr(host);
677 unsigned long parts[4] = {0, 0, 0, 0};
678 CURLcode result = CURLE_OK;
679
680 if(*c == '[')
681 return HOST_IPV6;
682
683 errno = 0; /* for strtoul */
684 while(!done) {
685 char *endp = NULL;
686 unsigned long l;
687 if(!ISDIGIT(*c))
688 /* most importantly this does not allow a leading plus or minus */
689 return HOST_NAME;
690 l = strtoul(c, &endp, 0);
691 if(errno)
692 return HOST_NAME;
693 #if SIZEOF_LONG > 4
694 /* a value larger than 32 bits */
695 if(l > UINT_MAX)
696 return HOST_NAME;
697 #endif
698
699 parts[n] = l;
700 c = endp;
701
702 switch(*c) {
703 case '.':
704 if(n == 3)
705 return HOST_NAME;
706 n++;
707 c++;
708 break;
709
710 case '\0':
711 done = TRUE;
712 break;
713
714 default:
715 return HOST_NAME;
716 }
717 }
718
719 switch(n) {
720 case 0: /* a -- 32 bits */
721 Curl_dyn_reset(host);
722
723 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
724 (unsigned int)(parts[0] >> 24),
725 (unsigned int)((parts[0] >> 16) & 0xff),
726 (unsigned int)((parts[0] >> 8) & 0xff),
727 (unsigned int)(parts[0] & 0xff));
728 break;
729 case 1: /* a.b -- 8.24 bits */
730 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
731 return HOST_NAME;
732 Curl_dyn_reset(host);
733 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
734 (unsigned int)(parts[0]),
735 (unsigned int)((parts[1] >> 16) & 0xff),
736 (unsigned int)((parts[1] >> 8) & 0xff),
737 (unsigned int)(parts[1] & 0xff));
738 break;
739 case 2: /* a.b.c -- 8.8.16 bits */
740 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
741 return HOST_NAME;
742 Curl_dyn_reset(host);
743 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
744 (unsigned int)(parts[0]),
745 (unsigned int)(parts[1]),
746 (unsigned int)((parts[2] >> 8) & 0xff),
747 (unsigned int)(parts[2] & 0xff));
748 break;
749 case 3: /* a.b.c.d -- 8.8.8.8 bits */
750 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
751 (parts[3] > 0xff))
752 return HOST_NAME;
753 Curl_dyn_reset(host);
754 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
755 (unsigned int)(parts[0]),
756 (unsigned int)(parts[1]),
757 (unsigned int)(parts[2]),
758 (unsigned int)(parts[3]));
759 break;
760 }
761 if(result)
762 return HOST_ERROR;
763 return HOST_IPV4;
764 }
765
766 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)767 static CURLUcode urldecode_host(struct dynbuf *host)
768 {
769 char *per = NULL;
770 const char *hostname = Curl_dyn_ptr(host);
771 per = strchr(hostname, '%');
772 if(!per)
773 /* nothing to decode */
774 return CURLUE_OK;
775 else {
776 /* encoded */
777 size_t dlen;
778 char *decoded;
779 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
780 REJECT_CTRL);
781 if(result)
782 return CURLUE_BAD_HOSTNAME;
783 Curl_dyn_reset(host);
784 result = Curl_dyn_addn(host, decoded, dlen);
785 free(decoded);
786 if(result)
787 return cc2cu(result);
788 }
789
790 return CURLUE_OK;
791 }
792
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)793 static CURLUcode parse_authority(struct Curl_URL *u,
794 const char *auth, size_t authlen,
795 unsigned int flags,
796 struct dynbuf *host,
797 bool has_scheme)
798 {
799 size_t offset;
800 CURLUcode uc;
801 CURLcode result;
802
803 /*
804 * Parse the login details and strip them out of the hostname.
805 */
806 uc = parse_hostname_login(u, auth, authlen, flags, &offset);
807 if(uc)
808 goto out;
809
810 result = Curl_dyn_addn(host, auth + offset, authlen - offset);
811 if(result) {
812 uc = cc2cu(result);
813 goto out;
814 }
815
816 uc = Curl_parse_port(u, host, has_scheme);
817 if(uc)
818 goto out;
819
820 if(!Curl_dyn_len(host))
821 return CURLUE_NO_HOST;
822
823 switch(ipv4_normalize(host)) {
824 case HOST_IPV4:
825 break;
826 case HOST_IPV6:
827 uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
828 break;
829 case HOST_NAME:
830 uc = urldecode_host(host);
831 if(!uc)
832 uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
833 break;
834 case HOST_ERROR:
835 uc = CURLUE_OUT_OF_MEMORY;
836 break;
837 default:
838 uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
839 break;
840 }
841
842 out:
843 return uc;
844 }
845
846 /* used for HTTP/2 server push */
Curl_url_set_authority(CURLU * u,const char * authority)847 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
848 {
849 CURLUcode result;
850 struct dynbuf host;
851
852 DEBUGASSERT(authority);
853 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
854
855 result = parse_authority(u, authority, strlen(authority),
856 CURLU_DISALLOW_USER, &host, !!u->scheme);
857 if(result)
858 Curl_dyn_free(&host);
859 else {
860 free(u->host);
861 u->host = Curl_dyn_ptr(&host);
862 }
863 return result;
864 }
865
866 /*
867 * "Remove Dot Segments"
868 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
869 */
870
871 /*
872 * dedotdotify()
873 * @unittest: 1395
874 *
875 * This function gets a null-terminated path with dot and dotdot sequences
876 * passed in and strips them off according to the rules in RFC 3986 section
877 * 5.2.4.
878 *
879 * The function handles a query part ('?' + stuff) appended but it expects
880 * that fragments ('#' + stuff) have already been cut off.
881 *
882 * RETURNS
883 *
884 * Zero for success and 'out' set to an allocated dedotdotified string.
885 */
886 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)887 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
888 {
889 char *outptr;
890 const char *endp = &input[clen];
891 char *out;
892
893 *outp = NULL;
894 /* the path always starts with a slash, and a slash has not dot */
895 if((clen < 2) || !memchr(input, '.', clen))
896 return 0;
897
898 out = malloc(clen + 1);
899 if(!out)
900 return 1; /* out of memory */
901
902 *out = 0; /* null-terminates, for inputs like "./" */
903 outptr = out;
904
905 do {
906 bool dotdot = TRUE;
907 if(*input == '.') {
908 /* A. If the input buffer begins with a prefix of "../" or "./", then
909 remove that prefix from the input buffer; otherwise, */
910
911 if(!strncmp("./", input, 2)) {
912 input += 2;
913 clen -= 2;
914 }
915 else if(!strncmp("../", input, 3)) {
916 input += 3;
917 clen -= 3;
918 }
919 /* D. if the input buffer consists only of "." or "..", then remove
920 that from the input buffer; otherwise, */
921
922 else if(!strcmp(".", input) || !strcmp("..", input) ||
923 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
924 *out = 0;
925 break;
926 }
927 else
928 dotdot = FALSE;
929 }
930 else if(*input == '/') {
931 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
932 "." is a complete path segment, then replace that prefix with "/" in
933 the input buffer; otherwise, */
934 if(!strncmp("/./", input, 3)) {
935 input += 2;
936 clen -= 2;
937 }
938 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
939 *outptr++ = '/';
940 *outptr = 0;
941 break;
942 }
943
944 /* C. if the input buffer begins with a prefix of "/../" or "/..",
945 where ".." is a complete path segment, then replace that prefix with
946 "/" in the input buffer and remove the last segment and its
947 preceding "/" (if any) from the output buffer; otherwise, */
948
949 else if(!strncmp("/../", input, 4)) {
950 input += 3;
951 clen -= 3;
952 /* remove the last segment from the output buffer */
953 while(outptr > out) {
954 outptr--;
955 if(*outptr == '/')
956 break;
957 }
958 *outptr = 0; /* null-terminate where it stops */
959 }
960 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
961 /* remove the last segment from the output buffer */
962 while(outptr > out) {
963 outptr--;
964 if(*outptr == '/')
965 break;
966 }
967 *outptr++ = '/';
968 *outptr = 0; /* null-terminate where it stops */
969 break;
970 }
971 else
972 dotdot = FALSE;
973 }
974 else
975 dotdot = FALSE;
976
977 if(!dotdot) {
978 /* E. move the first path segment in the input buffer to the end of
979 the output buffer, including the initial "/" character (if any) and
980 any subsequent characters up to, but not including, the next "/"
981 character or the end of the input buffer. */
982
983 do {
984 *outptr++ = *input++;
985 clen--;
986 } while(*input && (*input != '/') && (*input != '?'));
987 *outptr = 0;
988 }
989
990 /* continue until end of path */
991 } while(input < endp);
992
993 *outp = out;
994 return 0; /* success */
995 }
996
parseurl(const char * url,CURLU * u,unsigned int flags)997 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
998 {
999 const char *path;
1000 size_t pathlen;
1001 char *query = NULL;
1002 char *fragment = NULL;
1003 char schemebuf[MAX_SCHEME_LEN + 1];
1004 size_t schemelen = 0;
1005 size_t urllen;
1006 CURLUcode result = CURLUE_OK;
1007 size_t fraglen = 0;
1008 struct dynbuf host;
1009
1010 DEBUGASSERT(url);
1011
1012 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1013
1014 result = junkscan(url, &urllen, flags);
1015 if(result)
1016 goto fail;
1017
1018 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1019 flags & (CURLU_GUESS_SCHEME|
1020 CURLU_DEFAULT_SCHEME));
1021
1022 /* handle the file: scheme */
1023 if(schemelen && !strcmp(schemebuf, "file")) {
1024 bool uncpath = FALSE;
1025 if(urllen <= 6) {
1026 /* file:/ is not enough to actually be a complete file: URL */
1027 result = CURLUE_BAD_FILE_URL;
1028 goto fail;
1029 }
1030
1031 /* path has been allocated large enough to hold this */
1032 path = (char *)&url[5];
1033 pathlen = urllen - 5;
1034
1035 u->scheme = strdup("file");
1036 if(!u->scheme) {
1037 result = CURLUE_OUT_OF_MEMORY;
1038 goto fail;
1039 }
1040
1041 /* Extra handling URLs with an authority component (i.e. that start with
1042 * "file://")
1043 *
1044 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1045 * RFC 8089, but not the (current) WHAT-WG URL spec.
1046 */
1047 if(path[0] == '/' && path[1] == '/') {
1048 /* swallow the two slashes */
1049 const char *ptr = &path[2];
1050
1051 /*
1052 * According to RFC 8089, a file: URL can be reliably dereferenced if:
1053 *
1054 * o it has no/blank hostname, or
1055 *
1056 * o the hostname matches "localhost" (case-insensitively), or
1057 *
1058 * o the hostname is a FQDN that resolves to this machine, or
1059 *
1060 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
1061 * Appendix E.3).
1062 *
1063 * For brevity, we only consider URLs with empty, "localhost", or
1064 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1065 *
1066 * Additionally, there is an exception for URLs with a Windows drive
1067 * letter in the authority (which was accidentally omitted from RFC 8089
1068 * Appendix E, but believe me, it was meant to be there. --MK)
1069 */
1070 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1071 /* the URL includes a hostname, it must match "localhost" or
1072 "127.0.0.1" to be valid */
1073 if(checkprefix("localhost/", ptr) ||
1074 checkprefix("127.0.0.1/", ptr)) {
1075 ptr += 9; /* now points to the slash after the host */
1076 }
1077 else {
1078 #if defined(_WIN32)
1079 size_t len;
1080
1081 /* the hostname, NetBIOS computer name, can not contain disallowed
1082 chars, and the delimiting slash character must be appended to the
1083 hostname */
1084 path = strpbrk(ptr, "/\\:*?\"<>|");
1085 if(!path || *path != '/') {
1086 result = CURLUE_BAD_FILE_URL;
1087 goto fail;
1088 }
1089
1090 len = path - ptr;
1091 if(len) {
1092 CURLcode code = Curl_dyn_addn(&host, ptr, len);
1093 if(code) {
1094 result = cc2cu(code);
1095 goto fail;
1096 }
1097 uncpath = TRUE;
1098 }
1099
1100 ptr -= 2; /* now points to the // before the host in UNC */
1101 #else
1102 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1103 none */
1104 result = CURLUE_BAD_FILE_URL;
1105 goto fail;
1106 #endif
1107 }
1108 }
1109
1110 path = ptr;
1111 pathlen = urllen - (ptr - url);
1112 }
1113
1114 if(!uncpath)
1115 /* no host for file: URLs by default */
1116 Curl_dyn_reset(&host);
1117
1118 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1119 /* Do not allow Windows drive letters when not in Windows.
1120 * This catches both "file:/c:" and "file:c:" */
1121 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1122 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1123 /* File drive letters are only accepted in MS-DOS/Windows */
1124 result = CURLUE_BAD_FILE_URL;
1125 goto fail;
1126 }
1127 #else
1128 /* If the path starts with a slash and a drive letter, ditch the slash */
1129 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1130 /* This cannot be done with strcpy, as the memory chunks overlap! */
1131 path++;
1132 pathlen--;
1133 }
1134 #endif
1135
1136 }
1137 else {
1138 /* clear path */
1139 const char *schemep = NULL;
1140 const char *hostp;
1141 size_t hostlen;
1142
1143 if(schemelen) {
1144 int i = 0;
1145 const char *p = &url[schemelen + 1];
1146 while((*p == '/') && (i < 4)) {
1147 p++;
1148 i++;
1149 }
1150
1151 schemep = schemebuf;
1152 if(!Curl_get_scheme_handler(schemep) &&
1153 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1154 result = CURLUE_UNSUPPORTED_SCHEME;
1155 goto fail;
1156 }
1157
1158 if((i < 1) || (i > 3)) {
1159 /* less than one or more than three slashes */
1160 result = CURLUE_BAD_SLASHES;
1161 goto fail;
1162 }
1163 hostp = p; /* hostname starts here */
1164 }
1165 else {
1166 /* no scheme! */
1167
1168 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1169 result = CURLUE_BAD_SCHEME;
1170 goto fail;
1171 }
1172 if(flags & CURLU_DEFAULT_SCHEME)
1173 schemep = DEFAULT_SCHEME;
1174
1175 /*
1176 * The URL was badly formatted, let's try without scheme specified.
1177 */
1178 hostp = url;
1179 }
1180
1181 if(schemep) {
1182 u->scheme = strdup(schemep);
1183 if(!u->scheme) {
1184 result = CURLUE_OUT_OF_MEMORY;
1185 goto fail;
1186 }
1187 }
1188
1189 /* find the end of the hostname + port number */
1190 hostlen = strcspn(hostp, "/?#");
1191 path = &hostp[hostlen];
1192
1193 /* this pathlen also contains the query and the fragment */
1194 pathlen = urllen - (path - url);
1195 if(hostlen) {
1196
1197 result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1198 if(result)
1199 goto fail;
1200
1201 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1202 const char *hostname = Curl_dyn_ptr(&host);
1203 /* legacy curl-style guess based on hostname */
1204 if(checkprefix("ftp.", hostname))
1205 schemep = "ftp";
1206 else if(checkprefix("dict.", hostname))
1207 schemep = "dict";
1208 else if(checkprefix("ldap.", hostname))
1209 schemep = "ldap";
1210 else if(checkprefix("imap.", hostname))
1211 schemep = "imap";
1212 else if(checkprefix("smtp.", hostname))
1213 schemep = "smtp";
1214 else if(checkprefix("pop3.", hostname))
1215 schemep = "pop3";
1216 else
1217 schemep = "http";
1218
1219 u->scheme = strdup(schemep);
1220 if(!u->scheme) {
1221 result = CURLUE_OUT_OF_MEMORY;
1222 goto fail;
1223 }
1224 u->guessed_scheme = TRUE;
1225 }
1226 }
1227 else if(flags & CURLU_NO_AUTHORITY) {
1228 /* allowed to be empty. */
1229 if(Curl_dyn_add(&host, "")) {
1230 result = CURLUE_OUT_OF_MEMORY;
1231 goto fail;
1232 }
1233 }
1234 else {
1235 result = CURLUE_NO_HOST;
1236 goto fail;
1237 }
1238 }
1239
1240 fragment = strchr(path, '#');
1241 if(fragment) {
1242 fraglen = pathlen - (fragment - path);
1243 u->fragment_present = TRUE;
1244 if(fraglen > 1) {
1245 /* skip the leading '#' in the copy but include the terminating null */
1246 if(flags & CURLU_URLENCODE) {
1247 struct dynbuf enc;
1248 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1249 result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1250 if(result)
1251 goto fail;
1252 u->fragment = Curl_dyn_ptr(&enc);
1253 }
1254 else {
1255 u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1256 if(!u->fragment) {
1257 result = CURLUE_OUT_OF_MEMORY;
1258 goto fail;
1259 }
1260 }
1261 }
1262 /* after this, pathlen still contains the query */
1263 pathlen -= fraglen;
1264 }
1265
1266 query = memchr(path, '?', pathlen);
1267 if(query) {
1268 size_t qlen = fragment ? (size_t)(fragment - query) :
1269 pathlen - (query - path);
1270 pathlen -= qlen;
1271 u->query_present = TRUE;
1272 if(qlen > 1) {
1273 if(flags & CURLU_URLENCODE) {
1274 struct dynbuf enc;
1275 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1276 /* skip the leading question mark */
1277 result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1278 if(result)
1279 goto fail;
1280 u->query = Curl_dyn_ptr(&enc);
1281 }
1282 else {
1283 u->query = Curl_memdup0(query + 1, qlen - 1);
1284 if(!u->query) {
1285 result = CURLUE_OUT_OF_MEMORY;
1286 goto fail;
1287 }
1288 }
1289 }
1290 else {
1291 /* single byte query */
1292 u->query = strdup("");
1293 if(!u->query) {
1294 result = CURLUE_OUT_OF_MEMORY;
1295 goto fail;
1296 }
1297 }
1298 }
1299
1300 if(pathlen && (flags & CURLU_URLENCODE)) {
1301 struct dynbuf enc;
1302 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1303 result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1304 if(result)
1305 goto fail;
1306 pathlen = Curl_dyn_len(&enc);
1307 path = u->path = Curl_dyn_ptr(&enc);
1308 }
1309
1310 if(pathlen <= 1) {
1311 /* there is no path left or just the slash, unset */
1312 path = NULL;
1313 }
1314 else {
1315 if(!u->path) {
1316 u->path = Curl_memdup0(path, pathlen);
1317 if(!u->path) {
1318 result = CURLUE_OUT_OF_MEMORY;
1319 goto fail;
1320 }
1321 path = u->path;
1322 }
1323 else if(flags & CURLU_URLENCODE)
1324 /* it might have encoded more than just the path so cut it */
1325 u->path[pathlen] = 0;
1326
1327 if(!(flags & CURLU_PATH_AS_IS)) {
1328 /* remove ../ and ./ sequences according to RFC3986 */
1329 char *dedot;
1330 int err = dedotdotify((char *)path, pathlen, &dedot);
1331 if(err) {
1332 result = CURLUE_OUT_OF_MEMORY;
1333 goto fail;
1334 }
1335 if(dedot) {
1336 free(u->path);
1337 u->path = dedot;
1338 }
1339 }
1340 }
1341
1342 u->host = Curl_dyn_ptr(&host);
1343
1344 return result;
1345 fail:
1346 Curl_dyn_free(&host);
1347 free_urlhandle(u);
1348 return result;
1349 }
1350
1351 /*
1352 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1353 */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1354 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1355 unsigned int flags)
1356 {
1357 CURLUcode result;
1358 CURLU tmpurl;
1359 memset(&tmpurl, 0, sizeof(tmpurl));
1360 result = parseurl(url, &tmpurl, flags);
1361 if(!result) {
1362 free_urlhandle(u);
1363 *u = tmpurl;
1364 }
1365 return result;
1366 }
1367
1368 /*
1369 */
curl_url(void)1370 CURLU *curl_url(void)
1371 {
1372 return calloc(1, sizeof(struct Curl_URL));
1373 }
1374
curl_url_cleanup(CURLU * u)1375 void curl_url_cleanup(CURLU *u)
1376 {
1377 if(u) {
1378 free_urlhandle(u);
1379 free(u);
1380 }
1381 }
1382
1383 #define DUP(dest, src, name) \
1384 do { \
1385 if(src->name) { \
1386 dest->name = strdup(src->name); \
1387 if(!dest->name) \
1388 goto fail; \
1389 } \
1390 } while(0)
1391
curl_url_dup(const CURLU * in)1392 CURLU *curl_url_dup(const CURLU *in)
1393 {
1394 struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1395 if(u) {
1396 DUP(u, in, scheme);
1397 DUP(u, in, user);
1398 DUP(u, in, password);
1399 DUP(u, in, options);
1400 DUP(u, in, host);
1401 DUP(u, in, port);
1402 DUP(u, in, path);
1403 DUP(u, in, query);
1404 DUP(u, in, fragment);
1405 DUP(u, in, zoneid);
1406 u->portnum = in->portnum;
1407 u->fragment_present = in->fragment_present;
1408 u->query_present = in->query_present;
1409 }
1410 return u;
1411 fail:
1412 curl_url_cleanup(u);
1413 return NULL;
1414 }
1415
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1416 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1417 char **part, unsigned int flags)
1418 {
1419 const char *ptr;
1420 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1421 char portbuf[7];
1422 bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1423 bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1424 bool punycode = FALSE;
1425 bool depunyfy = FALSE;
1426 bool plusdecode = FALSE;
1427 (void)flags;
1428 if(!u)
1429 return CURLUE_BAD_HANDLE;
1430 if(!part)
1431 return CURLUE_BAD_PARTPOINTER;
1432 *part = NULL;
1433
1434 switch(what) {
1435 case CURLUPART_SCHEME:
1436 ptr = u->scheme;
1437 ifmissing = CURLUE_NO_SCHEME;
1438 urldecode = FALSE; /* never for schemes */
1439 if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1440 return CURLUE_NO_SCHEME;
1441 break;
1442 case CURLUPART_USER:
1443 ptr = u->user;
1444 ifmissing = CURLUE_NO_USER;
1445 break;
1446 case CURLUPART_PASSWORD:
1447 ptr = u->password;
1448 ifmissing = CURLUE_NO_PASSWORD;
1449 break;
1450 case CURLUPART_OPTIONS:
1451 ptr = u->options;
1452 ifmissing = CURLUE_NO_OPTIONS;
1453 break;
1454 case CURLUPART_HOST:
1455 ptr = u->host;
1456 ifmissing = CURLUE_NO_HOST;
1457 punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1458 depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1459 break;
1460 case CURLUPART_ZONEID:
1461 ptr = u->zoneid;
1462 ifmissing = CURLUE_NO_ZONEID;
1463 break;
1464 case CURLUPART_PORT:
1465 ptr = u->port;
1466 ifmissing = CURLUE_NO_PORT;
1467 urldecode = FALSE; /* never for port */
1468 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1469 /* there is no stored port number, but asked to deliver
1470 a default one for the scheme */
1471 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1472 if(h) {
1473 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1474 ptr = portbuf;
1475 }
1476 }
1477 else if(ptr && u->scheme) {
1478 /* there is a stored port number, but ask to inhibit if
1479 it matches the default one for the scheme */
1480 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1481 if(h && (h->defport == u->portnum) &&
1482 (flags & CURLU_NO_DEFAULT_PORT))
1483 ptr = NULL;
1484 }
1485 break;
1486 case CURLUPART_PATH:
1487 ptr = u->path;
1488 if(!ptr)
1489 ptr = "/";
1490 break;
1491 case CURLUPART_QUERY:
1492 ptr = u->query;
1493 ifmissing = CURLUE_NO_QUERY;
1494 plusdecode = urldecode;
1495 if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1496 /* there was a blank query and the user do not ask for it */
1497 ptr = NULL;
1498 break;
1499 case CURLUPART_FRAGMENT:
1500 ptr = u->fragment;
1501 ifmissing = CURLUE_NO_FRAGMENT;
1502 if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1503 /* there was a blank fragment and the user asks for it */
1504 ptr = "";
1505 break;
1506 case CURLUPART_URL: {
1507 char *url;
1508 char *scheme;
1509 char *options = u->options;
1510 char *port = u->port;
1511 char *allochost = NULL;
1512 bool show_fragment =
1513 u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1514 bool show_query =
1515 (u->query && u->query[0]) ||
1516 (u->query_present && flags & CURLU_GET_EMPTY);
1517 punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1518 depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1519 if(u->scheme && strcasecompare("file", u->scheme)) {
1520 url = aprintf("file://%s%s%s",
1521 u->path,
1522 show_fragment ? "#": "",
1523 u->fragment ? u->fragment : "");
1524 }
1525 else if(!u->host)
1526 return CURLUE_NO_HOST;
1527 else {
1528 const struct Curl_handler *h = NULL;
1529 char schemebuf[MAX_SCHEME_LEN + 5];
1530 if(u->scheme)
1531 scheme = u->scheme;
1532 else if(flags & CURLU_DEFAULT_SCHEME)
1533 scheme = (char *) DEFAULT_SCHEME;
1534 else
1535 return CURLUE_NO_SCHEME;
1536
1537 h = Curl_get_scheme_handler(scheme);
1538 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1539 /* there is no stored port number, but asked to deliver
1540 a default one for the scheme */
1541 if(h) {
1542 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1543 port = portbuf;
1544 }
1545 }
1546 else if(port) {
1547 /* there is a stored port number, but asked to inhibit if it matches
1548 the default one for the scheme */
1549 if(h && (h->defport == u->portnum) &&
1550 (flags & CURLU_NO_DEFAULT_PORT))
1551 port = NULL;
1552 }
1553
1554 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1555 options = NULL;
1556
1557 if(u->host[0] == '[') {
1558 if(u->zoneid) {
1559 /* make it '[ host %25 zoneid ]' */
1560 struct dynbuf enc;
1561 size_t hostlen = strlen(u->host);
1562 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1563 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1564 u->zoneid))
1565 return CURLUE_OUT_OF_MEMORY;
1566 allochost = Curl_dyn_ptr(&enc);
1567 }
1568 }
1569 else if(urlencode) {
1570 allochost = curl_easy_escape(NULL, u->host, 0);
1571 if(!allochost)
1572 return CURLUE_OUT_OF_MEMORY;
1573 }
1574 else if(punycode) {
1575 if(!Curl_is_ASCII_name(u->host)) {
1576 #ifndef USE_IDN
1577 return CURLUE_LACKS_IDN;
1578 #else
1579 CURLcode result = Curl_idn_decode(u->host, &allochost);
1580 if(result)
1581 return (result == CURLE_OUT_OF_MEMORY) ?
1582 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1583 #endif
1584 }
1585 }
1586 else if(depunyfy) {
1587 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1588 #ifndef USE_IDN
1589 return CURLUE_LACKS_IDN;
1590 #else
1591 CURLcode result = Curl_idn_encode(u->host, &allochost);
1592 if(result)
1593 /* this is the most likely error */
1594 return (result == CURLE_OUT_OF_MEMORY) ?
1595 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1596 #endif
1597 }
1598 }
1599
1600 if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1601 msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1602 else
1603 schemebuf[0] = 0;
1604
1605 url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1606 schemebuf,
1607 u->user ? u->user : "",
1608 u->password ? ":": "",
1609 u->password ? u->password : "",
1610 options ? ";" : "",
1611 options ? options : "",
1612 (u->user || u->password || options) ? "@": "",
1613 allochost ? allochost : u->host,
1614 port ? ":": "",
1615 port ? port : "",
1616 u->path ? u->path : "/",
1617 show_query ? "?": "",
1618 u->query ? u->query : "",
1619 show_fragment ? "#": "",
1620 u->fragment ? u->fragment : "");
1621 free(allochost);
1622 }
1623 if(!url)
1624 return CURLUE_OUT_OF_MEMORY;
1625 *part = url;
1626 return CURLUE_OK;
1627 }
1628 default:
1629 ptr = NULL;
1630 break;
1631 }
1632 if(ptr) {
1633 size_t partlen = strlen(ptr);
1634 size_t i = 0;
1635 *part = Curl_memdup0(ptr, partlen);
1636 if(!*part)
1637 return CURLUE_OUT_OF_MEMORY;
1638 if(plusdecode) {
1639 /* convert + to space */
1640 char *plus = *part;
1641 for(i = 0; i < partlen; ++plus, i++) {
1642 if(*plus == '+')
1643 *plus = ' ';
1644 }
1645 }
1646 if(urldecode) {
1647 char *decoded;
1648 size_t dlen;
1649 /* this unconditional rejection of control bytes is documented
1650 API behavior */
1651 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1652 free(*part);
1653 if(res) {
1654 *part = NULL;
1655 return CURLUE_URLDECODE;
1656 }
1657 *part = decoded;
1658 partlen = dlen;
1659 }
1660 if(urlencode) {
1661 struct dynbuf enc;
1662 CURLUcode uc;
1663 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1664 uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1665 if(uc)
1666 return uc;
1667 free(*part);
1668 *part = Curl_dyn_ptr(&enc);
1669 }
1670 else if(punycode) {
1671 if(!Curl_is_ASCII_name(u->host)) {
1672 #ifndef USE_IDN
1673 return CURLUE_LACKS_IDN;
1674 #else
1675 char *allochost;
1676 CURLcode result = Curl_idn_decode(*part, &allochost);
1677 if(result)
1678 return (result == CURLE_OUT_OF_MEMORY) ?
1679 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1680 free(*part);
1681 *part = allochost;
1682 #endif
1683 }
1684 }
1685 else if(depunyfy) {
1686 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1687 #ifndef USE_IDN
1688 return CURLUE_LACKS_IDN;
1689 #else
1690 char *allochost;
1691 CURLcode result = Curl_idn_encode(*part, &allochost);
1692 if(result)
1693 return (result == CURLE_OUT_OF_MEMORY) ?
1694 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1695 free(*part);
1696 *part = allochost;
1697 #endif
1698 }
1699 }
1700
1701 return CURLUE_OK;
1702 }
1703 else
1704 return ifmissing;
1705 }
1706
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1707 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1708 const char *part, unsigned int flags)
1709 {
1710 char **storep = NULL;
1711 bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1712 bool plusencode = FALSE;
1713 bool urlskipslash = FALSE;
1714 bool leadingslash = FALSE;
1715 bool appendquery = FALSE;
1716 bool equalsencode = FALSE;
1717 size_t nalloc;
1718
1719 if(!u)
1720 return CURLUE_BAD_HANDLE;
1721 if(!part) {
1722 /* setting a part to NULL clears it */
1723 switch(what) {
1724 case CURLUPART_URL:
1725 break;
1726 case CURLUPART_SCHEME:
1727 storep = &u->scheme;
1728 u->guessed_scheme = FALSE;
1729 break;
1730 case CURLUPART_USER:
1731 storep = &u->user;
1732 break;
1733 case CURLUPART_PASSWORD:
1734 storep = &u->password;
1735 break;
1736 case CURLUPART_OPTIONS:
1737 storep = &u->options;
1738 break;
1739 case CURLUPART_HOST:
1740 storep = &u->host;
1741 break;
1742 case CURLUPART_ZONEID:
1743 storep = &u->zoneid;
1744 break;
1745 case CURLUPART_PORT:
1746 u->portnum = 0;
1747 storep = &u->port;
1748 break;
1749 case CURLUPART_PATH:
1750 storep = &u->path;
1751 break;
1752 case CURLUPART_QUERY:
1753 storep = &u->query;
1754 u->query_present = FALSE;
1755 break;
1756 case CURLUPART_FRAGMENT:
1757 storep = &u->fragment;
1758 u->fragment_present = FALSE;
1759 break;
1760 default:
1761 return CURLUE_UNKNOWN_PART;
1762 }
1763 if(storep && *storep) {
1764 Curl_safefree(*storep);
1765 }
1766 else if(!storep) {
1767 free_urlhandle(u);
1768 memset(u, 0, sizeof(struct Curl_URL));
1769 }
1770 return CURLUE_OK;
1771 }
1772
1773 nalloc = strlen(part);
1774 if(nalloc > CURL_MAX_INPUT_LENGTH)
1775 /* excessive input length */
1776 return CURLUE_MALFORMED_INPUT;
1777
1778 switch(what) {
1779 case CURLUPART_SCHEME: {
1780 size_t plen = strlen(part);
1781 const char *s = part;
1782 if((plen > MAX_SCHEME_LEN) || (plen < 1))
1783 /* too long or too short */
1784 return CURLUE_BAD_SCHEME;
1785 /* verify that it is a fine scheme */
1786 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1787 return CURLUE_UNSUPPORTED_SCHEME;
1788 storep = &u->scheme;
1789 urlencode = FALSE; /* never */
1790 if(ISALPHA(*s)) {
1791 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1792 while(--plen) {
1793 if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1794 s++; /* fine */
1795 else
1796 return CURLUE_BAD_SCHEME;
1797 }
1798 }
1799 else
1800 return CURLUE_BAD_SCHEME;
1801 u->guessed_scheme = FALSE;
1802 break;
1803 }
1804 case CURLUPART_USER:
1805 storep = &u->user;
1806 break;
1807 case CURLUPART_PASSWORD:
1808 storep = &u->password;
1809 break;
1810 case CURLUPART_OPTIONS:
1811 storep = &u->options;
1812 break;
1813 case CURLUPART_HOST:
1814 storep = &u->host;
1815 Curl_safefree(u->zoneid);
1816 break;
1817 case CURLUPART_ZONEID:
1818 storep = &u->zoneid;
1819 break;
1820 case CURLUPART_PORT:
1821 if(!ISDIGIT(part[0]))
1822 /* not a number */
1823 return CURLUE_BAD_PORT_NUMBER;
1824 else {
1825 char *tmp;
1826 char *endp;
1827 unsigned long port;
1828 errno = 0;
1829 port = strtoul(part, &endp, 10); /* must be decimal */
1830 if(errno || (port > 0xffff) || *endp)
1831 /* weirdly provided number, not good! */
1832 return CURLUE_BAD_PORT_NUMBER;
1833 tmp = strdup(part);
1834 if(!tmp)
1835 return CURLUE_OUT_OF_MEMORY;
1836 free(u->port);
1837 u->port = tmp;
1838 u->portnum = (unsigned short)port;
1839 return CURLUE_OK;
1840 }
1841 case CURLUPART_PATH:
1842 urlskipslash = TRUE;
1843 leadingslash = TRUE; /* enforce */
1844 storep = &u->path;
1845 break;
1846 case CURLUPART_QUERY:
1847 plusencode = urlencode;
1848 appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1849 equalsencode = appendquery;
1850 storep = &u->query;
1851 u->query_present = TRUE;
1852 break;
1853 case CURLUPART_FRAGMENT:
1854 storep = &u->fragment;
1855 u->fragment_present = TRUE;
1856 break;
1857 case CURLUPART_URL: {
1858 /*
1859 * Allow a new URL to replace the existing (if any) contents.
1860 *
1861 * If the existing contents is enough for a URL, allow a relative URL to
1862 * replace it.
1863 */
1864 CURLcode result;
1865 CURLUcode uc;
1866 char *oldurl;
1867 char *redired_url;
1868
1869 if(!nalloc)
1870 /* a blank URL is not a valid URL */
1871 return CURLUE_MALFORMED_INPUT;
1872
1873 /* if the new thing is absolute or the old one is not
1874 * (we could not get an absolute URL in 'oldurl'),
1875 * then replace the existing with the new. */
1876 if(Curl_is_absolute_url(part, NULL, 0,
1877 flags & (CURLU_GUESS_SCHEME|
1878 CURLU_DEFAULT_SCHEME))
1879 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1880 return parseurl_and_replace(part, u, flags);
1881 }
1882
1883 /* apply the relative part to create a new URL
1884 * and replace the existing one with it. */
1885 result = concat_url(oldurl, part, &redired_url);
1886 free(oldurl);
1887 if(result)
1888 return cc2cu(result);
1889
1890 uc = parseurl_and_replace(redired_url, u, flags);
1891 free(redired_url);
1892 return uc;
1893 }
1894 default:
1895 return CURLUE_UNKNOWN_PART;
1896 }
1897 DEBUGASSERT(storep);
1898 {
1899 const char *newp;
1900 struct dynbuf enc;
1901 Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1902
1903 if(leadingslash && (part[0] != '/')) {
1904 CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1905 if(result)
1906 return cc2cu(result);
1907 }
1908 if(urlencode) {
1909 const unsigned char *i;
1910
1911 for(i = (const unsigned char *)part; *i; i++) {
1912 CURLcode result;
1913 if((*i == ' ') && plusencode) {
1914 result = Curl_dyn_addn(&enc, "+", 1);
1915 if(result)
1916 return CURLUE_OUT_OF_MEMORY;
1917 }
1918 else if(ISUNRESERVED(*i) ||
1919 ((*i == '/') && urlskipslash) ||
1920 ((*i == '=') && equalsencode)) {
1921 if((*i == '=') && equalsencode)
1922 /* only skip the first equals sign */
1923 equalsencode = FALSE;
1924 result = Curl_dyn_addn(&enc, i, 1);
1925 if(result)
1926 return cc2cu(result);
1927 }
1928 else {
1929 char out[3]={'%'};
1930 out[1] = hexdigits[*i >> 4];
1931 out[2] = hexdigits[*i & 0xf];
1932 result = Curl_dyn_addn(&enc, out, 3);
1933 if(result)
1934 return cc2cu(result);
1935 }
1936 }
1937 }
1938 else {
1939 char *p;
1940 CURLcode result = Curl_dyn_add(&enc, part);
1941 if(result)
1942 return cc2cu(result);
1943 p = Curl_dyn_ptr(&enc);
1944 while(*p) {
1945 /* make sure percent encoded are lower case */
1946 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1947 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1948 p[1] = Curl_raw_tolower(p[1]);
1949 p[2] = Curl_raw_tolower(p[2]);
1950 p += 3;
1951 }
1952 else
1953 p++;
1954 }
1955 }
1956 newp = Curl_dyn_ptr(&enc);
1957
1958 if(appendquery && newp) {
1959 /* Append the 'newp' string onto the old query. Add a '&' separator if
1960 none is present at the end of the existing query already */
1961
1962 size_t querylen = u->query ? strlen(u->query) : 0;
1963 bool addamperand = querylen && (u->query[querylen -1] != '&');
1964 if(querylen) {
1965 struct dynbuf qbuf;
1966 Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1967
1968 if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1969 goto nomem;
1970
1971 if(addamperand) {
1972 if(Curl_dyn_addn(&qbuf, "&", 1))
1973 goto nomem;
1974 }
1975 if(Curl_dyn_add(&qbuf, newp))
1976 goto nomem;
1977 Curl_dyn_free(&enc);
1978 free(*storep);
1979 *storep = Curl_dyn_ptr(&qbuf);
1980 return CURLUE_OK;
1981 nomem:
1982 Curl_dyn_free(&enc);
1983 return CURLUE_OUT_OF_MEMORY;
1984 }
1985 }
1986
1987 else if(what == CURLUPART_HOST) {
1988 size_t n = Curl_dyn_len(&enc);
1989 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1990 /* Skip hostname check, it is allowed to be empty. */
1991 }
1992 else {
1993 bool bad = FALSE;
1994 if(!n)
1995 bad = TRUE; /* empty hostname is not okay */
1996 else if(!urlencode) {
1997 /* if the host name part was not URL encoded here, it was set ready
1998 URL encoded so we need to decode it to check */
1999 size_t dlen;
2000 char *decoded = NULL;
2001 CURLcode result =
2002 Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
2003 if(result || hostname_check(u, decoded, dlen))
2004 bad = TRUE;
2005 free(decoded);
2006 }
2007 else if(hostname_check(u, (char *)newp, n))
2008 bad = TRUE;
2009 if(bad) {
2010 Curl_dyn_free(&enc);
2011 return CURLUE_BAD_HOSTNAME;
2012 }
2013 }
2014 }
2015
2016 free(*storep);
2017 *storep = (char *)newp;
2018 }
2019 return CURLUE_OK;
2020 }
2021