1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37
38 /* The last 3 #include files should be in this order */
39 #include "curl_printf.h"
40 #include "curl_memory.h"
41 #include "memdebug.h"
42
43 /* MS-DOS/Windows style drive prefix, eg c: in c:foo */
44 #define STARTS_WITH_DRIVE_PREFIX(str) \
45 ((('a' <= str[0] && str[0] <= 'z') || \
46 ('A' <= str[0] && str[0] <= 'Z')) && \
47 (str[1] == ':'))
48
49 /* MS-DOS/Windows style drive prefix, optionally with
50 * a '|' instead of ':', followed by a slash or NUL */
51 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
52 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
53 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
54 ((str)[1] == ':' || (str)[1] == '|') && \
55 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
56
57 /* scheme is not URL encoded, the longest libcurl supported ones are... */
58 #define MAX_SCHEME_LEN 40
59
60 /*
61 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
62 * sure we have _some_ value for AF_INET6 without polluting our fake value
63 * everywhere.
64 */
65 #if !defined(USE_IPV6) && !defined(AF_INET6)
66 #define AF_INET6 (AF_INET + 1)
67 #endif
68
69 /* Internal representation of CURLU. Point to URL-encoded strings. */
70 struct Curl_URL {
71 char *scheme;
72 char *user;
73 char *password;
74 char *options; /* IMAP only? */
75 char *host;
76 char *zoneid; /* for numerical IPv6 addresses */
77 char *port;
78 char *path;
79 char *query;
80 char *fragment;
81 unsigned short portnum; /* the numerical version (if 'port' is set) */
82 BIT(query_present); /* to support blank */
83 BIT(fragment_present); /* to support blank */
84 BIT(guessed_scheme); /* when a URL without scheme is parsed */
85 };
86
87 #define DEFAULT_SCHEME "https"
88
free_urlhandle(struct Curl_URL * u)89 static void free_urlhandle(struct Curl_URL *u)
90 {
91 free(u->scheme);
92 free(u->user);
93 free(u->password);
94 free(u->options);
95 free(u->host);
96 free(u->zoneid);
97 free(u->port);
98 free(u->path);
99 free(u->query);
100 free(u->fragment);
101 }
102
103 /*
104 * Find the separator at the end of the hostname, or the '?' in cases like
105 * http://www.example.com?id=2380
106 */
find_host_sep(const char * url)107 static const char *find_host_sep(const char *url)
108 {
109 const char *sep;
110 const char *query;
111
112 /* Find the start of the hostname */
113 sep = strstr(url, "//");
114 if(!sep)
115 sep = url;
116 else
117 sep += 2;
118
119 query = strchr(sep, '?');
120 sep = strchr(sep, '/');
121
122 if(!sep)
123 sep = url + strlen(url);
124
125 if(!query)
126 query = url + strlen(url);
127
128 return sep < query ? sep : query;
129 }
130
131 /* convert CURLcode to CURLUcode */
132 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
133 CURLUE_OUT_OF_MEMORY)
134 /*
135 * Decide whether a character in a URL must be escaped.
136 */
137 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
138
139 static const char hexdigits[] = "0123456789abcdef";
140 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
141 * spaces in the source URL accordingly.
142 *
143 * URL encoding should be skipped for hostnames, otherwise IDN resolution
144 * will fail.
145 */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)146 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
147 size_t len, bool relative,
148 bool query)
149 {
150 /* we must add this with whitespace-replacing */
151 bool left = !query;
152 const unsigned char *iptr;
153 const unsigned char *host_sep = (const unsigned char *) url;
154 CURLcode result;
155
156 if(!relative)
157 host_sep = (const unsigned char *) find_host_sep(url);
158
159 for(iptr = (unsigned char *)url; /* read from here */
160 len; iptr++, len--) {
161
162 if(iptr < host_sep) {
163 result = Curl_dyn_addn(o, iptr, 1);
164 if(result)
165 return cc2cu(result);
166 continue;
167 }
168
169 if(*iptr == ' ') {
170 if(left)
171 result = Curl_dyn_addn(o, "%20", 3);
172 else
173 result = Curl_dyn_addn(o, "+", 1);
174 if(result)
175 return cc2cu(result);
176 continue;
177 }
178
179 if(*iptr == '?')
180 left = FALSE;
181
182 if(urlchar_needs_escaping(*iptr)) {
183 char out[3]={'%'};
184 out[1] = hexdigits[*iptr >> 4];
185 out[2] = hexdigits[*iptr & 0xf];
186 result = Curl_dyn_addn(o, out, 3);
187 }
188 else
189 result = Curl_dyn_addn(o, iptr, 1);
190 if(result)
191 return cc2cu(result);
192 }
193
194 return CURLUE_OK;
195 }
196
197 /*
198 * Returns the length of the scheme if the given URL is absolute (as opposed
199 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
200 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
201 *
202 * If 'guess_scheme' is TRUE, it means the URL might be provided without
203 * scheme.
204 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)205 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
206 bool guess_scheme)
207 {
208 size_t i = 0;
209 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
210 (void)buflen; /* only used in debug-builds */
211 if(buf)
212 buf[0] = 0; /* always leave a defined value in buf */
213 #ifdef _WIN32
214 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
215 return 0;
216 #endif
217 if(ISALPHA(url[0]))
218 for(i = 1; i < MAX_SCHEME_LEN; ++i) {
219 char s = url[i];
220 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
221 /* RFC 3986 3.1 explains:
222 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223 */
224 }
225 else {
226 break;
227 }
228 }
229 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
230 /* If this does not guess scheme, the scheme always ends with the colon so
231 that this also detects data: URLs etc. In guessing mode, data: could
232 be the hostname "data" with a specified port number. */
233
234 /* the length of the scheme is the name part only */
235 size_t len = i;
236 if(buf) {
237 Curl_strntolower(buf, url, i);
238 buf[i] = 0;
239 }
240 return len;
241 }
242 return 0;
243 }
244
245 /*
246 * Concatenate a relative URL to a base URL making it absolute.
247 * URL-encodes any spaces.
248 * The returned pointer must be freed by the caller unless NULL
249 * (returns NULL on out of memory).
250 *
251 * Note that this function destroys the 'base' string.
252 */
concat_url(char * base,const char * relurl,char ** newurl)253 static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254 {
255 /***
256 TRY to append this new path to the old URL
257 to the right of the host part. Oh crap, this is doomed to cause
258 problems in the future...
259 */
260 struct dynbuf newest;
261 char *protsep;
262 char *pathsep;
263 bool host_changed = FALSE;
264 const char *useurl = relurl;
265 CURLcode result = CURLE_OK;
266 CURLUcode uc;
267 bool skip_slash = FALSE;
268 *newurl = NULL;
269
270 /* protsep points to the start of the hostname */
271 protsep = strstr(base, "//");
272 if(!protsep)
273 protsep = base;
274 else
275 protsep += 2; /* pass the slashes */
276
277 if('/' != relurl[0]) {
278 int level = 0;
279
280 /* First we need to find out if there is a ?-letter in the URL,
281 and cut it and the right-side of that off */
282 pathsep = strchr(protsep, '?');
283 if(pathsep)
284 *pathsep = 0;
285
286 /* we have a relative path to append to the last slash if there is one
287 available, or the new URL is just a query string (starts with a '?') or
288 a fragment (starts with '#') we append the new one at the end of the
289 current URL */
290 if((useurl[0] != '?') && (useurl[0] != '#')) {
291 pathsep = strrchr(protsep, '/');
292 if(pathsep)
293 *pathsep = 0;
294
295 /* Check if there is any slash after the hostname, and if so, remember
296 that position instead */
297 pathsep = strchr(protsep, '/');
298 if(pathsep)
299 protsep = pathsep + 1;
300 else
301 protsep = NULL;
302
303 /* now deal with one "./" or any amount of "../" in the newurl
304 and act accordingly */
305
306 if((useurl[0] == '.') && (useurl[1] == '/'))
307 useurl += 2; /* just skip the "./" */
308
309 while((useurl[0] == '.') &&
310 (useurl[1] == '.') &&
311 (useurl[2] == '/')) {
312 level++;
313 useurl += 3; /* pass the "../" */
314 }
315
316 if(protsep) {
317 while(level--) {
318 /* cut off one more level from the right of the original URL */
319 pathsep = strrchr(protsep, '/');
320 if(pathsep)
321 *pathsep = 0;
322 else {
323 *protsep = 0;
324 break;
325 }
326 }
327 }
328 }
329 else
330 skip_slash = TRUE;
331 }
332 else {
333 /* We got a new absolute path for this server */
334
335 if(relurl[1] == '/') {
336 /* the new URL starts with //, just keep the protocol part from the
337 original one */
338 *protsep = 0;
339 useurl = &relurl[2]; /* we keep the slashes from the original, so we
340 skip the new ones */
341 host_changed = TRUE;
342 }
343 else {
344 /* cut off the original URL from the first slash, or deal with URLs
345 without slash */
346 pathsep = strchr(protsep, '/');
347 if(pathsep) {
348 /* When people use badly formatted URLs, such as
349 "http://www.example.com?dir=/home/daniel" we must not use the first
350 slash, if there is a ?-letter before it! */
351 char *sep = strchr(protsep, '?');
352 if(sep && (sep < pathsep))
353 pathsep = sep;
354 *pathsep = 0;
355 }
356 else {
357 /* There was no slash. Now, since we might be operating on a badly
358 formatted URL, such as "http://www.example.com?id=2380" which does
359 not use a slash separator as it is supposed to, we need to check
360 for a ?-letter as well! */
361 pathsep = strchr(protsep, '?');
362 if(pathsep)
363 *pathsep = 0;
364 }
365 }
366 }
367
368 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
369
370 /* copy over the root URL part */
371 result = Curl_dyn_add(&newest, base);
372 if(result)
373 return result;
374
375 /* check if we need to append a slash */
376 if(('/' == useurl[0]) || (protsep && !*protsep) || skip_slash)
377 ;
378 else {
379 result = Curl_dyn_addn(&newest, "/", 1);
380 if(result)
381 return result;
382 }
383
384 /* then append the new piece on the right side */
385 uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
386 FALSE);
387 if(uc)
388 return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
389
390 *newurl = Curl_dyn_ptr(&newest);
391 return CURLE_OK;
392 }
393
394 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)395 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
396 {
397 static const char badbytes[]={
398 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
399 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
400 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
401 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
402 0x7f, 0x00 /* null-terminate */
403 };
404 size_t n = strlen(url);
405 size_t nfine;
406
407 if(n > CURL_MAX_INPUT_LENGTH)
408 /* excessive input length */
409 return CURLUE_MALFORMED_INPUT;
410
411 nfine = strcspn(url, badbytes);
412 if((nfine != n) ||
413 (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
414 return CURLUE_MALFORMED_INPUT;
415
416 *urllen = n;
417 return CURLUE_OK;
418 }
419
420 /*
421 * parse_hostname_login()
422 *
423 * Parse the login details (username, password and options) from the URL and
424 * strip them out of the hostname
425 *
426 */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)427 static CURLUcode parse_hostname_login(struct Curl_URL *u,
428 const char *login,
429 size_t len,
430 unsigned int flags,
431 size_t *offset) /* to the hostname */
432 {
433 CURLUcode result = CURLUE_OK;
434 CURLcode ccode;
435 char *userp = NULL;
436 char *passwdp = NULL;
437 char *optionsp = NULL;
438 const struct Curl_handler *h = NULL;
439
440 /* At this point, we assume all the other special cases have been taken
441 * care of, so the host is at most
442 *
443 * [user[:password][;options]]@]hostname
444 *
445 * We need somewhere to put the embedded details, so do that first.
446 */
447 char *ptr;
448
449 DEBUGASSERT(login);
450
451 *offset = 0;
452 ptr = memchr(login, '@', len);
453 if(!ptr)
454 goto out;
455
456 /* We will now try to extract the
457 * possible login information in a string like:
458 * ftp://user:password@ftp.my.site:8021/README */
459 ptr++;
460
461 /* if this is a known scheme, get some details */
462 if(u->scheme)
463 h = Curl_get_scheme_handler(u->scheme);
464
465 /* We could use the login information in the URL so extract it. Only parse
466 options if the handler says we should. Note that 'h' might be NULL! */
467 ccode = Curl_parse_login_details(login, ptr - login - 1,
468 &userp, &passwdp,
469 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
470 &optionsp : NULL);
471 if(ccode) {
472 result = CURLUE_BAD_LOGIN;
473 goto out;
474 }
475
476 if(userp) {
477 if(flags & CURLU_DISALLOW_USER) {
478 /* Option DISALLOW_USER is set and URL contains username. */
479 result = CURLUE_USER_NOT_ALLOWED;
480 goto out;
481 }
482 free(u->user);
483 u->user = userp;
484 }
485
486 if(passwdp) {
487 free(u->password);
488 u->password = passwdp;
489 }
490
491 if(optionsp) {
492 free(u->options);
493 u->options = optionsp;
494 }
495
496 /* the hostname starts at this offset */
497 *offset = ptr - login;
498 return CURLUE_OK;
499
500 out:
501
502 free(userp);
503 free(passwdp);
504 free(optionsp);
505 u->user = NULL;
506 u->password = NULL;
507 u->options = NULL;
508
509 return result;
510 }
511
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)512 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
513 bool has_scheme)
514 {
515 char *portptr;
516 char *hostname = Curl_dyn_ptr(host);
517 /*
518 * Find the end of an IPv6 address on the ']' ending bracket.
519 */
520 if(hostname[0] == '[') {
521 portptr = strchr(hostname, ']');
522 if(!portptr)
523 return CURLUE_BAD_IPV6;
524 portptr++;
525 /* this is a RFC2732-style specified IP-address */
526 if(*portptr) {
527 if(*portptr != ':')
528 return CURLUE_BAD_PORT_NUMBER;
529 }
530 else
531 portptr = NULL;
532 }
533 else
534 portptr = strchr(hostname, ':');
535
536 if(portptr) {
537 char *rest = NULL;
538 unsigned long port;
539 size_t keep = portptr - hostname;
540
541 /* Browser behavior adaptation. If there is a colon with no digits after,
542 just cut off the name there which makes us ignore the colon and just
543 use the default port. Firefox, Chrome and Safari all do that.
544
545 Do not do it if the URL has no scheme, to make something that looks like
546 a scheme not work!
547 */
548 Curl_dyn_setlen(host, keep);
549 portptr++;
550 if(!*portptr)
551 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
552
553 if(!ISDIGIT(*portptr))
554 return CURLUE_BAD_PORT_NUMBER;
555
556 errno = 0;
557 port = strtoul(portptr, &rest, 10); /* Port number must be decimal */
558
559 if(errno || (port > 0xffff) || *rest)
560 return CURLUE_BAD_PORT_NUMBER;
561
562 u->portnum = (unsigned short) port;
563 /* generate a new port number string to get rid of leading zeroes etc */
564 free(u->port);
565 u->port = aprintf("%ld", port);
566 if(!u->port)
567 return CURLUE_OUT_OF_MEMORY;
568 }
569
570 return CURLUE_OK;
571 }
572
573 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)574 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
575 size_t hlen) /* length of hostname */
576 {
577 size_t len;
578 DEBUGASSERT(*hostname == '[');
579 if(hlen < 4) /* '[::]' is the shortest possible valid string */
580 return CURLUE_BAD_IPV6;
581 hostname++;
582 hlen -= 2;
583
584 /* only valid IPv6 letters are ok */
585 len = strspn(hostname, "0123456789abcdefABCDEF:.");
586
587 if(hlen != len) {
588 hlen = len;
589 if(hostname[len] == '%') {
590 /* this could now be '%[zone id]' */
591 char zoneid[16];
592 int i = 0;
593 char *h = &hostname[len + 1];
594 /* pass '25' if present and is a URL encoded percent sign */
595 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
596 h += 2;
597 while(*h && (*h != ']') && (i < 15))
598 zoneid[i++] = *h++;
599 if(!i || (']' != *h))
600 return CURLUE_BAD_IPV6;
601 zoneid[i] = 0;
602 u->zoneid = strdup(zoneid);
603 if(!u->zoneid)
604 return CURLUE_OUT_OF_MEMORY;
605 hostname[len] = ']'; /* insert end bracket */
606 hostname[len + 1] = 0; /* terminate the hostname */
607 }
608 else
609 return CURLUE_BAD_IPV6;
610 /* hostname is fine */
611 }
612
613 /* Normalize the IPv6 address */
614 {
615 char dest[16]; /* fits a binary IPv6 address */
616 hostname[hlen] = 0; /* end the address there */
617 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618 return CURLUE_BAD_IPV6;
619 if(Curl_inet_ntop(AF_INET6, dest, hostname, hlen)) {
620 hlen = strlen(hostname); /* might be shorter now */
621 hostname[hlen + 1] = 0;
622 }
623 hostname[hlen] = ']'; /* restore ending bracket */
624 }
625 return CURLUE_OK;
626 }
627
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)628 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
629 size_t hlen) /* length of hostname */
630 {
631 size_t len;
632 DEBUGASSERT(hostname);
633
634 if(!hlen)
635 return CURLUE_NO_HOST;
636 else if(hostname[0] == '[')
637 return ipv6_parse(u, hostname, hlen);
638 else {
639 /* letters from the second string are not ok */
640 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
641 if(hlen != len)
642 /* hostname with bad content */
643 return CURLUE_BAD_HOSTNAME;
644 }
645 return CURLUE_OK;
646 }
647
648 /*
649 * Handle partial IPv4 numerical addresses and different bases, like
650 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
651 *
652 * If the given input string is syntactically wrong IPv4 or any part for
653 * example is too big, this function returns HOST_NAME.
654 *
655 * Output the "normalized" version of that input string in plain quad decimal
656 * integers.
657 *
658 * Returns the host type.
659 */
660
661 #define HOST_ERROR -1 /* out of memory */
662
663 #define HOST_NAME 1
664 #define HOST_IPV4 2
665 #define HOST_IPV6 3
666
ipv4_normalize(struct dynbuf * host)667 static int ipv4_normalize(struct dynbuf *host)
668 {
669 bool done = FALSE;
670 int n = 0;
671 const char *c = Curl_dyn_ptr(host);
672 unsigned long parts[4] = {0, 0, 0, 0};
673 CURLcode result = CURLE_OK;
674
675 if(*c == '[')
676 return HOST_IPV6;
677
678 errno = 0; /* for strtoul */
679 while(!done) {
680 char *endp = NULL;
681 unsigned long l;
682 if(!ISDIGIT(*c))
683 /* most importantly this does not allow a leading plus or minus */
684 return HOST_NAME;
685 l = strtoul(c, &endp, 0);
686 if(errno)
687 return HOST_NAME;
688 #if SIZEOF_LONG > 4
689 /* a value larger than 32 bits */
690 if(l > UINT_MAX)
691 return HOST_NAME;
692 #endif
693
694 parts[n] = l;
695 c = endp;
696
697 switch(*c) {
698 case '.':
699 if(n == 3)
700 return HOST_NAME;
701 n++;
702 c++;
703 break;
704
705 case '\0':
706 done = TRUE;
707 break;
708
709 default:
710 return HOST_NAME;
711 }
712 }
713
714 switch(n) {
715 case 0: /* a -- 32 bits */
716 Curl_dyn_reset(host);
717
718 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
719 (unsigned int)(parts[0] >> 24),
720 (unsigned int)((parts[0] >> 16) & 0xff),
721 (unsigned int)((parts[0] >> 8) & 0xff),
722 (unsigned int)(parts[0] & 0xff));
723 break;
724 case 1: /* a.b -- 8.24 bits */
725 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
726 return HOST_NAME;
727 Curl_dyn_reset(host);
728 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
729 (unsigned int)(parts[0]),
730 (unsigned int)((parts[1] >> 16) & 0xff),
731 (unsigned int)((parts[1] >> 8) & 0xff),
732 (unsigned int)(parts[1] & 0xff));
733 break;
734 case 2: /* a.b.c -- 8.8.16 bits */
735 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
736 return HOST_NAME;
737 Curl_dyn_reset(host);
738 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
739 (unsigned int)(parts[0]),
740 (unsigned int)(parts[1]),
741 (unsigned int)((parts[2] >> 8) & 0xff),
742 (unsigned int)(parts[2] & 0xff));
743 break;
744 case 3: /* a.b.c.d -- 8.8.8.8 bits */
745 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
746 (parts[3] > 0xff))
747 return HOST_NAME;
748 Curl_dyn_reset(host);
749 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
750 (unsigned int)(parts[0]),
751 (unsigned int)(parts[1]),
752 (unsigned int)(parts[2]),
753 (unsigned int)(parts[3]));
754 break;
755 }
756 if(result)
757 return HOST_ERROR;
758 return HOST_IPV4;
759 }
760
761 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)762 static CURLUcode urldecode_host(struct dynbuf *host)
763 {
764 char *per = NULL;
765 const char *hostname = Curl_dyn_ptr(host);
766 per = strchr(hostname, '%');
767 if(!per)
768 /* nothing to decode */
769 return CURLUE_OK;
770 else {
771 /* encoded */
772 size_t dlen;
773 char *decoded;
774 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
775 REJECT_CTRL);
776 if(result)
777 return CURLUE_BAD_HOSTNAME;
778 Curl_dyn_reset(host);
779 result = Curl_dyn_addn(host, decoded, dlen);
780 free(decoded);
781 if(result)
782 return cc2cu(result);
783 }
784
785 return CURLUE_OK;
786 }
787
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)788 static CURLUcode parse_authority(struct Curl_URL *u,
789 const char *auth, size_t authlen,
790 unsigned int flags,
791 struct dynbuf *host,
792 bool has_scheme)
793 {
794 size_t offset;
795 CURLUcode uc;
796 CURLcode result;
797
798 /*
799 * Parse the login details and strip them out of the hostname.
800 */
801 uc = parse_hostname_login(u, auth, authlen, flags, &offset);
802 if(uc)
803 goto out;
804
805 result = Curl_dyn_addn(host, auth + offset, authlen - offset);
806 if(result) {
807 uc = cc2cu(result);
808 goto out;
809 }
810
811 uc = Curl_parse_port(u, host, has_scheme);
812 if(uc)
813 goto out;
814
815 if(!Curl_dyn_len(host))
816 return CURLUE_NO_HOST;
817
818 switch(ipv4_normalize(host)) {
819 case HOST_IPV4:
820 break;
821 case HOST_IPV6:
822 uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
823 break;
824 case HOST_NAME:
825 uc = urldecode_host(host);
826 if(!uc)
827 uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
828 break;
829 case HOST_ERROR:
830 uc = CURLUE_OUT_OF_MEMORY;
831 break;
832 default:
833 uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
834 break;
835 }
836
837 out:
838 return uc;
839 }
840
841 /* used for HTTP/2 server push */
Curl_url_set_authority(CURLU * u,const char * authority)842 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
843 {
844 CURLUcode result;
845 struct dynbuf host;
846
847 DEBUGASSERT(authority);
848 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
849
850 result = parse_authority(u, authority, strlen(authority),
851 CURLU_DISALLOW_USER, &host, !!u->scheme);
852 if(result)
853 Curl_dyn_free(&host);
854 else {
855 free(u->host);
856 u->host = Curl_dyn_ptr(&host);
857 }
858 return result;
859 }
860
861 /*
862 * "Remove Dot Segments"
863 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
864 */
865
866 /*
867 * dedotdotify()
868 * @unittest: 1395
869 *
870 * This function gets a null-terminated path with dot and dotdot sequences
871 * passed in and strips them off according to the rules in RFC 3986 section
872 * 5.2.4.
873 *
874 * The function handles a query part ('?' + stuff) appended but it expects
875 * that fragments ('#' + stuff) have already been cut off.
876 *
877 * RETURNS
878 *
879 * Zero for success and 'out' set to an allocated dedotdotified string.
880 */
881 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)882 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
883 {
884 char *outptr;
885 const char *endp = &input[clen];
886 char *out;
887
888 *outp = NULL;
889 /* the path always starts with a slash, and a slash has not dot */
890 if((clen < 2) || !memchr(input, '.', clen))
891 return 0;
892
893 out = malloc(clen + 1);
894 if(!out)
895 return 1; /* out of memory */
896
897 *out = 0; /* null-terminates, for inputs like "./" */
898 outptr = out;
899
900 do {
901 bool dotdot = TRUE;
902 if(*input == '.') {
903 /* A. If the input buffer begins with a prefix of "../" or "./", then
904 remove that prefix from the input buffer; otherwise, */
905
906 if(!strncmp("./", input, 2)) {
907 input += 2;
908 clen -= 2;
909 }
910 else if(!strncmp("../", input, 3)) {
911 input += 3;
912 clen -= 3;
913 }
914 /* D. if the input buffer consists only of "." or "..", then remove
915 that from the input buffer; otherwise, */
916
917 else if(!strcmp(".", input) || !strcmp("..", input) ||
918 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
919 *out = 0;
920 break;
921 }
922 else
923 dotdot = FALSE;
924 }
925 else if(*input == '/') {
926 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
927 "." is a complete path segment, then replace that prefix with "/" in
928 the input buffer; otherwise, */
929 if(!strncmp("/./", input, 3)) {
930 input += 2;
931 clen -= 2;
932 }
933 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
934 *outptr++ = '/';
935 *outptr = 0;
936 break;
937 }
938
939 /* C. if the input buffer begins with a prefix of "/../" or "/..",
940 where ".." is a complete path segment, then replace that prefix with
941 "/" in the input buffer and remove the last segment and its
942 preceding "/" (if any) from the output buffer; otherwise, */
943
944 else if(!strncmp("/../", input, 4)) {
945 input += 3;
946 clen -= 3;
947 /* remove the last segment from the output buffer */
948 while(outptr > out) {
949 outptr--;
950 if(*outptr == '/')
951 break;
952 }
953 *outptr = 0; /* null-terminate where it stops */
954 }
955 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
956 /* remove the last segment from the output buffer */
957 while(outptr > out) {
958 outptr--;
959 if(*outptr == '/')
960 break;
961 }
962 *outptr++ = '/';
963 *outptr = 0; /* null-terminate where it stops */
964 break;
965 }
966 else
967 dotdot = FALSE;
968 }
969 else
970 dotdot = FALSE;
971
972 if(!dotdot) {
973 /* E. move the first path segment in the input buffer to the end of
974 the output buffer, including the initial "/" character (if any) and
975 any subsequent characters up to, but not including, the next "/"
976 character or the end of the input buffer. */
977
978 do {
979 *outptr++ = *input++;
980 clen--;
981 } while(*input && (*input != '/') && (*input != '?'));
982 *outptr = 0;
983 }
984
985 /* continue until end of path */
986 } while(input < endp);
987
988 *outp = out;
989 return 0; /* success */
990 }
991
parseurl(const char * url,CURLU * u,unsigned int flags)992 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
993 {
994 const char *path;
995 size_t pathlen;
996 char *query = NULL;
997 char *fragment = NULL;
998 char schemebuf[MAX_SCHEME_LEN + 1];
999 size_t schemelen = 0;
1000 size_t urllen;
1001 CURLUcode result = CURLUE_OK;
1002 size_t fraglen = 0;
1003 struct dynbuf host;
1004
1005 DEBUGASSERT(url);
1006
1007 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1008
1009 result = junkscan(url, &urllen, flags);
1010 if(result)
1011 goto fail;
1012
1013 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1014 flags & (CURLU_GUESS_SCHEME|
1015 CURLU_DEFAULT_SCHEME));
1016
1017 /* handle the file: scheme */
1018 if(schemelen && !strcmp(schemebuf, "file")) {
1019 bool uncpath = FALSE;
1020 if(urllen <= 6) {
1021 /* file:/ is not enough to actually be a complete file: URL */
1022 result = CURLUE_BAD_FILE_URL;
1023 goto fail;
1024 }
1025
1026 /* path has been allocated large enough to hold this */
1027 path = (char *)&url[5];
1028 pathlen = urllen - 5;
1029
1030 u->scheme = strdup("file");
1031 if(!u->scheme) {
1032 result = CURLUE_OUT_OF_MEMORY;
1033 goto fail;
1034 }
1035
1036 /* Extra handling URLs with an authority component (i.e. that start with
1037 * "file://")
1038 *
1039 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1040 * RFC 8089, but not the (current) WHAT-WG URL spec.
1041 */
1042 if(path[0] == '/' && path[1] == '/') {
1043 /* swallow the two slashes */
1044 const char *ptr = &path[2];
1045
1046 /*
1047 * According to RFC 8089, a file: URL can be reliably dereferenced if:
1048 *
1049 * o it has no/blank hostname, or
1050 *
1051 * o the hostname matches "localhost" (case-insensitively), or
1052 *
1053 * o the hostname is a FQDN that resolves to this machine, or
1054 *
1055 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
1056 * Appendix E.3).
1057 *
1058 * For brevity, we only consider URLs with empty, "localhost", or
1059 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1060 *
1061 * Additionally, there is an exception for URLs with a Windows drive
1062 * letter in the authority (which was accidentally omitted from RFC 8089
1063 * Appendix E, but believe me, it was meant to be there. --MK)
1064 */
1065 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1066 /* the URL includes a hostname, it must match "localhost" or
1067 "127.0.0.1" to be valid */
1068 if(checkprefix("localhost/", ptr) ||
1069 checkprefix("127.0.0.1/", ptr)) {
1070 ptr += 9; /* now points to the slash after the host */
1071 }
1072 else {
1073 #if defined(_WIN32)
1074 size_t len;
1075
1076 /* the hostname, NetBIOS computer name, can not contain disallowed
1077 chars, and the delimiting slash character must be appended to the
1078 hostname */
1079 path = strpbrk(ptr, "/\\:*?\"<>|");
1080 if(!path || *path != '/') {
1081 result = CURLUE_BAD_FILE_URL;
1082 goto fail;
1083 }
1084
1085 len = path - ptr;
1086 if(len) {
1087 CURLcode code = Curl_dyn_addn(&host, ptr, len);
1088 if(code) {
1089 result = cc2cu(code);
1090 goto fail;
1091 }
1092 uncpath = TRUE;
1093 }
1094
1095 ptr -= 2; /* now points to the // before the host in UNC */
1096 #else
1097 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1098 none */
1099 result = CURLUE_BAD_FILE_URL;
1100 goto fail;
1101 #endif
1102 }
1103 }
1104
1105 path = ptr;
1106 pathlen = urllen - (ptr - url);
1107 }
1108
1109 if(!uncpath)
1110 /* no host for file: URLs by default */
1111 Curl_dyn_reset(&host);
1112
1113 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1114 /* Do not allow Windows drive letters when not in Windows.
1115 * This catches both "file:/c:" and "file:c:" */
1116 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1117 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1118 /* File drive letters are only accepted in MS-DOS/Windows */
1119 result = CURLUE_BAD_FILE_URL;
1120 goto fail;
1121 }
1122 #else
1123 /* If the path starts with a slash and a drive letter, ditch the slash */
1124 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1125 /* This cannot be done with strcpy, as the memory chunks overlap! */
1126 path++;
1127 pathlen--;
1128 }
1129 #endif
1130
1131 }
1132 else {
1133 /* clear path */
1134 const char *schemep = NULL;
1135 const char *hostp;
1136 size_t hostlen;
1137
1138 if(schemelen) {
1139 int i = 0;
1140 const char *p = &url[schemelen + 1];
1141 while((*p == '/') && (i < 4)) {
1142 p++;
1143 i++;
1144 }
1145
1146 schemep = schemebuf;
1147 if(!Curl_get_scheme_handler(schemep) &&
1148 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1149 result = CURLUE_UNSUPPORTED_SCHEME;
1150 goto fail;
1151 }
1152
1153 if((i < 1) || (i > 3)) {
1154 /* less than one or more than three slashes */
1155 result = CURLUE_BAD_SLASHES;
1156 goto fail;
1157 }
1158 hostp = p; /* hostname starts here */
1159 }
1160 else {
1161 /* no scheme! */
1162
1163 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1164 result = CURLUE_BAD_SCHEME;
1165 goto fail;
1166 }
1167 if(flags & CURLU_DEFAULT_SCHEME)
1168 schemep = DEFAULT_SCHEME;
1169
1170 /*
1171 * The URL was badly formatted, let's try without scheme specified.
1172 */
1173 hostp = url;
1174 }
1175
1176 if(schemep) {
1177 u->scheme = strdup(schemep);
1178 if(!u->scheme) {
1179 result = CURLUE_OUT_OF_MEMORY;
1180 goto fail;
1181 }
1182 }
1183
1184 /* find the end of the hostname + port number */
1185 hostlen = strcspn(hostp, "/?#");
1186 path = &hostp[hostlen];
1187
1188 /* this pathlen also contains the query and the fragment */
1189 pathlen = urllen - (path - url);
1190 if(hostlen) {
1191
1192 result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1193 if(result)
1194 goto fail;
1195
1196 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1197 const char *hostname = Curl_dyn_ptr(&host);
1198 /* legacy curl-style guess based on hostname */
1199 if(checkprefix("ftp.", hostname))
1200 schemep = "ftp";
1201 else if(checkprefix("dict.", hostname))
1202 schemep = "dict";
1203 else if(checkprefix("ldap.", hostname))
1204 schemep = "ldap";
1205 else if(checkprefix("imap.", hostname))
1206 schemep = "imap";
1207 else if(checkprefix("smtp.", hostname))
1208 schemep = "smtp";
1209 else if(checkprefix("pop3.", hostname))
1210 schemep = "pop3";
1211 else
1212 schemep = "http";
1213
1214 u->scheme = strdup(schemep);
1215 if(!u->scheme) {
1216 result = CURLUE_OUT_OF_MEMORY;
1217 goto fail;
1218 }
1219 u->guessed_scheme = TRUE;
1220 }
1221 }
1222 else if(flags & CURLU_NO_AUTHORITY) {
1223 /* allowed to be empty. */
1224 if(Curl_dyn_add(&host, "")) {
1225 result = CURLUE_OUT_OF_MEMORY;
1226 goto fail;
1227 }
1228 }
1229 else {
1230 result = CURLUE_NO_HOST;
1231 goto fail;
1232 }
1233 }
1234
1235 fragment = strchr(path, '#');
1236 if(fragment) {
1237 fraglen = pathlen - (fragment - path);
1238 u->fragment_present = TRUE;
1239 if(fraglen > 1) {
1240 /* skip the leading '#' in the copy but include the terminating null */
1241 if(flags & CURLU_URLENCODE) {
1242 struct dynbuf enc;
1243 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1244 result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1245 if(result)
1246 goto fail;
1247 u->fragment = Curl_dyn_ptr(&enc);
1248 }
1249 else {
1250 u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1251 if(!u->fragment) {
1252 result = CURLUE_OUT_OF_MEMORY;
1253 goto fail;
1254 }
1255 }
1256 }
1257 /* after this, pathlen still contains the query */
1258 pathlen -= fraglen;
1259 }
1260
1261 query = memchr(path, '?', pathlen);
1262 if(query) {
1263 size_t qlen = fragment ? (size_t)(fragment - query) :
1264 pathlen - (query - path);
1265 pathlen -= qlen;
1266 u->query_present = TRUE;
1267 if(qlen > 1) {
1268 if(flags & CURLU_URLENCODE) {
1269 struct dynbuf enc;
1270 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1271 /* skip the leading question mark */
1272 result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1273 if(result)
1274 goto fail;
1275 u->query = Curl_dyn_ptr(&enc);
1276 }
1277 else {
1278 u->query = Curl_memdup0(query + 1, qlen - 1);
1279 if(!u->query) {
1280 result = CURLUE_OUT_OF_MEMORY;
1281 goto fail;
1282 }
1283 }
1284 }
1285 else {
1286 /* single byte query */
1287 u->query = strdup("");
1288 if(!u->query) {
1289 result = CURLUE_OUT_OF_MEMORY;
1290 goto fail;
1291 }
1292 }
1293 }
1294
1295 if(pathlen && (flags & CURLU_URLENCODE)) {
1296 struct dynbuf enc;
1297 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1298 result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1299 if(result)
1300 goto fail;
1301 pathlen = Curl_dyn_len(&enc);
1302 path = u->path = Curl_dyn_ptr(&enc);
1303 }
1304
1305 if(pathlen <= 1) {
1306 /* there is no path left or just the slash, unset */
1307 path = NULL;
1308 }
1309 else {
1310 if(!u->path) {
1311 u->path = Curl_memdup0(path, pathlen);
1312 if(!u->path) {
1313 result = CURLUE_OUT_OF_MEMORY;
1314 goto fail;
1315 }
1316 path = u->path;
1317 }
1318 else if(flags & CURLU_URLENCODE)
1319 /* it might have encoded more than just the path so cut it */
1320 u->path[pathlen] = 0;
1321
1322 if(!(flags & CURLU_PATH_AS_IS)) {
1323 /* remove ../ and ./ sequences according to RFC3986 */
1324 char *dedot;
1325 int err = dedotdotify((char *)path, pathlen, &dedot);
1326 if(err) {
1327 result = CURLUE_OUT_OF_MEMORY;
1328 goto fail;
1329 }
1330 if(dedot) {
1331 free(u->path);
1332 u->path = dedot;
1333 }
1334 }
1335 }
1336
1337 u->host = Curl_dyn_ptr(&host);
1338
1339 return result;
1340 fail:
1341 Curl_dyn_free(&host);
1342 free_urlhandle(u);
1343 return result;
1344 }
1345
1346 /*
1347 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1348 */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1349 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1350 unsigned int flags)
1351 {
1352 CURLUcode result;
1353 CURLU tmpurl;
1354 memset(&tmpurl, 0, sizeof(tmpurl));
1355 result = parseurl(url, &tmpurl, flags);
1356 if(!result) {
1357 free_urlhandle(u);
1358 *u = tmpurl;
1359 }
1360 return result;
1361 }
1362
1363 /*
1364 */
curl_url(void)1365 CURLU *curl_url(void)
1366 {
1367 return calloc(1, sizeof(struct Curl_URL));
1368 }
1369
curl_url_cleanup(CURLU * u)1370 void curl_url_cleanup(CURLU *u)
1371 {
1372 if(u) {
1373 free_urlhandle(u);
1374 free(u);
1375 }
1376 }
1377
1378 #define DUP(dest, src, name) \
1379 do { \
1380 if(src->name) { \
1381 dest->name = strdup(src->name); \
1382 if(!dest->name) \
1383 goto fail; \
1384 } \
1385 } while(0)
1386
curl_url_dup(const CURLU * in)1387 CURLU *curl_url_dup(const CURLU *in)
1388 {
1389 struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1390 if(u) {
1391 DUP(u, in, scheme);
1392 DUP(u, in, user);
1393 DUP(u, in, password);
1394 DUP(u, in, options);
1395 DUP(u, in, host);
1396 DUP(u, in, port);
1397 DUP(u, in, path);
1398 DUP(u, in, query);
1399 DUP(u, in, fragment);
1400 DUP(u, in, zoneid);
1401 u->portnum = in->portnum;
1402 u->fragment_present = in->fragment_present;
1403 u->query_present = in->query_present;
1404 }
1405 return u;
1406 fail:
1407 curl_url_cleanup(u);
1408 return NULL;
1409 }
1410
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1411 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1412 char **part, unsigned int flags)
1413 {
1414 const char *ptr;
1415 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1416 char portbuf[7];
1417 bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1418 bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1419 bool punycode = FALSE;
1420 bool depunyfy = FALSE;
1421 bool plusdecode = FALSE;
1422 (void)flags;
1423 if(!u)
1424 return CURLUE_BAD_HANDLE;
1425 if(!part)
1426 return CURLUE_BAD_PARTPOINTER;
1427 *part = NULL;
1428
1429 switch(what) {
1430 case CURLUPART_SCHEME:
1431 ptr = u->scheme;
1432 ifmissing = CURLUE_NO_SCHEME;
1433 urldecode = FALSE; /* never for schemes */
1434 if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1435 return CURLUE_NO_SCHEME;
1436 break;
1437 case CURLUPART_USER:
1438 ptr = u->user;
1439 ifmissing = CURLUE_NO_USER;
1440 break;
1441 case CURLUPART_PASSWORD:
1442 ptr = u->password;
1443 ifmissing = CURLUE_NO_PASSWORD;
1444 break;
1445 case CURLUPART_OPTIONS:
1446 ptr = u->options;
1447 ifmissing = CURLUE_NO_OPTIONS;
1448 break;
1449 case CURLUPART_HOST:
1450 ptr = u->host;
1451 ifmissing = CURLUE_NO_HOST;
1452 punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1453 depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1454 break;
1455 case CURLUPART_ZONEID:
1456 ptr = u->zoneid;
1457 ifmissing = CURLUE_NO_ZONEID;
1458 break;
1459 case CURLUPART_PORT:
1460 ptr = u->port;
1461 ifmissing = CURLUE_NO_PORT;
1462 urldecode = FALSE; /* never for port */
1463 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1464 /* there is no stored port number, but asked to deliver
1465 a default one for the scheme */
1466 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1467 if(h) {
1468 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1469 ptr = portbuf;
1470 }
1471 }
1472 else if(ptr && u->scheme) {
1473 /* there is a stored port number, but ask to inhibit if
1474 it matches the default one for the scheme */
1475 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1476 if(h && (h->defport == u->portnum) &&
1477 (flags & CURLU_NO_DEFAULT_PORT))
1478 ptr = NULL;
1479 }
1480 break;
1481 case CURLUPART_PATH:
1482 ptr = u->path;
1483 if(!ptr)
1484 ptr = "/";
1485 break;
1486 case CURLUPART_QUERY:
1487 ptr = u->query;
1488 ifmissing = CURLUE_NO_QUERY;
1489 plusdecode = urldecode;
1490 if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1491 /* there was a blank query and the user do not ask for it */
1492 ptr = NULL;
1493 break;
1494 case CURLUPART_FRAGMENT:
1495 ptr = u->fragment;
1496 ifmissing = CURLUE_NO_FRAGMENT;
1497 if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1498 /* there was a blank fragment and the user asks for it */
1499 ptr = "";
1500 break;
1501 case CURLUPART_URL: {
1502 char *url;
1503 char *scheme;
1504 char *options = u->options;
1505 char *port = u->port;
1506 char *allochost = NULL;
1507 bool show_fragment =
1508 u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1509 bool show_query =
1510 (u->query && u->query[0]) ||
1511 (u->query_present && flags & CURLU_GET_EMPTY);
1512 punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1513 depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1514 if(u->scheme && strcasecompare("file", u->scheme)) {
1515 url = aprintf("file://%s%s%s",
1516 u->path,
1517 show_fragment ? "#": "",
1518 u->fragment ? u->fragment : "");
1519 }
1520 else if(!u->host)
1521 return CURLUE_NO_HOST;
1522 else {
1523 const struct Curl_handler *h = NULL;
1524 char schemebuf[MAX_SCHEME_LEN + 5];
1525 if(u->scheme)
1526 scheme = u->scheme;
1527 else if(flags & CURLU_DEFAULT_SCHEME)
1528 scheme = (char *) DEFAULT_SCHEME;
1529 else
1530 return CURLUE_NO_SCHEME;
1531
1532 h = Curl_get_scheme_handler(scheme);
1533 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1534 /* there is no stored port number, but asked to deliver
1535 a default one for the scheme */
1536 if(h) {
1537 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1538 port = portbuf;
1539 }
1540 }
1541 else if(port) {
1542 /* there is a stored port number, but asked to inhibit if it matches
1543 the default one for the scheme */
1544 if(h && (h->defport == u->portnum) &&
1545 (flags & CURLU_NO_DEFAULT_PORT))
1546 port = NULL;
1547 }
1548
1549 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1550 options = NULL;
1551
1552 if(u->host[0] == '[') {
1553 if(u->zoneid) {
1554 /* make it '[ host %25 zoneid ]' */
1555 struct dynbuf enc;
1556 size_t hostlen = strlen(u->host);
1557 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1558 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1559 u->zoneid))
1560 return CURLUE_OUT_OF_MEMORY;
1561 allochost = Curl_dyn_ptr(&enc);
1562 }
1563 }
1564 else if(urlencode) {
1565 allochost = curl_easy_escape(NULL, u->host, 0);
1566 if(!allochost)
1567 return CURLUE_OUT_OF_MEMORY;
1568 }
1569 else if(punycode) {
1570 if(!Curl_is_ASCII_name(u->host)) {
1571 #ifndef USE_IDN
1572 return CURLUE_LACKS_IDN;
1573 #else
1574 CURLcode result = Curl_idn_decode(u->host, &allochost);
1575 if(result)
1576 return (result == CURLE_OUT_OF_MEMORY) ?
1577 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1578 #endif
1579 }
1580 }
1581 else if(depunyfy) {
1582 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1583 #ifndef USE_IDN
1584 return CURLUE_LACKS_IDN;
1585 #else
1586 CURLcode result = Curl_idn_encode(u->host, &allochost);
1587 if(result)
1588 /* this is the most likely error */
1589 return (result == CURLE_OUT_OF_MEMORY) ?
1590 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1591 #endif
1592 }
1593 }
1594
1595 if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1596 msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1597 else
1598 schemebuf[0] = 0;
1599
1600 url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1601 schemebuf,
1602 u->user ? u->user : "",
1603 u->password ? ":": "",
1604 u->password ? u->password : "",
1605 options ? ";" : "",
1606 options ? options : "",
1607 (u->user || u->password || options) ? "@": "",
1608 allochost ? allochost : u->host,
1609 port ? ":": "",
1610 port ? port : "",
1611 u->path ? u->path : "/",
1612 show_query ? "?": "",
1613 u->query ? u->query : "",
1614 show_fragment ? "#": "",
1615 u->fragment ? u->fragment : "");
1616 free(allochost);
1617 }
1618 if(!url)
1619 return CURLUE_OUT_OF_MEMORY;
1620 *part = url;
1621 return CURLUE_OK;
1622 }
1623 default:
1624 ptr = NULL;
1625 break;
1626 }
1627 if(ptr) {
1628 size_t partlen = strlen(ptr);
1629 size_t i = 0;
1630 *part = Curl_memdup0(ptr, partlen);
1631 if(!*part)
1632 return CURLUE_OUT_OF_MEMORY;
1633 if(plusdecode) {
1634 /* convert + to space */
1635 char *plus = *part;
1636 for(i = 0; i < partlen; ++plus, i++) {
1637 if(*plus == '+')
1638 *plus = ' ';
1639 }
1640 }
1641 if(urldecode) {
1642 char *decoded;
1643 size_t dlen;
1644 /* this unconditional rejection of control bytes is documented
1645 API behavior */
1646 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1647 free(*part);
1648 if(res) {
1649 *part = NULL;
1650 return CURLUE_URLDECODE;
1651 }
1652 *part = decoded;
1653 partlen = dlen;
1654 }
1655 if(urlencode) {
1656 struct dynbuf enc;
1657 CURLUcode uc;
1658 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1659 uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1660 if(uc)
1661 return uc;
1662 free(*part);
1663 *part = Curl_dyn_ptr(&enc);
1664 }
1665 else if(punycode) {
1666 if(!Curl_is_ASCII_name(u->host)) {
1667 #ifndef USE_IDN
1668 return CURLUE_LACKS_IDN;
1669 #else
1670 char *allochost;
1671 CURLcode result = Curl_idn_decode(*part, &allochost);
1672 if(result)
1673 return (result == CURLE_OUT_OF_MEMORY) ?
1674 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1675 free(*part);
1676 *part = allochost;
1677 #endif
1678 }
1679 }
1680 else if(depunyfy) {
1681 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1682 #ifndef USE_IDN
1683 return CURLUE_LACKS_IDN;
1684 #else
1685 char *allochost;
1686 CURLcode result = Curl_idn_encode(*part, &allochost);
1687 if(result)
1688 return (result == CURLE_OUT_OF_MEMORY) ?
1689 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1690 free(*part);
1691 *part = allochost;
1692 #endif
1693 }
1694 }
1695
1696 return CURLUE_OK;
1697 }
1698 else
1699 return ifmissing;
1700 }
1701
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1702 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1703 const char *part, unsigned int flags)
1704 {
1705 char **storep = NULL;
1706 bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1707 bool plusencode = FALSE;
1708 bool urlskipslash = FALSE;
1709 bool leadingslash = FALSE;
1710 bool appendquery = FALSE;
1711 bool equalsencode = FALSE;
1712 size_t nalloc;
1713
1714 if(!u)
1715 return CURLUE_BAD_HANDLE;
1716 if(!part) {
1717 /* setting a part to NULL clears it */
1718 switch(what) {
1719 case CURLUPART_URL:
1720 break;
1721 case CURLUPART_SCHEME:
1722 storep = &u->scheme;
1723 u->guessed_scheme = FALSE;
1724 break;
1725 case CURLUPART_USER:
1726 storep = &u->user;
1727 break;
1728 case CURLUPART_PASSWORD:
1729 storep = &u->password;
1730 break;
1731 case CURLUPART_OPTIONS:
1732 storep = &u->options;
1733 break;
1734 case CURLUPART_HOST:
1735 storep = &u->host;
1736 break;
1737 case CURLUPART_ZONEID:
1738 storep = &u->zoneid;
1739 break;
1740 case CURLUPART_PORT:
1741 u->portnum = 0;
1742 storep = &u->port;
1743 break;
1744 case CURLUPART_PATH:
1745 storep = &u->path;
1746 break;
1747 case CURLUPART_QUERY:
1748 storep = &u->query;
1749 u->query_present = FALSE;
1750 break;
1751 case CURLUPART_FRAGMENT:
1752 storep = &u->fragment;
1753 u->fragment_present = FALSE;
1754 break;
1755 default:
1756 return CURLUE_UNKNOWN_PART;
1757 }
1758 if(storep && *storep) {
1759 Curl_safefree(*storep);
1760 }
1761 else if(!storep) {
1762 free_urlhandle(u);
1763 memset(u, 0, sizeof(struct Curl_URL));
1764 }
1765 return CURLUE_OK;
1766 }
1767
1768 nalloc = strlen(part);
1769 if(nalloc > CURL_MAX_INPUT_LENGTH)
1770 /* excessive input length */
1771 return CURLUE_MALFORMED_INPUT;
1772
1773 switch(what) {
1774 case CURLUPART_SCHEME: {
1775 size_t plen = strlen(part);
1776 const char *s = part;
1777 if((plen > MAX_SCHEME_LEN) || (plen < 1))
1778 /* too long or too short */
1779 return CURLUE_BAD_SCHEME;
1780 /* verify that it is a fine scheme */
1781 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1782 return CURLUE_UNSUPPORTED_SCHEME;
1783 storep = &u->scheme;
1784 urlencode = FALSE; /* never */
1785 if(ISALPHA(*s)) {
1786 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1787 while(--plen) {
1788 if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1789 s++; /* fine */
1790 else
1791 return CURLUE_BAD_SCHEME;
1792 }
1793 }
1794 else
1795 return CURLUE_BAD_SCHEME;
1796 u->guessed_scheme = FALSE;
1797 break;
1798 }
1799 case CURLUPART_USER:
1800 storep = &u->user;
1801 break;
1802 case CURLUPART_PASSWORD:
1803 storep = &u->password;
1804 break;
1805 case CURLUPART_OPTIONS:
1806 storep = &u->options;
1807 break;
1808 case CURLUPART_HOST:
1809 storep = &u->host;
1810 Curl_safefree(u->zoneid);
1811 break;
1812 case CURLUPART_ZONEID:
1813 storep = &u->zoneid;
1814 break;
1815 case CURLUPART_PORT:
1816 if(!ISDIGIT(part[0]))
1817 /* not a number */
1818 return CURLUE_BAD_PORT_NUMBER;
1819 else {
1820 char *tmp;
1821 char *endp;
1822 unsigned long port;
1823 errno = 0;
1824 port = strtoul(part, &endp, 10); /* must be decimal */
1825 if(errno || (port > 0xffff) || *endp)
1826 /* weirdly provided number, not good! */
1827 return CURLUE_BAD_PORT_NUMBER;
1828 tmp = strdup(part);
1829 if(!tmp)
1830 return CURLUE_OUT_OF_MEMORY;
1831 free(u->port);
1832 u->port = tmp;
1833 u->portnum = (unsigned short)port;
1834 return CURLUE_OK;
1835 }
1836 case CURLUPART_PATH:
1837 urlskipslash = TRUE;
1838 leadingslash = TRUE; /* enforce */
1839 storep = &u->path;
1840 break;
1841 case CURLUPART_QUERY:
1842 plusencode = urlencode;
1843 appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1844 equalsencode = appendquery;
1845 storep = &u->query;
1846 u->query_present = TRUE;
1847 break;
1848 case CURLUPART_FRAGMENT:
1849 storep = &u->fragment;
1850 u->fragment_present = TRUE;
1851 break;
1852 case CURLUPART_URL: {
1853 /*
1854 * Allow a new URL to replace the existing (if any) contents.
1855 *
1856 * If the existing contents is enough for a URL, allow a relative URL to
1857 * replace it.
1858 */
1859 CURLcode result;
1860 CURLUcode uc;
1861 char *oldurl;
1862 char *redired_url;
1863
1864 if(!nalloc)
1865 /* a blank URL is not a valid URL */
1866 return CURLUE_MALFORMED_INPUT;
1867
1868 /* if the new thing is absolute or the old one is not
1869 * (we could not get an absolute URL in 'oldurl'),
1870 * then replace the existing with the new. */
1871 if(Curl_is_absolute_url(part, NULL, 0,
1872 flags & (CURLU_GUESS_SCHEME|
1873 CURLU_DEFAULT_SCHEME))
1874 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1875 return parseurl_and_replace(part, u, flags);
1876 }
1877
1878 /* apply the relative part to create a new URL
1879 * and replace the existing one with it. */
1880 result = concat_url(oldurl, part, &redired_url);
1881 free(oldurl);
1882 if(result)
1883 return cc2cu(result);
1884
1885 uc = parseurl_and_replace(redired_url, u, flags);
1886 free(redired_url);
1887 return uc;
1888 }
1889 default:
1890 return CURLUE_UNKNOWN_PART;
1891 }
1892 DEBUGASSERT(storep);
1893 {
1894 const char *newp;
1895 struct dynbuf enc;
1896 Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1897
1898 if(leadingslash && (part[0] != '/')) {
1899 CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1900 if(result)
1901 return cc2cu(result);
1902 }
1903 if(urlencode) {
1904 const unsigned char *i;
1905
1906 for(i = (const unsigned char *)part; *i; i++) {
1907 CURLcode result;
1908 if((*i == ' ') && plusencode) {
1909 result = Curl_dyn_addn(&enc, "+", 1);
1910 if(result)
1911 return CURLUE_OUT_OF_MEMORY;
1912 }
1913 else if(ISUNRESERVED(*i) ||
1914 ((*i == '/') && urlskipslash) ||
1915 ((*i == '=') && equalsencode)) {
1916 if((*i == '=') && equalsencode)
1917 /* only skip the first equals sign */
1918 equalsencode = FALSE;
1919 result = Curl_dyn_addn(&enc, i, 1);
1920 if(result)
1921 return cc2cu(result);
1922 }
1923 else {
1924 char out[3]={'%'};
1925 out[1] = hexdigits[*i >> 4];
1926 out[2] = hexdigits[*i & 0xf];
1927 result = Curl_dyn_addn(&enc, out, 3);
1928 if(result)
1929 return cc2cu(result);
1930 }
1931 }
1932 }
1933 else {
1934 char *p;
1935 CURLcode result = Curl_dyn_add(&enc, part);
1936 if(result)
1937 return cc2cu(result);
1938 p = Curl_dyn_ptr(&enc);
1939 while(*p) {
1940 /* make sure percent encoded are lower case */
1941 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1942 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1943 p[1] = Curl_raw_tolower(p[1]);
1944 p[2] = Curl_raw_tolower(p[2]);
1945 p += 3;
1946 }
1947 else
1948 p++;
1949 }
1950 }
1951 newp = Curl_dyn_ptr(&enc);
1952
1953 if(appendquery && newp) {
1954 /* Append the 'newp' string onto the old query. Add a '&' separator if
1955 none is present at the end of the existing query already */
1956
1957 size_t querylen = u->query ? strlen(u->query) : 0;
1958 bool addamperand = querylen && (u->query[querylen -1] != '&');
1959 if(querylen) {
1960 struct dynbuf qbuf;
1961 Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1962
1963 if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1964 goto nomem;
1965
1966 if(addamperand) {
1967 if(Curl_dyn_addn(&qbuf, "&", 1))
1968 goto nomem;
1969 }
1970 if(Curl_dyn_add(&qbuf, newp))
1971 goto nomem;
1972 Curl_dyn_free(&enc);
1973 free(*storep);
1974 *storep = Curl_dyn_ptr(&qbuf);
1975 return CURLUE_OK;
1976 nomem:
1977 Curl_dyn_free(&enc);
1978 return CURLUE_OUT_OF_MEMORY;
1979 }
1980 }
1981
1982 else if(what == CURLUPART_HOST) {
1983 size_t n = Curl_dyn_len(&enc);
1984 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1985 /* Skip hostname check, it is allowed to be empty. */
1986 }
1987 else {
1988 bool bad = FALSE;
1989 if(!n)
1990 bad = TRUE; /* empty hostname is not okay */
1991 else if(!urlencode) {
1992 /* if the host name part was not URL encoded here, it was set ready
1993 URL encoded so we need to decode it to check */
1994 size_t dlen;
1995 char *decoded = NULL;
1996 CURLcode result =
1997 Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1998 if(result || hostname_check(u, decoded, dlen))
1999 bad = TRUE;
2000 free(decoded);
2001 }
2002 else if(hostname_check(u, (char *)newp, n))
2003 bad = TRUE;
2004 if(bad) {
2005 Curl_dyn_free(&enc);
2006 return CURLUE_BAD_HOSTNAME;
2007 }
2008 }
2009 }
2010
2011 free(*storep);
2012 *storep = (char *)newp;
2013 }
2014 return CURLUE_OK;
2015 }
2016