1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24 #include "tool_setup.h"
25
26 #include "curlx.h"
27 #include "tool_cfgable.h"
28 #include "tool_doswin.h"
29 #include "tool_urlglob.h"
30 #include "tool_vms.h"
31 #include "dynbuf.h"
32
33 #include "memdebug.h" /* keep this as LAST include */
34
35 #define GLOBERROR(string, column, code) \
36 glob->error = string, glob->pos = column, code
37
glob_fixed(struct URLGlob * glob,char * fixed,size_t len)38 static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len)
39 {
40 struct URLPattern *pat = &glob->pattern[glob->size];
41 pat->type = UPTSet;
42 pat->content.Set.size = 1;
43 pat->content.Set.ptr_s = 0;
44 pat->globindex = -1;
45
46 pat->content.Set.elements = malloc(sizeof(char *));
47
48 if(!pat->content.Set.elements)
49 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
50
51 pat->content.Set.elements[0] = malloc(len + 1);
52 if(!pat->content.Set.elements[0])
53 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
54
55 memcpy(pat->content.Set.elements[0], fixed, len);
56 pat->content.Set.elements[0][len] = 0;
57
58 return CURLE_OK;
59 }
60
61 /* multiply
62 *
63 * Multiplies and checks for overflow.
64 */
multiply(curl_off_t * amount,curl_off_t with)65 static int multiply(curl_off_t *amount, curl_off_t with)
66 {
67 curl_off_t sum;
68 DEBUGASSERT(*amount >= 0);
69 DEBUGASSERT(with >= 0);
70 if((with <= 0) || (*amount <= 0)) {
71 sum = 0;
72 }
73 else {
74 #if defined(__GNUC__) && \
75 ((__GNUC__ > 5) || ((__GNUC__ == 5) && (__GNUC_MINOR__ >= 1)))
76 if(__builtin_mul_overflow(*amount, with, &sum))
77 return 1;
78 #else
79 sum = *amount * with;
80 if(sum/with != *amount)
81 return 1; /* did not fit, bail out */
82 #endif
83 }
84 *amount = sum;
85 return 0;
86 }
87
glob_set(struct URLGlob * glob,char ** patternp,size_t * posp,curl_off_t * amount,int globindex)88 static CURLcode glob_set(struct URLGlob *glob, char **patternp,
89 size_t *posp, curl_off_t *amount,
90 int globindex)
91 {
92 /* processes a set expression with the point behind the opening '{'
93 ','-separated elements are collected until the next closing '}'
94 */
95 struct URLPattern *pat;
96 bool done = FALSE;
97 char *buf = glob->glob_buffer;
98 char *pattern = *patternp;
99 char *opattern = pattern;
100 size_t opos = *posp-1;
101
102 pat = &glob->pattern[glob->size];
103 /* patterns 0,1,2,... correspond to size=1,3,5,... */
104 pat->type = UPTSet;
105 pat->content.Set.size = 0;
106 pat->content.Set.ptr_s = 0;
107 pat->content.Set.elements = NULL;
108 pat->globindex = globindex;
109
110 while(!done) {
111 switch(*pattern) {
112 case '\0': /* URL ended while set was still open */
113 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
114
115 case '{':
116 case '[': /* no nested expressions at this time */
117 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
118
119 case '}': /* set element completed */
120 if(opattern == pattern)
121 return GLOBERROR("empty string within braces", *posp,
122 CURLE_URL_MALFORMAT);
123
124 /* add 1 to size since it will be incremented below */
125 if(multiply(amount, pat->content.Set.size + 1))
126 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
127
128 FALLTHROUGH();
129 case ',':
130
131 *buf = '\0';
132 if(pat->content.Set.elements) {
133 char **new_arr = realloc(pat->content.Set.elements,
134 (size_t)(pat->content.Set.size + 1) *
135 sizeof(char *));
136 if(!new_arr)
137 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
138
139 pat->content.Set.elements = new_arr;
140 }
141 else
142 pat->content.Set.elements = malloc(sizeof(char *));
143
144 if(!pat->content.Set.elements)
145 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
146
147 pat->content.Set.elements[pat->content.Set.size] =
148 strdup(glob->glob_buffer);
149 if(!pat->content.Set.elements[pat->content.Set.size])
150 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
151 ++pat->content.Set.size;
152
153 if(*pattern == '}') {
154 pattern++; /* pass the closing brace */
155 done = TRUE;
156 continue;
157 }
158
159 buf = glob->glob_buffer;
160 ++pattern;
161 ++(*posp);
162 break;
163
164 case ']': /* illegal closing bracket */
165 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
166
167 case '\\': /* escaped character, skip '\' */
168 if(pattern[1]) {
169 ++pattern;
170 ++(*posp);
171 }
172 FALLTHROUGH();
173 default:
174 *buf++ = *pattern++; /* copy character to set element */
175 ++(*posp);
176 }
177 }
178
179 *patternp = pattern; /* return with the new position */
180 return CURLE_OK;
181 }
182
glob_range(struct URLGlob * glob,char ** patternp,size_t * posp,curl_off_t * amount,int globindex)183 static CURLcode glob_range(struct URLGlob *glob, char **patternp,
184 size_t *posp, curl_off_t *amount,
185 int globindex)
186 {
187 /* processes a range expression with the point behind the opening '['
188 - char range: e.g. "a-z]", "B-Q]"
189 - num range: e.g. "0-9]", "17-2000]"
190 - num range with leading zeros: e.g. "001-999]"
191 expression is checked for well-formedness and collected until the next ']'
192 */
193 struct URLPattern *pat;
194 int rc;
195 char *pattern = *patternp;
196 char *c;
197
198 pat = &glob->pattern[glob->size];
199 pat->globindex = globindex;
200
201 if(ISALPHA(*pattern)) {
202 /* character range detected */
203 char min_c;
204 char max_c;
205 char end_c;
206 unsigned long step = 1;
207
208 pat->type = UPTCharRange;
209
210 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
211
212 if(rc == 3) {
213 if(end_c == ':') {
214 char *endp;
215 errno = 0;
216 step = strtoul(&pattern[4], &endp, 10);
217 if(errno || &pattern[4] == endp || *endp != ']')
218 step = 0;
219 else
220 pattern = endp + 1;
221 }
222 else if(end_c != ']')
223 /* then this is wrong */
224 rc = 0;
225 else
226 /* end_c == ']' */
227 pattern += 4;
228 }
229
230 *posp += (pattern - *patternp);
231
232 if(rc != 3 || !step || step > (unsigned)INT_MAX ||
233 (min_c == max_c && step != 1) ||
234 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
235 (max_c - min_c) > ('z' - 'a'))))
236 /* the pattern is not well-formed */
237 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
238
239 /* if there was a ":[num]" thing, use that as step or else use 1 */
240 pat->content.CharRange.step = (int)step;
241 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
242 pat->content.CharRange.max_c = max_c;
243
244 if(multiply(amount, ((pat->content.CharRange.max_c -
245 pat->content.CharRange.min_c) /
246 pat->content.CharRange.step + 1)))
247 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
248 }
249 else if(ISDIGIT(*pattern)) {
250 /* numeric range detected */
251 unsigned long min_n;
252 unsigned long max_n = 0;
253 unsigned long step_n = 0;
254 char *endp;
255
256 pat->type = UPTNumRange;
257 pat->content.NumRange.padlength = 0;
258
259 if(*pattern == '0') {
260 /* leading zero specified, count them! */
261 c = pattern;
262 while(ISDIGIT(*c)) {
263 c++;
264 ++pat->content.NumRange.padlength; /* padding length is set for all
265 instances of this pattern */
266 }
267 }
268
269 errno = 0;
270 min_n = strtoul(pattern, &endp, 10);
271 if(errno || (endp == pattern))
272 endp = NULL;
273 else {
274 if(*endp != '-')
275 endp = NULL;
276 else {
277 pattern = endp + 1;
278 while(*pattern && ISBLANK(*pattern))
279 pattern++;
280 if(!ISDIGIT(*pattern)) {
281 endp = NULL;
282 goto fail;
283 }
284 errno = 0;
285 max_n = strtoul(pattern, &endp, 10);
286 if(errno)
287 /* overflow */
288 endp = NULL;
289 else if(*endp == ':') {
290 pattern = endp + 1;
291 errno = 0;
292 step_n = strtoul(pattern, &endp, 10);
293 if(errno)
294 /* over/underflow situation */
295 endp = NULL;
296 }
297 else
298 step_n = 1;
299 if(endp && (*endp == ']')) {
300 pattern = endp + 1;
301 }
302 else
303 endp = NULL;
304 }
305 }
306
307 fail:
308 *posp += (pattern - *patternp);
309
310 if(!endp || !step_n ||
311 (min_n == max_n && step_n != 1) ||
312 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
313 /* the pattern is not well-formed */
314 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
315
316 /* typecasting to ints are fine here since we make sure above that we
317 are within 31 bits */
318 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
319 pat->content.NumRange.max_n = max_n;
320 pat->content.NumRange.step = step_n;
321
322 if(multiply(amount, ((pat->content.NumRange.max_n -
323 pat->content.NumRange.min_n) /
324 pat->content.NumRange.step + 1)))
325 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
326 }
327 else
328 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
329
330 *patternp = pattern;
331 return CURLE_OK;
332 }
333
334 #define MAX_IP6LEN 128
335
peek_ipv6(const char * str,size_t * skip)336 static bool peek_ipv6(const char *str, size_t *skip)
337 {
338 /*
339 * Scan for a potential IPv6 literal.
340 * - Valid globs contain a hyphen and <= 1 colon.
341 * - IPv6 literals contain no hyphens and >= 2 colons.
342 */
343 char hostname[MAX_IP6LEN];
344 CURLU *u;
345 char *endbr = strchr(str, ']');
346 size_t hlen;
347 CURLUcode rc;
348 if(!endbr)
349 return FALSE;
350
351 hlen = endbr - str + 1;
352 if(hlen >= MAX_IP6LEN)
353 return FALSE;
354
355 u = curl_url();
356 if(!u)
357 return FALSE;
358
359 memcpy(hostname, str, hlen);
360 hostname[hlen] = 0;
361
362 /* ask to "guess scheme" as then it works without an https:// prefix */
363 rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME);
364
365 curl_url_cleanup(u);
366 if(!rc)
367 *skip = hlen;
368 return rc ? FALSE : TRUE;
369 }
370
glob_parse(struct URLGlob * glob,char * pattern,size_t pos,curl_off_t * amount)371 static CURLcode glob_parse(struct URLGlob *glob, char *pattern,
372 size_t pos, curl_off_t *amount)
373 {
374 /* processes a literal string component of a URL
375 special characters '{' and '[' branch to set/range processing functions
376 */
377 CURLcode res = CURLE_OK;
378 int globindex = 0; /* count "actual" globs */
379
380 *amount = 1;
381
382 while(*pattern && !res) {
383 char *buf = glob->glob_buffer;
384 size_t sublen = 0;
385 while(*pattern && *pattern != '{') {
386 if(*pattern == '[') {
387 /* skip over IPv6 literals and [] */
388 size_t skip = 0;
389 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
390 skip = 2;
391 if(skip) {
392 memcpy(buf, pattern, skip);
393 buf += skip;
394 pattern += skip;
395 sublen += skip;
396 continue;
397 }
398 break;
399 }
400 if(*pattern == '}' || *pattern == ']')
401 return GLOBERROR("unmatched close brace/bracket", pos,
402 CURLE_URL_MALFORMAT);
403
404 /* only allow \ to escape known "special letters" */
405 if(*pattern == '\\' &&
406 (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
407 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
408
409 /* escape character, skip '\' */
410 ++pattern;
411 ++pos;
412 }
413 *buf++ = *pattern++; /* copy character to literal */
414 ++pos;
415 sublen++;
416 }
417 if(sublen) {
418 /* we got a literal string, add it as a single-item list */
419 *buf = '\0';
420 res = glob_fixed(glob, glob->glob_buffer, sublen);
421 }
422 else {
423 switch(*pattern) {
424 case '\0': /* done */
425 break;
426
427 case '{':
428 /* process set pattern */
429 pattern++;
430 pos++;
431 res = glob_set(glob, &pattern, &pos, amount, globindex++);
432 break;
433
434 case '[':
435 /* process range pattern */
436 pattern++;
437 pos++;
438 res = glob_range(glob, &pattern, &pos, amount, globindex++);
439 break;
440 }
441 }
442
443 if(++glob->size >= GLOB_PATTERN_NUM)
444 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
445 }
446 return res;
447 }
448
glob_url(struct URLGlob ** glob,char * url,curl_off_t * urlnum,FILE * error)449 CURLcode glob_url(struct URLGlob **glob, char *url, curl_off_t *urlnum,
450 FILE *error)
451 {
452 /*
453 * We can deal with any-size, just make a buffer with the same length
454 * as the specified URL!
455 */
456 struct URLGlob *glob_expand;
457 curl_off_t amount = 0;
458 char *glob_buffer;
459 CURLcode res;
460
461 *glob = NULL;
462
463 glob_buffer = malloc(strlen(url) + 1);
464 if(!glob_buffer)
465 return CURLE_OUT_OF_MEMORY;
466 glob_buffer[0] = 0;
467
468 glob_expand = calloc(1, sizeof(struct URLGlob));
469 if(!glob_expand) {
470 Curl_safefree(glob_buffer);
471 return CURLE_OUT_OF_MEMORY;
472 }
473 glob_expand->urllen = strlen(url);
474 glob_expand->glob_buffer = glob_buffer;
475
476 res = glob_parse(glob_expand, url, 1, &amount);
477 if(!res)
478 *urlnum = amount;
479 else {
480 if(error && glob_expand->error) {
481 char text[512];
482 const char *t;
483 if(glob_expand->pos) {
484 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
485 glob_expand->error,
486 glob_expand->pos, url, (int)glob_expand->pos - 1, " ");
487 t = text;
488 }
489 else
490 t = glob_expand->error;
491
492 /* send error description to the error-stream */
493 fprintf(error, "curl: (%d) %s\n", res, t);
494 }
495 /* it failed, we cleanup */
496 glob_cleanup(glob_expand);
497 *urlnum = 1;
498 return res;
499 }
500
501 *glob = glob_expand;
502 return CURLE_OK;
503 }
504
glob_cleanup(struct URLGlob * glob)505 void glob_cleanup(struct URLGlob *glob)
506 {
507 size_t i;
508 curl_off_t elem;
509
510 if(!glob)
511 return;
512
513 for(i = 0; i < glob->size; i++) {
514 if((glob->pattern[i].type == UPTSet) &&
515 (glob->pattern[i].content.Set.elements)) {
516 for(elem = glob->pattern[i].content.Set.size - 1;
517 elem >= 0;
518 --elem) {
519 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
520 }
521 Curl_safefree(glob->pattern[i].content.Set.elements);
522 }
523 }
524 Curl_safefree(glob->glob_buffer);
525 Curl_safefree(glob);
526 }
527
glob_next_url(char ** globbed,struct URLGlob * glob)528 CURLcode glob_next_url(char **globbed, struct URLGlob *glob)
529 {
530 struct URLPattern *pat;
531 size_t i;
532 size_t len;
533 size_t buflen = glob->urllen + 1;
534 char *buf = glob->glob_buffer;
535
536 *globbed = NULL;
537
538 if(!glob->beenhere)
539 glob->beenhere = 1;
540 else {
541 bool carry = TRUE;
542
543 /* implement a counter over the index ranges of all patterns, starting
544 with the rightmost pattern */
545 for(i = 0; carry && (i < glob->size); i++) {
546 carry = FALSE;
547 pat = &glob->pattern[glob->size - 1 - i];
548 switch(pat->type) {
549 case UPTSet:
550 if((pat->content.Set.elements) &&
551 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
552 pat->content.Set.ptr_s = 0;
553 carry = TRUE;
554 }
555 break;
556 case UPTCharRange:
557 pat->content.CharRange.ptr_c =
558 (char)(pat->content.CharRange.step +
559 (int)((unsigned char)pat->content.CharRange.ptr_c));
560 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
561 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
562 carry = TRUE;
563 }
564 break;
565 case UPTNumRange:
566 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
567 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
568 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
569 carry = TRUE;
570 }
571 break;
572 default:
573 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
574 return CURLE_FAILED_INIT;
575 }
576 }
577 if(carry) { /* first pattern ptr has run into overflow, done! */
578 return CURLE_OK;
579 }
580 }
581
582 for(i = 0; i < glob->size; ++i) {
583 pat = &glob->pattern[i];
584 switch(pat->type) {
585 case UPTSet:
586 if(pat->content.Set.elements) {
587 msnprintf(buf, buflen, "%s",
588 pat->content.Set.elements[pat->content.Set.ptr_s]);
589 len = strlen(buf);
590 buf += len;
591 buflen -= len;
592 }
593 break;
594 case UPTCharRange:
595 if(buflen) {
596 *buf++ = pat->content.CharRange.ptr_c;
597 *buf = '\0';
598 buflen--;
599 }
600 break;
601 case UPTNumRange:
602 msnprintf(buf, buflen, "%0*" CURL_FORMAT_CURL_OFF_T,
603 pat->content.NumRange.padlength,
604 pat->content.NumRange.ptr_n);
605 len = strlen(buf);
606 buf += len;
607 buflen -= len;
608 break;
609 default:
610 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
611 return CURLE_FAILED_INIT;
612 }
613 }
614
615 *globbed = strdup(glob->glob_buffer);
616 if(!*globbed)
617 return CURLE_OUT_OF_MEMORY;
618
619 return CURLE_OK;
620 }
621
622 #define MAX_OUTPUT_GLOB_LENGTH (10*1024)
623
glob_match_url(char ** result,char * filename,struct URLGlob * glob)624 CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob)
625 {
626 char numbuf[18];
627 char *appendthis = (char *)"";
628 size_t appendlen = 0;
629 struct curlx_dynbuf dyn;
630
631 *result = NULL;
632
633 /* We cannot use the glob_buffer for storage since the filename may be
634 * longer than the URL we use.
635 */
636 curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH);
637
638 while(*filename) {
639 if(*filename == '#' && ISDIGIT(filename[1])) {
640 char *ptr = filename;
641 unsigned long num = strtoul(&filename[1], &filename, 10);
642 struct URLPattern *pat = NULL;
643
644 if(num && (num < glob->size)) {
645 unsigned long i;
646 num--; /* make it zero based */
647 /* find the correct glob entry */
648 for(i = 0; i < glob->size; i++) {
649 if(glob->pattern[i].globindex == (int)num) {
650 pat = &glob->pattern[i];
651 break;
652 }
653 }
654 }
655
656 if(pat) {
657 switch(pat->type) {
658 case UPTSet:
659 if(pat->content.Set.elements) {
660 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
661 appendlen =
662 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
663 }
664 break;
665 case UPTCharRange:
666 numbuf[0] = pat->content.CharRange.ptr_c;
667 numbuf[1] = 0;
668 appendthis = numbuf;
669 appendlen = 1;
670 break;
671 case UPTNumRange:
672 msnprintf(numbuf, sizeof(numbuf), "%0*" CURL_FORMAT_CURL_OFF_T,
673 pat->content.NumRange.padlength,
674 pat->content.NumRange.ptr_n);
675 appendthis = numbuf;
676 appendlen = strlen(numbuf);
677 break;
678 default:
679 fprintf(tool_stderr, "internal error: invalid pattern type (%d)\n",
680 (int)pat->type);
681 curlx_dyn_free(&dyn);
682 return CURLE_FAILED_INIT;
683 }
684 }
685 else {
686 /* #[num] out of range, use the #[num] in the output */
687 filename = ptr;
688 appendthis = filename++;
689 appendlen = 1;
690 }
691 }
692 else {
693 appendthis = filename++;
694 appendlen = 1;
695 }
696 if(curlx_dyn_addn(&dyn, appendthis, appendlen))
697 return CURLE_OUT_OF_MEMORY;
698 }
699
700 if(curlx_dyn_addn(&dyn, "", 0))
701 return CURLE_OUT_OF_MEMORY;
702
703 #if defined(_WIN32) || defined(MSDOS)
704 {
705 char *sanitized;
706 SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn),
707 (SANITIZE_ALLOW_PATH |
708 SANITIZE_ALLOW_RESERVED));
709 curlx_dyn_free(&dyn);
710 if(sc)
711 return CURLE_URL_MALFORMAT;
712 *result = sanitized;
713 return CURLE_OK;
714 }
715 #else
716 *result = curlx_dyn_ptr(&dyn);
717 return CURLE_OK;
718 #endif /* _WIN32 || MSDOS */
719 }
720