1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24 #include "tool_setup.h"
25
26 #include "curlx.h"
27 #include "tool_cfgable.h"
28 #include "tool_doswin.h"
29 #include "tool_urlglob.h"
30 #include "tool_vms.h"
31 #include "dynbuf.h"
32
33 #include "memdebug.h" /* keep this as LAST include */
34
35 #define GLOBERROR(string, column, code) \
36 glob->error = string, glob->pos = column, code
37
glob_fixed(struct URLGlob * glob,char * fixed,size_t len)38 static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len)
39 {
40 struct URLPattern *pat = &glob->pattern[glob->size];
41 pat->type = UPTSet;
42 pat->content.Set.size = 1;
43 pat->content.Set.ptr_s = 0;
44 pat->globindex = -1;
45
46 pat->content.Set.elements = malloc(sizeof(char *));
47
48 if(!pat->content.Set.elements)
49 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
50
51 pat->content.Set.elements[0] = malloc(len + 1);
52 if(!pat->content.Set.elements[0])
53 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
54
55 memcpy(pat->content.Set.elements[0], fixed, len);
56 pat->content.Set.elements[0][len] = 0;
57
58 return CURLE_OK;
59 }
60
61 /* multiply
62 *
63 * Multiplies and checks for overflow.
64 */
multiply(curl_off_t * amount,curl_off_t with)65 static int multiply(curl_off_t *amount, curl_off_t with)
66 {
67 curl_off_t sum;
68 DEBUGASSERT(*amount >= 0);
69 DEBUGASSERT(with >= 0);
70 if((with <= 0) || (*amount <= 0)) {
71 sum = 0;
72 }
73 else {
74 #if defined(__GNUC__) && \
75 ((__GNUC__ > 5) || ((__GNUC__ == 5) && (__GNUC_MINOR__ >= 1)))
76 if(__builtin_mul_overflow(*amount, with, &sum))
77 return 1;
78 #else
79 sum = *amount * with;
80 if(sum/with != *amount)
81 return 1; /* did not fit, bail out */
82 #endif
83 }
84 *amount = sum;
85 return 0;
86 }
87
glob_set(struct URLGlob * glob,char ** patternp,size_t * posp,curl_off_t * amount,int globindex)88 static CURLcode glob_set(struct URLGlob *glob, char **patternp,
89 size_t *posp, curl_off_t *amount,
90 int globindex)
91 {
92 /* processes a set expression with the point behind the opening '{'
93 ','-separated elements are collected until the next closing '}'
94 */
95 struct URLPattern *pat;
96 bool done = FALSE;
97 char *buf = glob->glob_buffer;
98 char *pattern = *patternp;
99 char *opattern = pattern;
100 size_t opos = *posp-1;
101
102 pat = &glob->pattern[glob->size];
103 /* patterns 0,1,2,... correspond to size=1,3,5,... */
104 pat->type = UPTSet;
105 pat->content.Set.size = 0;
106 pat->content.Set.ptr_s = 0;
107 pat->content.Set.elements = NULL;
108 pat->globindex = globindex;
109
110 while(!done) {
111 switch(*pattern) {
112 case '\0': /* URL ended while set was still open */
113 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
114
115 case '{':
116 case '[': /* no nested expressions at this time */
117 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
118
119 case '}': /* set element completed */
120 if(opattern == pattern)
121 return GLOBERROR("empty string within braces", *posp,
122 CURLE_URL_MALFORMAT);
123
124 /* add 1 to size since it will be incremented below */
125 if(multiply(amount, pat->content.Set.size + 1))
126 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
127
128 FALLTHROUGH();
129 case ',':
130
131 *buf = '\0';
132 if(pat->content.Set.elements) {
133 char **new_arr = realloc(pat->content.Set.elements,
134 (size_t)(pat->content.Set.size + 1) *
135 sizeof(char *));
136 if(!new_arr)
137 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
138
139 pat->content.Set.elements = new_arr;
140 }
141 else
142 pat->content.Set.elements = malloc(sizeof(char *));
143
144 if(!pat->content.Set.elements)
145 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
146
147 pat->content.Set.elements[pat->content.Set.size] =
148 strdup(glob->glob_buffer);
149 if(!pat->content.Set.elements[pat->content.Set.size])
150 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
151 ++pat->content.Set.size;
152
153 if(*pattern == '}') {
154 pattern++; /* pass the closing brace */
155 done = TRUE;
156 continue;
157 }
158
159 buf = glob->glob_buffer;
160 ++pattern;
161 ++(*posp);
162 break;
163
164 case ']': /* illegal closing bracket */
165 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
166
167 case '\\': /* escaped character, skip '\' */
168 if(pattern[1]) {
169 ++pattern;
170 ++(*posp);
171 }
172 FALLTHROUGH();
173 default:
174 *buf++ = *pattern++; /* copy character to set element */
175 ++(*posp);
176 }
177 }
178
179 *patternp = pattern; /* return with the new position */
180 return CURLE_OK;
181 }
182
glob_range(struct URLGlob * glob,char ** patternp,size_t * posp,curl_off_t * amount,int globindex)183 static CURLcode glob_range(struct URLGlob *glob, char **patternp,
184 size_t *posp, curl_off_t *amount,
185 int globindex)
186 {
187 /* processes a range expression with the point behind the opening '['
188 - char range: e.g. "a-z]", "B-Q]"
189 - num range: e.g. "0-9]", "17-2000]"
190 - num range with leading zeros: e.g. "001-999]"
191 expression is checked for well-formedness and collected until the next ']'
192 */
193 struct URLPattern *pat;
194 int rc;
195 char *pattern = *patternp;
196 char *c;
197
198 pat = &glob->pattern[glob->size];
199 pat->globindex = globindex;
200
201 if(ISALPHA(*pattern)) {
202 /* character range detected */
203 char min_c;
204 char max_c;
205 char end_c;
206 unsigned long step = 1;
207
208 pat->type = UPTCharRange;
209
210 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
211
212 if(rc == 3) {
213 if(end_c == ':') {
214 char *endp;
215 errno = 0;
216 step = strtoul(&pattern[4], &endp, 10);
217 if(errno || &pattern[4] == endp || *endp != ']')
218 step = 0;
219 else
220 pattern = endp + 1;
221 }
222 else if(end_c != ']')
223 /* then this is wrong */
224 rc = 0;
225 else
226 /* end_c == ']' */
227 pattern += 4;
228 }
229
230 *posp += (pattern - *patternp);
231
232 if(rc != 3 || !step || step > (unsigned)INT_MAX ||
233 (min_c == max_c && step != 1) ||
234 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
235 (max_c - min_c) > ('z' - 'a'))))
236 /* the pattern is not well-formed */
237 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
238
239 /* if there was a ":[num]" thing, use that as step or else use 1 */
240 pat->content.CharRange.step = (int)step;
241 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
242 pat->content.CharRange.max_c = max_c;
243
244 if(multiply(amount, ((pat->content.CharRange.max_c -
245 pat->content.CharRange.min_c) /
246 pat->content.CharRange.step + 1)))
247 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
248 }
249 else if(ISDIGIT(*pattern)) {
250 /* numeric range detected */
251 unsigned long min_n;
252 unsigned long max_n = 0;
253 unsigned long step_n = 0;
254 char *endp;
255
256 pat->type = UPTNumRange;
257 pat->content.NumRange.padlength = 0;
258
259 if(*pattern == '0') {
260 /* leading zero specified, count them! */
261 c = pattern;
262 while(ISDIGIT(*c)) {
263 c++;
264 ++pat->content.NumRange.padlength; /* padding length is set for all
265 instances of this pattern */
266 }
267 }
268
269 errno = 0;
270 min_n = strtoul(pattern, &endp, 10);
271 if(errno || (endp == pattern))
272 endp = NULL;
273 else {
274 if(*endp != '-')
275 endp = NULL;
276 else {
277 pattern = endp + 1;
278 while(*pattern && ISBLANK(*pattern))
279 pattern++;
280 if(!ISDIGIT(*pattern)) {
281 endp = NULL;
282 goto fail;
283 }
284 errno = 0;
285 max_n = strtoul(pattern, &endp, 10);
286 if(errno)
287 /* overflow */
288 endp = NULL;
289 else if(*endp == ':') {
290 pattern = endp + 1;
291 errno = 0;
292 step_n = strtoul(pattern, &endp, 10);
293 if(errno)
294 /* over/underflow situation */
295 endp = NULL;
296 }
297 else
298 step_n = 1;
299 if(endp && (*endp == ']')) {
300 pattern = endp + 1;
301 }
302 else
303 endp = NULL;
304 }
305 }
306
307 fail:
308 *posp += (pattern - *patternp);
309
310 if(!endp || !step_n ||
311 (min_n == max_n && step_n != 1) ||
312 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
313 /* the pattern is not well-formed */
314 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
315
316 /* typecasting to ints are fine here since we make sure above that we
317 are within 31 bits */
318 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
319 pat->content.NumRange.max_n = max_n;
320 pat->content.NumRange.step = step_n;
321
322 if(multiply(amount, ((pat->content.NumRange.max_n -
323 pat->content.NumRange.min_n) /
324 pat->content.NumRange.step + 1)))
325 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
326 }
327 else
328 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
329
330 *patternp = pattern;
331 return CURLE_OK;
332 }
333
334 #define MAX_IP6LEN 128
335
peek_ipv6(const char * str,size_t * skip)336 static bool peek_ipv6(const char *str, size_t *skip)
337 {
338 /*
339 * Scan for a potential IPv6 literal.
340 * - Valid globs contain a hyphen and <= 1 colon.
341 * - IPv6 literals contain no hyphens and >= 2 colons.
342 */
343 char hostname[MAX_IP6LEN];
344 CURLU *u;
345 char *endbr = strchr(str, ']');
346 size_t hlen;
347 CURLUcode rc;
348 if(!endbr)
349 return FALSE;
350
351 hlen = endbr - str + 1;
352 if(hlen >= MAX_IP6LEN)
353 return FALSE;
354
355 u = curl_url();
356 if(!u)
357 return FALSE;
358
359 memcpy(hostname, str, hlen);
360 hostname[hlen] = 0;
361
362 /* ask to "guess scheme" as then it works without an https:// prefix */
363 rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME);
364
365 curl_url_cleanup(u);
366 if(!rc)
367 *skip = hlen;
368 return rc ? FALSE : TRUE;
369 }
370
glob_parse(struct URLGlob * glob,char * pattern,size_t pos,curl_off_t * amount)371 static CURLcode glob_parse(struct URLGlob *glob, char *pattern,
372 size_t pos, curl_off_t *amount)
373 {
374 /* processes a literal string component of a URL
375 special characters '{' and '[' branch to set/range processing functions
376 */
377 CURLcode res = CURLE_OK;
378 int globindex = 0; /* count "actual" globs */
379
380 *amount = 1;
381
382 while(*pattern && !res) {
383 char *buf = glob->glob_buffer;
384 size_t sublen = 0;
385 while(*pattern && *pattern != '{') {
386 if(*pattern == '[') {
387 /* skip over IPv6 literals and [] */
388 size_t skip = 0;
389 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
390 skip = 2;
391 if(skip) {
392 memcpy(buf, pattern, skip);
393 buf += skip;
394 pattern += skip;
395 sublen += skip;
396 continue;
397 }
398 break;
399 }
400 if(*pattern == '}' || *pattern == ']')
401 return GLOBERROR("unmatched close brace/bracket", pos,
402 CURLE_URL_MALFORMAT);
403
404 /* only allow \ to escape known "special letters" */
405 if(*pattern == '\\' &&
406 (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
407 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
408
409 /* escape character, skip '\' */
410 ++pattern;
411 ++pos;
412 }
413 *buf++ = *pattern++; /* copy character to literal */
414 ++pos;
415 sublen++;
416 }
417 if(sublen) {
418 /* we got a literal string, add it as a single-item list */
419 *buf = '\0';
420 res = glob_fixed(glob, glob->glob_buffer, sublen);
421 }
422 else {
423 switch(*pattern) {
424 case '\0': /* done */
425 break;
426
427 case '{':
428 /* process set pattern */
429 pattern++;
430 pos++;
431 res = glob_set(glob, &pattern, &pos, amount, globindex++);
432 break;
433
434 case '[':
435 /* process range pattern */
436 pattern++;
437 pos++;
438 res = glob_range(glob, &pattern, &pos, amount, globindex++);
439 break;
440 }
441 }
442
443 if(++glob->size >= GLOB_PATTERN_NUM)
444 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
445 }
446 return res;
447 }
448
glob_url(struct URLGlob ** glob,char * url,curl_off_t * urlnum,FILE * error)449 CURLcode glob_url(struct URLGlob **glob, char *url, curl_off_t *urlnum,
450 FILE *error)
451 {
452 /*
453 * We can deal with any-size, just make a buffer with the same length
454 * as the specified URL!
455 */
456 struct URLGlob *glob_expand;
457 curl_off_t amount = 0;
458 char *glob_buffer;
459 CURLcode res;
460
461 *glob = NULL;
462
463 glob_buffer = malloc(strlen(url) + 1);
464 if(!glob_buffer)
465 return CURLE_OUT_OF_MEMORY;
466 glob_buffer[0] = 0;
467
468 glob_expand = calloc(1, sizeof(struct URLGlob));
469 if(!glob_expand) {
470 Curl_safefree(glob_buffer);
471 return CURLE_OUT_OF_MEMORY;
472 }
473 glob_expand->urllen = strlen(url);
474 glob_expand->glob_buffer = glob_buffer;
475
476 res = glob_parse(glob_expand, url, 1, &amount);
477 if(!res)
478 *urlnum = amount;
479 else {
480 if(error && glob_expand->error) {
481 char text[512];
482 const char *t;
483 if(glob_expand->pos) {
484 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
485 glob_expand->error,
486 glob_expand->pos, url, (int)glob_expand->pos - 1, " ");
487 t = text;
488 }
489 else
490 t = glob_expand->error;
491
492 /* send error description to the error-stream */
493 fprintf(error, "curl: (%d) %s\n", res, t);
494 }
495 /* it failed, we cleanup */
496 glob_cleanup(&glob_expand);
497 *urlnum = 1;
498 return res;
499 }
500
501 *glob = glob_expand;
502 return CURLE_OK;
503 }
504
glob_cleanup(struct URLGlob ** globp)505 void glob_cleanup(struct URLGlob **globp)
506 {
507 size_t i;
508 curl_off_t elem;
509 struct URLGlob *glob = *globp;
510
511 if(!glob)
512 return;
513
514 for(i = 0; i < glob->size; i++) {
515 if((glob->pattern[i].type == UPTSet) &&
516 (glob->pattern[i].content.Set.elements)) {
517 for(elem = glob->pattern[i].content.Set.size - 1;
518 elem >= 0;
519 --elem) {
520 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
521 }
522 Curl_safefree(glob->pattern[i].content.Set.elements);
523 }
524 }
525 Curl_safefree(glob->glob_buffer);
526 Curl_safefree(glob);
527 *globp = NULL;
528 }
529
glob_next_url(char ** globbed,struct URLGlob * glob)530 CURLcode glob_next_url(char **globbed, struct URLGlob *glob)
531 {
532 struct URLPattern *pat;
533 size_t i;
534 size_t len;
535 size_t buflen = glob->urllen + 1;
536 char *buf = glob->glob_buffer;
537
538 *globbed = NULL;
539
540 if(!glob->beenhere)
541 glob->beenhere = 1;
542 else {
543 bool carry = TRUE;
544
545 /* implement a counter over the index ranges of all patterns, starting
546 with the rightmost pattern */
547 for(i = 0; carry && (i < glob->size); i++) {
548 carry = FALSE;
549 pat = &glob->pattern[glob->size - 1 - i];
550 switch(pat->type) {
551 case UPTSet:
552 if((pat->content.Set.elements) &&
553 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
554 pat->content.Set.ptr_s = 0;
555 carry = TRUE;
556 }
557 break;
558 case UPTCharRange:
559 pat->content.CharRange.ptr_c =
560 (char)(pat->content.CharRange.step +
561 (int)((unsigned char)pat->content.CharRange.ptr_c));
562 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
563 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
564 carry = TRUE;
565 }
566 break;
567 case UPTNumRange:
568 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
569 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
570 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
571 carry = TRUE;
572 }
573 break;
574 default:
575 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
576 return CURLE_FAILED_INIT;
577 }
578 }
579 if(carry) { /* first pattern ptr has run into overflow, done! */
580 return CURLE_OK;
581 }
582 }
583
584 for(i = 0; i < glob->size; ++i) {
585 pat = &glob->pattern[i];
586 switch(pat->type) {
587 case UPTSet:
588 if(pat->content.Set.elements) {
589 msnprintf(buf, buflen, "%s",
590 pat->content.Set.elements[pat->content.Set.ptr_s]);
591 len = strlen(buf);
592 buf += len;
593 buflen -= len;
594 }
595 break;
596 case UPTCharRange:
597 if(buflen) {
598 *buf++ = pat->content.CharRange.ptr_c;
599 *buf = '\0';
600 buflen--;
601 }
602 break;
603 case UPTNumRange:
604 msnprintf(buf, buflen, "%0*" CURL_FORMAT_CURL_OFF_T,
605 pat->content.NumRange.padlength,
606 pat->content.NumRange.ptr_n);
607 len = strlen(buf);
608 buf += len;
609 buflen -= len;
610 break;
611 default:
612 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
613 return CURLE_FAILED_INIT;
614 }
615 }
616
617 *globbed = strdup(glob->glob_buffer);
618 if(!*globbed)
619 return CURLE_OUT_OF_MEMORY;
620
621 return CURLE_OK;
622 }
623
624 #define MAX_OUTPUT_GLOB_LENGTH (10*1024)
625
glob_match_url(char ** result,char * filename,struct URLGlob * glob)626 CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob)
627 {
628 char numbuf[18];
629 char *appendthis = (char *)"";
630 size_t appendlen = 0;
631 struct curlx_dynbuf dyn;
632
633 *result = NULL;
634
635 /* We cannot use the glob_buffer for storage since the filename may be
636 * longer than the URL we use.
637 */
638 curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH);
639
640 while(*filename) {
641 if(*filename == '#' && ISDIGIT(filename[1])) {
642 char *ptr = filename;
643 unsigned long num = strtoul(&filename[1], &filename, 10);
644 struct URLPattern *pat = NULL;
645
646 if(num && (num < glob->size)) {
647 unsigned long i;
648 num--; /* make it zero based */
649 /* find the correct glob entry */
650 for(i = 0; i < glob->size; i++) {
651 if(glob->pattern[i].globindex == (int)num) {
652 pat = &glob->pattern[i];
653 break;
654 }
655 }
656 }
657
658 if(pat) {
659 switch(pat->type) {
660 case UPTSet:
661 if(pat->content.Set.elements) {
662 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
663 appendlen =
664 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
665 }
666 break;
667 case UPTCharRange:
668 numbuf[0] = pat->content.CharRange.ptr_c;
669 numbuf[1] = 0;
670 appendthis = numbuf;
671 appendlen = 1;
672 break;
673 case UPTNumRange:
674 msnprintf(numbuf, sizeof(numbuf), "%0*" CURL_FORMAT_CURL_OFF_T,
675 pat->content.NumRange.padlength,
676 pat->content.NumRange.ptr_n);
677 appendthis = numbuf;
678 appendlen = strlen(numbuf);
679 break;
680 default:
681 fprintf(tool_stderr, "internal error: invalid pattern type (%d)\n",
682 (int)pat->type);
683 curlx_dyn_free(&dyn);
684 return CURLE_FAILED_INIT;
685 }
686 }
687 else {
688 /* #[num] out of range, use the #[num] in the output */
689 filename = ptr;
690 appendthis = filename++;
691 appendlen = 1;
692 }
693 }
694 else {
695 appendthis = filename++;
696 appendlen = 1;
697 }
698 if(curlx_dyn_addn(&dyn, appendthis, appendlen))
699 return CURLE_OUT_OF_MEMORY;
700 }
701
702 if(curlx_dyn_addn(&dyn, "", 0))
703 return CURLE_OUT_OF_MEMORY;
704
705 #if defined(_WIN32) || defined(MSDOS)
706 {
707 char *sanitized;
708 SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn),
709 (SANITIZE_ALLOW_PATH |
710 SANITIZE_ALLOW_RESERVED));
711 curlx_dyn_free(&dyn);
712 if(sc)
713 return CURLE_URL_MALFORMAT;
714 *result = sanitized;
715 return CURLE_OK;
716 }
717 #else
718 *result = curlx_dyn_ptr(&dyn);
719 return CURLE_OK;
720 #endif /* _WIN32 || MSDOS */
721 }
722