xref: /curl/lib/file.c (revision 51a3b9f8)
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #ifndef CURL_DISABLE_FILE
28 
29 #ifdef HAVE_NETINET_IN_H
30 #include <netinet/in.h>
31 #endif
32 #ifdef HAVE_NETDB_H
33 #include <netdb.h>
34 #endif
35 #ifdef HAVE_ARPA_INET_H
36 #include <arpa/inet.h>
37 #endif
38 #ifdef HAVE_NET_IF_H
39 #include <net/if.h>
40 #endif
41 #ifdef HAVE_SYS_IOCTL_H
42 #include <sys/ioctl.h>
43 #endif
44 
45 #ifdef HAVE_SYS_PARAM_H
46 #include <sys/param.h>
47 #endif
48 
49 #ifdef HAVE_FCNTL_H
50 #include <fcntl.h>
51 #endif
52 
53 #ifdef HAVE_SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56 
57 #ifdef HAVE_DIRENT_H
58 #include <dirent.h>
59 #endif
60 
61 #include "strtoofft.h"
62 #include "urldata.h"
63 #include <curl/curl.h>
64 #include "progress.h"
65 #include "sendf.h"
66 #include "escape.h"
67 #include "file.h"
68 #include "speedcheck.h"
69 #include "getinfo.h"
70 #include "multiif.h"
71 #include "transfer.h"
72 #include "url.h"
73 #include "parsedate.h" /* for the week day and month names */
74 #include "warnless.h"
75 #include "curl_range.h"
76 /* The last 3 #include files should be in this order */
77 #include "curl_printf.h"
78 #include "curl_memory.h"
79 #include "memdebug.h"
80 
81 #if defined(_WIN32) || defined(MSDOS) || defined(__EMX__)
82 #define DOS_FILESYSTEM 1
83 #elif defined(__amigaos4__)
84 #define AMIGA_FILESYSTEM 1
85 #endif
86 
87 #ifdef OPEN_NEEDS_ARG3
88 #  define open_readonly(p,f) open((p),(f),(0))
89 #else
90 #  define open_readonly(p,f) open((p),(f))
91 #endif
92 
93 /*
94  * Forward declarations.
95  */
96 
97 static CURLcode file_do(struct Curl_easy *data, bool *done);
98 static CURLcode file_done(struct Curl_easy *data,
99                           CURLcode status, bool premature);
100 static CURLcode file_connect(struct Curl_easy *data, bool *done);
101 static CURLcode file_disconnect(struct Curl_easy *data,
102                                 struct connectdata *conn,
103                                 bool dead_connection);
104 static CURLcode file_setup_connection(struct Curl_easy *data,
105                                       struct connectdata *conn);
106 
107 /*
108  * FILE scheme handler.
109  */
110 
111 const struct Curl_handler Curl_handler_file = {
112   "FILE",                               /* scheme */
113   file_setup_connection,                /* setup_connection */
114   file_do,                              /* do_it */
115   file_done,                            /* done */
116   ZERO_NULL,                            /* do_more */
117   file_connect,                         /* connect_it */
118   ZERO_NULL,                            /* connecting */
119   ZERO_NULL,                            /* doing */
120   ZERO_NULL,                            /* proto_getsock */
121   ZERO_NULL,                            /* doing_getsock */
122   ZERO_NULL,                            /* domore_getsock */
123   ZERO_NULL,                            /* perform_getsock */
124   file_disconnect,                      /* disconnect */
125   ZERO_NULL,                            /* write_resp */
126   ZERO_NULL,                            /* write_resp_hd */
127   ZERO_NULL,                            /* connection_check */
128   ZERO_NULL,                            /* attach connection */
129   0,                                    /* defport */
130   CURLPROTO_FILE,                       /* protocol */
131   CURLPROTO_FILE,                       /* family */
132   PROTOPT_NONETWORK | PROTOPT_NOURLQUERY /* flags */
133 };
134 
135 
file_setup_connection(struct Curl_easy * data,struct connectdata * conn)136 static CURLcode file_setup_connection(struct Curl_easy *data,
137                                       struct connectdata *conn)
138 {
139   (void)conn;
140   /* allocate the FILE specific struct */
141   data->req.p.file = calloc(1, sizeof(struct FILEPROTO));
142   if(!data->req.p.file)
143     return CURLE_OUT_OF_MEMORY;
144 
145   return CURLE_OK;
146 }
147 
148 /*
149  * file_connect() gets called from Curl_protocol_connect() to allow us to
150  * do protocol-specific actions at connect-time.  We emulate a
151  * connect-then-transfer protocol and "connect" to the file here
152  */
file_connect(struct Curl_easy * data,bool * done)153 static CURLcode file_connect(struct Curl_easy *data, bool *done)
154 {
155   char *real_path;
156   struct FILEPROTO *file = data->req.p.file;
157   int fd;
158 #ifdef DOS_FILESYSTEM
159   size_t i;
160   char *actual_path;
161 #endif
162   size_t real_path_len;
163   CURLcode result;
164 
165   if(file->path) {
166     /* already connected.
167      * the handler->connect_it() is normally only called once, but
168      * FILE does a special check on setting up the connection which
169      * calls this explicitly. */
170     *done = TRUE;
171     return CURLE_OK;
172   }
173 
174   result = Curl_urldecode(data->state.up.path, 0, &real_path,
175                           &real_path_len, REJECT_ZERO);
176   if(result)
177     return result;
178 
179 #ifdef DOS_FILESYSTEM
180   /* If the first character is a slash, and there's
181      something that looks like a drive at the beginning of
182      the path, skip the slash.  If we remove the initial
183      slash in all cases, paths without drive letters end up
184      relative to the current directory which isn't how
185      browsers work.
186 
187      Some browsers accept | instead of : as the drive letter
188      separator, so we do too.
189 
190      On other platforms, we need the slash to indicate an
191      absolute pathname.  On Windows, absolute paths start
192      with a drive letter.
193   */
194   actual_path = real_path;
195   if((actual_path[0] == '/') &&
196       actual_path[1] &&
197      (actual_path[2] == ':' || actual_path[2] == '|')) {
198     actual_path[2] = ':';
199     actual_path++;
200     real_path_len--;
201   }
202 
203   /* change path separators from '/' to '\\' for DOS, Windows and OS/2 */
204   for(i = 0; i < real_path_len; ++i)
205     if(actual_path[i] == '/')
206       actual_path[i] = '\\';
207     else if(!actual_path[i]) { /* binary zero */
208       Curl_safefree(real_path);
209       return CURLE_URL_MALFORMAT;
210     }
211 
212   fd = open_readonly(actual_path, O_RDONLY|O_BINARY);
213   file->path = actual_path;
214 #else
215   if(memchr(real_path, 0, real_path_len)) {
216     /* binary zeroes indicate foul play */
217     Curl_safefree(real_path);
218     return CURLE_URL_MALFORMAT;
219   }
220 
221   #ifdef AMIGA_FILESYSTEM
222   /*
223    * A leading slash in an AmigaDOS path denotes the parent
224    * directory, and hence we block this as it is relative.
225    * Absolute paths start with 'volumename:', so we check for
226    * this first. Failing that, we treat the path as a real unix
227    * path, but only if the application was compiled with -lunix.
228    */
229   fd = -1;
230   file->path = real_path;
231 
232   if(real_path[0] == '/') {
233     extern int __unix_path_semantics;
234     if(strchr(real_path + 1, ':')) {
235       /* Amiga absolute path */
236       fd = open_readonly(real_path + 1, O_RDONLY);
237       file->path++;
238     }
239     else if(__unix_path_semantics) {
240       /* -lunix fallback */
241       fd = open_readonly(real_path, O_RDONLY);
242     }
243   }
244   #else
245   fd = open_readonly(real_path, O_RDONLY);
246   file->path = real_path;
247   #endif
248 #endif
249   Curl_safefree(file->freepath);
250   file->freepath = real_path; /* free this when done */
251 
252   file->fd = fd;
253   if(!data->state.upload && (fd == -1)) {
254     failf(data, "Couldn't open file %s", data->state.up.path);
255     file_done(data, CURLE_FILE_COULDNT_READ_FILE, FALSE);
256     return CURLE_FILE_COULDNT_READ_FILE;
257   }
258   *done = TRUE;
259 
260   return CURLE_OK;
261 }
262 
file_done(struct Curl_easy * data,CURLcode status,bool premature)263 static CURLcode file_done(struct Curl_easy *data,
264                           CURLcode status, bool premature)
265 {
266   struct FILEPROTO *file = data->req.p.file;
267   (void)status; /* not used */
268   (void)premature; /* not used */
269 
270   if(file) {
271     Curl_safefree(file->freepath);
272     file->path = NULL;
273     if(file->fd != -1)
274       close(file->fd);
275     file->fd = -1;
276   }
277 
278   return CURLE_OK;
279 }
280 
file_disconnect(struct Curl_easy * data,struct connectdata * conn,bool dead_connection)281 static CURLcode file_disconnect(struct Curl_easy *data,
282                                 struct connectdata *conn,
283                                 bool dead_connection)
284 {
285   (void)dead_connection; /* not used */
286   (void)conn;
287   return file_done(data, CURLE_OK, FALSE);
288 }
289 
290 #ifdef DOS_FILESYSTEM
291 #define DIRSEP '\\'
292 #else
293 #define DIRSEP '/'
294 #endif
295 
file_upload(struct Curl_easy * data)296 static CURLcode file_upload(struct Curl_easy *data)
297 {
298   struct FILEPROTO *file = data->req.p.file;
299   const char *dir = strchr(file->path, DIRSEP);
300   int fd;
301   int mode;
302   CURLcode result = CURLE_OK;
303   char *xfer_ulbuf;
304   size_t xfer_ulblen;
305   curl_off_t bytecount = 0;
306   struct_stat file_stat;
307   const char *sendbuf;
308   bool eos = FALSE;
309 
310   /*
311    * Since FILE: doesn't do the full init, we need to provide some extra
312    * assignments here.
313    */
314 
315   if(!dir)
316     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
317 
318   if(!dir[1])
319     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
320 
321 #ifdef O_BINARY
322 #define MODE_DEFAULT O_WRONLY|O_CREAT|O_BINARY
323 #else
324 #define MODE_DEFAULT O_WRONLY|O_CREAT
325 #endif
326 
327   if(data->state.resume_from)
328     mode = MODE_DEFAULT|O_APPEND;
329   else
330     mode = MODE_DEFAULT|O_TRUNC;
331 
332   fd = open(file->path, mode, data->set.new_file_perms);
333   if(fd < 0) {
334     failf(data, "Can't open %s for writing", file->path);
335     return CURLE_WRITE_ERROR;
336   }
337 
338   if(-1 != data->state.infilesize)
339     /* known size of data to "upload" */
340     Curl_pgrsSetUploadSize(data, data->state.infilesize);
341 
342   /* treat the negative resume offset value as the case of "-" */
343   if(data->state.resume_from < 0) {
344     if(fstat(fd, &file_stat)) {
345       close(fd);
346       failf(data, "Can't get the size of %s", file->path);
347       return CURLE_WRITE_ERROR;
348     }
349     data->state.resume_from = (curl_off_t)file_stat.st_size;
350   }
351 
352   result = Curl_multi_xfer_ulbuf_borrow(data, &xfer_ulbuf, &xfer_ulblen);
353   if(result)
354     goto out;
355 
356   while(!result && !eos) {
357     size_t nread;
358     ssize_t nwrite;
359     size_t readcount;
360 
361     result = Curl_client_read(data, xfer_ulbuf, xfer_ulblen, &readcount, &eos);
362     if(result)
363       break;
364 
365     if(!readcount)
366       break;
367 
368     nread = readcount;
369 
370     /* skip bytes before resume point */
371     if(data->state.resume_from) {
372       if((curl_off_t)nread <= data->state.resume_from) {
373         data->state.resume_from -= nread;
374         nread = 0;
375         sendbuf = xfer_ulbuf;
376       }
377       else {
378         sendbuf = xfer_ulbuf + data->state.resume_from;
379         nread -= (size_t)data->state.resume_from;
380         data->state.resume_from = 0;
381       }
382     }
383     else
384       sendbuf = xfer_ulbuf;
385 
386     /* write the data to the target */
387     nwrite = write(fd, sendbuf, nread);
388     if((size_t)nwrite != nread) {
389       result = CURLE_SEND_ERROR;
390       break;
391     }
392 
393     bytecount += nread;
394 
395     Curl_pgrsSetUploadCounter(data, bytecount);
396 
397     if(Curl_pgrsUpdate(data))
398       result = CURLE_ABORTED_BY_CALLBACK;
399     else
400       result = Curl_speedcheck(data, Curl_now());
401   }
402   if(!result && Curl_pgrsUpdate(data))
403     result = CURLE_ABORTED_BY_CALLBACK;
404 
405 out:
406   close(fd);
407   Curl_multi_xfer_ulbuf_release(data, xfer_ulbuf);
408 
409   return result;
410 }
411 
412 /*
413  * file_do() is the protocol-specific function for the do-phase, separated
414  * from the connect-phase above. Other protocols merely setup the transfer in
415  * the do-phase, to have it done in the main transfer loop but since some
416  * platforms we support don't allow select()ing etc on file handles (as
417  * opposed to sockets) we instead perform the whole do-operation in this
418  * function.
419  */
file_do(struct Curl_easy * data,bool * done)420 static CURLcode file_do(struct Curl_easy *data, bool *done)
421 {
422   /* This implementation ignores the host name in conformance with
423      RFC 1738. Only local files (reachable via the standard file system)
424      are supported. This means that files on remotely mounted directories
425      (via NFS, Samba, NT sharing) can be accessed through a file:// URL
426   */
427   CURLcode result = CURLE_OK;
428   struct_stat statbuf; /* struct_stat instead of struct stat just to allow the
429                           Windows version to have a different struct without
430                           having to redefine the simple word 'stat' */
431   curl_off_t expected_size = -1;
432   bool size_known;
433   bool fstated = FALSE;
434   int fd;
435   struct FILEPROTO *file;
436   char *xfer_buf;
437   size_t xfer_blen;
438 
439   *done = TRUE; /* unconditionally */
440 
441   if(data->state.upload)
442     return file_upload(data);
443 
444   file = data->req.p.file;
445 
446   /* get the fd from the connection phase */
447   fd = file->fd;
448 
449   /* VMS: This only works reliable for STREAMLF files */
450   if(-1 != fstat(fd, &statbuf)) {
451     if(!S_ISDIR(statbuf.st_mode))
452       expected_size = statbuf.st_size;
453     /* and store the modification time */
454     data->info.filetime = statbuf.st_mtime;
455     fstated = TRUE;
456   }
457 
458   if(fstated && !data->state.range && data->set.timecondition &&
459      !Curl_meets_timecondition(data, data->info.filetime))
460     return CURLE_OK;
461 
462   if(fstated) {
463     time_t filetime;
464     struct tm buffer;
465     const struct tm *tm = &buffer;
466     char header[80];
467     int headerlen;
468     char accept_ranges[24]= { "Accept-ranges: bytes\r\n" };
469     if(expected_size >= 0) {
470       headerlen = msnprintf(header, sizeof(header),
471                 "Content-Length: %" CURL_FORMAT_CURL_OFF_T "\r\n",
472                 expected_size);
473       result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
474       if(result)
475         return result;
476 
477       result = Curl_client_write(data, CLIENTWRITE_HEADER,
478                                  accept_ranges, strlen(accept_ranges));
479       if(result != CURLE_OK)
480         return result;
481     }
482 
483     filetime = (time_t)statbuf.st_mtime;
484     result = Curl_gmtime(filetime, &buffer);
485     if(result)
486       return result;
487 
488     /* format: "Tue, 15 Nov 1994 12:45:26 GMT" */
489     headerlen = msnprintf(header, sizeof(header),
490               "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\r\n%s",
491               Curl_wkday[tm->tm_wday?tm->tm_wday-1:6],
492               tm->tm_mday,
493               Curl_month[tm->tm_mon],
494               tm->tm_year + 1900,
495               tm->tm_hour,
496               tm->tm_min,
497               tm->tm_sec,
498               data->req.no_body ? "": "\r\n");
499     result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
500     if(result)
501       return result;
502     /* set the file size to make it available post transfer */
503     Curl_pgrsSetDownloadSize(data, expected_size);
504     if(data->req.no_body)
505       return result;
506   }
507 
508   /* Check whether file range has been specified */
509   result = Curl_range(data);
510   if(result)
511     return result;
512 
513   /* Adjust the start offset in case we want to get the N last bytes
514    * of the stream if the filesize could be determined */
515   if(data->state.resume_from < 0) {
516     if(!fstated) {
517       failf(data, "Can't get the size of file.");
518       return CURLE_READ_ERROR;
519     }
520     data->state.resume_from += (curl_off_t)statbuf.st_size;
521   }
522 
523   if(data->state.resume_from > 0) {
524     /* We check explicitly if we have a start offset, because
525      * expected_size may be -1 if we don't know how large the file is,
526      * in which case we should not adjust it. */
527     if(data->state.resume_from <= expected_size)
528       expected_size -= data->state.resume_from;
529     else {
530       failf(data, "failed to resume file:// transfer");
531       return CURLE_BAD_DOWNLOAD_RESUME;
532     }
533   }
534 
535   /* A high water mark has been specified so we obey... */
536   if(data->req.maxdownload > 0)
537     expected_size = data->req.maxdownload;
538 
539   if(!fstated || (expected_size <= 0))
540     size_known = FALSE;
541   else
542     size_known = TRUE;
543 
544   /* The following is a shortcut implementation of file reading
545      this is both more efficient than the former call to download() and
546      it avoids problems with select() and recv() on file descriptors
547      in Winsock */
548   if(size_known)
549     Curl_pgrsSetDownloadSize(data, expected_size);
550 
551   if(data->state.resume_from) {
552     if(!S_ISDIR(statbuf.st_mode)) {
553       if(data->state.resume_from !=
554           lseek(fd, data->state.resume_from, SEEK_SET))
555         return CURLE_BAD_DOWNLOAD_RESUME;
556     }
557     else {
558       return CURLE_BAD_DOWNLOAD_RESUME;
559     }
560   }
561 
562   result = Curl_multi_xfer_buf_borrow(data, &xfer_buf, &xfer_blen);
563   if(result)
564     goto out;
565 
566   if(!S_ISDIR(statbuf.st_mode)) {
567     while(!result) {
568       ssize_t nread;
569       /* Don't fill a whole buffer if we want less than all data */
570       size_t bytestoread;
571 
572       if(size_known) {
573         bytestoread = (expected_size < (curl_off_t)(xfer_blen-1)) ?
574           curlx_sotouz(expected_size) : (xfer_blen-1);
575       }
576       else
577         bytestoread = xfer_blen-1;
578 
579       nread = read(fd, xfer_buf, bytestoread);
580 
581       if(nread > 0)
582         xfer_buf[nread] = 0;
583 
584       if(nread <= 0 || (size_known && (expected_size == 0)))
585         break;
586 
587       if(size_known)
588         expected_size -= nread;
589 
590       result = Curl_client_write(data, CLIENTWRITE_BODY, xfer_buf, nread);
591       if(result)
592         goto out;
593 
594       if(Curl_pgrsUpdate(data))
595         result = CURLE_ABORTED_BY_CALLBACK;
596       else
597         result = Curl_speedcheck(data, Curl_now());
598       if(result)
599         goto out;
600     }
601   }
602   else {
603 #ifdef HAVE_OPENDIR
604     DIR *dir = opendir(file->path);
605     struct dirent *entry;
606 
607     if(!dir) {
608       result = CURLE_READ_ERROR;
609       goto out;
610     }
611     else {
612       while((entry = readdir(dir))) {
613         if(entry->d_name[0] != '.') {
614           result = Curl_client_write(data, CLIENTWRITE_BODY,
615                    entry->d_name, strlen(entry->d_name));
616           if(result)
617             break;
618           result = Curl_client_write(data, CLIENTWRITE_BODY, "\n", 1);
619           if(result)
620             break;
621         }
622       }
623       closedir(dir);
624     }
625 #else
626     failf(data, "Directory listing not yet implemented on this platform.");
627     result = CURLE_READ_ERROR;
628 #endif
629   }
630 
631   if(Curl_pgrsUpdate(data))
632     result = CURLE_ABORTED_BY_CALLBACK;
633 
634 out:
635   Curl_multi_xfer_buf_release(data, xfer_buf);
636   return result;
637 }
638 
639 #endif
640