xref: /curl/lib/file.c (revision fbf5d507)
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #ifndef CURL_DISABLE_FILE
28 
29 #ifdef HAVE_NETINET_IN_H
30 #include <netinet/in.h>
31 #endif
32 #ifdef HAVE_NETDB_H
33 #include <netdb.h>
34 #endif
35 #ifdef HAVE_ARPA_INET_H
36 #include <arpa/inet.h>
37 #endif
38 #ifdef HAVE_NET_IF_H
39 #include <net/if.h>
40 #endif
41 #ifdef HAVE_SYS_IOCTL_H
42 #include <sys/ioctl.h>
43 #endif
44 
45 #ifdef HAVE_SYS_PARAM_H
46 #include <sys/param.h>
47 #endif
48 
49 #ifdef HAVE_FCNTL_H
50 #include <fcntl.h>
51 #endif
52 
53 #ifdef HAVE_SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56 
57 #ifdef HAVE_DIRENT_H
58 #include <dirent.h>
59 #endif
60 
61 #include "strtoofft.h"
62 #include "urldata.h"
63 #include <curl/curl.h>
64 #include "progress.h"
65 #include "sendf.h"
66 #include "escape.h"
67 #include "file.h"
68 #include "speedcheck.h"
69 #include "getinfo.h"
70 #include "multiif.h"
71 #include "transfer.h"
72 #include "url.h"
73 #include "parsedate.h" /* for the week day and month names */
74 #include "warnless.h"
75 #include "curl_range.h"
76 /* The last 3 #include files should be in this order */
77 #include "curl_printf.h"
78 #include "curl_memory.h"
79 #include "memdebug.h"
80 
81 #if defined(_WIN32) || defined(MSDOS) || defined(__EMX__)
82 #define DOS_FILESYSTEM 1
83 #elif defined(__amigaos4__)
84 #define AMIGA_FILESYSTEM 1
85 #endif
86 
87 #ifdef OPEN_NEEDS_ARG3
88 #  define open_readonly(p,f) open((p),(f),(0))
89 #else
90 #  define open_readonly(p,f) open((p),(f))
91 #endif
92 
93 /*
94  * Forward declarations.
95  */
96 
97 static CURLcode file_do(struct Curl_easy *data, bool *done);
98 static CURLcode file_done(struct Curl_easy *data,
99                           CURLcode status, bool premature);
100 static CURLcode file_connect(struct Curl_easy *data, bool *done);
101 static CURLcode file_disconnect(struct Curl_easy *data,
102                                 struct connectdata *conn,
103                                 bool dead_connection);
104 static CURLcode file_setup_connection(struct Curl_easy *data,
105                                       struct connectdata *conn);
106 
107 /*
108  * FILE scheme handler.
109  */
110 
111 const struct Curl_handler Curl_handler_file = {
112   "file",                               /* scheme */
113   file_setup_connection,                /* setup_connection */
114   file_do,                              /* do_it */
115   file_done,                            /* done */
116   ZERO_NULL,                            /* do_more */
117   file_connect,                         /* connect_it */
118   ZERO_NULL,                            /* connecting */
119   ZERO_NULL,                            /* doing */
120   ZERO_NULL,                            /* proto_getsock */
121   ZERO_NULL,                            /* doing_getsock */
122   ZERO_NULL,                            /* domore_getsock */
123   ZERO_NULL,                            /* perform_getsock */
124   file_disconnect,                      /* disconnect */
125   ZERO_NULL,                            /* write_resp */
126   ZERO_NULL,                            /* write_resp_hd */
127   ZERO_NULL,                            /* connection_check */
128   ZERO_NULL,                            /* attach connection */
129   0,                                    /* defport */
130   CURLPROTO_FILE,                       /* protocol */
131   CURLPROTO_FILE,                       /* family */
132   PROTOPT_NONETWORK | PROTOPT_NOURLQUERY /* flags */
133 };
134 
135 
file_setup_connection(struct Curl_easy * data,struct connectdata * conn)136 static CURLcode file_setup_connection(struct Curl_easy *data,
137                                       struct connectdata *conn)
138 {
139   (void)conn;
140   /* allocate the FILE specific struct */
141   data->req.p.file = calloc(1, sizeof(struct FILEPROTO));
142   if(!data->req.p.file)
143     return CURLE_OUT_OF_MEMORY;
144 
145   return CURLE_OK;
146 }
147 
148 /*
149  * file_connect() gets called from Curl_protocol_connect() to allow us to
150  * do protocol-specific actions at connect-time. We emulate a
151  * connect-then-transfer protocol and "connect" to the file here
152  */
file_connect(struct Curl_easy * data,bool * done)153 static CURLcode file_connect(struct Curl_easy *data, bool *done)
154 {
155   char *real_path;
156   struct FILEPROTO *file = data->req.p.file;
157   int fd;
158 #ifdef DOS_FILESYSTEM
159   size_t i;
160   char *actual_path;
161 #endif
162   size_t real_path_len;
163   CURLcode result;
164 
165   if(file->path) {
166     /* already connected.
167      * the handler->connect_it() is normally only called once, but
168      * FILE does a special check on setting up the connection which
169      * calls this explicitly. */
170     *done = TRUE;
171     return CURLE_OK;
172   }
173 
174   result = Curl_urldecode(data->state.up.path, 0, &real_path,
175                           &real_path_len, REJECT_ZERO);
176   if(result)
177     return result;
178 
179 #ifdef DOS_FILESYSTEM
180   /* If the first character is a slash, and there is
181      something that looks like a drive at the beginning of
182      the path, skip the slash. If we remove the initial
183      slash in all cases, paths without drive letters end up
184      relative to the current directory which is not how
185      browsers work.
186 
187      Some browsers accept | instead of : as the drive letter
188      separator, so we do too.
189 
190      On other platforms, we need the slash to indicate an
191      absolute pathname. On Windows, absolute paths start
192      with a drive letter.
193   */
194   actual_path = real_path;
195   if((actual_path[0] == '/') &&
196       actual_path[1] &&
197      (actual_path[2] == ':' || actual_path[2] == '|')) {
198     actual_path[2] = ':';
199     actual_path++;
200     real_path_len--;
201   }
202 
203   /* change path separators from '/' to '\\' for DOS, Windows and OS/2 */
204   for(i = 0; i < real_path_len; ++i)
205     if(actual_path[i] == '/')
206       actual_path[i] = '\\';
207     else if(!actual_path[i]) { /* binary zero */
208       Curl_safefree(real_path);
209       return CURLE_URL_MALFORMAT;
210     }
211 
212   fd = open_readonly(actual_path, O_RDONLY|O_BINARY);
213   file->path = actual_path;
214 #else
215   if(memchr(real_path, 0, real_path_len)) {
216     /* binary zeroes indicate foul play */
217     Curl_safefree(real_path);
218     return CURLE_URL_MALFORMAT;
219   }
220 
221   #ifdef AMIGA_FILESYSTEM
222   /*
223    * A leading slash in an AmigaDOS path denotes the parent
224    * directory, and hence we block this as it is relative.
225    * Absolute paths start with 'volumename:', so we check for
226    * this first. Failing that, we treat the path as a real Unix
227    * path, but only if the application was compiled with -lunix.
228    */
229   fd = -1;
230   file->path = real_path;
231 
232   if(real_path[0] == '/') {
233     extern int __unix_path_semantics;
234     if(strchr(real_path + 1, ':')) {
235       /* Amiga absolute path */
236       fd = open_readonly(real_path + 1, O_RDONLY);
237       file->path++;
238     }
239     else if(__unix_path_semantics) {
240       /* -lunix fallback */
241       fd = open_readonly(real_path, O_RDONLY);
242     }
243   }
244   #else
245   fd = open_readonly(real_path, O_RDONLY);
246   file->path = real_path;
247   #endif
248 #endif
249   Curl_safefree(file->freepath);
250   file->freepath = real_path; /* free this when done */
251 
252   file->fd = fd;
253   if(!data->state.upload && (fd == -1)) {
254     failf(data, "Couldn't open file %s", data->state.up.path);
255     file_done(data, CURLE_FILE_COULDNT_READ_FILE, FALSE);
256     return CURLE_FILE_COULDNT_READ_FILE;
257   }
258   *done = TRUE;
259 
260   return CURLE_OK;
261 }
262 
file_done(struct Curl_easy * data,CURLcode status,bool premature)263 static CURLcode file_done(struct Curl_easy *data,
264                           CURLcode status, bool premature)
265 {
266   struct FILEPROTO *file = data->req.p.file;
267   (void)status; /* not used */
268   (void)premature; /* not used */
269 
270   if(file) {
271     Curl_safefree(file->freepath);
272     file->path = NULL;
273     if(file->fd != -1)
274       close(file->fd);
275     file->fd = -1;
276   }
277 
278   return CURLE_OK;
279 }
280 
file_disconnect(struct Curl_easy * data,struct connectdata * conn,bool dead_connection)281 static CURLcode file_disconnect(struct Curl_easy *data,
282                                 struct connectdata *conn,
283                                 bool dead_connection)
284 {
285   (void)dead_connection; /* not used */
286   (void)conn;
287   return file_done(data, CURLE_OK, FALSE);
288 }
289 
290 #ifdef DOS_FILESYSTEM
291 #define DIRSEP '\\'
292 #else
293 #define DIRSEP '/'
294 #endif
295 
file_upload(struct Curl_easy * data)296 static CURLcode file_upload(struct Curl_easy *data)
297 {
298   struct FILEPROTO *file = data->req.p.file;
299   const char *dir = strchr(file->path, DIRSEP);
300   int fd;
301   int mode;
302   CURLcode result = CURLE_OK;
303   char *xfer_ulbuf;
304   size_t xfer_ulblen;
305   curl_off_t bytecount = 0;
306   struct_stat file_stat;
307   const char *sendbuf;
308   bool eos = FALSE;
309 
310   /*
311    * Since FILE: does not do the full init, we need to provide some extra
312    * assignments here.
313    */
314 
315   if(!dir)
316     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
317 
318   if(!dir[1])
319     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
320 
321 #ifdef O_BINARY
322 #define MODE_DEFAULT O_WRONLY|O_CREAT|O_BINARY
323 #else
324 #define MODE_DEFAULT O_WRONLY|O_CREAT
325 #endif
326 
327   if(data->state.resume_from)
328     mode = MODE_DEFAULT|O_APPEND;
329   else
330     mode = MODE_DEFAULT|O_TRUNC;
331 
332   fd = open(file->path, mode, data->set.new_file_perms);
333   if(fd < 0) {
334     failf(data, "cannot open %s for writing", file->path);
335     return CURLE_WRITE_ERROR;
336   }
337 
338   if(-1 != data->state.infilesize)
339     /* known size of data to "upload" */
340     Curl_pgrsSetUploadSize(data, data->state.infilesize);
341 
342   /* treat the negative resume offset value as the case of "-" */
343   if(data->state.resume_from < 0) {
344     if(fstat(fd, &file_stat)) {
345       close(fd);
346       failf(data, "cannot get the size of %s", file->path);
347       return CURLE_WRITE_ERROR;
348     }
349     data->state.resume_from = (curl_off_t)file_stat.st_size;
350   }
351 
352   result = Curl_multi_xfer_ulbuf_borrow(data, &xfer_ulbuf, &xfer_ulblen);
353   if(result)
354     goto out;
355 
356   while(!result && !eos) {
357     size_t nread;
358     ssize_t nwrite;
359     size_t readcount;
360 
361     result = Curl_client_read(data, xfer_ulbuf, xfer_ulblen, &readcount, &eos);
362     if(result)
363       break;
364 
365     if(!readcount)
366       break;
367 
368     nread = readcount;
369 
370     /* skip bytes before resume point */
371     if(data->state.resume_from) {
372       if((curl_off_t)nread <= data->state.resume_from) {
373         data->state.resume_from -= nread;
374         nread = 0;
375         sendbuf = xfer_ulbuf;
376       }
377       else {
378         sendbuf = xfer_ulbuf + data->state.resume_from;
379         nread -= (size_t)data->state.resume_from;
380         data->state.resume_from = 0;
381       }
382     }
383     else
384       sendbuf = xfer_ulbuf;
385 
386     /* write the data to the target */
387     nwrite = write(fd, sendbuf, nread);
388     if((size_t)nwrite != nread) {
389       result = CURLE_SEND_ERROR;
390       break;
391     }
392 
393     bytecount += nread;
394 
395     Curl_pgrsSetUploadCounter(data, bytecount);
396 
397     if(Curl_pgrsUpdate(data))
398       result = CURLE_ABORTED_BY_CALLBACK;
399     else
400       result = Curl_speedcheck(data, Curl_now());
401   }
402   if(!result && Curl_pgrsUpdate(data))
403     result = CURLE_ABORTED_BY_CALLBACK;
404 
405 out:
406   close(fd);
407   Curl_multi_xfer_ulbuf_release(data, xfer_ulbuf);
408 
409   return result;
410 }
411 
412 /*
413  * file_do() is the protocol-specific function for the do-phase, separated
414  * from the connect-phase above. Other protocols merely setup the transfer in
415  * the do-phase, to have it done in the main transfer loop but since some
416  * platforms we support do not allow select()ing etc on file handles (as
417  * opposed to sockets) we instead perform the whole do-operation in this
418  * function.
419  */
file_do(struct Curl_easy * data,bool * done)420 static CURLcode file_do(struct Curl_easy *data, bool *done)
421 {
422   /* This implementation ignores the hostname in conformance with
423      RFC 1738. Only local files (reachable via the standard file system)
424      are supported. This means that files on remotely mounted directories
425      (via NFS, Samba, NT sharing) can be accessed through a file:// URL
426   */
427   CURLcode result = CURLE_OK;
428   struct_stat statbuf; /* struct_stat instead of struct stat just to allow the
429                           Windows version to have a different struct without
430                           having to redefine the simple word 'stat' */
431   curl_off_t expected_size = -1;
432   bool size_known;
433   bool fstated = FALSE;
434   int fd;
435   struct FILEPROTO *file;
436   char *xfer_buf;
437   size_t xfer_blen;
438 
439   *done = TRUE; /* unconditionally */
440 
441   if(data->state.upload)
442     return file_upload(data);
443 
444   file = data->req.p.file;
445 
446   /* get the fd from the connection phase */
447   fd = file->fd;
448 
449   /* VMS: This only works reliable for STREAMLF files */
450   if(-1 != fstat(fd, &statbuf)) {
451     if(!S_ISDIR(statbuf.st_mode))
452       expected_size = statbuf.st_size;
453     /* and store the modification time */
454     data->info.filetime = statbuf.st_mtime;
455     fstated = TRUE;
456   }
457 
458   if(fstated && !data->state.range && data->set.timecondition &&
459      !Curl_meets_timecondition(data, data->info.filetime))
460     return CURLE_OK;
461 
462   if(fstated) {
463     time_t filetime;
464     struct tm buffer;
465     const struct tm *tm = &buffer;
466     char header[80];
467     int headerlen;
468     static const char accept_ranges[]= { "Accept-ranges: bytes\r\n" };
469     if(expected_size >= 0) {
470       headerlen =
471         msnprintf(header, sizeof(header), "Content-Length: %" FMT_OFF_T "\r\n",
472                   expected_size);
473       result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
474       if(result)
475         return result;
476 
477       result = Curl_client_write(data, CLIENTWRITE_HEADER,
478                                  accept_ranges, sizeof(accept_ranges) - 1);
479       if(result != CURLE_OK)
480         return result;
481     }
482 
483     filetime = (time_t)statbuf.st_mtime;
484     result = Curl_gmtime(filetime, &buffer);
485     if(result)
486       return result;
487 
488     /* format: "Tue, 15 Nov 1994 12:45:26 GMT" */
489     headerlen =
490       msnprintf(header, sizeof(header),
491                 "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\r\n",
492                 Curl_wkday[tm->tm_wday ? tm->tm_wday-1 : 6],
493                 tm->tm_mday,
494                 Curl_month[tm->tm_mon],
495                 tm->tm_year + 1900,
496                 tm->tm_hour,
497                 tm->tm_min,
498                 tm->tm_sec);
499     result = Curl_client_write(data, CLIENTWRITE_HEADER, header, headerlen);
500     if(!result)
501       /* end of headers */
502       result = Curl_client_write(data, CLIENTWRITE_HEADER, "\r\n", 2);
503     if(result)
504       return result;
505     /* set the file size to make it available post transfer */
506     Curl_pgrsSetDownloadSize(data, expected_size);
507     if(data->req.no_body)
508       return CURLE_OK;
509   }
510 
511   /* Check whether file range has been specified */
512   result = Curl_range(data);
513   if(result)
514     return result;
515 
516   /* Adjust the start offset in case we want to get the N last bytes
517    * of the stream if the filesize could be determined */
518   if(data->state.resume_from < 0) {
519     if(!fstated) {
520       failf(data, "cannot get the size of file.");
521       return CURLE_READ_ERROR;
522     }
523     data->state.resume_from += (curl_off_t)statbuf.st_size;
524   }
525 
526   if(data->state.resume_from > 0) {
527     /* We check explicitly if we have a start offset, because
528      * expected_size may be -1 if we do not know how large the file is,
529      * in which case we should not adjust it. */
530     if(data->state.resume_from <= expected_size)
531       expected_size -= data->state.resume_from;
532     else {
533       failf(data, "failed to resume file:// transfer");
534       return CURLE_BAD_DOWNLOAD_RESUME;
535     }
536   }
537 
538   /* A high water mark has been specified so we obey... */
539   if(data->req.maxdownload > 0)
540     expected_size = data->req.maxdownload;
541 
542   if(!fstated || (expected_size <= 0))
543     size_known = FALSE;
544   else
545     size_known = TRUE;
546 
547   /* The following is a shortcut implementation of file reading
548      this is both more efficient than the former call to download() and
549      it avoids problems with select() and recv() on file descriptors
550      in Winsock */
551   if(size_known)
552     Curl_pgrsSetDownloadSize(data, expected_size);
553 
554   if(data->state.resume_from) {
555     if(!S_ISDIR(statbuf.st_mode)) {
556       if(data->state.resume_from !=
557           lseek(fd, data->state.resume_from, SEEK_SET))
558         return CURLE_BAD_DOWNLOAD_RESUME;
559     }
560     else {
561       return CURLE_BAD_DOWNLOAD_RESUME;
562     }
563   }
564 
565   result = Curl_multi_xfer_buf_borrow(data, &xfer_buf, &xfer_blen);
566   if(result)
567     goto out;
568 
569   if(!S_ISDIR(statbuf.st_mode)) {
570     while(!result) {
571       ssize_t nread;
572       /* Do not fill a whole buffer if we want less than all data */
573       size_t bytestoread;
574 
575       if(size_known) {
576         bytestoread = (expected_size < (curl_off_t)(xfer_blen-1)) ?
577           curlx_sotouz(expected_size) : (xfer_blen-1);
578       }
579       else
580         bytestoread = xfer_blen-1;
581 
582       nread = read(fd, xfer_buf, bytestoread);
583 
584       if(nread > 0)
585         xfer_buf[nread] = 0;
586 
587       if(nread <= 0 || (size_known && (expected_size == 0)))
588         break;
589 
590       if(size_known)
591         expected_size -= nread;
592 
593       result = Curl_client_write(data, CLIENTWRITE_BODY, xfer_buf, nread);
594       if(result)
595         goto out;
596 
597       if(Curl_pgrsUpdate(data))
598         result = CURLE_ABORTED_BY_CALLBACK;
599       else
600         result = Curl_speedcheck(data, Curl_now());
601       if(result)
602         goto out;
603     }
604   }
605   else {
606 #ifdef HAVE_OPENDIR
607     DIR *dir = opendir(file->path);
608     struct dirent *entry;
609 
610     if(!dir) {
611       result = CURLE_READ_ERROR;
612       goto out;
613     }
614     else {
615       while((entry = readdir(dir))) {
616         if(entry->d_name[0] != '.') {
617           result = Curl_client_write(data, CLIENTWRITE_BODY,
618                    entry->d_name, strlen(entry->d_name));
619           if(result)
620             break;
621           result = Curl_client_write(data, CLIENTWRITE_BODY, "\n", 1);
622           if(result)
623             break;
624         }
625       }
626       closedir(dir);
627     }
628 #else
629     failf(data, "Directory listing not yet implemented on this platform.");
630     result = CURLE_READ_ERROR;
631 #endif
632   }
633 
634   if(Curl_pgrsUpdate(data))
635     result = CURLE_ABORTED_BY_CALLBACK;
636 
637 out:
638   Curl_multi_xfer_buf_release(data, xfer_buf);
639   return result;
640 }
641 
642 #endif
643