xref: /libuv/src/unix/kqueue.c (revision 18d48bc1)
1 /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2  * Permission is hereby granted, free of charge, to any person obtaining a copy
3  * of this software and associated documentation files (the "Software"), to
4  * deal in the Software without restriction, including without limitation the
5  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6  * sell copies of the Software, and to permit persons to whom the Software is
7  * furnished to do so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in
10  * all copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18  * IN THE SOFTWARE.
19  */
20 
21 #include "uv.h"
22 #include "internal.h"
23 
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <errno.h>
28 
29 #include <sys/sysctl.h>
30 #include <sys/types.h>
31 #include <sys/event.h>
32 #include <sys/time.h>
33 #if defined(__FreeBSD__)
34 #include <sys/user.h>
35 #endif
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include <time.h>
39 
40 /*
41  * Required on
42  * - Until at least FreeBSD 11.0
43  * - Older versions of Mac OS X
44  *
45  * http://www.boost.org/doc/libs/1_61_0/boost/asio/detail/kqueue_reactor.hpp
46  */
47 #ifndef EV_OOBAND
48 #define EV_OOBAND  EV_FLAG1
49 #endif
50 
51 static void uv__fs_event(uv_loop_t* loop, uv__io_t* w, unsigned int fflags);
52 
53 
uv__kqueue_init(uv_loop_t * loop)54 int uv__kqueue_init(uv_loop_t* loop) {
55   loop->backend_fd = kqueue();
56   if (loop->backend_fd == -1)
57     return UV__ERR(errno);
58 
59   uv__cloexec(loop->backend_fd, 1);
60 
61   return 0;
62 }
63 
64 
65 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
66 static _Atomic int uv__has_forked_with_cfrunloop;
67 #endif
68 
uv__io_fork(uv_loop_t * loop)69 int uv__io_fork(uv_loop_t* loop) {
70   int err;
71   loop->backend_fd = -1;
72   err = uv__kqueue_init(loop);
73   if (err)
74     return err;
75 
76 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
77   if (loop->cf_state != NULL) {
78     /* We cannot start another CFRunloop and/or thread in the child
79        process; CF aborts if you try or if you try to touch the thread
80        at all to kill it. So the best we can do is ignore it from now
81        on. This means we can't watch directories in the same way
82        anymore (like other BSDs). It also means we cannot properly
83        clean up the allocated resources; calling
84        uv__fsevents_loop_delete from uv_loop_close will crash the
85        process. So we sidestep the issue by pretending like we never
86        started it in the first place.
87     */
88     atomic_store_explicit(&uv__has_forked_with_cfrunloop,
89                           1,
90                           memory_order_relaxed);
91     uv__free(loop->cf_state);
92     loop->cf_state = NULL;
93   }
94 #endif /* #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 */
95   return err;
96 }
97 
98 
uv__io_check_fd(uv_loop_t * loop,int fd)99 int uv__io_check_fd(uv_loop_t* loop, int fd) {
100   struct kevent ev;
101   int rc;
102   struct stat sb;
103 #ifdef __APPLE__
104   char path[MAXPATHLEN];
105 #endif
106 
107   if (uv__fstat(fd, &sb))
108     return UV__ERR(errno);
109 
110   /* On FreeBSD, kqueue only supports EVFILT_READ notification for regular files
111    * and always reports ready events for writing, resulting in busy-looping.
112    *
113    * On Darwin, DragonFlyBSD, NetBSD and OpenBSD, kqueue reports ready events for
114    * regular files as readable and writable only once, acting like an EV_ONESHOT.
115    *
116    * Neither of the above cases should be added to the kqueue.
117    */
118   if (S_ISREG(sb.st_mode) || S_ISDIR(sb.st_mode))
119     return UV_EINVAL;
120 
121 #ifdef __APPLE__
122   /* On Darwin (both macOS and iOS), in addition to regular files, FIFOs also don't
123    * work properly with kqueue: the disconnection from the last writer won't trigger
124    * an event for kqueue in spite of what the man pages say. Thus, we also disallow
125    * the case of S_IFIFO. */
126   if (S_ISFIFO(sb.st_mode)) {
127     /* File descriptors of FIFO, pipe and kqueue share the same type of file,
128      * therefore there is no way to tell them apart via stat.st_mode&S_IFMT.
129      * Fortunately, FIFO is the only one that has a persisted file on filesystem,
130      * from which we're able to make the distinction for it. */
131     if (!fcntl(fd, F_GETPATH, path))
132       return UV_EINVAL;
133   }
134 #endif
135 
136   rc = 0;
137   EV_SET(&ev, fd, EVFILT_READ, EV_ADD, 0, 0, 0);
138   if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
139     rc = UV__ERR(errno);
140 
141   EV_SET(&ev, fd, EVFILT_READ, EV_DELETE, 0, 0, 0);
142   if (rc == 0)
143     if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
144       abort();
145 
146   return rc;
147 }
148 
149 
uv__kqueue_delete(int kqfd,const struct kevent * ev)150 static void uv__kqueue_delete(int kqfd, const struct kevent *ev) {
151   struct kevent change;
152 
153   EV_SET(&change, ev->ident, ev->filter, EV_DELETE, 0, 0, 0);
154 
155   if (0 == kevent(kqfd, &change, 1, NULL, 0, NULL))
156     return;
157 
158   if (errno == EBADF || errno == ENOENT)
159     return;
160 
161   abort();
162 }
163 
164 
uv__io_poll(uv_loop_t * loop,int timeout)165 void uv__io_poll(uv_loop_t* loop, int timeout) {
166   uv__loop_internal_fields_t* lfields;
167   struct kevent events[1024];
168   struct kevent* ev;
169   struct timespec spec;
170   unsigned int nevents;
171   unsigned int revents;
172   struct uv__queue* q;
173   uv__io_t* w;
174   uv_process_t* process;
175   sigset_t* pset;
176   sigset_t set;
177   uint64_t base;
178   uint64_t diff;
179   int have_signals;
180   int filter;
181   int fflags;
182   int count;
183   int nfds;
184   int fd;
185   int op;
186   int i;
187   int user_timeout;
188   int reset_timeout;
189 
190   if (loop->nfds == 0) {
191     assert(uv__queue_empty(&loop->watcher_queue));
192     return;
193   }
194 
195   lfields = uv__get_internal_fields(loop);
196   nevents = 0;
197 
198   while (!uv__queue_empty(&loop->watcher_queue)) {
199     q = uv__queue_head(&loop->watcher_queue);
200     uv__queue_remove(q);
201     uv__queue_init(q);
202 
203     w = uv__queue_data(q, uv__io_t, watcher_queue);
204     assert(w->pevents != 0);
205     assert(w->fd >= 0);
206     assert(w->fd < (int) loop->nwatchers);
207 
208     if ((w->events & POLLIN) == 0 && (w->pevents & POLLIN) != 0) {
209       filter = EVFILT_READ;
210       fflags = 0;
211       op = EV_ADD;
212 
213       if (w->cb == uv__fs_event) {
214         filter = EVFILT_VNODE;
215         fflags = NOTE_ATTRIB | NOTE_WRITE  | NOTE_RENAME
216                | NOTE_DELETE | NOTE_EXTEND | NOTE_REVOKE;
217         op = EV_ADD | EV_ONESHOT; /* Stop the event from firing repeatedly. */
218       }
219 
220       EV_SET(events + nevents, w->fd, filter, op, fflags, 0, 0);
221 
222       if (++nevents == ARRAY_SIZE(events)) {
223         if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
224           abort();
225         nevents = 0;
226       }
227     }
228 
229     if ((w->events & POLLOUT) == 0 && (w->pevents & POLLOUT) != 0) {
230       EV_SET(events + nevents, w->fd, EVFILT_WRITE, EV_ADD, 0, 0, 0);
231 
232       if (++nevents == ARRAY_SIZE(events)) {
233         if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
234           abort();
235         nevents = 0;
236       }
237     }
238 
239    if ((w->events & UV__POLLPRI) == 0 && (w->pevents & UV__POLLPRI) != 0) {
240       EV_SET(events + nevents, w->fd, EV_OOBAND, EV_ADD, 0, 0, 0);
241 
242       if (++nevents == ARRAY_SIZE(events)) {
243         if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
244           abort();
245         nevents = 0;
246       }
247     }
248 
249     w->events = w->pevents;
250   }
251 
252   pset = NULL;
253   if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
254     pset = &set;
255     sigemptyset(pset);
256     sigaddset(pset, SIGPROF);
257   }
258 
259   assert(timeout >= -1);
260   base = loop->time;
261   count = 48; /* Benchmarks suggest this gives the best throughput. */
262 
263   if (lfields->flags & UV_METRICS_IDLE_TIME) {
264     reset_timeout = 1;
265     user_timeout = timeout;
266     timeout = 0;
267   } else {
268     reset_timeout = 0;
269   }
270 
271   for (;; nevents = 0) {
272     /* Only need to set the provider_entry_time if timeout != 0. The function
273      * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
274      */
275     if (timeout != 0)
276       uv__metrics_set_provider_entry_time(loop);
277 
278     if (timeout != -1) {
279       spec.tv_sec = timeout / 1000;
280       spec.tv_nsec = (timeout % 1000) * 1000000;
281     }
282 
283     if (pset != NULL)
284       pthread_sigmask(SIG_BLOCK, pset, NULL);
285 
286     /* Store the current timeout in a location that's globally accessible so
287      * other locations like uv__work_done() can determine whether the queue
288      * of events in the callback were waiting when poll was called.
289      */
290     lfields->current_timeout = timeout;
291 
292     nfds = kevent(loop->backend_fd,
293                   events,
294                   nevents,
295                   events,
296                   ARRAY_SIZE(events),
297                   timeout == -1 ? NULL : &spec);
298 
299     if (nfds == -1)
300       assert(errno == EINTR);
301     else if (nfds == 0)
302       /* Unlimited timeout should only return with events or signal. */
303       assert(timeout != -1);
304 
305     if (pset != NULL)
306       pthread_sigmask(SIG_UNBLOCK, pset, NULL);
307 
308     /* Update loop->time unconditionally. It's tempting to skip the update when
309      * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
310      * operating system didn't reschedule our process while in the syscall.
311      */
312     uv__update_time(loop);
313 
314     if (nfds == 0 || nfds == -1) {
315       /* If kqueue is empty or interrupted, we might still have children ready
316        * to reap immediately. */
317       if (loop->flags & UV_LOOP_REAP_CHILDREN) {
318         loop->flags &= ~UV_LOOP_REAP_CHILDREN;
319         uv__wait_children(loop);
320         assert((reset_timeout == 0 ? timeout : user_timeout) == 0);
321         return; /* Equivalent to fall-through behavior. */
322       }
323 
324       if (reset_timeout != 0) {
325         timeout = user_timeout;
326         reset_timeout = 0;
327       } else if (nfds == 0) {
328         return;
329       }
330 
331       /* Interrupted by a signal. Update timeout and poll again. */
332       goto update_timeout;
333     }
334 
335     have_signals = 0;
336     nevents = 0;
337 
338     assert(loop->watchers != NULL);
339     loop->watchers[loop->nwatchers] = (void*) events;
340     loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
341     for (i = 0; i < nfds; i++) {
342       ev = events + i;
343       fd = ev->ident;
344 
345       /* Handle kevent NOTE_EXIT results */
346       if (ev->filter == EVFILT_PROC) {
347         uv__queue_foreach(q, &loop->process_handles) {
348           process = uv__queue_data(q, uv_process_t, queue);
349           if (process->pid == fd) {
350             process->flags |= UV_HANDLE_REAP;
351             loop->flags |= UV_LOOP_REAP_CHILDREN;
352             break;
353           }
354         }
355         nevents++;
356         continue;
357       }
358 
359       /* Skip invalidated events, see uv__platform_invalidate_fd */
360       if (fd == -1)
361         continue;
362       w = loop->watchers[fd];
363 
364       if (w == NULL) {
365         /* File descriptor that we've stopped watching, disarm it. */
366         uv__kqueue_delete(loop->backend_fd, ev);
367         continue;
368       }
369 
370       if (ev->filter == EVFILT_VNODE) {
371         assert(w->events == POLLIN);
372         assert(w->pevents == POLLIN);
373         uv__metrics_update_idle_time(loop);
374         w->cb(loop, w, ev->fflags); /* XXX always uv__fs_event() */
375         nevents++;
376         continue;
377       }
378 
379       revents = 0;
380 
381       if (ev->filter == EVFILT_READ) {
382         if (w->pevents & POLLIN)
383           revents |= POLLIN;
384         else
385           uv__kqueue_delete(loop->backend_fd, ev);
386 
387         if ((ev->flags & EV_EOF) && (w->pevents & UV__POLLRDHUP))
388           revents |= UV__POLLRDHUP;
389       }
390 
391       if (ev->filter == EV_OOBAND) {
392         if (w->pevents & UV__POLLPRI)
393           revents |= UV__POLLPRI;
394         else
395           uv__kqueue_delete(loop->backend_fd, ev);
396       }
397 
398       if (ev->filter == EVFILT_WRITE) {
399         if (w->pevents & POLLOUT)
400           revents |= POLLOUT;
401         else
402           uv__kqueue_delete(loop->backend_fd, ev);
403       }
404 
405       if (ev->flags & EV_ERROR)
406         revents |= POLLERR;
407 
408       if (revents == 0)
409         continue;
410 
411       /* Run signal watchers last.  This also affects child process watchers
412        * because those are implemented in terms of signal watchers.
413        */
414       if (w == &loop->signal_io_watcher) {
415         have_signals = 1;
416       } else {
417         uv__metrics_update_idle_time(loop);
418         w->cb(loop, w, revents);
419       }
420 
421       nevents++;
422     }
423 
424     if (loop->flags & UV_LOOP_REAP_CHILDREN) {
425       loop->flags &= ~UV_LOOP_REAP_CHILDREN;
426       uv__wait_children(loop);
427     }
428 
429     uv__metrics_inc_events(loop, nevents);
430     if (reset_timeout != 0) {
431       timeout = user_timeout;
432       reset_timeout = 0;
433       uv__metrics_inc_events_waiting(loop, nevents);
434     }
435 
436     if (have_signals != 0) {
437       uv__metrics_update_idle_time(loop);
438       loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
439     }
440 
441     loop->watchers[loop->nwatchers] = NULL;
442     loop->watchers[loop->nwatchers + 1] = NULL;
443 
444     if (have_signals != 0)
445       return;  /* Event loop should cycle now so don't poll again. */
446 
447     if (nevents != 0) {
448       if (nfds == ARRAY_SIZE(events) && --count != 0) {
449         /* Poll for more events but don't block this time. */
450         timeout = 0;
451         continue;
452       }
453       return;
454     }
455 
456 update_timeout:
457     if (timeout == 0)
458       return;
459 
460     if (timeout == -1)
461       continue;
462 
463     assert(timeout > 0);
464 
465     diff = loop->time - base;
466     if (diff >= (uint64_t) timeout)
467       return;
468 
469     timeout -= diff;
470   }
471 }
472 
473 
uv__platform_invalidate_fd(uv_loop_t * loop,int fd)474 void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
475   struct kevent* events;
476   uintptr_t i;
477   uintptr_t nfds;
478 
479   assert(loop->watchers != NULL);
480   assert(fd >= 0);
481 
482   events = (struct kevent*) loop->watchers[loop->nwatchers];
483   nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
484   if (events == NULL)
485     return;
486 
487   /* Invalidate events with same file descriptor */
488   for (i = 0; i < nfds; i++)
489     if ((int) events[i].ident == fd && events[i].filter != EVFILT_PROC)
490       events[i].ident = -1;
491 }
492 
493 
uv__fs_event(uv_loop_t * loop,uv__io_t * w,unsigned int fflags)494 static void uv__fs_event(uv_loop_t* loop, uv__io_t* w, unsigned int fflags) {
495   uv_fs_event_t* handle;
496   struct kevent ev;
497   int events;
498   const char* path;
499 #if defined(F_GETPATH)
500   /* MAXPATHLEN == PATH_MAX but the former is what XNU calls it internally. */
501   char pathbuf[MAXPATHLEN];
502 #endif
503 
504   handle = container_of(w, uv_fs_event_t, event_watcher);
505 
506   if (fflags & (NOTE_ATTRIB | NOTE_EXTEND))
507     events = UV_CHANGE;
508   else
509     events = UV_RENAME;
510 
511   path = NULL;
512 #if defined(F_GETPATH)
513   /* Also works when the file has been unlinked from the file system. Passing
514    * in the path when the file has been deleted is arguably a little strange
515    * but it's consistent with what the inotify backend does.
516    */
517   if (fcntl(handle->event_watcher.fd, F_GETPATH, pathbuf) == 0)
518     path = uv__basename_r(pathbuf);
519 #elif defined(F_KINFO)
520   /* We try to get the file info reference from the file descriptor.
521    * the struct's kf_structsize must be initialised beforehand
522    * whether with the KINFO_FILE_SIZE constant or this way.
523    */
524   struct stat statbuf;
525   struct kinfo_file kf;
526 
527   if (handle->event_watcher.fd != -1 &&
528      (!uv__fstat(handle->event_watcher.fd, &statbuf) && !(statbuf.st_mode & S_IFDIR))) {
529      /* we are purposely not using KINFO_FILE_SIZE here
530       * as it is not available on non intl archs
531       * and here it gives 1392 too on intel.
532       * anyway, the man page also mentions we can proceed
533       * this way.
534       */
535      kf.kf_structsize = sizeof(kf);
536      if (fcntl(handle->event_watcher.fd, F_KINFO, &kf) == 0)
537        path = uv__basename_r(kf.kf_path);
538   }
539 #endif
540   handle->cb(handle, path, events, 0);
541 
542   if (handle->event_watcher.fd == -1)
543     return;
544 
545   /* Watcher operates in one-shot mode, re-arm it. */
546   fflags = NOTE_ATTRIB | NOTE_WRITE  | NOTE_RENAME
547          | NOTE_DELETE | NOTE_EXTEND | NOTE_REVOKE;
548 
549   EV_SET(&ev, w->fd, EVFILT_VNODE, EV_ADD | EV_ONESHOT, fflags, 0, 0);
550 
551   if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
552     abort();
553 }
554 
555 
uv_fs_event_init(uv_loop_t * loop,uv_fs_event_t * handle)556 int uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle) {
557   uv__handle_init(loop, (uv_handle_t*)handle, UV_FS_EVENT);
558   return 0;
559 }
560 
561 
uv_fs_event_start(uv_fs_event_t * handle,uv_fs_event_cb cb,const char * path,unsigned int flags)562 int uv_fs_event_start(uv_fs_event_t* handle,
563                       uv_fs_event_cb cb,
564                       const char* path,
565                       unsigned int flags) {
566   int fd;
567 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
568   struct stat statbuf;
569 #endif
570 
571   if (uv__is_active(handle))
572     return UV_EINVAL;
573 
574   handle->cb = cb;
575   handle->path = uv__strdup(path);
576   if (handle->path == NULL)
577     return UV_ENOMEM;
578 
579   /* TODO open asynchronously - but how do we report back errors? */
580   fd = open(handle->path, O_RDONLY);
581   if (fd == -1) {
582     uv__free(handle->path);
583     handle->path = NULL;
584     return UV__ERR(errno);
585   }
586 
587 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
588   /* Nullify field to perform checks later */
589   handle->cf_cb = NULL;
590   handle->realpath = NULL;
591   handle->realpath_len = 0;
592   handle->cf_flags = flags;
593 
594   if (uv__fstat(fd, &statbuf))
595     goto fallback;
596   /* FSEvents works only with directories */
597   if (!(statbuf.st_mode & S_IFDIR))
598     goto fallback;
599 
600   if (0 == atomic_load_explicit(&uv__has_forked_with_cfrunloop,
601                                 memory_order_relaxed)) {
602     int r;
603     /* The fallback fd is no longer needed */
604     uv__close_nocheckstdio(fd);
605     handle->event_watcher.fd = -1;
606     r = uv__fsevents_init(handle);
607     if (r == 0) {
608       uv__handle_start(handle);
609     } else {
610       uv__free(handle->path);
611       handle->path = NULL;
612     }
613     return r;
614   }
615 fallback:
616 #endif /* #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 */
617 
618   uv__handle_start(handle);
619   uv__io_init(&handle->event_watcher, uv__fs_event, fd);
620   uv__io_start(handle->loop, &handle->event_watcher, POLLIN);
621 
622   return 0;
623 }
624 
625 
uv_fs_event_stop(uv_fs_event_t * handle)626 int uv_fs_event_stop(uv_fs_event_t* handle) {
627   int r;
628   r = 0;
629 
630   if (!uv__is_active(handle))
631     return 0;
632 
633   uv__handle_stop(handle);
634 
635 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
636   if (0 == atomic_load_explicit(&uv__has_forked_with_cfrunloop,
637                                 memory_order_relaxed))
638     if (handle->cf_cb != NULL)
639       r = uv__fsevents_close(handle);
640 #endif
641 
642   if (handle->event_watcher.fd != -1) {
643     uv__io_close(handle->loop, &handle->event_watcher);
644     uv__close(handle->event_watcher.fd);
645     handle->event_watcher.fd = -1;
646   }
647 
648   uv__free(handle->path);
649   handle->path = NULL;
650 
651   return r;
652 }
653 
654 
uv__fs_event_close(uv_fs_event_t * handle)655 void uv__fs_event_close(uv_fs_event_t* handle) {
656   uv_fs_event_stop(handle);
657 }
658