xref: /libuv/src/unix/kqueue.c (revision a407b232)
1 /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2  * Permission is hereby granted, free of charge, to any person obtaining a copy
3  * of this software and associated documentation files (the "Software"), to
4  * deal in the Software without restriction, including without limitation the
5  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6  * sell copies of the Software, and to permit persons to whom the Software is
7  * furnished to do so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in
10  * all copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18  * IN THE SOFTWARE.
19  */
20 
21 #include "uv.h"
22 #include "internal.h"
23 
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <errno.h>
28 
29 #include <sys/sysctl.h>
30 #include <sys/types.h>
31 #include <sys/event.h>
32 #include <sys/time.h>
33 #if defined(__FreeBSD__)
34 #include <sys/user.h>
35 #endif
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include <time.h>
39 
40 /*
41  * Required on
42  * - Until at least FreeBSD 11.0
43  * - Older versions of Mac OS X
44  *
45  * http://www.boost.org/doc/libs/1_61_0/boost/asio/detail/kqueue_reactor.hpp
46  */
47 #ifndef EV_OOBAND
48 #define EV_OOBAND  EV_FLAG1
49 #endif
50 
51 static void uv__fs_event(uv_loop_t* loop, uv__io_t* w, unsigned int fflags);
52 
53 
uv__kqueue_init(uv_loop_t * loop)54 int uv__kqueue_init(uv_loop_t* loop) {
55   loop->backend_fd = kqueue();
56   if (loop->backend_fd == -1)
57     return UV__ERR(errno);
58 
59   uv__cloexec(loop->backend_fd, 1);
60 
61   return 0;
62 }
63 
64 
65 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
66 static _Atomic int uv__has_forked_with_cfrunloop;
67 #endif
68 
uv__io_fork(uv_loop_t * loop)69 int uv__io_fork(uv_loop_t* loop) {
70   int err;
71   loop->backend_fd = -1;
72   err = uv__kqueue_init(loop);
73   if (err)
74     return err;
75 
76 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
77   if (loop->cf_state != NULL) {
78     /* We cannot start another CFRunloop and/or thread in the child
79        process; CF aborts if you try or if you try to touch the thread
80        at all to kill it. So the best we can do is ignore it from now
81        on. This means we can't watch directories in the same way
82        anymore (like other BSDs). It also means we cannot properly
83        clean up the allocated resources; calling
84        uv__fsevents_loop_delete from uv_loop_close will crash the
85        process. So we sidestep the issue by pretending like we never
86        started it in the first place.
87     */
88     atomic_store_explicit(&uv__has_forked_with_cfrunloop,
89                           1,
90                           memory_order_relaxed);
91     uv__free(loop->cf_state);
92     loop->cf_state = NULL;
93   }
94 #endif /* #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 */
95   return err;
96 }
97 
98 
uv__io_check_fd(uv_loop_t * loop,int fd)99 int uv__io_check_fd(uv_loop_t* loop, int fd) {
100   struct kevent ev;
101   int rc;
102 
103   rc = 0;
104   EV_SET(&ev, fd, EVFILT_READ, EV_ADD, 0, 0, 0);
105   if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
106     rc = UV__ERR(errno);
107 
108   EV_SET(&ev, fd, EVFILT_READ, EV_DELETE, 0, 0, 0);
109   if (rc == 0)
110     if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
111       abort();
112 
113   return rc;
114 }
115 
116 
uv__kqueue_delete(int kqfd,const struct kevent * ev)117 static void uv__kqueue_delete(int kqfd, const struct kevent *ev) {
118   struct kevent change;
119 
120   EV_SET(&change, ev->ident, ev->filter, EV_DELETE, 0, 0, 0);
121 
122   if (0 == kevent(kqfd, &change, 1, NULL, 0, NULL))
123     return;
124 
125   if (errno == EBADF || errno == ENOENT)
126     return;
127 
128   abort();
129 }
130 
131 
uv__io_poll(uv_loop_t * loop,int timeout)132 void uv__io_poll(uv_loop_t* loop, int timeout) {
133   uv__loop_internal_fields_t* lfields;
134   struct kevent events[1024];
135   struct kevent* ev;
136   struct timespec spec;
137   unsigned int nevents;
138   unsigned int revents;
139   struct uv__queue* q;
140   uv__io_t* w;
141   uv_process_t* process;
142   sigset_t* pset;
143   sigset_t set;
144   uint64_t base;
145   uint64_t diff;
146   int have_signals;
147   int filter;
148   int fflags;
149   int count;
150   int nfds;
151   int fd;
152   int op;
153   int i;
154   int user_timeout;
155   int reset_timeout;
156 
157   if (loop->nfds == 0) {
158     assert(uv__queue_empty(&loop->watcher_queue));
159     return;
160   }
161 
162   lfields = uv__get_internal_fields(loop);
163   nevents = 0;
164 
165   while (!uv__queue_empty(&loop->watcher_queue)) {
166     q = uv__queue_head(&loop->watcher_queue);
167     uv__queue_remove(q);
168     uv__queue_init(q);
169 
170     w = uv__queue_data(q, uv__io_t, watcher_queue);
171     assert(w->pevents != 0);
172     assert(w->fd >= 0);
173     assert(w->fd < (int) loop->nwatchers);
174 
175     if ((w->events & POLLIN) == 0 && (w->pevents & POLLIN) != 0) {
176       filter = EVFILT_READ;
177       fflags = 0;
178       op = EV_ADD;
179 
180       if (w->cb == uv__fs_event) {
181         filter = EVFILT_VNODE;
182         fflags = NOTE_ATTRIB | NOTE_WRITE  | NOTE_RENAME
183                | NOTE_DELETE | NOTE_EXTEND | NOTE_REVOKE;
184         op = EV_ADD | EV_ONESHOT; /* Stop the event from firing repeatedly. */
185       }
186 
187       EV_SET(events + nevents, w->fd, filter, op, fflags, 0, 0);
188 
189       if (++nevents == ARRAY_SIZE(events)) {
190         if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
191           abort();
192         nevents = 0;
193       }
194     }
195 
196     if ((w->events & POLLOUT) == 0 && (w->pevents & POLLOUT) != 0) {
197       EV_SET(events + nevents, w->fd, EVFILT_WRITE, EV_ADD, 0, 0, 0);
198 
199       if (++nevents == ARRAY_SIZE(events)) {
200         if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
201           abort();
202         nevents = 0;
203       }
204     }
205 
206    if ((w->events & UV__POLLPRI) == 0 && (w->pevents & UV__POLLPRI) != 0) {
207       EV_SET(events + nevents, w->fd, EV_OOBAND, EV_ADD, 0, 0, 0);
208 
209       if (++nevents == ARRAY_SIZE(events)) {
210         if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
211           abort();
212         nevents = 0;
213       }
214     }
215 
216     w->events = w->pevents;
217   }
218 
219   pset = NULL;
220   if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
221     pset = &set;
222     sigemptyset(pset);
223     sigaddset(pset, SIGPROF);
224   }
225 
226   assert(timeout >= -1);
227   base = loop->time;
228   count = 48; /* Benchmarks suggest this gives the best throughput. */
229 
230   if (lfields->flags & UV_METRICS_IDLE_TIME) {
231     reset_timeout = 1;
232     user_timeout = timeout;
233     timeout = 0;
234   } else {
235     reset_timeout = 0;
236   }
237 
238   for (;; nevents = 0) {
239     /* Only need to set the provider_entry_time if timeout != 0. The function
240      * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
241      */
242     if (timeout != 0)
243       uv__metrics_set_provider_entry_time(loop);
244 
245     if (timeout != -1) {
246       spec.tv_sec = timeout / 1000;
247       spec.tv_nsec = (timeout % 1000) * 1000000;
248     }
249 
250     if (pset != NULL)
251       pthread_sigmask(SIG_BLOCK, pset, NULL);
252 
253     /* Store the current timeout in a location that's globally accessible so
254      * other locations like uv__work_done() can determine whether the queue
255      * of events in the callback were waiting when poll was called.
256      */
257     lfields->current_timeout = timeout;
258 
259     nfds = kevent(loop->backend_fd,
260                   events,
261                   nevents,
262                   events,
263                   ARRAY_SIZE(events),
264                   timeout == -1 ? NULL : &spec);
265 
266     if (nfds == -1)
267       assert(errno == EINTR);
268     else if (nfds == 0)
269       /* Unlimited timeout should only return with events or signal. */
270       assert(timeout != -1);
271 
272     if (pset != NULL)
273       pthread_sigmask(SIG_UNBLOCK, pset, NULL);
274 
275     /* Update loop->time unconditionally. It's tempting to skip the update when
276      * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
277      * operating system didn't reschedule our process while in the syscall.
278      */
279     uv__update_time(loop);
280 
281     if (nfds == 0 || nfds == -1) {
282       /* If kqueue is empty or interrupted, we might still have children ready
283        * to reap immediately. */
284       if (loop->flags & UV_LOOP_REAP_CHILDREN) {
285         loop->flags &= ~UV_LOOP_REAP_CHILDREN;
286         uv__wait_children(loop);
287         assert((reset_timeout == 0 ? timeout : user_timeout) == 0);
288         return; /* Equivalent to fall-through behavior. */
289       }
290 
291       if (reset_timeout != 0) {
292         timeout = user_timeout;
293         reset_timeout = 0;
294       } else if (nfds == 0) {
295         return;
296       }
297 
298       /* Interrupted by a signal. Update timeout and poll again. */
299       goto update_timeout;
300     }
301 
302     have_signals = 0;
303     nevents = 0;
304 
305     assert(loop->watchers != NULL);
306     loop->watchers[loop->nwatchers] = (void*) events;
307     loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
308     for (i = 0; i < nfds; i++) {
309       ev = events + i;
310       fd = ev->ident;
311 
312       /* Handle kevent NOTE_EXIT results */
313       if (ev->filter == EVFILT_PROC) {
314         uv__queue_foreach(q, &loop->process_handles) {
315           process = uv__queue_data(q, uv_process_t, queue);
316           if (process->pid == fd) {
317             process->flags |= UV_HANDLE_REAP;
318             loop->flags |= UV_LOOP_REAP_CHILDREN;
319             break;
320           }
321         }
322         nevents++;
323         continue;
324       }
325 
326       /* Skip invalidated events, see uv__platform_invalidate_fd */
327       if (fd == -1)
328         continue;
329       w = loop->watchers[fd];
330 
331       if (w == NULL) {
332         /* File descriptor that we've stopped watching, disarm it. */
333         uv__kqueue_delete(loop->backend_fd, ev);
334         continue;
335       }
336 
337       if (ev->filter == EVFILT_VNODE) {
338         assert(w->events == POLLIN);
339         assert(w->pevents == POLLIN);
340         uv__metrics_update_idle_time(loop);
341         w->cb(loop, w, ev->fflags); /* XXX always uv__fs_event() */
342         nevents++;
343         continue;
344       }
345 
346       revents = 0;
347 
348       if (ev->filter == EVFILT_READ) {
349         if (w->pevents & POLLIN)
350           revents |= POLLIN;
351         else
352           uv__kqueue_delete(loop->backend_fd, ev);
353 
354         if ((ev->flags & EV_EOF) && (w->pevents & UV__POLLRDHUP))
355           revents |= UV__POLLRDHUP;
356       }
357 
358       if (ev->filter == EV_OOBAND) {
359         if (w->pevents & UV__POLLPRI)
360           revents |= UV__POLLPRI;
361         else
362           uv__kqueue_delete(loop->backend_fd, ev);
363       }
364 
365       if (ev->filter == EVFILT_WRITE) {
366         if (w->pevents & POLLOUT)
367           revents |= POLLOUT;
368         else
369           uv__kqueue_delete(loop->backend_fd, ev);
370       }
371 
372       if (ev->flags & EV_ERROR)
373         revents |= POLLERR;
374 
375       if (revents == 0)
376         continue;
377 
378       /* Run signal watchers last.  This also affects child process watchers
379        * because those are implemented in terms of signal watchers.
380        */
381       if (w == &loop->signal_io_watcher) {
382         have_signals = 1;
383       } else {
384         uv__metrics_update_idle_time(loop);
385         w->cb(loop, w, revents);
386       }
387 
388       nevents++;
389     }
390 
391     if (loop->flags & UV_LOOP_REAP_CHILDREN) {
392       loop->flags &= ~UV_LOOP_REAP_CHILDREN;
393       uv__wait_children(loop);
394     }
395 
396     uv__metrics_inc_events(loop, nevents);
397     if (reset_timeout != 0) {
398       timeout = user_timeout;
399       reset_timeout = 0;
400       uv__metrics_inc_events_waiting(loop, nevents);
401     }
402 
403     if (have_signals != 0) {
404       uv__metrics_update_idle_time(loop);
405       loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
406     }
407 
408     loop->watchers[loop->nwatchers] = NULL;
409     loop->watchers[loop->nwatchers + 1] = NULL;
410 
411     if (have_signals != 0)
412       return;  /* Event loop should cycle now so don't poll again. */
413 
414     if (nevents != 0) {
415       if (nfds == ARRAY_SIZE(events) && --count != 0) {
416         /* Poll for more events but don't block this time. */
417         timeout = 0;
418         continue;
419       }
420       return;
421     }
422 
423 update_timeout:
424     if (timeout == 0)
425       return;
426 
427     if (timeout == -1)
428       continue;
429 
430     assert(timeout > 0);
431 
432     diff = loop->time - base;
433     if (diff >= (uint64_t) timeout)
434       return;
435 
436     timeout -= diff;
437   }
438 }
439 
440 
uv__platform_invalidate_fd(uv_loop_t * loop,int fd)441 void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
442   struct kevent* events;
443   uintptr_t i;
444   uintptr_t nfds;
445 
446   assert(loop->watchers != NULL);
447   assert(fd >= 0);
448 
449   events = (struct kevent*) loop->watchers[loop->nwatchers];
450   nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
451   if (events == NULL)
452     return;
453 
454   /* Invalidate events with same file descriptor */
455   for (i = 0; i < nfds; i++)
456     if ((int) events[i].ident == fd && events[i].filter != EVFILT_PROC)
457       events[i].ident = -1;
458 }
459 
460 
uv__fs_event(uv_loop_t * loop,uv__io_t * w,unsigned int fflags)461 static void uv__fs_event(uv_loop_t* loop, uv__io_t* w, unsigned int fflags) {
462   uv_fs_event_t* handle;
463   struct kevent ev;
464   int events;
465   const char* path;
466 #if defined(F_GETPATH)
467   /* MAXPATHLEN == PATH_MAX but the former is what XNU calls it internally. */
468   char pathbuf[MAXPATHLEN];
469 #endif
470 
471   handle = container_of(w, uv_fs_event_t, event_watcher);
472 
473   if (fflags & (NOTE_ATTRIB | NOTE_EXTEND))
474     events = UV_CHANGE;
475   else
476     events = UV_RENAME;
477 
478   path = NULL;
479 #if defined(F_GETPATH)
480   /* Also works when the file has been unlinked from the file system. Passing
481    * in the path when the file has been deleted is arguably a little strange
482    * but it's consistent with what the inotify backend does.
483    */
484   if (fcntl(handle->event_watcher.fd, F_GETPATH, pathbuf) == 0)
485     path = uv__basename_r(pathbuf);
486 #elif defined(F_KINFO)
487   /* We try to get the file info reference from the file descriptor.
488    * the struct's kf_structsize must be initialised beforehand
489    * whether with the KINFO_FILE_SIZE constant or this way.
490    */
491   struct stat statbuf;
492   struct kinfo_file kf;
493 
494   if (handle->event_watcher.fd != -1 &&
495      (!uv__fstat(handle->event_watcher.fd, &statbuf) && !(statbuf.st_mode & S_IFDIR))) {
496      /* we are purposely not using KINFO_FILE_SIZE here
497       * as it is not available on non intl archs
498       * and here it gives 1392 too on intel.
499       * anyway, the man page also mentions we can proceed
500       * this way.
501       */
502      kf.kf_structsize = sizeof(kf);
503      if (fcntl(handle->event_watcher.fd, F_KINFO, &kf) == 0)
504        path = uv__basename_r(kf.kf_path);
505   }
506 #endif
507   handle->cb(handle, path, events, 0);
508 
509   if (handle->event_watcher.fd == -1)
510     return;
511 
512   /* Watcher operates in one-shot mode, re-arm it. */
513   fflags = NOTE_ATTRIB | NOTE_WRITE  | NOTE_RENAME
514          | NOTE_DELETE | NOTE_EXTEND | NOTE_REVOKE;
515 
516   EV_SET(&ev, w->fd, EVFILT_VNODE, EV_ADD | EV_ONESHOT, fflags, 0, 0);
517 
518   if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
519     abort();
520 }
521 
522 
uv_fs_event_init(uv_loop_t * loop,uv_fs_event_t * handle)523 int uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle) {
524   uv__handle_init(loop, (uv_handle_t*)handle, UV_FS_EVENT);
525   return 0;
526 }
527 
528 
uv_fs_event_start(uv_fs_event_t * handle,uv_fs_event_cb cb,const char * path,unsigned int flags)529 int uv_fs_event_start(uv_fs_event_t* handle,
530                       uv_fs_event_cb cb,
531                       const char* path,
532                       unsigned int flags) {
533   int fd;
534 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
535   struct stat statbuf;
536 #endif
537 
538   if (uv__is_active(handle))
539     return UV_EINVAL;
540 
541   handle->cb = cb;
542   handle->path = uv__strdup(path);
543   if (handle->path == NULL)
544     return UV_ENOMEM;
545 
546   /* TODO open asynchronously - but how do we report back errors? */
547   fd = open(handle->path, O_RDONLY);
548   if (fd == -1) {
549     uv__free(handle->path);
550     handle->path = NULL;
551     return UV__ERR(errno);
552   }
553 
554 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
555   /* Nullify field to perform checks later */
556   handle->cf_cb = NULL;
557   handle->realpath = NULL;
558   handle->realpath_len = 0;
559   handle->cf_flags = flags;
560 
561   if (uv__fstat(fd, &statbuf))
562     goto fallback;
563   /* FSEvents works only with directories */
564   if (!(statbuf.st_mode & S_IFDIR))
565     goto fallback;
566 
567   if (0 == atomic_load_explicit(&uv__has_forked_with_cfrunloop,
568                                 memory_order_relaxed)) {
569     int r;
570     /* The fallback fd is no longer needed */
571     uv__close_nocheckstdio(fd);
572     handle->event_watcher.fd = -1;
573     r = uv__fsevents_init(handle);
574     if (r == 0) {
575       uv__handle_start(handle);
576     } else {
577       uv__free(handle->path);
578       handle->path = NULL;
579     }
580     return r;
581   }
582 fallback:
583 #endif /* #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 */
584 
585   uv__handle_start(handle);
586   uv__io_init(&handle->event_watcher, uv__fs_event, fd);
587   uv__io_start(handle->loop, &handle->event_watcher, POLLIN);
588 
589   return 0;
590 }
591 
592 
uv_fs_event_stop(uv_fs_event_t * handle)593 int uv_fs_event_stop(uv_fs_event_t* handle) {
594   int r;
595   r = 0;
596 
597   if (!uv__is_active(handle))
598     return 0;
599 
600   uv__handle_stop(handle);
601 
602 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
603   if (0 == atomic_load_explicit(&uv__has_forked_with_cfrunloop,
604                                 memory_order_relaxed))
605     if (handle->cf_cb != NULL)
606       r = uv__fsevents_close(handle);
607 #endif
608 
609   if (handle->event_watcher.fd != -1) {
610     uv__io_close(handle->loop, &handle->event_watcher);
611     uv__close(handle->event_watcher.fd);
612     handle->event_watcher.fd = -1;
613   }
614 
615   uv__free(handle->path);
616   handle->path = NULL;
617 
618   return r;
619 }
620 
621 
uv__fs_event_close(uv_fs_event_t * handle)622 void uv__fs_event_close(uv_fs_event_t* handle) {
623   uv_fs_event_stop(handle);
624 }
625