xref: /openssl/crypto/thread/arch/thread_win.c (revision da1c088f)
1 /*
2  * Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <internal/thread_arch.h>
11 
12 #if defined(OPENSSL_THREADS_WINNT)
13 # include <process.h>
14 # include <windows.h>
15 
thread_start_thunk(LPVOID vthread)16 static unsigned __stdcall thread_start_thunk(LPVOID vthread)
17 {
18     CRYPTO_THREAD *thread;
19     CRYPTO_THREAD_RETVAL ret;
20 
21     thread = (CRYPTO_THREAD *)vthread;
22 
23     thread->thread_id = GetCurrentThreadId();
24 
25     ret = thread->routine(thread->data);
26     ossl_crypto_mutex_lock(thread->statelock);
27     CRYPTO_THREAD_SET_STATE(thread, CRYPTO_THREAD_FINISHED);
28     thread->retval = ret;
29     ossl_crypto_condvar_signal(thread->condvar);
30     ossl_crypto_mutex_unlock(thread->statelock);
31 
32     return 0;
33 }
34 
ossl_crypto_thread_native_spawn(CRYPTO_THREAD * thread)35 int ossl_crypto_thread_native_spawn(CRYPTO_THREAD *thread)
36 {
37     HANDLE *handle;
38 
39     handle = OPENSSL_zalloc(sizeof(*handle));
40     if (handle == NULL)
41         goto fail;
42 
43     *handle = (HANDLE)_beginthreadex(NULL, 0, &thread_start_thunk, thread, 0, NULL);
44     if (*handle == NULL)
45         goto fail;
46 
47     thread->handle = handle;
48     return 1;
49 
50 fail:
51     thread->handle = NULL;
52     OPENSSL_free(handle);
53     return 0;
54 }
55 
ossl_crypto_thread_native_perform_join(CRYPTO_THREAD * thread,CRYPTO_THREAD_RETVAL * retval)56 int ossl_crypto_thread_native_perform_join(CRYPTO_THREAD *thread, CRYPTO_THREAD_RETVAL *retval)
57 {
58     DWORD thread_retval;
59     HANDLE *handle;
60 
61     if (thread == NULL || thread->handle == NULL)
62         return 0;
63 
64     handle = (HANDLE *) thread->handle;
65     if (WaitForSingleObject(*handle, INFINITE) != WAIT_OBJECT_0)
66         return 0;
67 
68     if (GetExitCodeThread(*handle, &thread_retval) == 0)
69         return 0;
70 
71     /*
72      * GetExitCodeThread call followed by this check is to make sure that
73      * the thread exited properly. In particular, thread_retval may be
74      * non-zero when exited via explicit ExitThread/TerminateThread or
75      * if the thread is still active (returns STILL_ACTIVE (259)).
76      */
77     if (thread_retval != 0)
78         return 0;
79 
80     if (CloseHandle(*handle) == 0)
81         return 0;
82 
83     return 1;
84 }
85 
ossl_crypto_thread_native_exit(void)86 int ossl_crypto_thread_native_exit(void)
87 {
88     _endthreadex(0);
89     return 1;
90 }
91 
ossl_crypto_thread_native_is_self(CRYPTO_THREAD * thread)92 int ossl_crypto_thread_native_is_self(CRYPTO_THREAD *thread)
93 {
94     return thread->thread_id == GetCurrentThreadId();
95 }
96 
ossl_crypto_mutex_new(void)97 CRYPTO_MUTEX *ossl_crypto_mutex_new(void)
98 {
99     CRITICAL_SECTION *mutex;
100 
101     if ((mutex = OPENSSL_zalloc(sizeof(*mutex))) == NULL)
102         return NULL;
103     InitializeCriticalSection(mutex);
104     return (CRYPTO_MUTEX *)mutex;
105 }
106 
ossl_crypto_mutex_lock(CRYPTO_MUTEX * mutex)107 void ossl_crypto_mutex_lock(CRYPTO_MUTEX *mutex)
108 {
109     CRITICAL_SECTION *mutex_p;
110 
111     mutex_p = (CRITICAL_SECTION *)mutex;
112     EnterCriticalSection(mutex_p);
113 }
114 
ossl_crypto_mutex_try_lock(CRYPTO_MUTEX * mutex)115 int ossl_crypto_mutex_try_lock(CRYPTO_MUTEX *mutex)
116 {
117     CRITICAL_SECTION *mutex_p;
118 
119     mutex_p = (CRITICAL_SECTION *)mutex;
120     if (TryEnterCriticalSection(mutex_p))
121         return 1;
122 
123     return 0;
124 }
125 
ossl_crypto_mutex_unlock(CRYPTO_MUTEX * mutex)126 void ossl_crypto_mutex_unlock(CRYPTO_MUTEX *mutex)
127 {
128     CRITICAL_SECTION *mutex_p;
129 
130     mutex_p = (CRITICAL_SECTION *)mutex;
131     LeaveCriticalSection(mutex_p);
132 }
133 
ossl_crypto_mutex_free(CRYPTO_MUTEX ** mutex)134 void ossl_crypto_mutex_free(CRYPTO_MUTEX **mutex)
135 {
136     CRITICAL_SECTION **mutex_p;
137 
138     mutex_p = (CRITICAL_SECTION **)mutex;
139     if (*mutex_p != NULL)
140         DeleteCriticalSection(*mutex_p);
141     OPENSSL_free(*mutex_p);
142     *mutex = NULL;
143 }
144 
determine_timeout(OSSL_TIME deadline,DWORD * w_timeout_p)145 static int determine_timeout(OSSL_TIME deadline, DWORD *w_timeout_p)
146 {
147     OSSL_TIME now, delta;
148     uint64_t ms;
149 
150     if (ossl_time_is_infinite(deadline)) {
151         *w_timeout_p = INFINITE;
152         return 1;
153     }
154 
155     now = ossl_time_now();
156     delta = ossl_time_subtract(deadline, now);
157 
158     if (ossl_time_is_zero(delta))
159         return 0;
160 
161     ms = ossl_time2ms(delta);
162 
163     /*
164      * Amount of time we want to wait is too long for the 32-bit argument to
165      * the Win32 API, so just wait as long as possible.
166      */
167     if (ms > (uint64_t)(INFINITE - 1))
168         *w_timeout_p = INFINITE - 1;
169     else
170         *w_timeout_p = (DWORD)ms;
171 
172     return 1;
173 }
174 
175 # if defined(OPENSSL_THREADS_WINNT_LEGACY)
176 #  include <assert.h>
177 
178 /*
179  * Win32, before Vista, did not have an OS-provided condition variable
180  * construct. This leads to the need to construct our own condition variable
181  * construct in order to support Windows XP.
182  *
183  * It is difficult to construct a condition variable construct using the
184  * OS-provided primitives in a way that is both correct (avoiding race
185  * conditions where broadcasts get lost) and fair.
186  *
187  * CORRECTNESS:
188  *   A blocked thread is a thread which is calling wait(), between the
189  *   precise instants at which the external mutex passed to wait() is
190  *   unlocked and the instant at which it is relocked.
191  *
192  *   a)
193  *     - If broadcast() is called, ALL blocked threads MUST be unblocked.
194  *     - If signal() is called, at least one blocked thread MUST be unblocked.
195  *
196  *     (i.e.: a signal or broadcast must never get 'lost')
197  *
198  *   b)
199  *     - If broadcast() or signal() is called, this must not cause a thread
200  *       which is not blocked to return immediately from a subsequent
201  *       call to wait().
202  *
203  * FAIRNESS:
204  *   If broadcast() is called at time T1, all blocked threads must be unblocked
205  *   before any thread which subsequently calls wait() at time T2 > T1 is
206  *   unblocked.
207  *
208  *   An example of an implementation which lacks fairness is as follows:
209  *
210  *     t1 enters wait()
211  *     t2 enters wait()
212  *
213  *     tZ calls broadcast()
214  *
215  *     t1 exits wait()
216  *     t1 enters wait()
217  *
218  *     tZ calls broadcast()
219  *
220  *     t1 exits wait()
221  *
222  * IMPLEMENTATION:
223  *
224  *   The most suitable primitives available to us in Windows XP are semaphores,
225  *   auto-reset events and manual-reset events. A solution based on semaphores
226  *   is chosen.
227  *
228  *   PROBLEM. Designing a solution based on semaphores is non-trivial because,
229  *   while it is easy to track the number of waiters in an interlocked data
230  *   structure and then add that number to the semaphore, this does not
231  *   guarantee fairness or correctness. Consider the following situation:
232  *
233  *     - t1 enters wait(), adding 1 to the wait counter & blocks on the semaphore
234  *     - t2 enters wait(), adding 1 to the wait counter & blocks on the semaphore
235  *     - tZ calls broadcast(), finds the wait counter is 2, adds 2 to the semaphore
236  *
237  *     - t1 exits wait()
238  *     - t1 immediately reenters wait() and blocks on the semaphore
239  *     - The semaphore is still positive due to also having been signalled
240  *       for t2, therefore it is decremented
241  *     - t1 exits wait() immediately; t2 is never woken
242  *
243  *   GENERATION COUNTERS. One naive solution to this is to use a generation
244  *   counter. Each broadcast() invocation increments a generation counter. If
245  *   the generation counter has not changed during a semaphore wait operation
246  *   inside wait(), this indicates that no broadcast() call has been made in
247  *   the meantime; therefore, the successful semaphore decrement must have
248  *   'stolen' a wakeup from another thread which was waiting to wakeup from the
249  *   prior broadcast() call but which had not yet had a chance to do so. The
250  *   semaphore can then be reincremented and the wait() operation repeated.
251  *
252  *   However, this suffers from the obvious problem that without OS guarantees
253  *   as to how semaphore readiness events are distributed amongst threads,
254  *   there is no particular guarantee that the semaphore readiness event will
255  *   not be immediately redistributed back to the same thread t1.
256  *
257  *   SOLUTION. A solution is chosen as follows. In its initial state, a
258  *   condition variable can accept waiters, who wait for the semaphore
259  *   normally. However, once broadcast() is called, the condition
260  *   variable becomes 'closed'. Any existing blocked threads are unblocked,
261  *   but any new calls to wait() will instead enter a blocking pre-wait stage.
262  *   Pre-wait threads are not considered to be waiting (and the external
263  *   mutex remains held). A call to wait() in pre-wait cannot progress
264  *   to waiting until all threads due to be unblocked by the prior broadcast()
265  *   call have returned and had a chance to execute.
266  *
267  *   This pre-wait does not affect a thread if it does not call wait()
268  *   again until after all threads have had a chance to execute.
269  *
270  *   RESOURCE USAGE. Aside from an allocation for the condition variable
271  *   structure, this solution uses two Win32 semaphores.
272  *
273  * FUTURE OPTIMISATIONS:
274  *
275  *   An optimised multi-generation implementation is possible at the cost of
276  *   higher Win32 resource usage. Multiple 'buckets' could be defined, with
277  *   usage rotating between buckets internally as buckets become closed.
278  *   This would avoid the need for the prewait in more cases, depending
279  *   on intensity of usage.
280  *
281  */
282 typedef struct legacy_condvar_st {
283     CRYPTO_MUTEX    *int_m;       /* internal mutex */
284     HANDLE          sema;         /* main wait semaphore */
285     HANDLE          prewait_sema; /* prewait semaphore */
286     /*
287      * All of the following fields are protected by int_m.
288      *
289      * num_wake only ever increases by virtue of a corresponding decrease in
290      * num_wait. num_wait can decrease for other reasons (for example due to a
291      * wait operation timing out).
292      */
293     size_t          num_wait;     /* Num. threads currently blocked */
294     size_t          num_wake;     /* Num. threads due to wake up */
295     size_t          num_prewait;  /* Num. threads in prewait */
296     size_t          gen;          /* Prewait generation */
297     int             closed;       /* Is closed? */
298 } LEGACY_CONDVAR;
299 
ossl_crypto_condvar_new(void)300 CRYPTO_CONDVAR *ossl_crypto_condvar_new(void)
301 {
302     LEGACY_CONDVAR *cv;
303 
304     if ((cv = OPENSSL_malloc(sizeof(LEGACY_CONDVAR))) == NULL)
305         return NULL;
306 
307     if ((cv->int_m = ossl_crypto_mutex_new()) == NULL) {
308         OPENSSL_free(cv);
309         return NULL;
310     }
311 
312     if ((cv->sema = CreateSemaphoreA(NULL, 0, LONG_MAX, NULL)) == NULL) {
313         ossl_crypto_mutex_free(&cv->int_m);
314         OPENSSL_free(cv);
315         return NULL;
316     }
317 
318     if ((cv->prewait_sema = CreateSemaphoreA(NULL, 0, LONG_MAX, NULL)) == NULL) {
319         CloseHandle(cv->sema);
320         ossl_crypto_mutex_free(&cv->int_m);
321         OPENSSL_free(cv);
322         return NULL;
323     }
324 
325     cv->num_wait      = 0;
326     cv->num_wake      = 0;
327     cv->num_prewait   = 0;
328     cv->closed        = 0;
329 
330     return (CRYPTO_CONDVAR *)cv;
331 }
332 
ossl_crypto_condvar_free(CRYPTO_CONDVAR ** cv_p)333 void ossl_crypto_condvar_free(CRYPTO_CONDVAR **cv_p)
334 {
335     if (*cv_p != NULL) {
336         LEGACY_CONDVAR *cv = *(LEGACY_CONDVAR **)cv_p;
337 
338         CloseHandle(cv->sema);
339         CloseHandle(cv->prewait_sema);
340         ossl_crypto_mutex_free(&cv->int_m);
341         OPENSSL_free(cv);
342     }
343 
344     *cv_p = NULL;
345 }
346 
obj_wait(HANDLE h,OSSL_TIME deadline)347 static uint32_t obj_wait(HANDLE h, OSSL_TIME deadline)
348 {
349     DWORD timeout;
350 
351     if (!determine_timeout(deadline, &timeout))
352         timeout = 1;
353 
354     return WaitForSingleObject(h, timeout);
355 }
356 
ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR * cv_,CRYPTO_MUTEX * ext_m,OSSL_TIME deadline)357 void ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR *cv_, CRYPTO_MUTEX *ext_m,
358                                       OSSL_TIME deadline)
359 {
360     LEGACY_CONDVAR *cv = (LEGACY_CONDVAR *)cv_;
361     int closed, set_prewait = 0, have_orig_gen = 0;
362     uint32_t rc;
363     size_t orig_gen;
364 
365     /* Admission control - prewait until we can enter our actual wait phase. */
366     do {
367         ossl_crypto_mutex_lock(cv->int_m);
368 
369         closed = cv->closed;
370 
371         /*
372          * Once prewait is over the prewait semaphore is signalled and
373          * num_prewait is set to 0. Use a generation counter to track if we need
374          * to remove a value we added to num_prewait when exiting (e.g. due to
375          * timeout or failure of WaitForSingleObject).
376          */
377         if (!have_orig_gen) {
378             orig_gen = cv->gen;
379             have_orig_gen = 1;
380         } else if (cv->gen != orig_gen) {
381             set_prewait = 0;
382             orig_gen = cv->gen;
383         }
384 
385         if (!closed) {
386             /* We can now be admitted. */
387             ++cv->num_wait;
388             if (set_prewait) {
389                 --cv->num_prewait;
390                 set_prewait = 0;
391             }
392         } else if (!set_prewait) {
393             ++cv->num_prewait;
394             set_prewait = 1;
395         }
396 
397         ossl_crypto_mutex_unlock(cv->int_m);
398 
399         if (closed)
400             if (obj_wait(cv->prewait_sema, deadline) != WAIT_OBJECT_0) {
401                 /*
402                  * If we got WAIT_OBJECT_0 we are safe - num_prewait has been
403                  * set to 0 and the semaphore has been consumed. On the other
404                  * hand if we timed out, there may be a residual posting that
405                  * was made just after we timed out. However in the worst case
406                  * this will just cause an internal spurious wakeup here in the
407                  * future, so we do not care too much about this. We treat
408                  * failure and timeout cases as the same, and simply exit in
409                  * this case.
410                  */
411                 ossl_crypto_mutex_lock(cv->int_m);
412                 if (set_prewait && cv->gen == orig_gen)
413                     --cv->num_prewait;
414                 ossl_crypto_mutex_unlock(cv->int_m);
415                 return;
416             }
417     } while (closed);
418 
419     /*
420      * Unlock external mutex. Do not do this until we have been admitted, as we
421      * must guarantee we wake if broadcast is called at any time after ext_m is
422      * unlocked.
423      */
424     ossl_crypto_mutex_unlock(ext_m);
425 
426     for (;;) {
427         /* Wait. */
428         rc = obj_wait(cv->sema, deadline);
429 
430         /* Reacquire internal mutex and probe state. */
431         ossl_crypto_mutex_lock(cv->int_m);
432 
433         if (cv->num_wake > 0) {
434             /*
435              * A wake token is available, so we can wake up. Consume the token
436              * and get out of here. We don't care what WaitForSingleObject
437              * returned here (e.g. if it timed out coincidentally). In the
438              * latter case a signal might be left in the semaphore which causes
439              * a future WaitForSingleObject call to return immediately, but in
440              * this case we will just loop again.
441              */
442             --cv->num_wake;
443             if (cv->num_wake == 0 && cv->closed) {
444                 /*
445                  * We consumed the last wake token, so we can now open the
446                  * condition variable for new admissions.
447                  */
448                 cv->closed = 0;
449                 if (cv->num_prewait > 0) {
450                     ReleaseSemaphore(cv->prewait_sema, (LONG)cv->num_prewait, NULL);
451                     cv->num_prewait = 0;
452                     ++cv->gen;
453                 }
454             }
455         } else if (rc == WAIT_OBJECT_0) {
456             /*
457              * We got a wakeup from the semaphore but we did not have any wake
458              * tokens. This ideally does not happen, but might if during a
459              * previous wait() call the semaphore is posted just after
460              * WaitForSingleObject returns due to a timeout (such that the
461              * num_wake > 0 case is taken above). Just spin again. (It is worth
462              * noting that repeated WaitForSingleObject calls is the only method
463              * documented for decrementing a Win32 semaphore, so this is
464              * basically the best possible strategy.)
465              */
466             ossl_crypto_mutex_unlock(cv->int_m);
467             continue;
468         } else {
469             /*
470              * Assume we timed out. The WaitForSingleObject call may also have
471              * failed for some other reason, which we treat as a timeout.
472              */
473             assert(cv->num_wait > 0);
474             --cv->num_wait;
475         }
476 
477         break;
478     }
479 
480     ossl_crypto_mutex_unlock(cv->int_m);
481     ossl_crypto_mutex_lock(ext_m);
482 }
483 
ossl_crypto_condvar_wait(CRYPTO_CONDVAR * cv,CRYPTO_MUTEX * ext_m)484 void ossl_crypto_condvar_wait(CRYPTO_CONDVAR *cv, CRYPTO_MUTEX *ext_m)
485 {
486     ossl_crypto_condvar_wait_timeout(cv, ext_m, ossl_time_infinite());
487 }
488 
ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR * cv_)489 void ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR *cv_)
490 {
491     LEGACY_CONDVAR *cv = (LEGACY_CONDVAR *)cv_;
492     size_t num_wake;
493 
494     ossl_crypto_mutex_lock(cv->int_m);
495 
496     num_wake = cv->num_wait;
497     if (num_wake == 0) {
498         ossl_crypto_mutex_unlock(cv->int_m);
499         return;
500     }
501 
502     cv->num_wake  += num_wake;
503     cv->num_wait  -= num_wake;
504     cv->closed     = 1;
505 
506     ossl_crypto_mutex_unlock(cv->int_m);
507     ReleaseSemaphore(cv->sema, num_wake, NULL);
508 }
509 
ossl_crypto_condvar_signal(CRYPTO_CONDVAR * cv_)510 void ossl_crypto_condvar_signal(CRYPTO_CONDVAR *cv_)
511 {
512     LEGACY_CONDVAR *cv = (LEGACY_CONDVAR *)cv_;
513 
514     ossl_crypto_mutex_lock(cv->int_m);
515 
516     if (cv->num_wait == 0) {
517         ossl_crypto_mutex_unlock(cv->int_m);
518         return;
519     }
520 
521     /*
522      * We do not close the condition variable when merely signalling, as there
523      * are no guaranteed fairness semantics here, unlike for a broadcast.
524      */
525     --cv->num_wait;
526     ++cv->num_wake;
527 
528     ossl_crypto_mutex_unlock(cv->int_m);
529     ReleaseSemaphore(cv->sema, 1, NULL);
530 }
531 
532 # else
533 
ossl_crypto_condvar_new(void)534 CRYPTO_CONDVAR *ossl_crypto_condvar_new(void)
535 {
536     CONDITION_VARIABLE *cv_p;
537 
538     if ((cv_p = OPENSSL_zalloc(sizeof(*cv_p))) == NULL)
539         return NULL;
540     InitializeConditionVariable(cv_p);
541     return (CRYPTO_CONDVAR *)cv_p;
542 }
543 
ossl_crypto_condvar_wait(CRYPTO_CONDVAR * cv,CRYPTO_MUTEX * mutex)544 void ossl_crypto_condvar_wait(CRYPTO_CONDVAR *cv, CRYPTO_MUTEX *mutex)
545 {
546     CONDITION_VARIABLE *cv_p;
547     CRITICAL_SECTION *mutex_p;
548 
549     cv_p = (CONDITION_VARIABLE *)cv;
550     mutex_p = (CRITICAL_SECTION *)mutex;
551     SleepConditionVariableCS(cv_p, mutex_p, INFINITE);
552 }
553 
ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR * cv,CRYPTO_MUTEX * mutex,OSSL_TIME deadline)554 void ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR *cv, CRYPTO_MUTEX *mutex,
555                                       OSSL_TIME deadline)
556 {
557     DWORD timeout;
558     CONDITION_VARIABLE *cv_p = (CONDITION_VARIABLE *)cv;
559     CRITICAL_SECTION *mutex_p = (CRITICAL_SECTION *)mutex;
560 
561     if (!determine_timeout(deadline, &timeout))
562         timeout = 1;
563 
564     SleepConditionVariableCS(cv_p, mutex_p, timeout);
565 }
566 
ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR * cv)567 void ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR *cv)
568 {
569     CONDITION_VARIABLE *cv_p;
570 
571     cv_p = (CONDITION_VARIABLE *)cv;
572     WakeAllConditionVariable(cv_p);
573 }
574 
ossl_crypto_condvar_signal(CRYPTO_CONDVAR * cv)575 void ossl_crypto_condvar_signal(CRYPTO_CONDVAR *cv)
576 {
577     CONDITION_VARIABLE *cv_p;
578 
579     cv_p = (CONDITION_VARIABLE *)cv;
580     WakeConditionVariable(cv_p);
581 }
582 
ossl_crypto_condvar_free(CRYPTO_CONDVAR ** cv)583 void ossl_crypto_condvar_free(CRYPTO_CONDVAR **cv)
584 {
585     CONDITION_VARIABLE **cv_p;
586 
587     cv_p = (CONDITION_VARIABLE **)cv;
588     OPENSSL_free(*cv_p);
589     *cv_p = NULL;
590 }
591 
592 # endif
593 
ossl_crypto_mem_barrier(void)594 void ossl_crypto_mem_barrier(void)
595 {
596     MemoryBarrier();
597 }
598 
599 #endif
600