1 /*
2 * Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <internal/thread_arch.h>
11
12 #if defined(OPENSSL_THREADS_WINNT)
13 # include <process.h>
14 # include <windows.h>
15
thread_start_thunk(LPVOID vthread)16 static unsigned __stdcall thread_start_thunk(LPVOID vthread)
17 {
18 CRYPTO_THREAD *thread;
19 CRYPTO_THREAD_RETVAL ret;
20
21 thread = (CRYPTO_THREAD *)vthread;
22
23 thread->thread_id = GetCurrentThreadId();
24
25 ret = thread->routine(thread->data);
26 ossl_crypto_mutex_lock(thread->statelock);
27 CRYPTO_THREAD_SET_STATE(thread, CRYPTO_THREAD_FINISHED);
28 thread->retval = ret;
29 ossl_crypto_condvar_signal(thread->condvar);
30 ossl_crypto_mutex_unlock(thread->statelock);
31
32 return 0;
33 }
34
ossl_crypto_thread_native_spawn(CRYPTO_THREAD * thread)35 int ossl_crypto_thread_native_spawn(CRYPTO_THREAD *thread)
36 {
37 HANDLE *handle;
38
39 handle = OPENSSL_zalloc(sizeof(*handle));
40 if (handle == NULL)
41 goto fail;
42
43 *handle = (HANDLE)_beginthreadex(NULL, 0, &thread_start_thunk, thread, 0, NULL);
44 if (*handle == NULL)
45 goto fail;
46
47 thread->handle = handle;
48 return 1;
49
50 fail:
51 thread->handle = NULL;
52 OPENSSL_free(handle);
53 return 0;
54 }
55
ossl_crypto_thread_native_perform_join(CRYPTO_THREAD * thread,CRYPTO_THREAD_RETVAL * retval)56 int ossl_crypto_thread_native_perform_join(CRYPTO_THREAD *thread, CRYPTO_THREAD_RETVAL *retval)
57 {
58 DWORD thread_retval;
59 HANDLE *handle;
60
61 if (thread == NULL || thread->handle == NULL)
62 return 0;
63
64 handle = (HANDLE *) thread->handle;
65 if (WaitForSingleObject(*handle, INFINITE) != WAIT_OBJECT_0)
66 return 0;
67
68 if (GetExitCodeThread(*handle, &thread_retval) == 0)
69 return 0;
70
71 /*
72 * GetExitCodeThread call followed by this check is to make sure that
73 * the thread exited properly. In particular, thread_retval may be
74 * non-zero when exited via explicit ExitThread/TerminateThread or
75 * if the thread is still active (returns STILL_ACTIVE (259)).
76 */
77 if (thread_retval != 0)
78 return 0;
79
80 if (CloseHandle(*handle) == 0)
81 return 0;
82
83 return 1;
84 }
85
ossl_crypto_thread_native_exit(void)86 int ossl_crypto_thread_native_exit(void)
87 {
88 _endthreadex(0);
89 return 1;
90 }
91
ossl_crypto_thread_native_is_self(CRYPTO_THREAD * thread)92 int ossl_crypto_thread_native_is_self(CRYPTO_THREAD *thread)
93 {
94 return thread->thread_id == GetCurrentThreadId();
95 }
96
ossl_crypto_mutex_new(void)97 CRYPTO_MUTEX *ossl_crypto_mutex_new(void)
98 {
99 CRITICAL_SECTION *mutex;
100
101 if ((mutex = OPENSSL_zalloc(sizeof(*mutex))) == NULL)
102 return NULL;
103 InitializeCriticalSection(mutex);
104 return (CRYPTO_MUTEX *)mutex;
105 }
106
ossl_crypto_mutex_lock(CRYPTO_MUTEX * mutex)107 void ossl_crypto_mutex_lock(CRYPTO_MUTEX *mutex)
108 {
109 CRITICAL_SECTION *mutex_p;
110
111 mutex_p = (CRITICAL_SECTION *)mutex;
112 EnterCriticalSection(mutex_p);
113 }
114
ossl_crypto_mutex_try_lock(CRYPTO_MUTEX * mutex)115 int ossl_crypto_mutex_try_lock(CRYPTO_MUTEX *mutex)
116 {
117 CRITICAL_SECTION *mutex_p;
118
119 mutex_p = (CRITICAL_SECTION *)mutex;
120 if (TryEnterCriticalSection(mutex_p))
121 return 1;
122
123 return 0;
124 }
125
ossl_crypto_mutex_unlock(CRYPTO_MUTEX * mutex)126 void ossl_crypto_mutex_unlock(CRYPTO_MUTEX *mutex)
127 {
128 CRITICAL_SECTION *mutex_p;
129
130 mutex_p = (CRITICAL_SECTION *)mutex;
131 LeaveCriticalSection(mutex_p);
132 }
133
ossl_crypto_mutex_free(CRYPTO_MUTEX ** mutex)134 void ossl_crypto_mutex_free(CRYPTO_MUTEX **mutex)
135 {
136 CRITICAL_SECTION **mutex_p;
137
138 mutex_p = (CRITICAL_SECTION **)mutex;
139 if (*mutex_p != NULL)
140 DeleteCriticalSection(*mutex_p);
141 OPENSSL_free(*mutex_p);
142 *mutex = NULL;
143 }
144
determine_timeout(OSSL_TIME deadline,DWORD * w_timeout_p)145 static int determine_timeout(OSSL_TIME deadline, DWORD *w_timeout_p)
146 {
147 OSSL_TIME now, delta;
148 uint64_t ms;
149
150 if (ossl_time_is_infinite(deadline)) {
151 *w_timeout_p = INFINITE;
152 return 1;
153 }
154
155 now = ossl_time_now();
156 delta = ossl_time_subtract(deadline, now);
157
158 if (ossl_time_is_zero(delta))
159 return 0;
160
161 ms = ossl_time2ms(delta);
162
163 /*
164 * Amount of time we want to wait is too long for the 32-bit argument to
165 * the Win32 API, so just wait as long as possible.
166 */
167 if (ms > (uint64_t)(INFINITE - 1))
168 *w_timeout_p = INFINITE - 1;
169 else
170 *w_timeout_p = (DWORD)ms;
171
172 return 1;
173 }
174
175 # if defined(OPENSSL_THREADS_WINNT_LEGACY)
176 # include <assert.h>
177
178 /*
179 * Win32, before Vista, did not have an OS-provided condition variable
180 * construct. This leads to the need to construct our own condition variable
181 * construct in order to support Windows XP.
182 *
183 * It is difficult to construct a condition variable construct using the
184 * OS-provided primitives in a way that is both correct (avoiding race
185 * conditions where broadcasts get lost) and fair.
186 *
187 * CORRECTNESS:
188 * A blocked thread is a thread which is calling wait(), between the
189 * precise instants at which the external mutex passed to wait() is
190 * unlocked and the instant at which it is relocked.
191 *
192 * a)
193 * - If broadcast() is called, ALL blocked threads MUST be unblocked.
194 * - If signal() is called, at least one blocked thread MUST be unblocked.
195 *
196 * (i.e.: a signal or broadcast must never get 'lost')
197 *
198 * b)
199 * - If broadcast() or signal() is called, this must not cause a thread
200 * which is not blocked to return immediately from a subsequent
201 * call to wait().
202 *
203 * FAIRNESS:
204 * If broadcast() is called at time T1, all blocked threads must be unblocked
205 * before any thread which subsequently calls wait() at time T2 > T1 is
206 * unblocked.
207 *
208 * An example of an implementation which lacks fairness is as follows:
209 *
210 * t1 enters wait()
211 * t2 enters wait()
212 *
213 * tZ calls broadcast()
214 *
215 * t1 exits wait()
216 * t1 enters wait()
217 *
218 * tZ calls broadcast()
219 *
220 * t1 exits wait()
221 *
222 * IMPLEMENTATION:
223 *
224 * The most suitable primitives available to us in Windows XP are semaphores,
225 * auto-reset events and manual-reset events. A solution based on semaphores
226 * is chosen.
227 *
228 * PROBLEM. Designing a solution based on semaphores is non-trivial because,
229 * while it is easy to track the number of waiters in an interlocked data
230 * structure and then add that number to the semaphore, this does not
231 * guarantee fairness or correctness. Consider the following situation:
232 *
233 * - t1 enters wait(), adding 1 to the wait counter & blocks on the semaphore
234 * - t2 enters wait(), adding 1 to the wait counter & blocks on the semaphore
235 * - tZ calls broadcast(), finds the wait counter is 2, adds 2 to the semaphore
236 *
237 * - t1 exits wait()
238 * - t1 immediately reenters wait() and blocks on the semaphore
239 * - The semaphore is still positive due to also having been signalled
240 * for t2, therefore it is decremented
241 * - t1 exits wait() immediately; t2 is never woken
242 *
243 * GENERATION COUNTERS. One naive solution to this is to use a generation
244 * counter. Each broadcast() invocation increments a generation counter. If
245 * the generation counter has not changed during a semaphore wait operation
246 * inside wait(), this indicates that no broadcast() call has been made in
247 * the meantime; therefore, the successful semaphore decrement must have
248 * 'stolen' a wakeup from another thread which was waiting to wakeup from the
249 * prior broadcast() call but which had not yet had a chance to do so. The
250 * semaphore can then be reincremented and the wait() operation repeated.
251 *
252 * However, this suffers from the obvious problem that without OS guarantees
253 * as to how semaphore readiness events are distributed amongst threads,
254 * there is no particular guarantee that the semaphore readiness event will
255 * not be immediately redistributed back to the same thread t1.
256 *
257 * SOLUTION. A solution is chosen as follows. In its initial state, a
258 * condition variable can accept waiters, who wait for the semaphore
259 * normally. However, once broadcast() is called, the condition
260 * variable becomes 'closed'. Any existing blocked threads are unblocked,
261 * but any new calls to wait() will instead enter a blocking pre-wait stage.
262 * Pre-wait threads are not considered to be waiting (and the external
263 * mutex remains held). A call to wait() in pre-wait cannot progress
264 * to waiting until all threads due to be unblocked by the prior broadcast()
265 * call have returned and had a chance to execute.
266 *
267 * This pre-wait does not affect a thread if it does not call wait()
268 * again until after all threads have had a chance to execute.
269 *
270 * RESOURCE USAGE. Aside from an allocation for the condition variable
271 * structure, this solution uses two Win32 semaphores.
272 *
273 * FUTURE OPTIMISATIONS:
274 *
275 * An optimised multi-generation implementation is possible at the cost of
276 * higher Win32 resource usage. Multiple 'buckets' could be defined, with
277 * usage rotating between buckets internally as buckets become closed.
278 * This would avoid the need for the prewait in more cases, depending
279 * on intensity of usage.
280 *
281 */
282 typedef struct legacy_condvar_st {
283 CRYPTO_MUTEX *int_m; /* internal mutex */
284 HANDLE sema; /* main wait semaphore */
285 HANDLE prewait_sema; /* prewait semaphore */
286 /*
287 * All of the following fields are protected by int_m.
288 *
289 * num_wake only ever increases by virtue of a corresponding decrease in
290 * num_wait. num_wait can decrease for other reasons (for example due to a
291 * wait operation timing out).
292 */
293 size_t num_wait; /* Num. threads currently blocked */
294 size_t num_wake; /* Num. threads due to wake up */
295 size_t num_prewait; /* Num. threads in prewait */
296 size_t gen; /* Prewait generation */
297 int closed; /* Is closed? */
298 } LEGACY_CONDVAR;
299
ossl_crypto_condvar_new(void)300 CRYPTO_CONDVAR *ossl_crypto_condvar_new(void)
301 {
302 LEGACY_CONDVAR *cv;
303
304 if ((cv = OPENSSL_malloc(sizeof(LEGACY_CONDVAR))) == NULL)
305 return NULL;
306
307 if ((cv->int_m = ossl_crypto_mutex_new()) == NULL) {
308 OPENSSL_free(cv);
309 return NULL;
310 }
311
312 if ((cv->sema = CreateSemaphoreA(NULL, 0, LONG_MAX, NULL)) == NULL) {
313 ossl_crypto_mutex_free(&cv->int_m);
314 OPENSSL_free(cv);
315 return NULL;
316 }
317
318 if ((cv->prewait_sema = CreateSemaphoreA(NULL, 0, LONG_MAX, NULL)) == NULL) {
319 CloseHandle(cv->sema);
320 ossl_crypto_mutex_free(&cv->int_m);
321 OPENSSL_free(cv);
322 return NULL;
323 }
324
325 cv->num_wait = 0;
326 cv->num_wake = 0;
327 cv->num_prewait = 0;
328 cv->closed = 0;
329
330 return (CRYPTO_CONDVAR *)cv;
331 }
332
ossl_crypto_condvar_free(CRYPTO_CONDVAR ** cv_p)333 void ossl_crypto_condvar_free(CRYPTO_CONDVAR **cv_p)
334 {
335 if (*cv_p != NULL) {
336 LEGACY_CONDVAR *cv = *(LEGACY_CONDVAR **)cv_p;
337
338 CloseHandle(cv->sema);
339 CloseHandle(cv->prewait_sema);
340 ossl_crypto_mutex_free(&cv->int_m);
341 OPENSSL_free(cv);
342 }
343
344 *cv_p = NULL;
345 }
346
obj_wait(HANDLE h,OSSL_TIME deadline)347 static uint32_t obj_wait(HANDLE h, OSSL_TIME deadline)
348 {
349 DWORD timeout;
350
351 if (!determine_timeout(deadline, &timeout))
352 timeout = 1;
353
354 return WaitForSingleObject(h, timeout);
355 }
356
ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR * cv_,CRYPTO_MUTEX * ext_m,OSSL_TIME deadline)357 void ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR *cv_, CRYPTO_MUTEX *ext_m,
358 OSSL_TIME deadline)
359 {
360 LEGACY_CONDVAR *cv = (LEGACY_CONDVAR *)cv_;
361 int closed, set_prewait = 0, have_orig_gen = 0;
362 uint32_t rc;
363 size_t orig_gen;
364
365 /* Admission control - prewait until we can enter our actual wait phase. */
366 do {
367 ossl_crypto_mutex_lock(cv->int_m);
368
369 closed = cv->closed;
370
371 /*
372 * Once prewait is over the prewait semaphore is signalled and
373 * num_prewait is set to 0. Use a generation counter to track if we need
374 * to remove a value we added to num_prewait when exiting (e.g. due to
375 * timeout or failure of WaitForSingleObject).
376 */
377 if (!have_orig_gen) {
378 orig_gen = cv->gen;
379 have_orig_gen = 1;
380 } else if (cv->gen != orig_gen) {
381 set_prewait = 0;
382 orig_gen = cv->gen;
383 }
384
385 if (!closed) {
386 /* We can now be admitted. */
387 ++cv->num_wait;
388 if (set_prewait) {
389 --cv->num_prewait;
390 set_prewait = 0;
391 }
392 } else if (!set_prewait) {
393 ++cv->num_prewait;
394 set_prewait = 1;
395 }
396
397 ossl_crypto_mutex_unlock(cv->int_m);
398
399 if (closed)
400 if (obj_wait(cv->prewait_sema, deadline) != WAIT_OBJECT_0) {
401 /*
402 * If we got WAIT_OBJECT_0 we are safe - num_prewait has been
403 * set to 0 and the semaphore has been consumed. On the other
404 * hand if we timed out, there may be a residual posting that
405 * was made just after we timed out. However in the worst case
406 * this will just cause an internal spurious wakeup here in the
407 * future, so we do not care too much about this. We treat
408 * failure and timeout cases as the same, and simply exit in
409 * this case.
410 */
411 ossl_crypto_mutex_lock(cv->int_m);
412 if (set_prewait && cv->gen == orig_gen)
413 --cv->num_prewait;
414 ossl_crypto_mutex_unlock(cv->int_m);
415 return;
416 }
417 } while (closed);
418
419 /*
420 * Unlock external mutex. Do not do this until we have been admitted, as we
421 * must guarantee we wake if broadcast is called at any time after ext_m is
422 * unlocked.
423 */
424 ossl_crypto_mutex_unlock(ext_m);
425
426 for (;;) {
427 /* Wait. */
428 rc = obj_wait(cv->sema, deadline);
429
430 /* Reacquire internal mutex and probe state. */
431 ossl_crypto_mutex_lock(cv->int_m);
432
433 if (cv->num_wake > 0) {
434 /*
435 * A wake token is available, so we can wake up. Consume the token
436 * and get out of here. We don't care what WaitForSingleObject
437 * returned here (e.g. if it timed out coincidentally). In the
438 * latter case a signal might be left in the semaphore which causes
439 * a future WaitForSingleObject call to return immediately, but in
440 * this case we will just loop again.
441 */
442 --cv->num_wake;
443 if (cv->num_wake == 0 && cv->closed) {
444 /*
445 * We consumed the last wake token, so we can now open the
446 * condition variable for new admissions.
447 */
448 cv->closed = 0;
449 if (cv->num_prewait > 0) {
450 ReleaseSemaphore(cv->prewait_sema, (LONG)cv->num_prewait, NULL);
451 cv->num_prewait = 0;
452 ++cv->gen;
453 }
454 }
455 } else if (rc == WAIT_OBJECT_0) {
456 /*
457 * We got a wakeup from the semaphore but we did not have any wake
458 * tokens. This ideally does not happen, but might if during a
459 * previous wait() call the semaphore is posted just after
460 * WaitForSingleObject returns due to a timeout (such that the
461 * num_wake > 0 case is taken above). Just spin again. (It is worth
462 * noting that repeated WaitForSingleObject calls is the only method
463 * documented for decrementing a Win32 semaphore, so this is
464 * basically the best possible strategy.)
465 */
466 ossl_crypto_mutex_unlock(cv->int_m);
467 continue;
468 } else {
469 /*
470 * Assume we timed out. The WaitForSingleObject call may also have
471 * failed for some other reason, which we treat as a timeout.
472 */
473 assert(cv->num_wait > 0);
474 --cv->num_wait;
475 }
476
477 break;
478 }
479
480 ossl_crypto_mutex_unlock(cv->int_m);
481 ossl_crypto_mutex_lock(ext_m);
482 }
483
ossl_crypto_condvar_wait(CRYPTO_CONDVAR * cv,CRYPTO_MUTEX * ext_m)484 void ossl_crypto_condvar_wait(CRYPTO_CONDVAR *cv, CRYPTO_MUTEX *ext_m)
485 {
486 ossl_crypto_condvar_wait_timeout(cv, ext_m, ossl_time_infinite());
487 }
488
ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR * cv_)489 void ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR *cv_)
490 {
491 LEGACY_CONDVAR *cv = (LEGACY_CONDVAR *)cv_;
492 size_t num_wake;
493
494 ossl_crypto_mutex_lock(cv->int_m);
495
496 num_wake = cv->num_wait;
497 if (num_wake == 0) {
498 ossl_crypto_mutex_unlock(cv->int_m);
499 return;
500 }
501
502 cv->num_wake += num_wake;
503 cv->num_wait -= num_wake;
504 cv->closed = 1;
505
506 ossl_crypto_mutex_unlock(cv->int_m);
507 ReleaseSemaphore(cv->sema, num_wake, NULL);
508 }
509
ossl_crypto_condvar_signal(CRYPTO_CONDVAR * cv_)510 void ossl_crypto_condvar_signal(CRYPTO_CONDVAR *cv_)
511 {
512 LEGACY_CONDVAR *cv = (LEGACY_CONDVAR *)cv_;
513
514 ossl_crypto_mutex_lock(cv->int_m);
515
516 if (cv->num_wait == 0) {
517 ossl_crypto_mutex_unlock(cv->int_m);
518 return;
519 }
520
521 /*
522 * We do not close the condition variable when merely signalling, as there
523 * are no guaranteed fairness semantics here, unlike for a broadcast.
524 */
525 --cv->num_wait;
526 ++cv->num_wake;
527
528 ossl_crypto_mutex_unlock(cv->int_m);
529 ReleaseSemaphore(cv->sema, 1, NULL);
530 }
531
532 # else
533
ossl_crypto_condvar_new(void)534 CRYPTO_CONDVAR *ossl_crypto_condvar_new(void)
535 {
536 CONDITION_VARIABLE *cv_p;
537
538 if ((cv_p = OPENSSL_zalloc(sizeof(*cv_p))) == NULL)
539 return NULL;
540 InitializeConditionVariable(cv_p);
541 return (CRYPTO_CONDVAR *)cv_p;
542 }
543
ossl_crypto_condvar_wait(CRYPTO_CONDVAR * cv,CRYPTO_MUTEX * mutex)544 void ossl_crypto_condvar_wait(CRYPTO_CONDVAR *cv, CRYPTO_MUTEX *mutex)
545 {
546 CONDITION_VARIABLE *cv_p;
547 CRITICAL_SECTION *mutex_p;
548
549 cv_p = (CONDITION_VARIABLE *)cv;
550 mutex_p = (CRITICAL_SECTION *)mutex;
551 SleepConditionVariableCS(cv_p, mutex_p, INFINITE);
552 }
553
ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR * cv,CRYPTO_MUTEX * mutex,OSSL_TIME deadline)554 void ossl_crypto_condvar_wait_timeout(CRYPTO_CONDVAR *cv, CRYPTO_MUTEX *mutex,
555 OSSL_TIME deadline)
556 {
557 DWORD timeout;
558 CONDITION_VARIABLE *cv_p = (CONDITION_VARIABLE *)cv;
559 CRITICAL_SECTION *mutex_p = (CRITICAL_SECTION *)mutex;
560
561 if (!determine_timeout(deadline, &timeout))
562 timeout = 1;
563
564 SleepConditionVariableCS(cv_p, mutex_p, timeout);
565 }
566
ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR * cv)567 void ossl_crypto_condvar_broadcast(CRYPTO_CONDVAR *cv)
568 {
569 CONDITION_VARIABLE *cv_p;
570
571 cv_p = (CONDITION_VARIABLE *)cv;
572 WakeAllConditionVariable(cv_p);
573 }
574
ossl_crypto_condvar_signal(CRYPTO_CONDVAR * cv)575 void ossl_crypto_condvar_signal(CRYPTO_CONDVAR *cv)
576 {
577 CONDITION_VARIABLE *cv_p;
578
579 cv_p = (CONDITION_VARIABLE *)cv;
580 WakeConditionVariable(cv_p);
581 }
582
ossl_crypto_condvar_free(CRYPTO_CONDVAR ** cv)583 void ossl_crypto_condvar_free(CRYPTO_CONDVAR **cv)
584 {
585 CONDITION_VARIABLE **cv_p;
586
587 cv_p = (CONDITION_VARIABLE **)cv;
588 OPENSSL_free(*cv_p);
589 *cv_p = NULL;
590 }
591
592 # endif
593
ossl_crypto_mem_barrier(void)594 void ossl_crypto_mem_barrier(void)
595 {
596 MemoryBarrier();
597 }
598
599 #endif
600