1 /*
2 * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* We need to use the OPENSSL_fork_*() deprecated APIs */
11 #define OPENSSL_SUPPRESS_DEPRECATED
12
13 #include <openssl/crypto.h>
14 #include <crypto/cryptlib.h>
15 #include "internal/cryptlib.h"
16 #include "internal/rcu.h"
17 #include "rcu_internal.h"
18
19 #if defined(__clang__) && defined(__has_feature)
20 # if __has_feature(thread_sanitizer)
21 # define __SANITIZE_THREAD__
22 # endif
23 #endif
24
25 #if defined(__SANITIZE_THREAD__)
26 # include <sanitizer/tsan_interface.h>
27 # define TSAN_FAKE_UNLOCK(x) __tsan_mutex_pre_unlock((x), 0); \
28 __tsan_mutex_post_unlock((x), 0)
29
30 # define TSAN_FAKE_LOCK(x) __tsan_mutex_pre_lock((x), 0); \
31 __tsan_mutex_post_lock((x), 0, 0)
32 #else
33 # define TSAN_FAKE_UNLOCK(x)
34 # define TSAN_FAKE_LOCK(x)
35 #endif
36
37 #if defined(__sun)
38 # include <atomic.h>
39 #endif
40
41 #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
42 /*
43 * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
44 * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
45 * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
46 * All of this makes impossible to use __atomic_is_lock_free here.
47 *
48 * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
49 */
50 # define BROKEN_CLANG_ATOMICS
51 #endif
52
53 #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
54
55 # if defined(OPENSSL_SYS_UNIX)
56 # include <sys/types.h>
57 # include <unistd.h>
58 # endif
59
60 # include <assert.h>
61
62 /*
63 * The Non-Stop KLT thread model currently seems broken in its rwlock
64 * implementation
65 */
66 # if defined(PTHREAD_RWLOCK_INITIALIZER) && !defined(_KLT_MODEL_)
67 # define USE_RWLOCK
68 # endif
69
70 /*
71 * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
72 * other compilers.
73
74 * Unfortunately, we can't do that with some "generic type", because there's no
75 * guarantee that the chosen generic type is large enough to cover all cases.
76 * Therefore, we implement fallbacks for each applicable type, with composed
77 * names that include the type they handle.
78 *
79 * (an anecdote: we previously tried to use |void *| as the generic type, with
80 * the thought that the pointer itself is the largest type. However, this is
81 * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
82 *
83 * All applicable ATOMIC_ macros take the intended type as first parameter, so
84 * they can map to the correct fallback function. In the GNU/clang case, that
85 * parameter is simply ignored.
86 */
87
88 /*
89 * Internal types used with the ATOMIC_ macros, to make it possible to compose
90 * fallback function names.
91 */
92 typedef void *pvoid;
93 typedef struct rcu_cb_item *prcu_cb_item;
94
95 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
96 && !defined(USE_ATOMIC_FALLBACKS)
97 # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__)
98 /*
99 * For pointers, Apple M1 virtualized cpu seems to have some problem using the
100 * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
101 * When using the native apple clang compiler, this instruction is emitted for
102 * atomic loads, which is bad. So, if
103 * 1) We are building on a target that defines __APPLE__ AND
104 * 2) We are building on a target using clang (__clang__) AND
105 * 3) We are building for an M1 processor (__aarch64__)
106 * Then we should not use __atomic_load_n and instead implement our own
107 * function to issue the ldar instruction instead, which produces the proper
108 * sequencing guarantees
109 */
apple_atomic_load_n_pvoid(void ** p,ossl_unused int memorder)110 static inline void *apple_atomic_load_n_pvoid(void **p,
111 ossl_unused int memorder)
112 {
113 void *ret;
114
115 __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
116
117 return ret;
118 }
119
120 /* For uint64_t, we should be fine, though */
121 # define apple_atomic_load_n_uint32_t(p, o) __atomic_load_n(p, o)
122 # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
123
124 # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
125 # else
126 # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
127 # endif
128 # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
129 # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
130 # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
131 # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
132 # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
133 # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
134 # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
135 # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
136 # else
137 static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
138
139 # define IMPL_fallback_atomic_load_n(t) \
140 static ossl_inline t fallback_atomic_load_n_##t(t *p) \
141 { \
142 t ret; \
143 \
144 pthread_mutex_lock(&atomic_sim_lock); \
145 ret = *p; \
146 pthread_mutex_unlock(&atomic_sim_lock); \
147 return ret; \
148 }
149 IMPL_fallback_atomic_load_n(uint32_t)
IMPL_fallback_atomic_load_n(uint64_t)150 IMPL_fallback_atomic_load_n(uint64_t)
151 IMPL_fallback_atomic_load_n(pvoid)
152
153 # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
154
155 # define IMPL_fallback_atomic_store_n(t) \
156 static ossl_inline t fallback_atomic_store_n_##t(t *p, t v) \
157 { \
158 t ret; \
159 \
160 pthread_mutex_lock(&atomic_sim_lock); \
161 ret = *p; \
162 *p = v; \
163 pthread_mutex_unlock(&atomic_sim_lock); \
164 return ret; \
165 }
166 IMPL_fallback_atomic_store_n(uint32_t)
167 IMPL_fallback_atomic_store_n(uint64_t)
168
169 # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
170
171 # define IMPL_fallback_atomic_store(t) \
172 static ossl_inline void fallback_atomic_store_##t(t *p, t *v) \
173 { \
174 pthread_mutex_lock(&atomic_sim_lock); \
175 *p = *v; \
176 pthread_mutex_unlock(&atomic_sim_lock); \
177 }
178 IMPL_fallback_atomic_store(uint64_t)
179 IMPL_fallback_atomic_store(pvoid)
180
181 # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
182
183 # define IMPL_fallback_atomic_exchange_n(t) \
184 static ossl_inline t fallback_atomic_exchange_n_##t(t *p, t v) \
185 { \
186 t ret; \
187 \
188 pthread_mutex_lock(&atomic_sim_lock); \
189 ret = *p; \
190 *p = v; \
191 pthread_mutex_unlock(&atomic_sim_lock); \
192 return ret; \
193 }
194 IMPL_fallback_atomic_exchange_n(uint64_t)
195 IMPL_fallback_atomic_exchange_n(prcu_cb_item)
196
197 # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
198
199 /*
200 * The fallbacks that follow don't need any per type implementation, as
201 * they are designed for uint64_t only. If there comes a time when multiple
202 * types need to be covered, it's relatively easy to refactor them the same
203 * way as the fallbacks above.
204 */
205
206 static ossl_inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
207 {
208 uint64_t ret;
209
210 pthread_mutex_lock(&atomic_sim_lock);
211 *p += v;
212 ret = *p;
213 pthread_mutex_unlock(&atomic_sim_lock);
214 return ret;
215 }
216
217 # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
218
fallback_atomic_fetch_add(uint64_t * p,uint64_t v)219 static ossl_inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
220 {
221 uint64_t ret;
222
223 pthread_mutex_lock(&atomic_sim_lock);
224 ret = *p;
225 *p += v;
226 pthread_mutex_unlock(&atomic_sim_lock);
227 return ret;
228 }
229
230 # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
231
fallback_atomic_sub_fetch(uint64_t * p,uint64_t v)232 static ossl_inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
233 {
234 uint64_t ret;
235
236 pthread_mutex_lock(&atomic_sim_lock);
237 *p -= v;
238 ret = *p;
239 pthread_mutex_unlock(&atomic_sim_lock);
240 return ret;
241 }
242
243 # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
244
fallback_atomic_and_fetch(uint64_t * p,uint64_t m)245 static ossl_inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
246 {
247 uint64_t ret;
248
249 pthread_mutex_lock(&atomic_sim_lock);
250 *p &= m;
251 ret = *p;
252 pthread_mutex_unlock(&atomic_sim_lock);
253 return ret;
254 }
255
256 # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
257
fallback_atomic_or_fetch(uint64_t * p,uint64_t m)258 static ossl_inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
259 {
260 uint64_t ret;
261
262 pthread_mutex_lock(&atomic_sim_lock);
263 *p |= m;
264 ret = *p;
265 pthread_mutex_unlock(&atomic_sim_lock);
266 return ret;
267 }
268
269 # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
270 # endif
271
272 /*
273 * users is broken up into 2 parts
274 * bits 0-15 current readers
275 * bit 32-63 ID
276 */
277 # define READER_SHIFT 0
278 # define ID_SHIFT 32
279 /* TODO: READER_SIZE 32 in threads_win.c */
280 # define READER_SIZE 16
281 # define ID_SIZE 32
282
283 # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1)
284 # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1)
285 # define READER_COUNT(x) ((uint32_t)(((uint64_t)(x) >> READER_SHIFT) & \
286 READER_MASK))
287 # define ID_VAL(x) ((uint32_t)(((uint64_t)(x) >> ID_SHIFT) & ID_MASK))
288 # define VAL_READER ((uint64_t)1 << READER_SHIFT)
289 # define VAL_ID(x) ((uint64_t)x << ID_SHIFT)
290
291 /*
292 * This is the core of an rcu lock. It tracks the readers and writers for the
293 * current quiescence point for a given lock. Users is the 64 bit value that
294 * stores the READERS/ID as defined above
295 *
296 */
297 struct rcu_qp {
298 uint64_t users;
299 };
300
301 struct thread_qp {
302 struct rcu_qp *qp;
303 unsigned int depth;
304 CRYPTO_RCU_LOCK *lock;
305 };
306
307 # define MAX_QPS 10
308 /*
309 * This is the per thread tracking data
310 * that is assigned to each thread participating
311 * in an rcu qp
312 *
313 * qp points to the qp that it last acquired
314 *
315 */
316 struct rcu_thr_data {
317 struct thread_qp thread_qps[MAX_QPS];
318 };
319
320 /*
321 * This is the internal version of a CRYPTO_RCU_LOCK
322 * it is cast from CRYPTO_RCU_LOCK
323 */
324 struct rcu_lock_st {
325 /* Callbacks to call for next ossl_synchronize_rcu */
326 struct rcu_cb_item *cb_items;
327
328 /* The context we are being created against */
329 OSSL_LIB_CTX *ctx;
330
331 /* rcu generation counter for in-order retirement */
332 uint32_t id_ctr;
333
334 /* TODO: can be moved before id_ctr for better alignment */
335 /* Array of quiescent points for synchronization */
336 struct rcu_qp *qp_group;
337
338 /* Number of elements in qp_group array */
339 uint32_t group_count;
340
341 /* Index of the current qp in the qp_group array */
342 uint32_t reader_idx;
343
344 /* value of the next id_ctr value to be retired */
345 uint32_t next_to_retire;
346
347 /* index of the next free rcu_qp in the qp_group */
348 uint32_t current_alloc_idx;
349
350 /* number of qp's in qp_group array currently being retired */
351 uint32_t writers_alloced;
352
353 /* lock protecting write side operations */
354 pthread_mutex_t write_lock;
355
356 /* lock protecting updates to writers_alloced/current_alloc_idx */
357 pthread_mutex_t alloc_lock;
358
359 /* signal to wake threads waiting on alloc_lock */
360 pthread_cond_t alloc_signal;
361
362 /* lock to enforce in-order retirement */
363 pthread_mutex_t prior_lock;
364
365 /* signal to wake threads waiting on prior_lock */
366 pthread_cond_t prior_signal;
367 };
368
369 /* Read side acquisition of the current qp */
get_hold_current_qp(struct rcu_lock_st * lock)370 static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
371 {
372 uint32_t qp_idx;
373
374 /* get the current qp index */
375 for (;;) {
376 /*
377 * Notes on use of __ATOMIC_ACQUIRE
378 * We need to ensure the following:
379 * 1) That subsequent operations aren't optimized by hoisting them above
380 * this operation. Specifically, we don't want the below re-load of
381 * qp_idx to get optimized away
382 * 2) We want to ensure that any updating of reader_idx on the write side
383 * of the lock is flushed from a local cpu cache so that we see any
384 * updates prior to the load. This is a non-issue on cache coherent
385 * systems like x86, but is relevant on other arches
386 * Note: This applies to the reload below as well
387 */
388 qp_idx = ATOMIC_LOAD_N(uint32_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
389
390 /*
391 * Notes of use of __ATOMIC_RELEASE
392 * This counter is only read by the write side of the lock, and so we
393 * specify __ATOMIC_RELEASE here to ensure that the write side of the
394 * lock see this during the spin loop read of users, as it waits for the
395 * reader count to approach zero
396 */
397 ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
398 __ATOMIC_RELEASE);
399
400 /* if the idx hasn't changed, we're good, else try again */
401 if (qp_idx == ATOMIC_LOAD_N(uint32_t, &lock->reader_idx,
402 __ATOMIC_ACQUIRE))
403 break;
404
405 /*
406 * Notes on use of __ATOMIC_RELEASE
407 * As with the add above, we want to ensure that this decrement is
408 * seen by the write side of the lock as soon as it happens to prevent
409 * undue spinning waiting for write side completion
410 */
411 ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
412 __ATOMIC_RELEASE);
413 }
414
415 return &lock->qp_group[qp_idx];
416 }
417
ossl_rcu_free_local_data(void * arg)418 static void ossl_rcu_free_local_data(void *arg)
419 {
420 OSSL_LIB_CTX *ctx = arg;
421 CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx);
422 struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
423
424 OPENSSL_free(data);
425 CRYPTO_THREAD_set_local(lkey, NULL);
426 }
427
ossl_rcu_read_lock(CRYPTO_RCU_LOCK * lock)428 void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
429 {
430 struct rcu_thr_data *data;
431 int i, available_qp = -1;
432 CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
433
434 /*
435 * we're going to access current_qp here so ask the
436 * processor to fetch it
437 */
438 data = CRYPTO_THREAD_get_local(lkey);
439
440 if (data == NULL) {
441 data = OPENSSL_zalloc(sizeof(*data));
442 OPENSSL_assert(data != NULL);
443 CRYPTO_THREAD_set_local(lkey, data);
444 ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data);
445 }
446
447 for (i = 0; i < MAX_QPS; i++) {
448 if (data->thread_qps[i].qp == NULL && available_qp == -1)
449 available_qp = i;
450 /* If we have a hold on this lock already, we're good */
451 if (data->thread_qps[i].lock == lock) {
452 data->thread_qps[i].depth++;
453 return;
454 }
455 }
456
457 /*
458 * if we get here, then we don't have a hold on this lock yet
459 */
460 assert(available_qp != -1);
461
462 data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
463 data->thread_qps[available_qp].depth = 1;
464 data->thread_qps[available_qp].lock = lock;
465 }
466
ossl_rcu_read_unlock(CRYPTO_RCU_LOCK * lock)467 void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
468 {
469 int i;
470 CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
471 struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
472 uint64_t ret;
473
474 assert(data != NULL);
475
476 for (i = 0; i < MAX_QPS; i++) {
477 if (data->thread_qps[i].lock == lock) {
478 /*
479 * As with read side acquisition, we use __ATOMIC_RELEASE here
480 * to ensure that the decrement is published immediately
481 * to any write side waiters
482 */
483 data->thread_qps[i].depth--;
484 if (data->thread_qps[i].depth == 0) {
485 ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users,
486 VAL_READER, __ATOMIC_RELEASE);
487 OPENSSL_assert(ret != UINT64_MAX);
488 data->thread_qps[i].qp = NULL;
489 data->thread_qps[i].lock = NULL;
490 }
491 return;
492 }
493 }
494 /*
495 * If we get here, we're trying to unlock a lock that we never acquired -
496 * that's fatal.
497 */
498 assert(0);
499 }
500
501 /*
502 * Write side allocation routine to get the current qp
503 * and replace it with a new one
504 */
update_qp(CRYPTO_RCU_LOCK * lock)505 static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
506 {
507 uint64_t new_id;
508 uint32_t current_idx;
509
510 pthread_mutex_lock(&lock->alloc_lock);
511
512 /*
513 * we need at least one qp to be available with one
514 * left over, so that readers can start working on
515 * one that isn't yet being waited on
516 */
517 while (lock->group_count - lock->writers_alloced < 2)
518 /* we have to wait for one to be free */
519 pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
520
521 current_idx = lock->current_alloc_idx;
522
523 /* Allocate the qp */
524 lock->writers_alloced++;
525
526 /* increment the allocation index */
527 lock->current_alloc_idx =
528 (lock->current_alloc_idx + 1) % lock->group_count;
529
530 /* get and insert a new id */
531 new_id = VAL_ID(lock->id_ctr);
532 lock->id_ctr++;
533
534 /*
535 * Even though we are under a write side lock here
536 * We need to use atomic instructions to ensure that the results
537 * of this update are published to the read side prior to updating the
538 * reader idx below
539 */
540 ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
541 __ATOMIC_RELEASE);
542 ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
543 __ATOMIC_RELEASE);
544
545 /*
546 * Update the reader index to be the prior qp.
547 * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
548 * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
549 * of this value to be seen on the read side immediately after it happens
550 */
551 ATOMIC_STORE_N(uint32_t, &lock->reader_idx, lock->current_alloc_idx,
552 __ATOMIC_RELEASE);
553
554 /* wake up any waiters */
555 pthread_cond_signal(&lock->alloc_signal);
556 pthread_mutex_unlock(&lock->alloc_lock);
557 return &lock->qp_group[current_idx];
558 }
559
retire_qp(CRYPTO_RCU_LOCK * lock,struct rcu_qp * qp)560 static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
561 {
562 pthread_mutex_lock(&lock->alloc_lock);
563 lock->writers_alloced--;
564 pthread_cond_signal(&lock->alloc_signal);
565 pthread_mutex_unlock(&lock->alloc_lock);
566 }
567
568 /* TODO: count should be unsigned, e.g uint32_t */
569 /* a negative value could result in unexpected behaviour */
allocate_new_qp_group(CRYPTO_RCU_LOCK * lock,int count)570 static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
571 int count)
572 {
573 struct rcu_qp *new =
574 OPENSSL_zalloc(sizeof(*new) * count);
575
576 lock->group_count = count;
577 return new;
578 }
579
ossl_rcu_write_lock(CRYPTO_RCU_LOCK * lock)580 void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
581 {
582 pthread_mutex_lock(&lock->write_lock);
583 TSAN_FAKE_UNLOCK(&lock->write_lock);
584 }
585
ossl_rcu_write_unlock(CRYPTO_RCU_LOCK * lock)586 void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
587 {
588 TSAN_FAKE_LOCK(&lock->write_lock);
589 pthread_mutex_unlock(&lock->write_lock);
590 }
591
ossl_synchronize_rcu(CRYPTO_RCU_LOCK * lock)592 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
593 {
594 struct rcu_qp *qp;
595 uint64_t count;
596 struct rcu_cb_item *cb_items, *tmpcb;
597
598 pthread_mutex_lock(&lock->write_lock);
599 cb_items = lock->cb_items;
600 lock->cb_items = NULL;
601 pthread_mutex_unlock(&lock->write_lock);
602
603 qp = update_qp(lock);
604
605 /*
606 * wait for the reader count to reach zero
607 * Note the use of __ATOMIC_ACQUIRE here to ensure that any
608 * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
609 * is visible prior to our read
610 */
611 do {
612 count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
613 } while (READER_COUNT(count) != 0);
614
615 /* retire in order */
616 pthread_mutex_lock(&lock->prior_lock);
617 while (lock->next_to_retire != ID_VAL(count))
618 pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
619 lock->next_to_retire++;
620 pthread_cond_broadcast(&lock->prior_signal);
621 pthread_mutex_unlock(&lock->prior_lock);
622
623 retire_qp(lock, qp);
624
625 /* handle any callbacks that we have */
626 while (cb_items != NULL) {
627 tmpcb = cb_items;
628 cb_items = cb_items->next;
629 tmpcb->fn(tmpcb->data);
630 OPENSSL_free(tmpcb);
631 }
632 }
633
ossl_rcu_call(CRYPTO_RCU_LOCK * lock,rcu_cb_fn cb,void * data)634 int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
635 {
636 struct rcu_cb_item *new =
637 OPENSSL_zalloc(sizeof(*new));
638
639 if (new == NULL)
640 return 0;
641
642 new->data = data;
643 new->fn = cb;
644 /*
645 * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
646 * list are visible to us prior to reading, and publish the new value
647 * immediately
648 */
649 new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
650 __ATOMIC_ACQ_REL);
651
652 return 1;
653 }
654
ossl_rcu_uptr_deref(void ** p)655 void *ossl_rcu_uptr_deref(void **p)
656 {
657 return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
658 }
659
ossl_rcu_assign_uptr(void ** p,void ** v)660 void ossl_rcu_assign_uptr(void **p, void **v)
661 {
662 ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
663 }
664
ossl_rcu_lock_new(int num_writers,OSSL_LIB_CTX * ctx)665 CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
666 {
667 struct rcu_lock_st *new;
668
669 if (num_writers < 1)
670 num_writers = 1;
671
672 ctx = ossl_lib_ctx_get_concrete(ctx);
673 if (ctx == NULL)
674 return 0;
675
676 new = OPENSSL_zalloc(sizeof(*new));
677 if (new == NULL)
678 return NULL;
679
680 new->ctx = ctx;
681 pthread_mutex_init(&new->write_lock, NULL);
682 pthread_mutex_init(&new->prior_lock, NULL);
683 pthread_mutex_init(&new->alloc_lock, NULL);
684 pthread_cond_init(&new->prior_signal, NULL);
685 pthread_cond_init(&new->alloc_signal, NULL);
686 new->qp_group = allocate_new_qp_group(new, num_writers + 1);
687 if (new->qp_group == NULL) {
688 OPENSSL_free(new);
689 new = NULL;
690 }
691 return new;
692 }
693
ossl_rcu_lock_free(CRYPTO_RCU_LOCK * lock)694 void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
695 {
696 struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
697
698 if (lock == NULL)
699 return;
700
701 /* make sure we're synchronized */
702 ossl_synchronize_rcu(rlock);
703
704 OPENSSL_free(rlock->qp_group);
705 /* There should only be a single qp left now */
706 OPENSSL_free(rlock);
707 }
708
CRYPTO_THREAD_lock_new(void)709 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
710 {
711 # ifdef USE_RWLOCK
712 CRYPTO_RWLOCK *lock;
713
714 if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
715 /* Don't set error, to avoid recursion blowup. */
716 return NULL;
717
718 if (pthread_rwlock_init(lock, NULL) != 0) {
719 OPENSSL_free(lock);
720 return NULL;
721 }
722 # else
723 pthread_mutexattr_t attr;
724 CRYPTO_RWLOCK *lock;
725
726 if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
727 /* Don't set error, to avoid recursion blowup. */
728 return NULL;
729
730 /*
731 * We don't use recursive mutexes, but try to catch errors if we do.
732 */
733 pthread_mutexattr_init(&attr);
734 # if !defined (__TANDEM) && !defined (_SPT_MODEL_)
735 # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
736 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
737 # endif
738 # else
739 /* The SPT Thread Library does not define MUTEX attributes. */
740 # endif
741
742 if (pthread_mutex_init(lock, &attr) != 0) {
743 pthread_mutexattr_destroy(&attr);
744 OPENSSL_free(lock);
745 return NULL;
746 }
747
748 pthread_mutexattr_destroy(&attr);
749 # endif
750
751 return lock;
752 }
753
CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK * lock)754 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
755 {
756 # ifdef USE_RWLOCK
757 if (pthread_rwlock_rdlock(lock) != 0)
758 return 0;
759 # else
760 if (pthread_mutex_lock(lock) != 0) {
761 assert(errno != EDEADLK && errno != EBUSY);
762 return 0;
763 }
764 # endif
765
766 return 1;
767 }
768
CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK * lock)769 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
770 {
771 # ifdef USE_RWLOCK
772 if (pthread_rwlock_wrlock(lock) != 0)
773 return 0;
774 # else
775 if (pthread_mutex_lock(lock) != 0) {
776 assert(errno != EDEADLK && errno != EBUSY);
777 return 0;
778 }
779 # endif
780
781 return 1;
782 }
783
CRYPTO_THREAD_unlock(CRYPTO_RWLOCK * lock)784 int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
785 {
786 # ifdef USE_RWLOCK
787 if (pthread_rwlock_unlock(lock) != 0)
788 return 0;
789 # else
790 if (pthread_mutex_unlock(lock) != 0) {
791 assert(errno != EPERM);
792 return 0;
793 }
794 # endif
795
796 return 1;
797 }
798
CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK * lock)799 void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
800 {
801 if (lock == NULL)
802 return;
803
804 # ifdef USE_RWLOCK
805 pthread_rwlock_destroy(lock);
806 # else
807 pthread_mutex_destroy(lock);
808 # endif
809 OPENSSL_free(lock);
810
811 return;
812 }
813
CRYPTO_THREAD_run_once(CRYPTO_ONCE * once,void (* init)(void))814 int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
815 {
816 if (pthread_once(once, init) != 0)
817 return 0;
818
819 return 1;
820 }
821
CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL * key,void (* cleanup)(void *))822 int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
823 {
824 if (pthread_key_create(key, cleanup) != 0)
825 return 0;
826
827 return 1;
828 }
829
CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL * key)830 void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
831 {
832 return pthread_getspecific(*key);
833 }
834
CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL * key,void * val)835 int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
836 {
837 if (pthread_setspecific(*key, val) != 0)
838 return 0;
839
840 return 1;
841 }
842
CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL * key)843 int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
844 {
845 if (pthread_key_delete(*key) != 0)
846 return 0;
847
848 return 1;
849 }
850
CRYPTO_THREAD_get_current_id(void)851 CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
852 {
853 return pthread_self();
854 }
855
CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a,CRYPTO_THREAD_ID b)856 int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
857 {
858 return pthread_equal(a, b);
859 }
860
CRYPTO_atomic_add(int * val,int amount,int * ret,CRYPTO_RWLOCK * lock)861 int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
862 {
863 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
864 if (__atomic_is_lock_free(sizeof(*val), val)) {
865 *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
866 return 1;
867 }
868 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
869 /* This will work for all future Solaris versions. */
870 if (ret != NULL) {
871 *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
872 return 1;
873 }
874 # endif
875 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
876 return 0;
877
878 *val += amount;
879 *ret = *val;
880
881 if (!CRYPTO_THREAD_unlock(lock))
882 return 0;
883
884 return 1;
885 }
886
CRYPTO_atomic_add64(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)887 int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret,
888 CRYPTO_RWLOCK *lock)
889 {
890 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
891 if (__atomic_is_lock_free(sizeof(*val), val)) {
892 *ret = __atomic_add_fetch(val, op, __ATOMIC_ACQ_REL);
893 return 1;
894 }
895 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
896 /* This will work for all future Solaris versions. */
897 if (ret != NULL) {
898 *ret = atomic_add_64_nv(val, op);
899 return 1;
900 }
901 # endif
902 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
903 return 0;
904 *val += op;
905 *ret = *val;
906
907 if (!CRYPTO_THREAD_unlock(lock))
908 return 0;
909
910 return 1;
911 }
912
CRYPTO_atomic_and(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)913 int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret,
914 CRYPTO_RWLOCK *lock)
915 {
916 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
917 if (__atomic_is_lock_free(sizeof(*val), val)) {
918 *ret = __atomic_and_fetch(val, op, __ATOMIC_ACQ_REL);
919 return 1;
920 }
921 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
922 /* This will work for all future Solaris versions. */
923 if (ret != NULL) {
924 *ret = atomic_and_64_nv(val, op);
925 return 1;
926 }
927 # endif
928 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
929 return 0;
930 *val &= op;
931 *ret = *val;
932
933 if (!CRYPTO_THREAD_unlock(lock))
934 return 0;
935
936 return 1;
937 }
938
CRYPTO_atomic_or(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)939 int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
940 CRYPTO_RWLOCK *lock)
941 {
942 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
943 if (__atomic_is_lock_free(sizeof(*val), val)) {
944 *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
945 return 1;
946 }
947 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
948 /* This will work for all future Solaris versions. */
949 if (ret != NULL) {
950 *ret = atomic_or_64_nv(val, op);
951 return 1;
952 }
953 # endif
954 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
955 return 0;
956 *val |= op;
957 *ret = *val;
958
959 if (!CRYPTO_THREAD_unlock(lock))
960 return 0;
961
962 return 1;
963 }
964
CRYPTO_atomic_load(uint64_t * val,uint64_t * ret,CRYPTO_RWLOCK * lock)965 int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
966 {
967 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
968 if (__atomic_is_lock_free(sizeof(*val), val)) {
969 __atomic_load(val, ret, __ATOMIC_ACQUIRE);
970 return 1;
971 }
972 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
973 /* This will work for all future Solaris versions. */
974 if (ret != NULL) {
975 *ret = atomic_or_64_nv(val, 0);
976 return 1;
977 }
978 # endif
979 if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
980 return 0;
981 *ret = *val;
982 if (!CRYPTO_THREAD_unlock(lock))
983 return 0;
984
985 return 1;
986 }
987
CRYPTO_atomic_store(uint64_t * dst,uint64_t val,CRYPTO_RWLOCK * lock)988 int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock)
989 {
990 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
991 if (__atomic_is_lock_free(sizeof(*dst), dst)) {
992 __atomic_store(dst, &val, __ATOMIC_RELEASE);
993 return 1;
994 }
995 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
996 /* This will work for all future Solaris versions. */
997 if (ret != NULL) {
998 atomic_swap_64(dst, val);
999 return 1;
1000 }
1001 # endif
1002 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1003 return 0;
1004 *dst = val;
1005 if (!CRYPTO_THREAD_unlock(lock))
1006 return 0;
1007
1008 return 1;
1009 }
1010
CRYPTO_atomic_load_int(int * val,int * ret,CRYPTO_RWLOCK * lock)1011 int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
1012 {
1013 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
1014 if (__atomic_is_lock_free(sizeof(*val), val)) {
1015 __atomic_load(val, ret, __ATOMIC_ACQUIRE);
1016 return 1;
1017 }
1018 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1019 /* This will work for all future Solaris versions. */
1020 if (ret != NULL) {
1021 *ret = (int)atomic_or_uint_nv((unsigned int *)val, 0);
1022 return 1;
1023 }
1024 # endif
1025 if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
1026 return 0;
1027 *ret = *val;
1028 if (!CRYPTO_THREAD_unlock(lock))
1029 return 0;
1030
1031 return 1;
1032 }
1033
1034 # ifndef FIPS_MODULE
openssl_init_fork_handlers(void)1035 int openssl_init_fork_handlers(void)
1036 {
1037 return 1;
1038 }
1039 # endif /* FIPS_MODULE */
1040
openssl_get_fork_id(void)1041 int openssl_get_fork_id(void)
1042 {
1043 return getpid();
1044 }
1045 #endif
1046