1 /*
2 * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /* We need to use the OPENSSL_fork_*() deprecated APIs */
11 #define OPENSSL_SUPPRESS_DEPRECATED
12
13 #include <openssl/crypto.h>
14 #include <crypto/cryptlib.h>
15 #include "internal/cryptlib.h"
16 #include "internal/rcu.h"
17 #include "rcu_internal.h"
18
19 #if defined(__clang__) && defined(__has_feature)
20 # if __has_feature(thread_sanitizer)
21 # define __SANITIZE_THREAD__
22 # endif
23 #endif
24
25 #if defined(__SANITIZE_THREAD__)
26 # include <sanitizer/tsan_interface.h>
27 # define TSAN_FAKE_UNLOCK(x) __tsan_mutex_pre_unlock((x), 0); \
28 __tsan_mutex_post_unlock((x), 0)
29
30 # define TSAN_FAKE_LOCK(x) __tsan_mutex_pre_lock((x), 0); \
31 __tsan_mutex_post_lock((x), 0, 0)
32 #else
33 # define TSAN_FAKE_UNLOCK(x)
34 # define TSAN_FAKE_LOCK(x)
35 #endif
36
37 #if defined(__sun)
38 # include <atomic.h>
39 #endif
40
41 #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
42 /*
43 * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
44 * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
45 * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
46 * All of this makes impossible to use __atomic_is_lock_free here.
47 *
48 * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
49 */
50 # define BROKEN_CLANG_ATOMICS
51 #endif
52
53 #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
54
55 # if defined(OPENSSL_SYS_UNIX)
56 # include <sys/types.h>
57 # include <unistd.h>
58 # endif
59
60 # include <assert.h>
61
62 /*
63 * The Non-Stop KLT thread model currently seems broken in its rwlock
64 * implementation
65 */
66 # if defined(PTHREAD_RWLOCK_INITIALIZER) && !defined(_KLT_MODEL_)
67 # define USE_RWLOCK
68 # endif
69
70 /*
71 * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
72 * other compilers.
73
74 * Unfortunately, we can't do that with some "generic type", because there's no
75 * guarantee that the chosen generic type is large enough to cover all cases.
76 * Therefore, we implement fallbacks for each applicable type, with composed
77 * names that include the type they handle.
78 *
79 * (an anecdote: we previously tried to use |void *| as the generic type, with
80 * the thought that the pointer itself is the largest type. However, this is
81 * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
82 *
83 * All applicable ATOMIC_ macros take the intended type as first parameter, so
84 * they can map to the correct fallback function. In the GNU/clang case, that
85 * parameter is simply ignored.
86 */
87
88 /*
89 * Internal types used with the ATOMIC_ macros, to make it possible to compose
90 * fallback function names.
91 */
92 typedef void *pvoid;
93 typedef struct rcu_cb_item *prcu_cb_item;
94
95 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
96 && !defined(USE_ATOMIC_FALLBACKS)
97 # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__) && defined(__LP64__)
98 /*
99 * For pointers, Apple M1 virtualized cpu seems to have some problem using the
100 * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
101 * When using the native apple clang compiler, this instruction is emitted for
102 * atomic loads, which is bad. So, if
103 * 1) We are building on a target that defines __APPLE__ AND
104 * 2) We are building on a target using clang (__clang__) AND
105 * 3) We are building for an M1 processor (__aarch64__) AND
106 * 4) We are building with 64 bit pointers
107 * Then we should not use __atomic_load_n and instead implement our own
108 * function to issue the ldar instruction instead, which produces the proper
109 * sequencing guarantees
110 */
apple_atomic_load_n_pvoid(void ** p,ossl_unused int memorder)111 static inline void *apple_atomic_load_n_pvoid(void **p,
112 ossl_unused int memorder)
113 {
114 void *ret;
115
116 __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
117
118 return ret;
119 }
120
121 /* For uint64_t, we should be fine, though */
122 # define apple_atomic_load_n_uint32_t(p, o) __atomic_load_n(p, o)
123 # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
124
125 # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
126 # else
127 # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
128 # endif
129 # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
130 # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
131 # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
132 # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
133 # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
134 # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
135 # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
136 # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
137 # else
138 static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
139
140 # define IMPL_fallback_atomic_load_n(t) \
141 static ossl_inline t fallback_atomic_load_n_##t(t *p) \
142 { \
143 t ret; \
144 \
145 pthread_mutex_lock(&atomic_sim_lock); \
146 ret = *p; \
147 pthread_mutex_unlock(&atomic_sim_lock); \
148 return ret; \
149 }
150 IMPL_fallback_atomic_load_n(uint32_t)
IMPL_fallback_atomic_load_n(uint64_t)151 IMPL_fallback_atomic_load_n(uint64_t)
152 IMPL_fallback_atomic_load_n(pvoid)
153
154 # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
155
156 # define IMPL_fallback_atomic_store_n(t) \
157 static ossl_inline t fallback_atomic_store_n_##t(t *p, t v) \
158 { \
159 t ret; \
160 \
161 pthread_mutex_lock(&atomic_sim_lock); \
162 ret = *p; \
163 *p = v; \
164 pthread_mutex_unlock(&atomic_sim_lock); \
165 return ret; \
166 }
167 IMPL_fallback_atomic_store_n(uint32_t)
168 IMPL_fallback_atomic_store_n(uint64_t)
169
170 # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
171
172 # define IMPL_fallback_atomic_store(t) \
173 static ossl_inline void fallback_atomic_store_##t(t *p, t *v) \
174 { \
175 pthread_mutex_lock(&atomic_sim_lock); \
176 *p = *v; \
177 pthread_mutex_unlock(&atomic_sim_lock); \
178 }
179 IMPL_fallback_atomic_store(uint64_t)
180 IMPL_fallback_atomic_store(pvoid)
181
182 # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
183
184 # define IMPL_fallback_atomic_exchange_n(t) \
185 static ossl_inline t fallback_atomic_exchange_n_##t(t *p, t v) \
186 { \
187 t ret; \
188 \
189 pthread_mutex_lock(&atomic_sim_lock); \
190 ret = *p; \
191 *p = v; \
192 pthread_mutex_unlock(&atomic_sim_lock); \
193 return ret; \
194 }
195 IMPL_fallback_atomic_exchange_n(uint64_t)
196 IMPL_fallback_atomic_exchange_n(prcu_cb_item)
197
198 # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
199
200 /*
201 * The fallbacks that follow don't need any per type implementation, as
202 * they are designed for uint64_t only. If there comes a time when multiple
203 * types need to be covered, it's relatively easy to refactor them the same
204 * way as the fallbacks above.
205 */
206
207 static ossl_inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
208 {
209 uint64_t ret;
210
211 pthread_mutex_lock(&atomic_sim_lock);
212 *p += v;
213 ret = *p;
214 pthread_mutex_unlock(&atomic_sim_lock);
215 return ret;
216 }
217
218 # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
219
fallback_atomic_fetch_add(uint64_t * p,uint64_t v)220 static ossl_inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
221 {
222 uint64_t ret;
223
224 pthread_mutex_lock(&atomic_sim_lock);
225 ret = *p;
226 *p += v;
227 pthread_mutex_unlock(&atomic_sim_lock);
228 return ret;
229 }
230
231 # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
232
fallback_atomic_sub_fetch(uint64_t * p,uint64_t v)233 static ossl_inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
234 {
235 uint64_t ret;
236
237 pthread_mutex_lock(&atomic_sim_lock);
238 *p -= v;
239 ret = *p;
240 pthread_mutex_unlock(&atomic_sim_lock);
241 return ret;
242 }
243
244 # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
245
fallback_atomic_and_fetch(uint64_t * p,uint64_t m)246 static ossl_inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
247 {
248 uint64_t ret;
249
250 pthread_mutex_lock(&atomic_sim_lock);
251 *p &= m;
252 ret = *p;
253 pthread_mutex_unlock(&atomic_sim_lock);
254 return ret;
255 }
256
257 # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
258
fallback_atomic_or_fetch(uint64_t * p,uint64_t m)259 static ossl_inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
260 {
261 uint64_t ret;
262
263 pthread_mutex_lock(&atomic_sim_lock);
264 *p |= m;
265 ret = *p;
266 pthread_mutex_unlock(&atomic_sim_lock);
267 return ret;
268 }
269
270 # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
271 # endif
272
273 /*
274 * users is broken up into 2 parts
275 * bits 0-15 current readers
276 * bit 32-63 ID
277 */
278 # define READER_SHIFT 0
279 # define ID_SHIFT 32
280 /* TODO: READER_SIZE 32 in threads_win.c */
281 # define READER_SIZE 16
282 # define ID_SIZE 32
283
284 # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1)
285 # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1)
286 # define READER_COUNT(x) ((uint32_t)(((uint64_t)(x) >> READER_SHIFT) & \
287 READER_MASK))
288 # define ID_VAL(x) ((uint32_t)(((uint64_t)(x) >> ID_SHIFT) & ID_MASK))
289 # define VAL_READER ((uint64_t)1 << READER_SHIFT)
290 # define VAL_ID(x) ((uint64_t)x << ID_SHIFT)
291
292 /*
293 * This is the core of an rcu lock. It tracks the readers and writers for the
294 * current quiescence point for a given lock. Users is the 64 bit value that
295 * stores the READERS/ID as defined above
296 *
297 */
298 struct rcu_qp {
299 uint64_t users;
300 };
301
302 struct thread_qp {
303 struct rcu_qp *qp;
304 unsigned int depth;
305 CRYPTO_RCU_LOCK *lock;
306 };
307
308 # define MAX_QPS 10
309 /*
310 * This is the per thread tracking data
311 * that is assigned to each thread participating
312 * in an rcu qp
313 *
314 * qp points to the qp that it last acquired
315 *
316 */
317 struct rcu_thr_data {
318 struct thread_qp thread_qps[MAX_QPS];
319 };
320
321 /*
322 * This is the internal version of a CRYPTO_RCU_LOCK
323 * it is cast from CRYPTO_RCU_LOCK
324 */
325 struct rcu_lock_st {
326 /* Callbacks to call for next ossl_synchronize_rcu */
327 struct rcu_cb_item *cb_items;
328
329 /* The context we are being created against */
330 OSSL_LIB_CTX *ctx;
331
332 /* rcu generation counter for in-order retirement */
333 uint32_t id_ctr;
334
335 /* TODO: can be moved before id_ctr for better alignment */
336 /* Array of quiescent points for synchronization */
337 struct rcu_qp *qp_group;
338
339 /* Number of elements in qp_group array */
340 uint32_t group_count;
341
342 /* Index of the current qp in the qp_group array */
343 uint32_t reader_idx;
344
345 /* value of the next id_ctr value to be retired */
346 uint32_t next_to_retire;
347
348 /* index of the next free rcu_qp in the qp_group */
349 uint32_t current_alloc_idx;
350
351 /* number of qp's in qp_group array currently being retired */
352 uint32_t writers_alloced;
353
354 /* lock protecting write side operations */
355 pthread_mutex_t write_lock;
356
357 /* lock protecting updates to writers_alloced/current_alloc_idx */
358 pthread_mutex_t alloc_lock;
359
360 /* signal to wake threads waiting on alloc_lock */
361 pthread_cond_t alloc_signal;
362
363 /* lock to enforce in-order retirement */
364 pthread_mutex_t prior_lock;
365
366 /* signal to wake threads waiting on prior_lock */
367 pthread_cond_t prior_signal;
368 };
369
370 /* Read side acquisition of the current qp */
get_hold_current_qp(struct rcu_lock_st * lock)371 static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
372 {
373 uint32_t qp_idx;
374
375 /* get the current qp index */
376 for (;;) {
377 /*
378 * Notes on use of __ATOMIC_ACQUIRE
379 * We need to ensure the following:
380 * 1) That subsequent operations aren't optimized by hoisting them above
381 * this operation. Specifically, we don't want the below re-load of
382 * qp_idx to get optimized away
383 * 2) We want to ensure that any updating of reader_idx on the write side
384 * of the lock is flushed from a local cpu cache so that we see any
385 * updates prior to the load. This is a non-issue on cache coherent
386 * systems like x86, but is relevant on other arches
387 * Note: This applies to the reload below as well
388 */
389 qp_idx = ATOMIC_LOAD_N(uint32_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
390
391 /*
392 * Notes of use of __ATOMIC_RELEASE
393 * This counter is only read by the write side of the lock, and so we
394 * specify __ATOMIC_RELEASE here to ensure that the write side of the
395 * lock see this during the spin loop read of users, as it waits for the
396 * reader count to approach zero
397 */
398 ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
399 __ATOMIC_RELEASE);
400
401 /* if the idx hasn't changed, we're good, else try again */
402 if (qp_idx == ATOMIC_LOAD_N(uint32_t, &lock->reader_idx,
403 __ATOMIC_ACQUIRE))
404 break;
405
406 /*
407 * Notes on use of __ATOMIC_RELEASE
408 * As with the add above, we want to ensure that this decrement is
409 * seen by the write side of the lock as soon as it happens to prevent
410 * undue spinning waiting for write side completion
411 */
412 ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
413 __ATOMIC_RELEASE);
414 }
415
416 return &lock->qp_group[qp_idx];
417 }
418
ossl_rcu_free_local_data(void * arg)419 static void ossl_rcu_free_local_data(void *arg)
420 {
421 OSSL_LIB_CTX *ctx = arg;
422 CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx);
423 struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
424
425 OPENSSL_free(data);
426 CRYPTO_THREAD_set_local(lkey, NULL);
427 }
428
ossl_rcu_read_lock(CRYPTO_RCU_LOCK * lock)429 void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
430 {
431 struct rcu_thr_data *data;
432 int i, available_qp = -1;
433 CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
434
435 /*
436 * we're going to access current_qp here so ask the
437 * processor to fetch it
438 */
439 data = CRYPTO_THREAD_get_local(lkey);
440
441 if (data == NULL) {
442 data = OPENSSL_zalloc(sizeof(*data));
443 OPENSSL_assert(data != NULL);
444 CRYPTO_THREAD_set_local(lkey, data);
445 ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data);
446 }
447
448 for (i = 0; i < MAX_QPS; i++) {
449 if (data->thread_qps[i].qp == NULL && available_qp == -1)
450 available_qp = i;
451 /* If we have a hold on this lock already, we're good */
452 if (data->thread_qps[i].lock == lock) {
453 data->thread_qps[i].depth++;
454 return;
455 }
456 }
457
458 /*
459 * if we get here, then we don't have a hold on this lock yet
460 */
461 assert(available_qp != -1);
462
463 data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
464 data->thread_qps[available_qp].depth = 1;
465 data->thread_qps[available_qp].lock = lock;
466 }
467
ossl_rcu_read_unlock(CRYPTO_RCU_LOCK * lock)468 void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
469 {
470 int i;
471 CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
472 struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
473 uint64_t ret;
474
475 assert(data != NULL);
476
477 for (i = 0; i < MAX_QPS; i++) {
478 if (data->thread_qps[i].lock == lock) {
479 /*
480 * As with read side acquisition, we use __ATOMIC_RELEASE here
481 * to ensure that the decrement is published immediately
482 * to any write side waiters
483 */
484 data->thread_qps[i].depth--;
485 if (data->thread_qps[i].depth == 0) {
486 ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users,
487 VAL_READER, __ATOMIC_RELEASE);
488 OPENSSL_assert(ret != UINT64_MAX);
489 data->thread_qps[i].qp = NULL;
490 data->thread_qps[i].lock = NULL;
491 }
492 return;
493 }
494 }
495 /*
496 * If we get here, we're trying to unlock a lock that we never acquired -
497 * that's fatal.
498 */
499 assert(0);
500 }
501
502 /*
503 * Write side allocation routine to get the current qp
504 * and replace it with a new one
505 */
update_qp(CRYPTO_RCU_LOCK * lock)506 static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
507 {
508 uint64_t new_id;
509 uint32_t current_idx;
510
511 pthread_mutex_lock(&lock->alloc_lock);
512
513 /*
514 * we need at least one qp to be available with one
515 * left over, so that readers can start working on
516 * one that isn't yet being waited on
517 */
518 while (lock->group_count - lock->writers_alloced < 2)
519 /* we have to wait for one to be free */
520 pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
521
522 current_idx = lock->current_alloc_idx;
523
524 /* Allocate the qp */
525 lock->writers_alloced++;
526
527 /* increment the allocation index */
528 lock->current_alloc_idx =
529 (lock->current_alloc_idx + 1) % lock->group_count;
530
531 /* get and insert a new id */
532 new_id = VAL_ID(lock->id_ctr);
533 lock->id_ctr++;
534
535 /*
536 * Even though we are under a write side lock here
537 * We need to use atomic instructions to ensure that the results
538 * of this update are published to the read side prior to updating the
539 * reader idx below
540 */
541 ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
542 __ATOMIC_RELEASE);
543 ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
544 __ATOMIC_RELEASE);
545
546 /*
547 * Update the reader index to be the prior qp.
548 * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
549 * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
550 * of this value to be seen on the read side immediately after it happens
551 */
552 ATOMIC_STORE_N(uint32_t, &lock->reader_idx, lock->current_alloc_idx,
553 __ATOMIC_RELEASE);
554
555 /* wake up any waiters */
556 pthread_cond_signal(&lock->alloc_signal);
557 pthread_mutex_unlock(&lock->alloc_lock);
558 return &lock->qp_group[current_idx];
559 }
560
retire_qp(CRYPTO_RCU_LOCK * lock,struct rcu_qp * qp)561 static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
562 {
563 pthread_mutex_lock(&lock->alloc_lock);
564 lock->writers_alloced--;
565 pthread_cond_signal(&lock->alloc_signal);
566 pthread_mutex_unlock(&lock->alloc_lock);
567 }
568
569 /* TODO: count should be unsigned, e.g uint32_t */
570 /* a negative value could result in unexpected behaviour */
allocate_new_qp_group(CRYPTO_RCU_LOCK * lock,int count)571 static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
572 int count)
573 {
574 struct rcu_qp *new =
575 OPENSSL_zalloc(sizeof(*new) * count);
576
577 lock->group_count = count;
578 return new;
579 }
580
ossl_rcu_write_lock(CRYPTO_RCU_LOCK * lock)581 void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
582 {
583 pthread_mutex_lock(&lock->write_lock);
584 TSAN_FAKE_UNLOCK(&lock->write_lock);
585 }
586
ossl_rcu_write_unlock(CRYPTO_RCU_LOCK * lock)587 void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
588 {
589 TSAN_FAKE_LOCK(&lock->write_lock);
590 pthread_mutex_unlock(&lock->write_lock);
591 }
592
ossl_synchronize_rcu(CRYPTO_RCU_LOCK * lock)593 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
594 {
595 struct rcu_qp *qp;
596 uint64_t count;
597 struct rcu_cb_item *cb_items, *tmpcb;
598
599 pthread_mutex_lock(&lock->write_lock);
600 cb_items = lock->cb_items;
601 lock->cb_items = NULL;
602 pthread_mutex_unlock(&lock->write_lock);
603
604 qp = update_qp(lock);
605
606 /*
607 * wait for the reader count to reach zero
608 * Note the use of __ATOMIC_ACQUIRE here to ensure that any
609 * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
610 * is visible prior to our read
611 */
612 do {
613 count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
614 } while (READER_COUNT(count) != 0);
615
616 /* retire in order */
617 pthread_mutex_lock(&lock->prior_lock);
618 while (lock->next_to_retire != ID_VAL(count))
619 pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
620 lock->next_to_retire++;
621 pthread_cond_broadcast(&lock->prior_signal);
622 pthread_mutex_unlock(&lock->prior_lock);
623
624 retire_qp(lock, qp);
625
626 /* handle any callbacks that we have */
627 while (cb_items != NULL) {
628 tmpcb = cb_items;
629 cb_items = cb_items->next;
630 tmpcb->fn(tmpcb->data);
631 OPENSSL_free(tmpcb);
632 }
633 }
634
ossl_rcu_call(CRYPTO_RCU_LOCK * lock,rcu_cb_fn cb,void * data)635 int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
636 {
637 struct rcu_cb_item *new =
638 OPENSSL_zalloc(sizeof(*new));
639
640 if (new == NULL)
641 return 0;
642
643 new->data = data;
644 new->fn = cb;
645 /*
646 * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
647 * list are visible to us prior to reading, and publish the new value
648 * immediately
649 */
650 new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
651 __ATOMIC_ACQ_REL);
652
653 return 1;
654 }
655
ossl_rcu_uptr_deref(void ** p)656 void *ossl_rcu_uptr_deref(void **p)
657 {
658 return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
659 }
660
ossl_rcu_assign_uptr(void ** p,void ** v)661 void ossl_rcu_assign_uptr(void **p, void **v)
662 {
663 ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
664 }
665
ossl_rcu_lock_new(int num_writers,OSSL_LIB_CTX * ctx)666 CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
667 {
668 struct rcu_lock_st *new;
669
670 if (num_writers < 1)
671 num_writers = 1;
672
673 ctx = ossl_lib_ctx_get_concrete(ctx);
674 if (ctx == NULL)
675 return 0;
676
677 new = OPENSSL_zalloc(sizeof(*new));
678 if (new == NULL)
679 return NULL;
680
681 new->ctx = ctx;
682 pthread_mutex_init(&new->write_lock, NULL);
683 pthread_mutex_init(&new->prior_lock, NULL);
684 pthread_mutex_init(&new->alloc_lock, NULL);
685 pthread_cond_init(&new->prior_signal, NULL);
686 pthread_cond_init(&new->alloc_signal, NULL);
687 new->qp_group = allocate_new_qp_group(new, num_writers + 1);
688 if (new->qp_group == NULL) {
689 OPENSSL_free(new);
690 new = NULL;
691 }
692 return new;
693 }
694
ossl_rcu_lock_free(CRYPTO_RCU_LOCK * lock)695 void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
696 {
697 struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
698
699 if (lock == NULL)
700 return;
701
702 /* make sure we're synchronized */
703 ossl_synchronize_rcu(rlock);
704
705 OPENSSL_free(rlock->qp_group);
706 /* There should only be a single qp left now */
707 OPENSSL_free(rlock);
708 }
709
CRYPTO_THREAD_lock_new(void)710 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
711 {
712 # ifdef USE_RWLOCK
713 CRYPTO_RWLOCK *lock;
714
715 if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
716 /* Don't set error, to avoid recursion blowup. */
717 return NULL;
718
719 if (pthread_rwlock_init(lock, NULL) != 0) {
720 OPENSSL_free(lock);
721 return NULL;
722 }
723 # else
724 pthread_mutexattr_t attr;
725 CRYPTO_RWLOCK *lock;
726
727 if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
728 /* Don't set error, to avoid recursion blowup. */
729 return NULL;
730
731 /*
732 * We don't use recursive mutexes, but try to catch errors if we do.
733 */
734 pthread_mutexattr_init(&attr);
735 # if !defined (__TANDEM) && !defined (_SPT_MODEL_)
736 # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
737 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
738 # endif
739 # else
740 /* The SPT Thread Library does not define MUTEX attributes. */
741 # endif
742
743 if (pthread_mutex_init(lock, &attr) != 0) {
744 pthread_mutexattr_destroy(&attr);
745 OPENSSL_free(lock);
746 return NULL;
747 }
748
749 pthread_mutexattr_destroy(&attr);
750 # endif
751
752 return lock;
753 }
754
CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK * lock)755 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
756 {
757 # ifdef USE_RWLOCK
758 if (pthread_rwlock_rdlock(lock) != 0)
759 return 0;
760 # else
761 if (pthread_mutex_lock(lock) != 0) {
762 assert(errno != EDEADLK && errno != EBUSY);
763 return 0;
764 }
765 # endif
766
767 return 1;
768 }
769
CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK * lock)770 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
771 {
772 # ifdef USE_RWLOCK
773 if (pthread_rwlock_wrlock(lock) != 0)
774 return 0;
775 # else
776 if (pthread_mutex_lock(lock) != 0) {
777 assert(errno != EDEADLK && errno != EBUSY);
778 return 0;
779 }
780 # endif
781
782 return 1;
783 }
784
CRYPTO_THREAD_unlock(CRYPTO_RWLOCK * lock)785 int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
786 {
787 # ifdef USE_RWLOCK
788 if (pthread_rwlock_unlock(lock) != 0)
789 return 0;
790 # else
791 if (pthread_mutex_unlock(lock) != 0) {
792 assert(errno != EPERM);
793 return 0;
794 }
795 # endif
796
797 return 1;
798 }
799
CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK * lock)800 void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
801 {
802 if (lock == NULL)
803 return;
804
805 # ifdef USE_RWLOCK
806 pthread_rwlock_destroy(lock);
807 # else
808 pthread_mutex_destroy(lock);
809 # endif
810 OPENSSL_free(lock);
811
812 return;
813 }
814
CRYPTO_THREAD_run_once(CRYPTO_ONCE * once,void (* init)(void))815 int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
816 {
817 if (pthread_once(once, init) != 0)
818 return 0;
819
820 return 1;
821 }
822
CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL * key,void (* cleanup)(void *))823 int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
824 {
825 if (pthread_key_create(key, cleanup) != 0)
826 return 0;
827
828 return 1;
829 }
830
CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL * key)831 void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
832 {
833 return pthread_getspecific(*key);
834 }
835
CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL * key,void * val)836 int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
837 {
838 if (pthread_setspecific(*key, val) != 0)
839 return 0;
840
841 return 1;
842 }
843
CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL * key)844 int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
845 {
846 if (pthread_key_delete(*key) != 0)
847 return 0;
848
849 return 1;
850 }
851
CRYPTO_THREAD_get_current_id(void)852 CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
853 {
854 return pthread_self();
855 }
856
CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a,CRYPTO_THREAD_ID b)857 int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
858 {
859 return pthread_equal(a, b);
860 }
861
CRYPTO_atomic_add(int * val,int amount,int * ret,CRYPTO_RWLOCK * lock)862 int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
863 {
864 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
865 if (__atomic_is_lock_free(sizeof(*val), val)) {
866 *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
867 return 1;
868 }
869 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
870 /* This will work for all future Solaris versions. */
871 if (ret != NULL) {
872 *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
873 return 1;
874 }
875 # endif
876 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
877 return 0;
878
879 *val += amount;
880 *ret = *val;
881
882 if (!CRYPTO_THREAD_unlock(lock))
883 return 0;
884
885 return 1;
886 }
887
CRYPTO_atomic_add64(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)888 int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret,
889 CRYPTO_RWLOCK *lock)
890 {
891 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
892 if (__atomic_is_lock_free(sizeof(*val), val)) {
893 *ret = __atomic_add_fetch(val, op, __ATOMIC_ACQ_REL);
894 return 1;
895 }
896 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
897 /* This will work for all future Solaris versions. */
898 if (ret != NULL) {
899 *ret = atomic_add_64_nv(val, op);
900 return 1;
901 }
902 # endif
903 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
904 return 0;
905 *val += op;
906 *ret = *val;
907
908 if (!CRYPTO_THREAD_unlock(lock))
909 return 0;
910
911 return 1;
912 }
913
CRYPTO_atomic_and(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)914 int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret,
915 CRYPTO_RWLOCK *lock)
916 {
917 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
918 if (__atomic_is_lock_free(sizeof(*val), val)) {
919 *ret = __atomic_and_fetch(val, op, __ATOMIC_ACQ_REL);
920 return 1;
921 }
922 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
923 /* This will work for all future Solaris versions. */
924 if (ret != NULL) {
925 *ret = atomic_and_64_nv(val, op);
926 return 1;
927 }
928 # endif
929 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
930 return 0;
931 *val &= op;
932 *ret = *val;
933
934 if (!CRYPTO_THREAD_unlock(lock))
935 return 0;
936
937 return 1;
938 }
939
CRYPTO_atomic_or(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)940 int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
941 CRYPTO_RWLOCK *lock)
942 {
943 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
944 if (__atomic_is_lock_free(sizeof(*val), val)) {
945 *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
946 return 1;
947 }
948 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
949 /* This will work for all future Solaris versions. */
950 if (ret != NULL) {
951 *ret = atomic_or_64_nv(val, op);
952 return 1;
953 }
954 # endif
955 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
956 return 0;
957 *val |= op;
958 *ret = *val;
959
960 if (!CRYPTO_THREAD_unlock(lock))
961 return 0;
962
963 return 1;
964 }
965
CRYPTO_atomic_load(uint64_t * val,uint64_t * ret,CRYPTO_RWLOCK * lock)966 int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
967 {
968 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
969 if (__atomic_is_lock_free(sizeof(*val), val)) {
970 __atomic_load(val, ret, __ATOMIC_ACQUIRE);
971 return 1;
972 }
973 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
974 /* This will work for all future Solaris versions. */
975 if (ret != NULL) {
976 *ret = atomic_or_64_nv(val, 0);
977 return 1;
978 }
979 # endif
980 if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
981 return 0;
982 *ret = *val;
983 if (!CRYPTO_THREAD_unlock(lock))
984 return 0;
985
986 return 1;
987 }
988
CRYPTO_atomic_store(uint64_t * dst,uint64_t val,CRYPTO_RWLOCK * lock)989 int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock)
990 {
991 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
992 if (__atomic_is_lock_free(sizeof(*dst), dst)) {
993 __atomic_store(dst, &val, __ATOMIC_RELEASE);
994 return 1;
995 }
996 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
997 /* This will work for all future Solaris versions. */
998 if (dst != NULL) {
999 atomic_swap_64(dst, val);
1000 return 1;
1001 }
1002 # endif
1003 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1004 return 0;
1005 *dst = val;
1006 if (!CRYPTO_THREAD_unlock(lock))
1007 return 0;
1008
1009 return 1;
1010 }
1011
CRYPTO_atomic_load_int(int * val,int * ret,CRYPTO_RWLOCK * lock)1012 int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
1013 {
1014 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
1015 if (__atomic_is_lock_free(sizeof(*val), val)) {
1016 __atomic_load(val, ret, __ATOMIC_ACQUIRE);
1017 return 1;
1018 }
1019 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1020 /* This will work for all future Solaris versions. */
1021 if (ret != NULL) {
1022 *ret = (int)atomic_or_uint_nv((unsigned int *)val, 0);
1023 return 1;
1024 }
1025 # endif
1026 if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
1027 return 0;
1028 *ret = *val;
1029 if (!CRYPTO_THREAD_unlock(lock))
1030 return 0;
1031
1032 return 1;
1033 }
1034
1035 # ifndef FIPS_MODULE
openssl_init_fork_handlers(void)1036 int openssl_init_fork_handlers(void)
1037 {
1038 return 1;
1039 }
1040 # endif /* FIPS_MODULE */
1041
openssl_get_fork_id(void)1042 int openssl_get_fork_id(void)
1043 {
1044 return getpid();
1045 }
1046 #endif
1047