xref: /openssl/crypto/threads_pthread.c (revision 9f4d8c63)
1 /*
2  * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 /* We need to use the OPENSSL_fork_*() deprecated APIs */
11 #define OPENSSL_SUPPRESS_DEPRECATED
12 
13 #include <openssl/crypto.h>
14 #include <crypto/cryptlib.h>
15 #include "internal/cryptlib.h"
16 #include "internal/rcu.h"
17 #include "rcu_internal.h"
18 
19 #if defined(__clang__) && defined(__has_feature)
20 # if __has_feature(thread_sanitizer)
21 #  define __SANITIZE_THREAD__
22 # endif
23 #endif
24 
25 #if defined(__SANITIZE_THREAD__)
26 # include <sanitizer/tsan_interface.h>
27 # define TSAN_FAKE_UNLOCK(x)   __tsan_mutex_pre_unlock((x), 0); \
28 __tsan_mutex_post_unlock((x), 0)
29 
30 # define TSAN_FAKE_LOCK(x)  __tsan_mutex_pre_lock((x), 0); \
31 __tsan_mutex_post_lock((x), 0, 0)
32 #else
33 # define TSAN_FAKE_UNLOCK(x)
34 # define TSAN_FAKE_LOCK(x)
35 #endif
36 
37 #if defined(__sun)
38 # include <atomic.h>
39 #endif
40 
41 #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
42 /*
43  * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
44  * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
45  * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
46  * All of this makes impossible to use __atomic_is_lock_free here.
47  *
48  * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
49  */
50 # define BROKEN_CLANG_ATOMICS
51 #endif
52 
53 #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
54 
55 # if defined(OPENSSL_SYS_UNIX)
56 #  include <sys/types.h>
57 #  include <unistd.h>
58 # endif
59 
60 # include <assert.h>
61 
62 /*
63  * The Non-Stop KLT thread model currently seems broken in its rwlock
64  * implementation
65  */
66 # if defined(PTHREAD_RWLOCK_INITIALIZER) && !defined(_KLT_MODEL_)
67 #  define USE_RWLOCK
68 # endif
69 
70 /*
71  * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
72  * other compilers.
73 
74  * Unfortunately, we can't do that with some "generic type", because there's no
75  * guarantee that the chosen generic type is large enough to cover all cases.
76  * Therefore, we implement fallbacks for each applicable type, with composed
77  * names that include the type they handle.
78  *
79  * (an anecdote: we previously tried to use |void *| as the generic type, with
80  * the thought that the pointer itself is the largest type.  However, this is
81  * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
82  *
83  * All applicable ATOMIC_ macros take the intended type as first parameter, so
84  * they can map to the correct fallback function.  In the GNU/clang case, that
85  * parameter is simply ignored.
86  */
87 
88 /*
89  * Internal types used with the ATOMIC_ macros, to make it possible to compose
90  * fallback function names.
91  */
92 typedef void *pvoid;
93 typedef struct rcu_cb_item *prcu_cb_item;
94 
95 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
96     && !defined(USE_ATOMIC_FALLBACKS)
97 #  if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__)
98 /*
99  * For pointers, Apple M1 virtualized cpu seems to have some problem using the
100  * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
101  * When using the native apple clang compiler, this instruction is emitted for
102  * atomic loads, which is bad.  So, if
103  * 1) We are building on a target that defines __APPLE__ AND
104  * 2) We are building on a target using clang (__clang__) AND
105  * 3) We are building for an M1 processor (__aarch64__)
106  * Then we should not use __atomic_load_n and instead implement our own
107  * function to issue the ldar instruction instead, which produces the proper
108  * sequencing guarantees
109  */
apple_atomic_load_n_pvoid(void ** p,ossl_unused int memorder)110 static inline void *apple_atomic_load_n_pvoid(void **p,
111                                               ossl_unused int memorder)
112 {
113     void *ret;
114 
115     __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
116 
117     return ret;
118 }
119 
120 /* For uint64_t, we should be fine, though */
121 #   define apple_atomic_load_n_uint32_t(p, o) __atomic_load_n(p, o)
122 #   define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
123 
124 #   define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
125 #  else
126 #   define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
127 #  endif
128 #  define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
129 #  define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
130 #  define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
131 #  define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
132 #  define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
133 #  define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
134 #  define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
135 #  define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
136 # else
137 static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
138 
139 #  define IMPL_fallback_atomic_load_n(t)                        \
140     static ossl_inline t fallback_atomic_load_n_##t(t *p)            \
141     {                                                           \
142         t ret;                                                  \
143                                                                 \
144         pthread_mutex_lock(&atomic_sim_lock);                   \
145         ret = *p;                                               \
146         pthread_mutex_unlock(&atomic_sim_lock);                 \
147         return ret;                                             \
148     }
149 IMPL_fallback_atomic_load_n(uint32_t)
IMPL_fallback_atomic_load_n(uint64_t)150 IMPL_fallback_atomic_load_n(uint64_t)
151 IMPL_fallback_atomic_load_n(pvoid)
152 
153 #  define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
154 
155 #  define IMPL_fallback_atomic_store_n(t)                       \
156     static ossl_inline t fallback_atomic_store_n_##t(t *p, t v)      \
157     {                                                           \
158         t ret;                                                  \
159                                                                 \
160         pthread_mutex_lock(&atomic_sim_lock);                   \
161         ret = *p;                                               \
162         *p = v;                                                 \
163         pthread_mutex_unlock(&atomic_sim_lock);                 \
164         return ret;                                             \
165     }
166 IMPL_fallback_atomic_store_n(uint32_t)
167 IMPL_fallback_atomic_store_n(uint64_t)
168 
169 #  define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
170 
171 #  define IMPL_fallback_atomic_store(t)                         \
172     static ossl_inline void fallback_atomic_store_##t(t *p, t *v)    \
173     {                                                           \
174         pthread_mutex_lock(&atomic_sim_lock);                   \
175         *p = *v;                                                \
176         pthread_mutex_unlock(&atomic_sim_lock);                 \
177     }
178 IMPL_fallback_atomic_store(uint64_t)
179 IMPL_fallback_atomic_store(pvoid)
180 
181 #  define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
182 
183 #  define IMPL_fallback_atomic_exchange_n(t)                            \
184     static ossl_inline t fallback_atomic_exchange_n_##t(t *p, t v)           \
185     {                                                                   \
186         t ret;                                                          \
187                                                                         \
188         pthread_mutex_lock(&atomic_sim_lock);                           \
189         ret = *p;                                                       \
190         *p = v;                                                         \
191         pthread_mutex_unlock(&atomic_sim_lock);                         \
192         return ret;                                                     \
193     }
194 IMPL_fallback_atomic_exchange_n(uint64_t)
195 IMPL_fallback_atomic_exchange_n(prcu_cb_item)
196 
197 #  define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
198 
199 /*
200  * The fallbacks that follow don't need any per type implementation, as
201  * they are designed for uint64_t only.  If there comes a time when multiple
202  * types need to be covered, it's relatively easy to refactor them the same
203  * way as the fallbacks above.
204  */
205 
206 static ossl_inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
207 {
208     uint64_t ret;
209 
210     pthread_mutex_lock(&atomic_sim_lock);
211     *p += v;
212     ret = *p;
213     pthread_mutex_unlock(&atomic_sim_lock);
214     return ret;
215 }
216 
217 #  define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
218 
fallback_atomic_fetch_add(uint64_t * p,uint64_t v)219 static ossl_inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
220 {
221     uint64_t ret;
222 
223     pthread_mutex_lock(&atomic_sim_lock);
224     ret = *p;
225     *p += v;
226     pthread_mutex_unlock(&atomic_sim_lock);
227     return ret;
228 }
229 
230 #  define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
231 
fallback_atomic_sub_fetch(uint64_t * p,uint64_t v)232 static ossl_inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
233 {
234     uint64_t ret;
235 
236     pthread_mutex_lock(&atomic_sim_lock);
237     *p -= v;
238     ret = *p;
239     pthread_mutex_unlock(&atomic_sim_lock);
240     return ret;
241 }
242 
243 #  define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
244 
fallback_atomic_and_fetch(uint64_t * p,uint64_t m)245 static ossl_inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
246 {
247     uint64_t ret;
248 
249     pthread_mutex_lock(&atomic_sim_lock);
250     *p &= m;
251     ret = *p;
252     pthread_mutex_unlock(&atomic_sim_lock);
253     return ret;
254 }
255 
256 #  define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
257 
fallback_atomic_or_fetch(uint64_t * p,uint64_t m)258 static ossl_inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
259 {
260     uint64_t ret;
261 
262     pthread_mutex_lock(&atomic_sim_lock);
263     *p |= m;
264     ret = *p;
265     pthread_mutex_unlock(&atomic_sim_lock);
266     return ret;
267 }
268 
269 #  define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
270 # endif
271 
272 /*
273  * users is broken up into 2 parts
274  * bits 0-15 current readers
275  * bit 32-63 ID
276  */
277 # define READER_SHIFT 0
278 # define ID_SHIFT 32
279 /* TODO: READER_SIZE 32 in threads_win.c */
280 # define READER_SIZE 16
281 # define ID_SIZE 32
282 
283 # define READER_MASK     (((uint64_t)1 << READER_SIZE) - 1)
284 # define ID_MASK         (((uint64_t)1 << ID_SIZE) - 1)
285 # define READER_COUNT(x) ((uint32_t)(((uint64_t)(x) >> READER_SHIFT) & \
286                                      READER_MASK))
287 # define ID_VAL(x)       ((uint32_t)(((uint64_t)(x) >> ID_SHIFT) & ID_MASK))
288 # define VAL_READER      ((uint64_t)1 << READER_SHIFT)
289 # define VAL_ID(x)       ((uint64_t)x << ID_SHIFT)
290 
291 /*
292  * This is the core of an rcu lock. It tracks the readers and writers for the
293  * current quiescence point for a given lock. Users is the 64 bit value that
294  * stores the READERS/ID as defined above
295  *
296  */
297 struct rcu_qp {
298     uint64_t users;
299 };
300 
301 struct thread_qp {
302     struct rcu_qp *qp;
303     unsigned int depth;
304     CRYPTO_RCU_LOCK *lock;
305 };
306 
307 # define MAX_QPS 10
308 /*
309  * This is the per thread tracking data
310  * that is assigned to each thread participating
311  * in an rcu qp
312  *
313  * qp points to the qp that it last acquired
314  *
315  */
316 struct rcu_thr_data {
317     struct thread_qp thread_qps[MAX_QPS];
318 };
319 
320 /*
321  * This is the internal version of a CRYPTO_RCU_LOCK
322  * it is cast from CRYPTO_RCU_LOCK
323  */
324 struct rcu_lock_st {
325     /* Callbacks to call for next ossl_synchronize_rcu */
326     struct rcu_cb_item *cb_items;
327 
328     /* The context we are being created against */
329     OSSL_LIB_CTX *ctx;
330 
331     /* rcu generation counter for in-order retirement */
332     uint32_t id_ctr;
333 
334     /* TODO: can be moved before id_ctr for better alignment */
335     /* Array of quiescent points for synchronization */
336     struct rcu_qp *qp_group;
337 
338     /* Number of elements in qp_group array */
339     uint32_t group_count;
340 
341     /* Index of the current qp in the qp_group array */
342     uint32_t reader_idx;
343 
344     /* value of the next id_ctr value to be retired */
345     uint32_t next_to_retire;
346 
347     /* index of the next free rcu_qp in the qp_group */
348     uint32_t current_alloc_idx;
349 
350     /* number of qp's in qp_group array currently being retired */
351     uint32_t writers_alloced;
352 
353     /* lock protecting write side operations */
354     pthread_mutex_t write_lock;
355 
356     /* lock protecting updates to writers_alloced/current_alloc_idx */
357     pthread_mutex_t alloc_lock;
358 
359     /* signal to wake threads waiting on alloc_lock */
360     pthread_cond_t alloc_signal;
361 
362     /* lock to enforce in-order retirement */
363     pthread_mutex_t prior_lock;
364 
365     /* signal to wake threads waiting on prior_lock */
366     pthread_cond_t prior_signal;
367 };
368 
369 /* Read side acquisition of the current qp */
get_hold_current_qp(struct rcu_lock_st * lock)370 static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
371 {
372     uint32_t qp_idx;
373 
374     /* get the current qp index */
375     for (;;) {
376         /*
377          * Notes on use of __ATOMIC_ACQUIRE
378          * We need to ensure the following:
379          * 1) That subsequent operations aren't optimized by hoisting them above
380          * this operation.  Specifically, we don't want the below re-load of
381          * qp_idx to get optimized away
382          * 2) We want to ensure that any updating of reader_idx on the write side
383          * of the lock is flushed from a local cpu cache so that we see any
384          * updates prior to the load.  This is a non-issue on cache coherent
385          * systems like x86, but is relevant on other arches
386          * Note: This applies to the reload below as well
387          */
388         qp_idx = ATOMIC_LOAD_N(uint32_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
389 
390         /*
391          * Notes of use of __ATOMIC_RELEASE
392          * This counter is only read by the write side of the lock, and so we
393          * specify __ATOMIC_RELEASE here to ensure that the write side of the
394          * lock see this during the spin loop read of users, as it waits for the
395          * reader count to approach zero
396          */
397         ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
398                          __ATOMIC_RELEASE);
399 
400         /* if the idx hasn't changed, we're good, else try again */
401         if (qp_idx == ATOMIC_LOAD_N(uint32_t, &lock->reader_idx,
402                                     __ATOMIC_ACQUIRE))
403             break;
404 
405         /*
406          * Notes on use of __ATOMIC_RELEASE
407          * As with the add above, we want to ensure that this decrement is
408          * seen by the write side of the lock as soon as it happens to prevent
409          * undue spinning waiting for write side completion
410          */
411         ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
412                          __ATOMIC_RELEASE);
413     }
414 
415     return &lock->qp_group[qp_idx];
416 }
417 
ossl_rcu_free_local_data(void * arg)418 static void ossl_rcu_free_local_data(void *arg)
419 {
420     OSSL_LIB_CTX *ctx = arg;
421     CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx);
422     struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
423 
424     OPENSSL_free(data);
425     CRYPTO_THREAD_set_local(lkey, NULL);
426 }
427 
ossl_rcu_read_lock(CRYPTO_RCU_LOCK * lock)428 void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
429 {
430     struct rcu_thr_data *data;
431     int i, available_qp = -1;
432     CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
433 
434     /*
435      * we're going to access current_qp here so ask the
436      * processor to fetch it
437      */
438     data = CRYPTO_THREAD_get_local(lkey);
439 
440     if (data == NULL) {
441         data = OPENSSL_zalloc(sizeof(*data));
442         OPENSSL_assert(data != NULL);
443         CRYPTO_THREAD_set_local(lkey, data);
444         ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data);
445     }
446 
447     for (i = 0; i < MAX_QPS; i++) {
448         if (data->thread_qps[i].qp == NULL && available_qp == -1)
449             available_qp = i;
450         /* If we have a hold on this lock already, we're good */
451         if (data->thread_qps[i].lock == lock) {
452             data->thread_qps[i].depth++;
453             return;
454         }
455     }
456 
457     /*
458      * if we get here, then we don't have a hold on this lock yet
459      */
460     assert(available_qp != -1);
461 
462     data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
463     data->thread_qps[available_qp].depth = 1;
464     data->thread_qps[available_qp].lock = lock;
465 }
466 
ossl_rcu_read_unlock(CRYPTO_RCU_LOCK * lock)467 void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
468 {
469     int i;
470     CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
471     struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
472     uint64_t ret;
473 
474     assert(data != NULL);
475 
476     for (i = 0; i < MAX_QPS; i++) {
477         if (data->thread_qps[i].lock == lock) {
478             /*
479              * As with read side acquisition, we use __ATOMIC_RELEASE here
480              * to ensure that the decrement is published immediately
481              * to any write side waiters
482              */
483             data->thread_qps[i].depth--;
484             if (data->thread_qps[i].depth == 0) {
485                 ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users,
486                                        VAL_READER, __ATOMIC_RELEASE);
487                 OPENSSL_assert(ret != UINT64_MAX);
488                 data->thread_qps[i].qp = NULL;
489                 data->thread_qps[i].lock = NULL;
490             }
491             return;
492         }
493     }
494     /*
495      * If we get here, we're trying to unlock a lock that we never acquired -
496      * that's fatal.
497      */
498     assert(0);
499 }
500 
501 /*
502  * Write side allocation routine to get the current qp
503  * and replace it with a new one
504  */
update_qp(CRYPTO_RCU_LOCK * lock)505 static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
506 {
507     uint64_t new_id;
508     uint32_t current_idx;
509 
510     pthread_mutex_lock(&lock->alloc_lock);
511 
512     /*
513      * we need at least one qp to be available with one
514      * left over, so that readers can start working on
515      * one that isn't yet being waited on
516      */
517     while (lock->group_count - lock->writers_alloced < 2)
518         /* we have to wait for one to be free */
519         pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
520 
521     current_idx = lock->current_alloc_idx;
522 
523     /* Allocate the qp */
524     lock->writers_alloced++;
525 
526     /* increment the allocation index */
527     lock->current_alloc_idx =
528         (lock->current_alloc_idx + 1) % lock->group_count;
529 
530     /* get and insert a new id */
531     new_id = VAL_ID(lock->id_ctr);
532     lock->id_ctr++;
533 
534     /*
535      * Even though we are under a write side lock here
536      * We need to use atomic instructions to ensure that the results
537      * of this update are published to the read side prior to updating the
538      * reader idx below
539      */
540     ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
541                      __ATOMIC_RELEASE);
542     ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
543                     __ATOMIC_RELEASE);
544 
545     /*
546      * Update the reader index to be the prior qp.
547      * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
548      * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
549      * of this value to be seen on the read side immediately after it happens
550      */
551     ATOMIC_STORE_N(uint32_t, &lock->reader_idx, lock->current_alloc_idx,
552                    __ATOMIC_RELEASE);
553 
554     /* wake up any waiters */
555     pthread_cond_signal(&lock->alloc_signal);
556     pthread_mutex_unlock(&lock->alloc_lock);
557     return &lock->qp_group[current_idx];
558 }
559 
retire_qp(CRYPTO_RCU_LOCK * lock,struct rcu_qp * qp)560 static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
561 {
562     pthread_mutex_lock(&lock->alloc_lock);
563     lock->writers_alloced--;
564     pthread_cond_signal(&lock->alloc_signal);
565     pthread_mutex_unlock(&lock->alloc_lock);
566 }
567 
568 /* TODO: count should be unsigned, e.g uint32_t */
569 /* a negative value could result in unexpected behaviour */
allocate_new_qp_group(CRYPTO_RCU_LOCK * lock,int count)570 static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
571                                             int count)
572 {
573     struct rcu_qp *new =
574         OPENSSL_zalloc(sizeof(*new) * count);
575 
576     lock->group_count = count;
577     return new;
578 }
579 
ossl_rcu_write_lock(CRYPTO_RCU_LOCK * lock)580 void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
581 {
582     pthread_mutex_lock(&lock->write_lock);
583     TSAN_FAKE_UNLOCK(&lock->write_lock);
584 }
585 
ossl_rcu_write_unlock(CRYPTO_RCU_LOCK * lock)586 void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
587 {
588     TSAN_FAKE_LOCK(&lock->write_lock);
589     pthread_mutex_unlock(&lock->write_lock);
590 }
591 
ossl_synchronize_rcu(CRYPTO_RCU_LOCK * lock)592 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
593 {
594     struct rcu_qp *qp;
595     uint64_t count;
596     struct rcu_cb_item *cb_items, *tmpcb;
597 
598     pthread_mutex_lock(&lock->write_lock);
599     cb_items = lock->cb_items;
600     lock->cb_items = NULL;
601     pthread_mutex_unlock(&lock->write_lock);
602 
603     qp = update_qp(lock);
604 
605     /*
606      * wait for the reader count to reach zero
607      * Note the use of __ATOMIC_ACQUIRE here to ensure that any
608      * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
609      * is visible prior to our read
610      */
611     do {
612         count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
613     } while (READER_COUNT(count) != 0);
614 
615     /* retire in order */
616     pthread_mutex_lock(&lock->prior_lock);
617     while (lock->next_to_retire != ID_VAL(count))
618         pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
619     lock->next_to_retire++;
620     pthread_cond_broadcast(&lock->prior_signal);
621     pthread_mutex_unlock(&lock->prior_lock);
622 
623     retire_qp(lock, qp);
624 
625     /* handle any callbacks that we have */
626     while (cb_items != NULL) {
627         tmpcb = cb_items;
628         cb_items = cb_items->next;
629         tmpcb->fn(tmpcb->data);
630         OPENSSL_free(tmpcb);
631     }
632 }
633 
ossl_rcu_call(CRYPTO_RCU_LOCK * lock,rcu_cb_fn cb,void * data)634 int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
635 {
636     struct rcu_cb_item *new =
637         OPENSSL_zalloc(sizeof(*new));
638 
639     if (new == NULL)
640         return 0;
641 
642     new->data = data;
643     new->fn = cb;
644     /*
645      * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
646      * list are visible to us prior to reading, and publish the new value
647      * immediately
648      */
649     new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
650                                   __ATOMIC_ACQ_REL);
651 
652     return 1;
653 }
654 
ossl_rcu_uptr_deref(void ** p)655 void *ossl_rcu_uptr_deref(void **p)
656 {
657     return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
658 }
659 
ossl_rcu_assign_uptr(void ** p,void ** v)660 void ossl_rcu_assign_uptr(void **p, void **v)
661 {
662     ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
663 }
664 
ossl_rcu_lock_new(int num_writers,OSSL_LIB_CTX * ctx)665 CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
666 {
667     struct rcu_lock_st *new;
668 
669     if (num_writers < 1)
670         num_writers = 1;
671 
672     ctx = ossl_lib_ctx_get_concrete(ctx);
673     if (ctx == NULL)
674         return 0;
675 
676     new = OPENSSL_zalloc(sizeof(*new));
677     if (new == NULL)
678         return NULL;
679 
680     new->ctx = ctx;
681     pthread_mutex_init(&new->write_lock, NULL);
682     pthread_mutex_init(&new->prior_lock, NULL);
683     pthread_mutex_init(&new->alloc_lock, NULL);
684     pthread_cond_init(&new->prior_signal, NULL);
685     pthread_cond_init(&new->alloc_signal, NULL);
686     new->qp_group = allocate_new_qp_group(new, num_writers + 1);
687     if (new->qp_group == NULL) {
688         OPENSSL_free(new);
689         new = NULL;
690     }
691     return new;
692 }
693 
ossl_rcu_lock_free(CRYPTO_RCU_LOCK * lock)694 void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
695 {
696     struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
697 
698     if (lock == NULL)
699         return;
700 
701     /* make sure we're synchronized */
702     ossl_synchronize_rcu(rlock);
703 
704     OPENSSL_free(rlock->qp_group);
705     /* There should only be a single qp left now */
706     OPENSSL_free(rlock);
707 }
708 
CRYPTO_THREAD_lock_new(void)709 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
710 {
711 # ifdef USE_RWLOCK
712     CRYPTO_RWLOCK *lock;
713 
714     if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
715         /* Don't set error, to avoid recursion blowup. */
716         return NULL;
717 
718     if (pthread_rwlock_init(lock, NULL) != 0) {
719         OPENSSL_free(lock);
720         return NULL;
721     }
722 # else
723     pthread_mutexattr_t attr;
724     CRYPTO_RWLOCK *lock;
725 
726     if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
727         /* Don't set error, to avoid recursion blowup. */
728         return NULL;
729 
730     /*
731      * We don't use recursive mutexes, but try to catch errors if we do.
732      */
733     pthread_mutexattr_init(&attr);
734 #  if !defined (__TANDEM) && !defined (_SPT_MODEL_)
735 #   if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
736     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
737 #   endif
738 #  else
739     /* The SPT Thread Library does not define MUTEX attributes. */
740 #  endif
741 
742     if (pthread_mutex_init(lock, &attr) != 0) {
743         pthread_mutexattr_destroy(&attr);
744         OPENSSL_free(lock);
745         return NULL;
746     }
747 
748     pthread_mutexattr_destroy(&attr);
749 # endif
750 
751     return lock;
752 }
753 
CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK * lock)754 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
755 {
756 # ifdef USE_RWLOCK
757     if (pthread_rwlock_rdlock(lock) != 0)
758         return 0;
759 # else
760     if (pthread_mutex_lock(lock) != 0) {
761         assert(errno != EDEADLK && errno != EBUSY);
762         return 0;
763     }
764 # endif
765 
766     return 1;
767 }
768 
CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK * lock)769 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
770 {
771 # ifdef USE_RWLOCK
772     if (pthread_rwlock_wrlock(lock) != 0)
773         return 0;
774 # else
775     if (pthread_mutex_lock(lock) != 0) {
776         assert(errno != EDEADLK && errno != EBUSY);
777         return 0;
778     }
779 # endif
780 
781     return 1;
782 }
783 
CRYPTO_THREAD_unlock(CRYPTO_RWLOCK * lock)784 int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
785 {
786 # ifdef USE_RWLOCK
787     if (pthread_rwlock_unlock(lock) != 0)
788         return 0;
789 # else
790     if (pthread_mutex_unlock(lock) != 0) {
791         assert(errno != EPERM);
792         return 0;
793     }
794 # endif
795 
796     return 1;
797 }
798 
CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK * lock)799 void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
800 {
801     if (lock == NULL)
802         return;
803 
804 # ifdef USE_RWLOCK
805     pthread_rwlock_destroy(lock);
806 # else
807     pthread_mutex_destroy(lock);
808 # endif
809     OPENSSL_free(lock);
810 
811     return;
812 }
813 
CRYPTO_THREAD_run_once(CRYPTO_ONCE * once,void (* init)(void))814 int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
815 {
816     if (pthread_once(once, init) != 0)
817         return 0;
818 
819     return 1;
820 }
821 
CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL * key,void (* cleanup)(void *))822 int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
823 {
824     if (pthread_key_create(key, cleanup) != 0)
825         return 0;
826 
827     return 1;
828 }
829 
CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL * key)830 void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
831 {
832     return pthread_getspecific(*key);
833 }
834 
CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL * key,void * val)835 int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
836 {
837     if (pthread_setspecific(*key, val) != 0)
838         return 0;
839 
840     return 1;
841 }
842 
CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL * key)843 int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
844 {
845     if (pthread_key_delete(*key) != 0)
846         return 0;
847 
848     return 1;
849 }
850 
CRYPTO_THREAD_get_current_id(void)851 CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
852 {
853     return pthread_self();
854 }
855 
CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a,CRYPTO_THREAD_ID b)856 int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
857 {
858     return pthread_equal(a, b);
859 }
860 
CRYPTO_atomic_add(int * val,int amount,int * ret,CRYPTO_RWLOCK * lock)861 int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
862 {
863 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
864     if (__atomic_is_lock_free(sizeof(*val), val)) {
865         *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
866         return 1;
867     }
868 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
869     /* This will work for all future Solaris versions. */
870     if (ret != NULL) {
871         *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
872         return 1;
873     }
874 # endif
875     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
876         return 0;
877 
878     *val += amount;
879     *ret  = *val;
880 
881     if (!CRYPTO_THREAD_unlock(lock))
882         return 0;
883 
884     return 1;
885 }
886 
CRYPTO_atomic_add64(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)887 int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret,
888                         CRYPTO_RWLOCK *lock)
889 {
890 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
891     if (__atomic_is_lock_free(sizeof(*val), val)) {
892         *ret = __atomic_add_fetch(val, op, __ATOMIC_ACQ_REL);
893         return 1;
894     }
895 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
896     /* This will work for all future Solaris versions. */
897     if (ret != NULL) {
898         *ret = atomic_add_64_nv(val, op);
899         return 1;
900     }
901 # endif
902     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
903         return 0;
904     *val += op;
905     *ret  = *val;
906 
907     if (!CRYPTO_THREAD_unlock(lock))
908         return 0;
909 
910     return 1;
911 }
912 
CRYPTO_atomic_and(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)913 int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret,
914                       CRYPTO_RWLOCK *lock)
915 {
916 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
917     if (__atomic_is_lock_free(sizeof(*val), val)) {
918         *ret = __atomic_and_fetch(val, op, __ATOMIC_ACQ_REL);
919         return 1;
920     }
921 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
922     /* This will work for all future Solaris versions. */
923     if (ret != NULL) {
924         *ret = atomic_and_64_nv(val, op);
925         return 1;
926     }
927 # endif
928     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
929         return 0;
930     *val &= op;
931     *ret  = *val;
932 
933     if (!CRYPTO_THREAD_unlock(lock))
934         return 0;
935 
936     return 1;
937 }
938 
CRYPTO_atomic_or(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)939 int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
940                      CRYPTO_RWLOCK *lock)
941 {
942 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
943     if (__atomic_is_lock_free(sizeof(*val), val)) {
944         *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
945         return 1;
946     }
947 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
948     /* This will work for all future Solaris versions. */
949     if (ret != NULL) {
950         *ret = atomic_or_64_nv(val, op);
951         return 1;
952     }
953 # endif
954     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
955         return 0;
956     *val |= op;
957     *ret  = *val;
958 
959     if (!CRYPTO_THREAD_unlock(lock))
960         return 0;
961 
962     return 1;
963 }
964 
CRYPTO_atomic_load(uint64_t * val,uint64_t * ret,CRYPTO_RWLOCK * lock)965 int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
966 {
967 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
968     if (__atomic_is_lock_free(sizeof(*val), val)) {
969         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
970         return 1;
971     }
972 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
973     /* This will work for all future Solaris versions. */
974     if (ret != NULL) {
975         *ret = atomic_or_64_nv(val, 0);
976         return 1;
977     }
978 # endif
979     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
980         return 0;
981     *ret  = *val;
982     if (!CRYPTO_THREAD_unlock(lock))
983         return 0;
984 
985     return 1;
986 }
987 
CRYPTO_atomic_store(uint64_t * dst,uint64_t val,CRYPTO_RWLOCK * lock)988 int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock)
989 {
990 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
991     if (__atomic_is_lock_free(sizeof(*dst), dst)) {
992         __atomic_store(dst, &val, __ATOMIC_RELEASE);
993         return 1;
994     }
995 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
996     /* This will work for all future Solaris versions. */
997     if (ret != NULL) {
998         atomic_swap_64(dst, val);
999         return 1;
1000     }
1001 # endif
1002     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1003         return 0;
1004     *dst  = val;
1005     if (!CRYPTO_THREAD_unlock(lock))
1006         return 0;
1007 
1008     return 1;
1009 }
1010 
CRYPTO_atomic_load_int(int * val,int * ret,CRYPTO_RWLOCK * lock)1011 int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
1012 {
1013 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
1014     if (__atomic_is_lock_free(sizeof(*val), val)) {
1015         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
1016         return 1;
1017     }
1018 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1019     /* This will work for all future Solaris versions. */
1020     if (ret != NULL) {
1021         *ret = (int)atomic_or_uint_nv((unsigned int *)val, 0);
1022         return 1;
1023     }
1024 # endif
1025     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
1026         return 0;
1027     *ret  = *val;
1028     if (!CRYPTO_THREAD_unlock(lock))
1029         return 0;
1030 
1031     return 1;
1032 }
1033 
1034 # ifndef FIPS_MODULE
openssl_init_fork_handlers(void)1035 int openssl_init_fork_handlers(void)
1036 {
1037     return 1;
1038 }
1039 # endif /* FIPS_MODULE */
1040 
openssl_get_fork_id(void)1041 int openssl_get_fork_id(void)
1042 {
1043     return getpid();
1044 }
1045 #endif
1046