xref: /openssl/crypto/threads_pthread.c (revision 79c9cbbe)
1 /*
2  * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 /* We need to use the OPENSSL_fork_*() deprecated APIs */
11 #define OPENSSL_SUPPRESS_DEPRECATED
12 
13 #include <openssl/crypto.h>
14 #include <crypto/cryptlib.h>
15 #include "internal/cryptlib.h"
16 #include "internal/rcu.h"
17 #include "rcu_internal.h"
18 
19 #if defined(__clang__) && defined(__has_feature)
20 # if __has_feature(thread_sanitizer)
21 #  define __SANITIZE_THREAD__
22 # endif
23 #endif
24 
25 #if defined(__SANITIZE_THREAD__)
26 # include <sanitizer/tsan_interface.h>
27 # define TSAN_FAKE_UNLOCK(x)   __tsan_mutex_pre_unlock((x), 0); \
28 __tsan_mutex_post_unlock((x), 0)
29 
30 # define TSAN_FAKE_LOCK(x)  __tsan_mutex_pre_lock((x), 0); \
31 __tsan_mutex_post_lock((x), 0, 0)
32 #else
33 # define TSAN_FAKE_UNLOCK(x)
34 # define TSAN_FAKE_LOCK(x)
35 #endif
36 
37 #if defined(__sun)
38 # include <atomic.h>
39 #endif
40 
41 #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
42 /*
43  * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
44  * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
45  * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
46  * All of this makes impossible to use __atomic_is_lock_free here.
47  *
48  * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
49  */
50 # define BROKEN_CLANG_ATOMICS
51 #endif
52 
53 #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
54 
55 # if defined(OPENSSL_SYS_UNIX)
56 #  include <sys/types.h>
57 #  include <unistd.h>
58 # endif
59 
60 # include <assert.h>
61 
62 /*
63  * The Non-Stop KLT thread model currently seems broken in its rwlock
64  * implementation
65  */
66 # if defined(PTHREAD_RWLOCK_INITIALIZER) && !defined(_KLT_MODEL_)
67 #  define USE_RWLOCK
68 # endif
69 
70 /*
71  * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
72  * other compilers.
73 
74  * Unfortunately, we can't do that with some "generic type", because there's no
75  * guarantee that the chosen generic type is large enough to cover all cases.
76  * Therefore, we implement fallbacks for each applicable type, with composed
77  * names that include the type they handle.
78  *
79  * (an anecdote: we previously tried to use |void *| as the generic type, with
80  * the thought that the pointer itself is the largest type.  However, this is
81  * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
82  *
83  * All applicable ATOMIC_ macros take the intended type as first parameter, so
84  * they can map to the correct fallback function.  In the GNU/clang case, that
85  * parameter is simply ignored.
86  */
87 
88 /*
89  * Internal types used with the ATOMIC_ macros, to make it possible to compose
90  * fallback function names.
91  */
92 typedef void *pvoid;
93 typedef struct rcu_cb_item *prcu_cb_item;
94 
95 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
96     && !defined(USE_ATOMIC_FALLBACKS)
97 #  if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__) && defined(__LP64__)
98 /*
99  * For pointers, Apple M1 virtualized cpu seems to have some problem using the
100  * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
101  * When using the native apple clang compiler, this instruction is emitted for
102  * atomic loads, which is bad.  So, if
103  * 1) We are building on a target that defines __APPLE__ AND
104  * 2) We are building on a target using clang (__clang__) AND
105  * 3) We are building for an M1 processor (__aarch64__) AND
106  * 4) We are building with 64 bit pointers
107  * Then we should not use __atomic_load_n and instead implement our own
108  * function to issue the ldar instruction instead, which produces the proper
109  * sequencing guarantees
110  */
apple_atomic_load_n_pvoid(void ** p,ossl_unused int memorder)111 static inline void *apple_atomic_load_n_pvoid(void **p,
112                                               ossl_unused int memorder)
113 {
114     void *ret;
115 
116     __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
117 
118     return ret;
119 }
120 
121 /* For uint64_t, we should be fine, though */
122 #   define apple_atomic_load_n_uint32_t(p, o) __atomic_load_n(p, o)
123 #   define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
124 
125 #   define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
126 #  else
127 #   define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
128 #  endif
129 #  define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
130 #  define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
131 #  define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
132 #  define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
133 #  define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
134 #  define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
135 #  define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
136 #  define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
137 # else
138 static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
139 
140 #  define IMPL_fallback_atomic_load_n(t)                        \
141     static ossl_inline t fallback_atomic_load_n_##t(t *p)            \
142     {                                                           \
143         t ret;                                                  \
144                                                                 \
145         pthread_mutex_lock(&atomic_sim_lock);                   \
146         ret = *p;                                               \
147         pthread_mutex_unlock(&atomic_sim_lock);                 \
148         return ret;                                             \
149     }
150 IMPL_fallback_atomic_load_n(uint32_t)
IMPL_fallback_atomic_load_n(uint64_t)151 IMPL_fallback_atomic_load_n(uint64_t)
152 IMPL_fallback_atomic_load_n(pvoid)
153 
154 #  define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
155 
156 #  define IMPL_fallback_atomic_store_n(t)                       \
157     static ossl_inline t fallback_atomic_store_n_##t(t *p, t v)      \
158     {                                                           \
159         t ret;                                                  \
160                                                                 \
161         pthread_mutex_lock(&atomic_sim_lock);                   \
162         ret = *p;                                               \
163         *p = v;                                                 \
164         pthread_mutex_unlock(&atomic_sim_lock);                 \
165         return ret;                                             \
166     }
167 IMPL_fallback_atomic_store_n(uint32_t)
168 IMPL_fallback_atomic_store_n(uint64_t)
169 
170 #  define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
171 
172 #  define IMPL_fallback_atomic_store(t)                         \
173     static ossl_inline void fallback_atomic_store_##t(t *p, t *v)    \
174     {                                                           \
175         pthread_mutex_lock(&atomic_sim_lock);                   \
176         *p = *v;                                                \
177         pthread_mutex_unlock(&atomic_sim_lock);                 \
178     }
179 IMPL_fallback_atomic_store(uint64_t)
180 IMPL_fallback_atomic_store(pvoid)
181 
182 #  define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
183 
184 #  define IMPL_fallback_atomic_exchange_n(t)                            \
185     static ossl_inline t fallback_atomic_exchange_n_##t(t *p, t v)           \
186     {                                                                   \
187         t ret;                                                          \
188                                                                         \
189         pthread_mutex_lock(&atomic_sim_lock);                           \
190         ret = *p;                                                       \
191         *p = v;                                                         \
192         pthread_mutex_unlock(&atomic_sim_lock);                         \
193         return ret;                                                     \
194     }
195 IMPL_fallback_atomic_exchange_n(uint64_t)
196 IMPL_fallback_atomic_exchange_n(prcu_cb_item)
197 
198 #  define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
199 
200 /*
201  * The fallbacks that follow don't need any per type implementation, as
202  * they are designed for uint64_t only.  If there comes a time when multiple
203  * types need to be covered, it's relatively easy to refactor them the same
204  * way as the fallbacks above.
205  */
206 
207 static ossl_inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
208 {
209     uint64_t ret;
210 
211     pthread_mutex_lock(&atomic_sim_lock);
212     *p += v;
213     ret = *p;
214     pthread_mutex_unlock(&atomic_sim_lock);
215     return ret;
216 }
217 
218 #  define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
219 
fallback_atomic_fetch_add(uint64_t * p,uint64_t v)220 static ossl_inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
221 {
222     uint64_t ret;
223 
224     pthread_mutex_lock(&atomic_sim_lock);
225     ret = *p;
226     *p += v;
227     pthread_mutex_unlock(&atomic_sim_lock);
228     return ret;
229 }
230 
231 #  define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
232 
fallback_atomic_sub_fetch(uint64_t * p,uint64_t v)233 static ossl_inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
234 {
235     uint64_t ret;
236 
237     pthread_mutex_lock(&atomic_sim_lock);
238     *p -= v;
239     ret = *p;
240     pthread_mutex_unlock(&atomic_sim_lock);
241     return ret;
242 }
243 
244 #  define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
245 
fallback_atomic_and_fetch(uint64_t * p,uint64_t m)246 static ossl_inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
247 {
248     uint64_t ret;
249 
250     pthread_mutex_lock(&atomic_sim_lock);
251     *p &= m;
252     ret = *p;
253     pthread_mutex_unlock(&atomic_sim_lock);
254     return ret;
255 }
256 
257 #  define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
258 
fallback_atomic_or_fetch(uint64_t * p,uint64_t m)259 static ossl_inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
260 {
261     uint64_t ret;
262 
263     pthread_mutex_lock(&atomic_sim_lock);
264     *p |= m;
265     ret = *p;
266     pthread_mutex_unlock(&atomic_sim_lock);
267     return ret;
268 }
269 
270 #  define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
271 # endif
272 
273 /*
274  * users is broken up into 2 parts
275  * bits 0-15 current readers
276  * bit 32-63 ID
277  */
278 # define READER_SHIFT 0
279 # define ID_SHIFT 32
280 /* TODO: READER_SIZE 32 in threads_win.c */
281 # define READER_SIZE 16
282 # define ID_SIZE 32
283 
284 # define READER_MASK     (((uint64_t)1 << READER_SIZE) - 1)
285 # define ID_MASK         (((uint64_t)1 << ID_SIZE) - 1)
286 # define READER_COUNT(x) ((uint32_t)(((uint64_t)(x) >> READER_SHIFT) & \
287                                      READER_MASK))
288 # define ID_VAL(x)       ((uint32_t)(((uint64_t)(x) >> ID_SHIFT) & ID_MASK))
289 # define VAL_READER      ((uint64_t)1 << READER_SHIFT)
290 # define VAL_ID(x)       ((uint64_t)x << ID_SHIFT)
291 
292 /*
293  * This is the core of an rcu lock. It tracks the readers and writers for the
294  * current quiescence point for a given lock. Users is the 64 bit value that
295  * stores the READERS/ID as defined above
296  *
297  */
298 struct rcu_qp {
299     uint64_t users;
300 };
301 
302 struct thread_qp {
303     struct rcu_qp *qp;
304     unsigned int depth;
305     CRYPTO_RCU_LOCK *lock;
306 };
307 
308 # define MAX_QPS 10
309 /*
310  * This is the per thread tracking data
311  * that is assigned to each thread participating
312  * in an rcu qp
313  *
314  * qp points to the qp that it last acquired
315  *
316  */
317 struct rcu_thr_data {
318     struct thread_qp thread_qps[MAX_QPS];
319 };
320 
321 /*
322  * This is the internal version of a CRYPTO_RCU_LOCK
323  * it is cast from CRYPTO_RCU_LOCK
324  */
325 struct rcu_lock_st {
326     /* Callbacks to call for next ossl_synchronize_rcu */
327     struct rcu_cb_item *cb_items;
328 
329     /* The context we are being created against */
330     OSSL_LIB_CTX *ctx;
331 
332     /* rcu generation counter for in-order retirement */
333     uint32_t id_ctr;
334 
335     /* TODO: can be moved before id_ctr for better alignment */
336     /* Array of quiescent points for synchronization */
337     struct rcu_qp *qp_group;
338 
339     /* Number of elements in qp_group array */
340     uint32_t group_count;
341 
342     /* Index of the current qp in the qp_group array */
343     uint32_t reader_idx;
344 
345     /* value of the next id_ctr value to be retired */
346     uint32_t next_to_retire;
347 
348     /* index of the next free rcu_qp in the qp_group */
349     uint32_t current_alloc_idx;
350 
351     /* number of qp's in qp_group array currently being retired */
352     uint32_t writers_alloced;
353 
354     /* lock protecting write side operations */
355     pthread_mutex_t write_lock;
356 
357     /* lock protecting updates to writers_alloced/current_alloc_idx */
358     pthread_mutex_t alloc_lock;
359 
360     /* signal to wake threads waiting on alloc_lock */
361     pthread_cond_t alloc_signal;
362 
363     /* lock to enforce in-order retirement */
364     pthread_mutex_t prior_lock;
365 
366     /* signal to wake threads waiting on prior_lock */
367     pthread_cond_t prior_signal;
368 };
369 
370 /* Read side acquisition of the current qp */
get_hold_current_qp(struct rcu_lock_st * lock)371 static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
372 {
373     uint32_t qp_idx;
374 
375     /* get the current qp index */
376     for (;;) {
377         /*
378          * Notes on use of __ATOMIC_ACQUIRE
379          * We need to ensure the following:
380          * 1) That subsequent operations aren't optimized by hoisting them above
381          * this operation.  Specifically, we don't want the below re-load of
382          * qp_idx to get optimized away
383          * 2) We want to ensure that any updating of reader_idx on the write side
384          * of the lock is flushed from a local cpu cache so that we see any
385          * updates prior to the load.  This is a non-issue on cache coherent
386          * systems like x86, but is relevant on other arches
387          * Note: This applies to the reload below as well
388          */
389         qp_idx = ATOMIC_LOAD_N(uint32_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
390 
391         /*
392          * Notes of use of __ATOMIC_RELEASE
393          * This counter is only read by the write side of the lock, and so we
394          * specify __ATOMIC_RELEASE here to ensure that the write side of the
395          * lock see this during the spin loop read of users, as it waits for the
396          * reader count to approach zero
397          */
398         ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
399                          __ATOMIC_RELEASE);
400 
401         /* if the idx hasn't changed, we're good, else try again */
402         if (qp_idx == ATOMIC_LOAD_N(uint32_t, &lock->reader_idx,
403                                     __ATOMIC_ACQUIRE))
404             break;
405 
406         /*
407          * Notes on use of __ATOMIC_RELEASE
408          * As with the add above, we want to ensure that this decrement is
409          * seen by the write side of the lock as soon as it happens to prevent
410          * undue spinning waiting for write side completion
411          */
412         ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
413                          __ATOMIC_RELEASE);
414     }
415 
416     return &lock->qp_group[qp_idx];
417 }
418 
ossl_rcu_free_local_data(void * arg)419 static void ossl_rcu_free_local_data(void *arg)
420 {
421     OSSL_LIB_CTX *ctx = arg;
422     CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx);
423     struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
424 
425     OPENSSL_free(data);
426     CRYPTO_THREAD_set_local(lkey, NULL);
427 }
428 
ossl_rcu_read_lock(CRYPTO_RCU_LOCK * lock)429 void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
430 {
431     struct rcu_thr_data *data;
432     int i, available_qp = -1;
433     CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
434 
435     /*
436      * we're going to access current_qp here so ask the
437      * processor to fetch it
438      */
439     data = CRYPTO_THREAD_get_local(lkey);
440 
441     if (data == NULL) {
442         data = OPENSSL_zalloc(sizeof(*data));
443         OPENSSL_assert(data != NULL);
444         CRYPTO_THREAD_set_local(lkey, data);
445         ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data);
446     }
447 
448     for (i = 0; i < MAX_QPS; i++) {
449         if (data->thread_qps[i].qp == NULL && available_qp == -1)
450             available_qp = i;
451         /* If we have a hold on this lock already, we're good */
452         if (data->thread_qps[i].lock == lock) {
453             data->thread_qps[i].depth++;
454             return;
455         }
456     }
457 
458     /*
459      * if we get here, then we don't have a hold on this lock yet
460      */
461     assert(available_qp != -1);
462 
463     data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
464     data->thread_qps[available_qp].depth = 1;
465     data->thread_qps[available_qp].lock = lock;
466 }
467 
ossl_rcu_read_unlock(CRYPTO_RCU_LOCK * lock)468 void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
469 {
470     int i;
471     CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
472     struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
473     uint64_t ret;
474 
475     assert(data != NULL);
476 
477     for (i = 0; i < MAX_QPS; i++) {
478         if (data->thread_qps[i].lock == lock) {
479             /*
480              * As with read side acquisition, we use __ATOMIC_RELEASE here
481              * to ensure that the decrement is published immediately
482              * to any write side waiters
483              */
484             data->thread_qps[i].depth--;
485             if (data->thread_qps[i].depth == 0) {
486                 ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users,
487                                        VAL_READER, __ATOMIC_RELEASE);
488                 OPENSSL_assert(ret != UINT64_MAX);
489                 data->thread_qps[i].qp = NULL;
490                 data->thread_qps[i].lock = NULL;
491             }
492             return;
493         }
494     }
495     /*
496      * If we get here, we're trying to unlock a lock that we never acquired -
497      * that's fatal.
498      */
499     assert(0);
500 }
501 
502 /*
503  * Write side allocation routine to get the current qp
504  * and replace it with a new one
505  */
update_qp(CRYPTO_RCU_LOCK * lock)506 static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
507 {
508     uint64_t new_id;
509     uint32_t current_idx;
510 
511     pthread_mutex_lock(&lock->alloc_lock);
512 
513     /*
514      * we need at least one qp to be available with one
515      * left over, so that readers can start working on
516      * one that isn't yet being waited on
517      */
518     while (lock->group_count - lock->writers_alloced < 2)
519         /* we have to wait for one to be free */
520         pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
521 
522     current_idx = lock->current_alloc_idx;
523 
524     /* Allocate the qp */
525     lock->writers_alloced++;
526 
527     /* increment the allocation index */
528     lock->current_alloc_idx =
529         (lock->current_alloc_idx + 1) % lock->group_count;
530 
531     /* get and insert a new id */
532     new_id = VAL_ID(lock->id_ctr);
533     lock->id_ctr++;
534 
535     /*
536      * Even though we are under a write side lock here
537      * We need to use atomic instructions to ensure that the results
538      * of this update are published to the read side prior to updating the
539      * reader idx below
540      */
541     ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
542                      __ATOMIC_RELEASE);
543     ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
544                     __ATOMIC_RELEASE);
545 
546     /*
547      * Update the reader index to be the prior qp.
548      * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
549      * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
550      * of this value to be seen on the read side immediately after it happens
551      */
552     ATOMIC_STORE_N(uint32_t, &lock->reader_idx, lock->current_alloc_idx,
553                    __ATOMIC_RELEASE);
554 
555     /* wake up any waiters */
556     pthread_cond_signal(&lock->alloc_signal);
557     pthread_mutex_unlock(&lock->alloc_lock);
558     return &lock->qp_group[current_idx];
559 }
560 
retire_qp(CRYPTO_RCU_LOCK * lock,struct rcu_qp * qp)561 static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
562 {
563     pthread_mutex_lock(&lock->alloc_lock);
564     lock->writers_alloced--;
565     pthread_cond_signal(&lock->alloc_signal);
566     pthread_mutex_unlock(&lock->alloc_lock);
567 }
568 
569 /* TODO: count should be unsigned, e.g uint32_t */
570 /* a negative value could result in unexpected behaviour */
allocate_new_qp_group(CRYPTO_RCU_LOCK * lock,int count)571 static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
572                                             int count)
573 {
574     struct rcu_qp *new =
575         OPENSSL_zalloc(sizeof(*new) * count);
576 
577     lock->group_count = count;
578     return new;
579 }
580 
ossl_rcu_write_lock(CRYPTO_RCU_LOCK * lock)581 void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
582 {
583     pthread_mutex_lock(&lock->write_lock);
584     TSAN_FAKE_UNLOCK(&lock->write_lock);
585 }
586 
ossl_rcu_write_unlock(CRYPTO_RCU_LOCK * lock)587 void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
588 {
589     TSAN_FAKE_LOCK(&lock->write_lock);
590     pthread_mutex_unlock(&lock->write_lock);
591 }
592 
ossl_synchronize_rcu(CRYPTO_RCU_LOCK * lock)593 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
594 {
595     struct rcu_qp *qp;
596     uint64_t count;
597     struct rcu_cb_item *cb_items, *tmpcb;
598 
599     pthread_mutex_lock(&lock->write_lock);
600     cb_items = lock->cb_items;
601     lock->cb_items = NULL;
602     pthread_mutex_unlock(&lock->write_lock);
603 
604     qp = update_qp(lock);
605 
606     /*
607      * wait for the reader count to reach zero
608      * Note the use of __ATOMIC_ACQUIRE here to ensure that any
609      * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
610      * is visible prior to our read
611      */
612     do {
613         count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
614     } while (READER_COUNT(count) != 0);
615 
616     /* retire in order */
617     pthread_mutex_lock(&lock->prior_lock);
618     while (lock->next_to_retire != ID_VAL(count))
619         pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
620     lock->next_to_retire++;
621     pthread_cond_broadcast(&lock->prior_signal);
622     pthread_mutex_unlock(&lock->prior_lock);
623 
624     retire_qp(lock, qp);
625 
626     /* handle any callbacks that we have */
627     while (cb_items != NULL) {
628         tmpcb = cb_items;
629         cb_items = cb_items->next;
630         tmpcb->fn(tmpcb->data);
631         OPENSSL_free(tmpcb);
632     }
633 }
634 
ossl_rcu_call(CRYPTO_RCU_LOCK * lock,rcu_cb_fn cb,void * data)635 int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
636 {
637     struct rcu_cb_item *new =
638         OPENSSL_zalloc(sizeof(*new));
639 
640     if (new == NULL)
641         return 0;
642 
643     new->data = data;
644     new->fn = cb;
645     /*
646      * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
647      * list are visible to us prior to reading, and publish the new value
648      * immediately
649      */
650     new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
651                                   __ATOMIC_ACQ_REL);
652 
653     return 1;
654 }
655 
ossl_rcu_uptr_deref(void ** p)656 void *ossl_rcu_uptr_deref(void **p)
657 {
658     return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
659 }
660 
ossl_rcu_assign_uptr(void ** p,void ** v)661 void ossl_rcu_assign_uptr(void **p, void **v)
662 {
663     ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
664 }
665 
ossl_rcu_lock_new(int num_writers,OSSL_LIB_CTX * ctx)666 CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
667 {
668     struct rcu_lock_st *new;
669 
670     if (num_writers < 1)
671         num_writers = 1;
672 
673     ctx = ossl_lib_ctx_get_concrete(ctx);
674     if (ctx == NULL)
675         return 0;
676 
677     new = OPENSSL_zalloc(sizeof(*new));
678     if (new == NULL)
679         return NULL;
680 
681     new->ctx = ctx;
682     pthread_mutex_init(&new->write_lock, NULL);
683     pthread_mutex_init(&new->prior_lock, NULL);
684     pthread_mutex_init(&new->alloc_lock, NULL);
685     pthread_cond_init(&new->prior_signal, NULL);
686     pthread_cond_init(&new->alloc_signal, NULL);
687     new->qp_group = allocate_new_qp_group(new, num_writers + 1);
688     if (new->qp_group == NULL) {
689         OPENSSL_free(new);
690         new = NULL;
691     }
692     return new;
693 }
694 
ossl_rcu_lock_free(CRYPTO_RCU_LOCK * lock)695 void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
696 {
697     struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
698 
699     if (lock == NULL)
700         return;
701 
702     /* make sure we're synchronized */
703     ossl_synchronize_rcu(rlock);
704 
705     OPENSSL_free(rlock->qp_group);
706     /* There should only be a single qp left now */
707     OPENSSL_free(rlock);
708 }
709 
CRYPTO_THREAD_lock_new(void)710 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
711 {
712 # ifdef USE_RWLOCK
713     CRYPTO_RWLOCK *lock;
714 
715     if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
716         /* Don't set error, to avoid recursion blowup. */
717         return NULL;
718 
719     if (pthread_rwlock_init(lock, NULL) != 0) {
720         OPENSSL_free(lock);
721         return NULL;
722     }
723 # else
724     pthread_mutexattr_t attr;
725     CRYPTO_RWLOCK *lock;
726 
727     if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
728         /* Don't set error, to avoid recursion blowup. */
729         return NULL;
730 
731     /*
732      * We don't use recursive mutexes, but try to catch errors if we do.
733      */
734     pthread_mutexattr_init(&attr);
735 #  if !defined (__TANDEM) && !defined (_SPT_MODEL_)
736 #   if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
737     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
738 #   endif
739 #  else
740     /* The SPT Thread Library does not define MUTEX attributes. */
741 #  endif
742 
743     if (pthread_mutex_init(lock, &attr) != 0) {
744         pthread_mutexattr_destroy(&attr);
745         OPENSSL_free(lock);
746         return NULL;
747     }
748 
749     pthread_mutexattr_destroy(&attr);
750 # endif
751 
752     return lock;
753 }
754 
CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK * lock)755 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
756 {
757 # ifdef USE_RWLOCK
758     if (pthread_rwlock_rdlock(lock) != 0)
759         return 0;
760 # else
761     if (pthread_mutex_lock(lock) != 0) {
762         assert(errno != EDEADLK && errno != EBUSY);
763         return 0;
764     }
765 # endif
766 
767     return 1;
768 }
769 
CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK * lock)770 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
771 {
772 # ifdef USE_RWLOCK
773     if (pthread_rwlock_wrlock(lock) != 0)
774         return 0;
775 # else
776     if (pthread_mutex_lock(lock) != 0) {
777         assert(errno != EDEADLK && errno != EBUSY);
778         return 0;
779     }
780 # endif
781 
782     return 1;
783 }
784 
CRYPTO_THREAD_unlock(CRYPTO_RWLOCK * lock)785 int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
786 {
787 # ifdef USE_RWLOCK
788     if (pthread_rwlock_unlock(lock) != 0)
789         return 0;
790 # else
791     if (pthread_mutex_unlock(lock) != 0) {
792         assert(errno != EPERM);
793         return 0;
794     }
795 # endif
796 
797     return 1;
798 }
799 
CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK * lock)800 void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
801 {
802     if (lock == NULL)
803         return;
804 
805 # ifdef USE_RWLOCK
806     pthread_rwlock_destroy(lock);
807 # else
808     pthread_mutex_destroy(lock);
809 # endif
810     OPENSSL_free(lock);
811 
812     return;
813 }
814 
CRYPTO_THREAD_run_once(CRYPTO_ONCE * once,void (* init)(void))815 int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
816 {
817     if (pthread_once(once, init) != 0)
818         return 0;
819 
820     return 1;
821 }
822 
CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL * key,void (* cleanup)(void *))823 int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
824 {
825     if (pthread_key_create(key, cleanup) != 0)
826         return 0;
827 
828     return 1;
829 }
830 
CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL * key)831 void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
832 {
833     return pthread_getspecific(*key);
834 }
835 
CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL * key,void * val)836 int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
837 {
838     if (pthread_setspecific(*key, val) != 0)
839         return 0;
840 
841     return 1;
842 }
843 
CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL * key)844 int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
845 {
846     if (pthread_key_delete(*key) != 0)
847         return 0;
848 
849     return 1;
850 }
851 
CRYPTO_THREAD_get_current_id(void)852 CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
853 {
854     return pthread_self();
855 }
856 
CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a,CRYPTO_THREAD_ID b)857 int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
858 {
859     return pthread_equal(a, b);
860 }
861 
CRYPTO_atomic_add(int * val,int amount,int * ret,CRYPTO_RWLOCK * lock)862 int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
863 {
864 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
865     if (__atomic_is_lock_free(sizeof(*val), val)) {
866         *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
867         return 1;
868     }
869 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
870     /* This will work for all future Solaris versions. */
871     if (ret != NULL) {
872         *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
873         return 1;
874     }
875 # endif
876     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
877         return 0;
878 
879     *val += amount;
880     *ret  = *val;
881 
882     if (!CRYPTO_THREAD_unlock(lock))
883         return 0;
884 
885     return 1;
886 }
887 
CRYPTO_atomic_add64(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)888 int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret,
889                         CRYPTO_RWLOCK *lock)
890 {
891 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
892     if (__atomic_is_lock_free(sizeof(*val), val)) {
893         *ret = __atomic_add_fetch(val, op, __ATOMIC_ACQ_REL);
894         return 1;
895     }
896 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
897     /* This will work for all future Solaris versions. */
898     if (ret != NULL) {
899         *ret = atomic_add_64_nv(val, op);
900         return 1;
901     }
902 # endif
903     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
904         return 0;
905     *val += op;
906     *ret  = *val;
907 
908     if (!CRYPTO_THREAD_unlock(lock))
909         return 0;
910 
911     return 1;
912 }
913 
CRYPTO_atomic_and(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)914 int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret,
915                       CRYPTO_RWLOCK *lock)
916 {
917 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
918     if (__atomic_is_lock_free(sizeof(*val), val)) {
919         *ret = __atomic_and_fetch(val, op, __ATOMIC_ACQ_REL);
920         return 1;
921     }
922 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
923     /* This will work for all future Solaris versions. */
924     if (ret != NULL) {
925         *ret = atomic_and_64_nv(val, op);
926         return 1;
927     }
928 # endif
929     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
930         return 0;
931     *val &= op;
932     *ret  = *val;
933 
934     if (!CRYPTO_THREAD_unlock(lock))
935         return 0;
936 
937     return 1;
938 }
939 
CRYPTO_atomic_or(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)940 int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
941                      CRYPTO_RWLOCK *lock)
942 {
943 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
944     if (__atomic_is_lock_free(sizeof(*val), val)) {
945         *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
946         return 1;
947     }
948 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
949     /* This will work for all future Solaris versions. */
950     if (ret != NULL) {
951         *ret = atomic_or_64_nv(val, op);
952         return 1;
953     }
954 # endif
955     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
956         return 0;
957     *val |= op;
958     *ret  = *val;
959 
960     if (!CRYPTO_THREAD_unlock(lock))
961         return 0;
962 
963     return 1;
964 }
965 
CRYPTO_atomic_load(uint64_t * val,uint64_t * ret,CRYPTO_RWLOCK * lock)966 int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
967 {
968 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
969     if (__atomic_is_lock_free(sizeof(*val), val)) {
970         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
971         return 1;
972     }
973 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
974     /* This will work for all future Solaris versions. */
975     if (ret != NULL) {
976         *ret = atomic_or_64_nv(val, 0);
977         return 1;
978     }
979 # endif
980     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
981         return 0;
982     *ret  = *val;
983     if (!CRYPTO_THREAD_unlock(lock))
984         return 0;
985 
986     return 1;
987 }
988 
CRYPTO_atomic_store(uint64_t * dst,uint64_t val,CRYPTO_RWLOCK * lock)989 int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock)
990 {
991 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
992     if (__atomic_is_lock_free(sizeof(*dst), dst)) {
993         __atomic_store(dst, &val, __ATOMIC_RELEASE);
994         return 1;
995     }
996 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
997     /* This will work for all future Solaris versions. */
998     if (dst != NULL) {
999         atomic_swap_64(dst, val);
1000         return 1;
1001     }
1002 # endif
1003     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1004         return 0;
1005     *dst  = val;
1006     if (!CRYPTO_THREAD_unlock(lock))
1007         return 0;
1008 
1009     return 1;
1010 }
1011 
CRYPTO_atomic_load_int(int * val,int * ret,CRYPTO_RWLOCK * lock)1012 int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
1013 {
1014 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
1015     if (__atomic_is_lock_free(sizeof(*val), val)) {
1016         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
1017         return 1;
1018     }
1019 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1020     /* This will work for all future Solaris versions. */
1021     if (ret != NULL) {
1022         *ret = (int)atomic_or_uint_nv((unsigned int *)val, 0);
1023         return 1;
1024     }
1025 # endif
1026     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
1027         return 0;
1028     *ret  = *val;
1029     if (!CRYPTO_THREAD_unlock(lock))
1030         return 0;
1031 
1032     return 1;
1033 }
1034 
1035 # ifndef FIPS_MODULE
openssl_init_fork_handlers(void)1036 int openssl_init_fork_handlers(void)
1037 {
1038     return 1;
1039 }
1040 # endif /* FIPS_MODULE */
1041 
openssl_get_fork_id(void)1042 int openssl_get_fork_id(void)
1043 {
1044     return getpid();
1045 }
1046 #endif
1047