xref: /openssl/ssl/quic/quic_ackm.c (revision 4d32f533)
1 /*
2  * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include "internal/quic_ackm.h"
11 #include "internal/common.h"
12 #include <assert.h>
13 
14 /*
15  * TX Packet History
16  * *****************
17  *
18  * The TX Packet History object tracks information about packets which have been
19  * sent for which we later expect to receive an ACK. It is essentially a simple
20  * database keeping a list of packet information structures in packet number
21  * order which can also be looked up directly by packet number.
22  *
23  * We currently only allow packets to be appended to the list (i.e. the packet
24  * numbers of the packets appended to the list must monotonically increase), as
25  * we should not currently need more general functionality such as a sorted list
26  * insert.
27  */
28 struct tx_pkt_history_st {
29     /* A linked list of all our packets. */
30     OSSL_ACKM_TX_PKT *head, *tail;
31 
32     /* Number of packets in the list. */
33     size_t num_packets;
34 
35     /*
36      * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *)
37      *
38      * Invariant: A packet is in this map if and only if it is in the linked
39      *            list.
40      */
41     LHASH_OF(OSSL_ACKM_TX_PKT) *map;
42 
43     /*
44      * The lowest packet number which may currently be added to the history list
45      * (inclusive). We do not allow packet numbers to be added to the history
46      * list non-monotonically, so packet numbers must be greater than or equal
47      * to this value.
48      */
49     uint64_t watermark;
50 
51     /*
52      * Packet number of the highest packet info structure we have yet appended
53      * to the list. This is usually one less than watermark, except when we have
54      * not added any packet yet.
55      */
56     uint64_t highest_sent;
57 };
58 
59 DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT);
60 
tx_pkt_info_hash(const OSSL_ACKM_TX_PKT * pkt)61 static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt)
62 {
63     return pkt->pkt_num;
64 }
65 
tx_pkt_info_compare(const OSSL_ACKM_TX_PKT * a,const OSSL_ACKM_TX_PKT * b)66 static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a,
67                                const OSSL_ACKM_TX_PKT *b)
68 {
69     if (a->pkt_num < b->pkt_num)
70         return -1;
71     if (a->pkt_num > b->pkt_num)
72         return 1;
73     return 0;
74 }
75 
76 static int
tx_pkt_history_init(struct tx_pkt_history_st * h)77 tx_pkt_history_init(struct tx_pkt_history_st *h)
78 {
79     h->head = h->tail = NULL;
80     h->num_packets  = 0;
81     h->watermark    = 0;
82     h->highest_sent = 0;
83 
84     h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare);
85     if (h->map == NULL)
86         return 0;
87 
88     return 1;
89 }
90 
91 static void
tx_pkt_history_destroy(struct tx_pkt_history_st * h)92 tx_pkt_history_destroy(struct tx_pkt_history_st *h)
93 {
94     lh_OSSL_ACKM_TX_PKT_free(h->map);
95     h->map = NULL;
96     h->head = h->tail = NULL;
97 }
98 
99 static int
tx_pkt_history_add_actual(struct tx_pkt_history_st * h,OSSL_ACKM_TX_PKT * pkt)100 tx_pkt_history_add_actual(struct tx_pkt_history_st *h,
101                           OSSL_ACKM_TX_PKT *pkt)
102 {
103     OSSL_ACKM_TX_PKT *existing;
104 
105     /*
106      * There should not be any existing packet with this number
107      * in our mapping.
108      */
109     existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt);
110     if (!ossl_assert(existing == NULL))
111         return 0;
112 
113     /* Should not already be in a list. */
114     if (!ossl_assert(pkt->next == NULL && pkt->prev == NULL))
115         return 0;
116 
117     lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt);
118 
119     pkt->next = NULL;
120     pkt->prev = h->tail;
121     if (h->tail != NULL)
122         h->tail->next = pkt;
123     h->tail = pkt;
124     if (h->head == NULL)
125         h->head = h->tail;
126 
127     ++h->num_packets;
128     return 1;
129 }
130 
131 /* Adds a packet information structure to the history list. */
132 static int
tx_pkt_history_add(struct tx_pkt_history_st * h,OSSL_ACKM_TX_PKT * pkt)133 tx_pkt_history_add(struct tx_pkt_history_st *h,
134                    OSSL_ACKM_TX_PKT *pkt)
135 {
136     if (!ossl_assert(pkt->pkt_num >= h->watermark))
137         return 0;
138 
139     if (tx_pkt_history_add_actual(h, pkt) < 1)
140         return 0;
141 
142     h->watermark    = pkt->pkt_num + 1;
143     h->highest_sent = pkt->pkt_num;
144     return 1;
145 }
146 
147 /* Retrieve a packet information structure by packet number. */
148 static OSSL_ACKM_TX_PKT *
tx_pkt_history_by_pkt_num(struct tx_pkt_history_st * h,uint64_t pkt_num)149 tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num)
150 {
151     OSSL_ACKM_TX_PKT key;
152 
153     key.pkt_num = pkt_num;
154 
155     return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key);
156 }
157 
158 /* Remove a packet information structure from the history log. */
159 static int
tx_pkt_history_remove(struct tx_pkt_history_st * h,uint64_t pkt_num)160 tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num)
161 {
162     OSSL_ACKM_TX_PKT key, *pkt;
163     key.pkt_num = pkt_num;
164 
165     pkt = tx_pkt_history_by_pkt_num(h, pkt_num);
166     if (pkt == NULL)
167         return 0;
168 
169     if (pkt->prev != NULL)
170         pkt->prev->next = pkt->next;
171     if (pkt->next != NULL)
172         pkt->next->prev = pkt->prev;
173     if (h->head == pkt)
174         h->head = pkt->next;
175     if (h->tail == pkt)
176         h->tail = pkt->prev;
177 
178     pkt->prev = pkt->next = NULL;
179 
180     lh_OSSL_ACKM_TX_PKT_delete(h->map, &key);
181     --h->num_packets;
182     return 1;
183 }
184 
185 /*
186  * RX Packet Number Tracking
187  * *************************
188  *
189  * **Background.** The RX side of the ACK manager must track packets we have
190  * received for which we have to generate ACK frames. Broadly, this means we
191  * store a set of packet numbers which we have received but which we do not know
192  * for a fact that the transmitter knows we have received.
193  *
194  * This must handle various situations:
195  *
196  *   1. We receive a packet but have not sent an ACK yet, so the transmitter
197  *      does not know whether we have received it or not yet.
198  *
199  *   2. We receive a packet and send an ACK which is lost. We do not
200  *      immediately know that the ACK was lost and the transmitter does not know
201  *      that we have received the packet.
202  *
203  *   3. We receive a packet and send an ACK which is received by the
204  *      transmitter. The transmitter does not immediately respond with an ACK,
205  *      or responds with an ACK which is lost. The transmitter knows that we
206  *      have received the packet, but we do not know for sure that it knows,
207  *      because the ACK we sent could have been lost.
208  *
209  *   4. We receive a packet and send an ACK which is received by the
210  *      transmitter. The transmitter subsequently sends us an ACK which confirms
211  *      its receipt of the ACK we sent, and we successfully receive that ACK, so
212  *      we know that the transmitter knows, that we received the original
213  *      packet.
214  *
215  * Only when we reach case (4) are we relieved of any need to track a given
216  * packet number we have received, because only in this case do we know for sure
217  * that the peer knows we have received the packet. Having reached case (4) we
218  * will never again need to generate an ACK containing the PN in question, but
219  * until we reach that point, we must keep track of the PN as not having been
220  * provably ACKed, as we may have to keep generating ACKs for the given PN not
221  * just until the transmitter receives one, but until we know that it has
222  * received one. This will be referred to herein as "provably ACKed".
223  *
224  * **Duplicate handling.** The above discusses the case where we have received a
225  * packet with a given PN but are at best unsure whether the sender knows we
226  * have received it or not. However, we must also handle the case where we have
227  * yet to receive a packet with a given PN in the first place. The reason for
228  * this is because of the requirement expressed by RFC 9000 s. 12.3:
229  *
230  *   "A receiver MUST discard a newly unprotected packet unless it is certain
231  *    that it has not processed another packet with the same packet number from
232  *    the same packet number space."
233  *
234  * We must ensure we never process a duplicate PN. As such, each possible PN we
235  * can receive must exist in one of the following logical states:
236  *
237  *   - We have never processed this PN before
238  *     (so if we receive such a PN, it can be processed)
239  *
240  *   - We have processed this PN but it has not yet been provably ACKed
241  *     (and should therefore be in any future ACK frame generated;
242  *      if we receive such a PN again, it must be ignored)
243  *
244  *   - We have processed this PN and it has been provably ACKed
245  *     (if we receive such a PN again, it must be ignored)
246  *
247  * However, if we were to track this state for every PN ever used in the history
248  * of a connection, the amount of state required would increase unboundedly as
249  * the connection goes on (for example, we would have to store a set of every PN
250  * ever received.)
251  *
252  * RFC 9000 s. 12.3 continues:
253  *
254  *   "Endpoints that track all individual packets for the purposes of detecting
255  *    duplicates are at risk of accumulating excessive state. The data required
256  *    for detecting duplicates can be limited by maintaining a minimum packet
257  *    number below which all packets are immediately dropped."
258  *
259  * Moreover, RFC 9000 s. 13.2.3 states that:
260  *
261  *   "A receiver MUST retain an ACK Range unless it can ensure that it will not
262  *    subsequently accept packets with numbers in that range. Maintaining a
263  *    minimum packet number that increases as ranges are discarded is one way to
264  *    achieve this with minimal state."
265  *
266  * This touches on a subtlety of the original requirement quoted above: the
267  * receiver MUST discard a packet unless it is certain that it has not processed
268  * another packet with the same PN. However, this does not forbid the receiver
269  * from also discarding some PNs even though it has not yet processed them. In
270  * other words, implementations must be conservative and err in the direction of
271  * assuming a packet is a duplicate, but it is acceptable for this to come at
272  * the cost of falsely identifying some packets as duplicates.
273  *
274  * This allows us to bound the amount of state we must keep, and we adopt the
275  * suggested strategy quoted above to do so. We define a watermark PN below
276  * which all PNs are in the same state. This watermark is only ever increased.
277  * Thus the PNs the state for which needs to be explicitly tracked is limited to
278  * only a small number of recent PNs, and all older PNs have an assumed state.
279  *
280  * Any given PN thus falls into one of the following states:
281  *
282  *   - (A) The PN is above the watermark but we have not yet received it.
283  *
284  *         If we receive such a PN, we should process it and record the PN as
285  *         received.
286  *
287  *   - (B) The PN is above the watermark and we have received it.
288  *
289  *         The PN should be included in any future ACK frame we generate.
290  *         If we receive such a PN again, we should ignore it.
291  *
292  *   - (C) The PN is below the watermark.
293  *
294  *         We do not know whether a packet with the given PN was received or
295  *         not. To be safe, if we receive such a packet, it is not processed.
296  *
297  * Note that state (C) corresponds to both "we have processed this PN and it has
298  * been provably ACKed" logical state and a subset of the PNs in the "we have
299  * never processed this PN before" logical state (namely all PNs which were lost
300  * and never received, but which are not recent enough to be above the
301  * watermark). The reason we can merge these states and avoid tracking states
302  * for the PNs in this state is because the provably ACKed and never-received
303  * states are functionally identical in terms of how we need to handle them: we
304  * don't need to do anything for PNs in either of these states, so we don't have
305  * to care about PNs in this state nor do we have to care about distinguishing
306  * the two states for a given PN.
307  *
308  * Note that under this scheme provably ACKed PNs are by definition always below
309  * the watermark; therefore, it follows that when a PN becomes provably ACKed,
310  * the watermark must be immediately increased to exceed it (otherwise we would
311  * keep reporting it in future ACK frames).
312  *
313  * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when
314  * to advance the watermark:
315  *
316  *   "When a packet containing an ACK frame is sent, the Largest Acknowledged
317  *    field in that frame can be saved. When a packet containing an ACK frame is
318  *    acknowledged, the receiver can stop acknowledging packets less than or
319  *    equal to the Largest Acknowledged field in the sent ACK frame."
320  *
321  * This is where our scheme's false positives arise. When a packet containing an
322  * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably
323  * acked, and the watermark is bumped accordingly. However, the Largest
324  * Acknowledged field does not imply that all lower PNs have been received,
325  * because there may be gaps expressed in the ranges of PNs expressed by that
326  * and previous ACK frames. Thus, some unreceived PNs may be moved below the
327  * watermark, and we may subsequently reject those PNs as possibly being
328  * duplicates even though we have not actually received those PNs. Since we bump
329  * the watermark when a PN becomes provably ACKed, it follows that an unreceived
330  * PN falls below the watermark (and thus becomes a false positive for the
331  * purposes of duplicate detection) when a higher-numbered PN becomes provably
332  * ACKed.
333  *
334  * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0,
335  * n) will no longer be processed. Although datagrams may be reordered in the
336  * network, a PN we receive can only become provably ACKed after our own
337  * subsequently generated ACK frame is sent in a future TX packet, and then we
338  * receive another RX PN acknowleding that TX packet. This means that a given RX
339  * PN can only become provably ACKed at least 1 RTT after it is received; it is
340  * unlikely that any reordered datagrams will still be "in the network" (and not
341  * lost) by this time. If this does occur for whatever reason and a late PN is
342  * received, the packet will be discarded unprocessed and the PN is simply
343  * handled as though lost (a "written off" PN).
344  *
345  * **Data structure.** Our state for the RX handling side of the ACK manager, as
346  * discussed above, mainly comprises:
347  *
348  *   a) a logical set of PNs, and
349  *   b) a monotonically increasing PN counter (the watermark).
350  *
351  * For (a), we define a data structure which stores a logical set of PNs, which
352  * we use to keep track of which PNs we have received but which have not yet
353  * been provably ACKed, and thus will later need to generate an ACK frame for.
354  *
355  * The correspondance with the logical states discussed above is as follows. A
356  * PN is in state (C) if it is below the watermark; otherwise it is in state (B)
357  * if it is in the logical set of PNs, and in state (A) otherwise.
358  *
359  * Note that PNs are only removed from the PN set (when they become provably
360  * ACKed or written off) by virtue of advancement of the watermark. Removing PNs
361  * from the PN set any other way would be ambiguous as it would be
362  * indistinguishable from a PN we have not yet received and risk us processing a
363  * duplicate packet. In other words, for a given PN:
364  *
365  *   - State (A) can transition to state (B) or (C)
366  *   - State (B) can transition to state (C) only
367  *   - State (C) is the terminal state
368  *
369  * We can query the logical set data structure for PNs which have been received
370  * but which have not been provably ACKed when we want to generate ACK frames.
371  * Since ACK frames can be lost and/or we might not know that the peer has
372  * successfully received them, we might generate multiple ACK frames covering a
373  * given PN until that PN becomes provably ACKed and we finally remove it from
374  * our set (by bumping the watermark) as no longer being our concern.
375  *
376  * The data structure supports the following operations:
377  *
378  *   Insert Range: Adds an inclusive range of packet numbers [start, end]
379  *                 to the set. Equivalent to Insert for each number
380  *                 in the range. (Used when we receive a new PN.)
381  *
382  *   Remove Range: Removes an inclusive range of packet numbers [start, end]
383  *                 from the set. Not all of the range need already be in
384  *                 the set, but any part of the range in the set is removed.
385  *                 (Used when bumping the watermark.)
386  *
387  *   Query:        Is a PN in the data structure?
388  *
389  * The data structure can be iterated.
390  *
391  * For greater efficiency in tracking large numbers of contiguous PNs, we track
392  * PN ranges rather than individual PNs. The data structure manages a list of PN
393  * ranges [[start, end]...]. Internally this is implemented as a doubly linked
394  * sorted list of range structures, which are automatically split and merged as
395  * necessary.
396  *
397  * This data structure requires O(n) traversal of the list for insertion,
398  * removal and query when we are not adding/removing ranges which are near the
399  * beginning or end of the set of ranges. It is expected that the number of PN
400  * ranges needed at any given time will generally be small and that most
401  * operations will be close to the beginning or end of the range.
402  *
403  * Invariant: The data structure is always sorted in ascending order by PN.
404  *
405  * Invariant: No two adjacent ranges ever 'border' one another (have no
406  *            numerical gap between them) as the data structure always ensures
407  *            such ranges are merged.
408  *
409  * Invariant: No two ranges ever overlap.
410  *
411  * Invariant: No range [a, b] ever has a > b.
412  *
413  * Invariant: Since ranges are represented using inclusive bounds, no range
414  *            item inside the data structure can represent a span of zero PNs.
415  *
416  * **Possible duplicates.** A PN is considered a possible duplicate when either:
417  *
418  *  a) its PN is already in the PN set (i.e. has already been received), or
419  *  b) its PN is below the watermark (i.e. was provably ACKed or written off).
420  *
421  * A packet with a given PN is considered 'processable' when that PN is not
422  * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable).
423  *
424  * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes
425  * provably ACKed. This occurs when an ACK frame is received by the TX side of
426  * the ACK manager; thus, there is necessary interaction between the TX and RX
427  * sides of the ACK manager.
428  *
429  * This is implemented as follows. When a packet is queued as sent in the TX
430  * side of the ACK manager, it may optionally have a Largest Acked value set on
431  * it. The user of the ACK manager should do this if the packet being
432  * transmitted contains an ACK frame, by setting the field to the Largest Acked
433  * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID.
434  * When a TX packet is eventually acknowledged which has this field set, it is
435  * used to update the state of the RX side of the ACK manager by bumping the
436  * watermark accordingly.
437  */
438 struct pn_set_item_st {
439     struct pn_set_item_st *prev, *next;
440     OSSL_QUIC_ACK_RANGE    range;
441 };
442 
443 struct pn_set_st {
444     struct pn_set_item_st *head, *tail;
445 
446     /* Number of ranges (not PNs) in the set. */
447     size_t                 num_ranges;
448 };
449 
pn_set_init(struct pn_set_st * s)450 static void pn_set_init(struct pn_set_st *s)
451 {
452     s->head = s->tail = NULL;
453     s->num_ranges = 0;
454 }
455 
pn_set_destroy(struct pn_set_st * s)456 static void pn_set_destroy(struct pn_set_st *s)
457 {
458     struct pn_set_item_st *x, *xnext;
459 
460     for (x = s->head; x != NULL; x = xnext) {
461         xnext = x->next;
462         OPENSSL_free(x);
463     }
464 }
465 
466 /* Possible merge of x, x->prev */
pn_set_merge_adjacent(struct pn_set_st * s,struct pn_set_item_st * x)467 static void pn_set_merge_adjacent(struct pn_set_st *s, struct pn_set_item_st *x)
468 {
469     struct pn_set_item_st *xprev = x->prev;
470 
471     if (xprev == NULL)
472         return;
473 
474     if (x->range.start - 1 != xprev->range.end)
475         return;
476 
477     x->range.start = xprev->range.start;
478     x->prev = xprev->prev;
479     if (x->prev != NULL)
480         x->prev->next = x;
481 
482     if (s->head == xprev)
483         s->head = x;
484 
485     OPENSSL_free(xprev);
486     --s->num_ranges;
487 }
488 
489 /* Returns 1 if there exists a PN x which falls within both ranges a and b. */
pn_range_overlaps(const OSSL_QUIC_ACK_RANGE * a,const OSSL_QUIC_ACK_RANGE * b)490 static int pn_range_overlaps(const OSSL_QUIC_ACK_RANGE *a,
491                              const OSSL_QUIC_ACK_RANGE *b)
492 {
493     return ossl_quic_pn_min(a->end, b->end)
494         >= ossl_quic_pn_max(a->start, b->start);
495 }
496 
497 /*
498  * Insert a range into a PN set. Returns 0 on allocation failure, in which case
499  * the PN set is in a valid but undefined state. Otherwise, returns 1. Ranges
500  * can overlap existing ranges without limitation. If a range is a subset of
501  * an existing range in the set, this is a no-op and returns 1.
502  */
pn_set_insert(struct pn_set_st * s,const OSSL_QUIC_ACK_RANGE * range)503 static int pn_set_insert(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
504 {
505     struct pn_set_item_st *x, *z, *xnext, *f, *fnext;
506     QUIC_PN start = range->start, end = range->end;
507 
508     if (!ossl_assert(start <= end))
509         return 0;
510 
511     if (s->head == NULL) {
512         /* Nothing in the set yet, so just add this range. */
513         x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
514         if (x == NULL)
515             return 0;
516 
517         x->range.start = start;
518         x->range.end   = end;
519         s->head = s->tail = x;
520         ++s->num_ranges;
521         return 1;
522     }
523 
524     if (start > s->tail->range.end) {
525         /*
526          * Range is after the latest range in the set, so append.
527          *
528          * Note: The case where the range is before the earliest range in the
529          * set is handled as a degenerate case of the final case below. See
530          * optimization note (*) below.
531          */
532         if (s->tail->range.end + 1 == start) {
533             s->tail->range.end = end;
534             return 1;
535         }
536 
537         x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
538         if (x == NULL)
539             return 0;
540 
541         x->range.start = start;
542         x->range.end   = end;
543         x->prev        = s->tail;
544         if (s->tail != NULL)
545             s->tail->next = x;
546         s->tail = x;
547         ++s->num_ranges;
548         return 1;
549     }
550 
551     if (start <= s->head->range.start && end >= s->tail->range.end) {
552         /*
553          * New range dwarfs all ranges in our set.
554          *
555          * Free everything except the first range in the set, which we scavenge
556          * and reuse.
557          */
558         for (x = s->head->next; x != NULL; x = xnext) {
559             xnext = x->next;
560             OPENSSL_free(x);
561         }
562 
563         s->head->range.start = start;
564         s->head->range.end   = end;
565         s->head->next = s->head->prev = NULL;
566         s->tail = s->head;
567         s->num_ranges = 1;
568         return 1;
569     }
570 
571     /*
572      * Walk backwards since we will most often be inserting at the end. As an
573      * optimization, test the head node first and skip iterating over the
574      * entire list if we are inserting at the start. The assumption is that
575      * insertion at the start and end of the space will be the most common
576      * operations. (*)
577      */
578     z = end < s->head->range.start ? s->head : s->tail;
579 
580     for (; z != NULL; z = z->prev) {
581         /* An existing range dwarfs our new range (optimisation). */
582         if (z->range.start <= start && z->range.end >= end)
583             return 1;
584 
585         if (pn_range_overlaps(&z->range, range)) {
586             /*
587              * Our new range overlaps an existing range, or possibly several
588              * existing ranges.
589              */
590             struct pn_set_item_st *ovend = z;
591             OSSL_QUIC_ACK_RANGE t;
592             size_t n = 0;
593 
594             t.end = ossl_quic_pn_max(end, z->range.end);
595 
596             /* Get earliest overlapping range. */
597             for (; z->prev != NULL && pn_range_overlaps(&z->prev->range, range);
598                    z = z->prev);
599 
600             t.start = ossl_quic_pn_min(start, z->range.start);
601 
602             /* Replace sequence of nodes z..ovend with ovend only. */
603             ovend->range = t;
604             ovend->prev = z->prev;
605             if (z->prev != NULL)
606                 z->prev->next = ovend;
607             if (s->head == z)
608                 s->head = ovend;
609 
610             /* Free now unused nodes. */
611             for (f = z; f != ovend; f = fnext, ++n) {
612                 fnext = f->next;
613                 OPENSSL_free(f);
614             }
615 
616             s->num_ranges -= n;
617             break;
618         } else if (end < z->range.start
619                     && (z->prev == NULL || start > z->prev->range.end)) {
620             if (z->range.start == end + 1) {
621                 /* We can extend the following range backwards. */
622                 z->range.start = start;
623 
624                 /*
625                  * If this closes a gap we now need to merge
626                  * consecutive nodes.
627                  */
628                 pn_set_merge_adjacent(s, z);
629             } else if (z->prev != NULL && z->prev->range.end + 1 == start) {
630                 /* We can extend the preceding range forwards. */
631                 z->prev->range.end = end;
632 
633                 /*
634                  * If this closes a gap we now need to merge
635                  * consecutive nodes.
636                  */
637                 pn_set_merge_adjacent(s, z);
638             } else {
639                 /*
640                  * The new interval is between intervals without overlapping or
641                  * touching them, so insert between, preserving sort.
642                  */
643                 x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
644                 if (x == NULL)
645                     return 0;
646 
647                 x->range.start = start;
648                 x->range.end   = end;
649 
650                 x->next = z;
651                 x->prev = z->prev;
652                 if (x->prev != NULL)
653                     x->prev->next = x;
654                 z->prev = x;
655                 if (s->head == z)
656                     s->head = x;
657 
658                 ++s->num_ranges;
659             }
660             break;
661         }
662     }
663 
664     return 1;
665 }
666 
667 /*
668  * Remove a range from the set. Returns 0 on allocation failure, in which case
669  * the PN set is unchanged. Otherwise, returns 1. Ranges which are not already
670  * in the set can be removed without issue. If a passed range is not in the PN
671  * set at all, this is a no-op and returns 1.
672  */
pn_set_remove(struct pn_set_st * s,const OSSL_QUIC_ACK_RANGE * range)673 static int pn_set_remove(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
674 {
675     struct pn_set_item_st *z, *zprev, *y;
676     QUIC_PN start = range->start, end = range->end;
677 
678     if (!ossl_assert(start <= end))
679         return 0;
680 
681     /* Walk backwards since we will most often be removing at the end. */
682     for (z = s->tail; z != NULL; z = zprev) {
683         zprev = z->prev;
684 
685         if (start > z->range.end)
686             /* No overlapping ranges can exist beyond this point, so stop. */
687             break;
688 
689         if (start <= z->range.start && end >= z->range.end) {
690             /*
691              * The range being removed dwarfs this range, so it should be
692              * removed.
693              */
694             if (z->next != NULL)
695                 z->next->prev = z->prev;
696             if (z->prev != NULL)
697                 z->prev->next = z->next;
698             if (s->head == z)
699                 s->head = z->next;
700             if (s->tail == z)
701                 s->tail = z->prev;
702 
703             OPENSSL_free(z);
704             --s->num_ranges;
705         } else if (start <= z->range.start) {
706             /*
707              * The range being removed includes start of this range, but does
708              * not cover the entire range (as this would be caught by the case
709              * above). Shorten the range.
710              */
711             assert(end < z->range.end);
712             z->range.start = end + 1;
713         } else if (end >= z->range.end) {
714             /*
715              * The range being removed includes the end of this range, but does
716              * not cover the entire range (as this would be caught by the case
717              * above). Shorten the range. We can also stop iterating.
718              */
719             assert(start > z->range.start);
720             assert(start > 0);
721             z->range.end = start - 1;
722             break;
723         } else if (start > z->range.start && end < z->range.end) {
724             /*
725              * The range being removed falls entirely in this range, so cut it
726              * into two. Cases where a zero-length range would be created are
727              * handled by the above cases.
728              */
729             y = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
730             if (y == NULL)
731                 return 0;
732 
733             y->range.end   = z->range.end;
734             y->range.start = end + 1;
735             y->next = z->next;
736             y->prev = z;
737             if (y->next != NULL)
738                 y->next->prev = y;
739 
740             z->range.end = start - 1;
741             z->next = y;
742 
743             if (s->tail == z)
744                 s->tail = y;
745 
746             ++s->num_ranges;
747             break;
748         } else {
749             /* Assert no partial overlap; all cases should be covered above. */
750             assert(!pn_range_overlaps(&z->range, range));
751         }
752     }
753 
754      return 1;
755 }
756 
757 /* Returns 1 iff the given PN is in the PN set. */
pn_set_query(const struct pn_set_st * s,QUIC_PN pn)758 static int pn_set_query(const struct pn_set_st *s, QUIC_PN pn)
759 {
760     struct pn_set_item_st *x;
761 
762     if (s->head == NULL)
763         return 0;
764 
765     for (x = s->tail; x != NULL; x = x->prev)
766         if (x->range.start <= pn && x->range.end >= pn)
767             return 1;
768         else if (x->range.end < pn)
769             return 0;
770 
771     return 0;
772 }
773 
774 struct rx_pkt_history_st {
775     struct pn_set_st set;
776 
777     /*
778      * Invariant: PNs below this are not in the set.
779      * Invariant: This is monotonic and only ever increases.
780      */
781     QUIC_PN watermark;
782 };
783 
784 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
785                                          QUIC_PN watermark);
786 
rx_pkt_history_init(struct rx_pkt_history_st * h)787 static void rx_pkt_history_init(struct rx_pkt_history_st *h)
788 {
789     pn_set_init(&h->set);
790     h->watermark = 0;
791 }
792 
rx_pkt_history_destroy(struct rx_pkt_history_st * h)793 static void rx_pkt_history_destroy(struct rx_pkt_history_st *h)
794 {
795     pn_set_destroy(&h->set);
796 }
797 
798 /*
799  * Limit the number of ACK ranges we store to prevent resource consumption DoS
800  * attacks.
801  */
802 #define MAX_RX_ACK_RANGES   32
803 
rx_pkt_history_trim_range_count(struct rx_pkt_history_st * h)804 static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h)
805 {
806     QUIC_PN highest = QUIC_PN_INVALID;
807 
808     while (h->set.num_ranges > MAX_RX_ACK_RANGES) {
809         OSSL_QUIC_ACK_RANGE r = h->set.head->range;
810 
811         highest = (highest == QUIC_PN_INVALID)
812             ? r.end : ossl_quic_pn_max(highest, r.end);
813 
814         pn_set_remove(&h->set, &r);
815     }
816 
817     /*
818      * Bump watermark to cover all PNs we removed to avoid accidential
819      * reprocessing of packets.
820      */
821     if (highest != QUIC_PN_INVALID)
822         rx_pkt_history_bump_watermark(h, highest + 1);
823 }
824 
rx_pkt_history_add_pn(struct rx_pkt_history_st * h,QUIC_PN pn)825 static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h,
826                                  QUIC_PN pn)
827 {
828     OSSL_QUIC_ACK_RANGE r;
829 
830     r.start = pn;
831     r.end   = pn;
832 
833     if (pn < h->watermark)
834         return 1; /* consider this a success case */
835 
836     if (pn_set_insert(&h->set, &r) != 1)
837         return 0;
838 
839     rx_pkt_history_trim_range_count(h);
840     return 1;
841 }
842 
rx_pkt_history_bump_watermark(struct rx_pkt_history_st * h,QUIC_PN watermark)843 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
844                                          QUIC_PN watermark)
845 {
846     OSSL_QUIC_ACK_RANGE r;
847 
848     if (watermark <= h->watermark)
849         return 1;
850 
851     /* Remove existing PNs below the watermark. */
852     r.start = 0;
853     r.end   = watermark - 1;
854     if (pn_set_remove(&h->set, &r) != 1)
855         return 0;
856 
857     h->watermark = watermark;
858     return 1;
859 }
860 
861 /*
862  * ACK Manager Implementation
863  * **************************
864  * Implementation of the ACK manager proper.
865  */
866 
867 /* Constants used by the ACK manager; see RFC 9002. */
868 #define K_GRANULARITY           (1 * OSSL_TIME_MS)
869 #define K_PKT_THRESHOLD         3
870 #define K_TIME_THRESHOLD_NUM    9
871 #define K_TIME_THRESHOLD_DEN    8
872 
873 /* The maximum number of times we allow PTO to be doubled. */
874 #define MAX_PTO_COUNT          16
875 
876 struct ossl_ackm_st {
877     /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */
878     struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM];
879 
880     /* Our list of received PNs which are not yet provably acked. */
881     struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM];
882 
883     /* Polymorphic dependencies that we consume. */
884     OSSL_TIME             (*now)(void *arg);
885     void                   *now_arg;
886     OSSL_STATM             *statm;
887     const OSSL_CC_METHOD   *cc_method;
888     OSSL_CC_DATA           *cc_data;
889 
890     /* RFC 9002 variables. */
891     uint32_t        pto_count;
892     QUIC_PN         largest_acked_pkt[QUIC_PN_SPACE_NUM];
893     OSSL_TIME       time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM];
894     OSSL_TIME       loss_time[QUIC_PN_SPACE_NUM];
895     OSSL_TIME       loss_detection_deadline;
896 
897     /* Lowest PN which is still not known to be ACKed. */
898     QUIC_PN         lowest_unacked_pkt[QUIC_PN_SPACE_NUM];
899 
900     /* Time at which we got our first RTT sample, or 0. */
901     OSSL_TIME       first_rtt_sample;
902 
903     /*
904      * A packet's num_bytes are added to this if it is inflight,
905      * and removed again once ack'd/lost/discarded.
906      */
907     uint64_t        bytes_in_flight;
908 
909     /*
910      * A packet's num_bytes are added to this if it is both inflight and
911      * ack-eliciting, and removed again once ack'd/lost/discarded.
912      */
913     uint64_t        ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM];
914 
915     /* Count of ECN-CE events. */
916     uint64_t        peer_ecnce[QUIC_PN_SPACE_NUM];
917 
918     /* Set to 1 when the handshake is confirmed. */
919     char            handshake_confirmed;
920 
921     /* Set to 1 when the peer has completed address validation. */
922     char            peer_completed_addr_validation;
923 
924     /* Set to 1 when a PN space has been discarded. */
925     char            discarded[QUIC_PN_SPACE_NUM];
926 
927     /* Set to 1 when we think an ACK frame should be generated. */
928     char            rx_ack_desired[QUIC_PN_SPACE_NUM];
929 
930     /* Set to 1 if an ACK frame has ever been generated. */
931     char            rx_ack_generated[QUIC_PN_SPACE_NUM];
932 
933     /* Probe request counts for reporting to the user. */
934     OSSL_ACKM_PROBE_INFO    pending_probe;
935 
936     /* Generated ACK frames for each PN space. */
937     OSSL_QUIC_FRAME_ACK     ack[QUIC_PN_SPACE_NUM];
938     OSSL_QUIC_ACK_RANGE     ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES];
939 
940     /* Other RX state. */
941     /* Largest PN we have RX'd. */
942     QUIC_PN         rx_largest_pn[QUIC_PN_SPACE_NUM];
943 
944     /* Time at which the PN in rx_largest_pn was RX'd. */
945     OSSL_TIME       rx_largest_time[QUIC_PN_SPACE_NUM];
946 
947     /*
948      * ECN event counters. Each time we receive a packet with a given ECN label,
949      * the corresponding ECN counter here is incremented.
950      */
951     uint64_t        rx_ect0[QUIC_PN_SPACE_NUM];
952     uint64_t        rx_ect1[QUIC_PN_SPACE_NUM];
953     uint64_t        rx_ecnce[QUIC_PN_SPACE_NUM];
954 
955     /*
956      * Number of ACK-eliciting packets since last ACK. We use this to defer
957      * emitting ACK frames until a threshold number of ACK-eliciting packets
958      * have been received.
959      */
960     uint32_t        rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM];
961 
962     /*
963      * The ACK frame coalescing deadline at which we should flush any unsent ACK
964      * frames.
965      */
966     OSSL_TIME       rx_ack_flush_deadline[QUIC_PN_SPACE_NUM];
967 
968     /* Callbacks for deadline updates. */
969     void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg);
970     void *loss_detection_deadline_cb_arg;
971 
972     void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg);
973     void *ack_deadline_cb_arg;
974 };
975 
min_u32(uint32_t x,uint32_t y)976 static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y)
977 {
978     return x < y ? x : y;
979 }
980 
981 /*
982  * Get TX history for a given packet number space. Must not have been
983  * discarded.
984  */
get_tx_history(OSSL_ACKM * ackm,int pkt_space)985 static struct tx_pkt_history_st *get_tx_history(OSSL_ACKM *ackm, int pkt_space)
986 {
987     assert(!ackm->discarded[pkt_space]);
988 
989     return &ackm->tx_history[pkt_space];
990 }
991 
992 /*
993  * Get RX history for a given packet number space. Must not have been
994  * discarded.
995  */
get_rx_history(OSSL_ACKM * ackm,int pkt_space)996 static struct rx_pkt_history_st *get_rx_history(OSSL_ACKM *ackm, int pkt_space)
997 {
998     assert(!ackm->discarded[pkt_space]);
999 
1000     return &ackm->rx_history[pkt_space];
1001 }
1002 
1003 /* Does the newly-acknowledged list contain any ack-eliciting packet? */
ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT * pkt)1004 static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT *pkt)
1005 {
1006     for (; pkt != NULL; pkt = pkt->anext)
1007         if (pkt->is_ack_eliciting)
1008             return 1;
1009 
1010     return 0;
1011 }
1012 
1013 /* Return number of ACK-eliciting bytes in flight across all PN spaces. */
ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM * ackm)1014 static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM *ackm)
1015 {
1016     int i;
1017     uint64_t total = 0;
1018 
1019     for (i = 0; i < QUIC_PN_SPACE_NUM; ++i)
1020         total += ackm->ack_eliciting_bytes_in_flight[i];
1021 
1022     return total;
1023 }
1024 
1025 /* Return 1 if the range contains the given PN. */
range_contains(const OSSL_QUIC_ACK_RANGE * range,QUIC_PN pn)1026 static int range_contains(const OSSL_QUIC_ACK_RANGE *range, QUIC_PN pn)
1027 {
1028     return pn >= range->start && pn <= range->end;
1029 }
1030 
1031 /*
1032  * Given a logical representation of an ACK frame 'ack', create a singly-linked
1033  * list of the newly ACK'd frames; that is, of frames which are matched by the
1034  * list of PN ranges contained in the ACK frame. The packet structures in the
1035  * list returned are removed from the TX history list. Returns a pointer to the
1036  * list head (or NULL) if empty.
1037  */
ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM * ackm,const OSSL_QUIC_FRAME_ACK * ack,int pkt_space)1038 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM *ackm,
1039                                                                  const OSSL_QUIC_FRAME_ACK *ack,
1040                                                                  int pkt_space)
1041 {
1042     OSSL_ACKM_TX_PKT *acked_pkts = NULL, **fixup = &acked_pkts, *pkt, *pprev;
1043     struct tx_pkt_history_st *h;
1044     size_t ridx = 0;
1045 
1046     assert(ack->num_ack_ranges > 0);
1047 
1048     /*
1049      * Our history list is a list of packets sorted in ascending order
1050      * by packet number.
1051      *
1052      * ack->ack_ranges is a list of packet number ranges in descending order.
1053      *
1054      * Walk through our history list from the end in order to efficiently detect
1055      * membership in the specified ack ranges. As an optimization, we use our
1056      * hashtable to try and skip to the first matching packet. This may fail if
1057      * the ACK ranges given include nonexistent packets.
1058      */
1059     h = get_tx_history(ackm, pkt_space);
1060 
1061     pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end);
1062     if (pkt == NULL)
1063         pkt = h->tail;
1064 
1065     for (; pkt != NULL; pkt = pprev) {
1066         /*
1067          * Save prev value as it will be zeroed if we remove the packet from the
1068          * history list below.
1069          */
1070         pprev = pkt->prev;
1071 
1072         for (;; ++ridx) {
1073             if (ridx >= ack->num_ack_ranges) {
1074                 /*
1075                  * We have exhausted all ranges so stop here, even if there are
1076                  * more packets to look at.
1077                  */
1078                 goto stop;
1079             }
1080 
1081             if (range_contains(&ack->ack_ranges[ridx], pkt->pkt_num)) {
1082                 /* We have matched this range. */
1083                 tx_pkt_history_remove(h, pkt->pkt_num);
1084 
1085                 *fixup = pkt;
1086                 fixup = &pkt->anext;
1087                 *fixup = NULL;
1088                 break;
1089             } else if (pkt->pkt_num > ack->ack_ranges[ridx].end) {
1090                 /*
1091                  * We have not reached this range yet in our list, so do not
1092                  * advance ridx.
1093                  */
1094                 break;
1095             } else {
1096                 /*
1097                  * We have moved beyond this range, so advance to the next range
1098                  * and try matching again.
1099                  */
1100                 assert(pkt->pkt_num < ack->ack_ranges[ridx].start);
1101                 continue;
1102             }
1103         }
1104     }
1105 stop:
1106 
1107     return acked_pkts;
1108 }
1109 
1110 /*
1111  * Create a singly-linked list of newly detected-lost packets in the given
1112  * packet number space. Returns the head of the list or NULL if no packets were
1113  * detected lost. The packets in the list are removed from the TX history list.
1114  */
ackm_detect_and_remove_lost_pkts(OSSL_ACKM * ackm,int pkt_space)1115 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_lost_pkts(OSSL_ACKM *ackm,
1116                                                           int pkt_space)
1117 {
1118     OSSL_ACKM_TX_PKT *lost_pkts = NULL, **fixup = &lost_pkts, *pkt, *pnext;
1119     OSSL_TIME loss_delay, lost_send_time, now;
1120     OSSL_RTT_INFO rtt;
1121     struct tx_pkt_history_st *h;
1122 
1123     assert(ackm->largest_acked_pkt[pkt_space] != QUIC_PN_INVALID);
1124 
1125     ossl_statm_get_rtt_info(ackm->statm, &rtt);
1126 
1127     ackm->loss_time[pkt_space] = ossl_time_zero();
1128 
1129     loss_delay = ossl_time_multiply(ossl_time_max(rtt.latest_rtt,
1130                                                   rtt.smoothed_rtt),
1131                                     K_TIME_THRESHOLD_NUM);
1132     loss_delay = ossl_time_divide(loss_delay, K_TIME_THRESHOLD_DEN);
1133 
1134     /* Minimum time of K_GRANULARITY before packets are deemed lost. */
1135     loss_delay = ossl_time_max(loss_delay, ossl_ticks2time(K_GRANULARITY));
1136 
1137     /* Packets sent before this time are deemed lost. */
1138     now = ackm->now(ackm->now_arg);
1139     lost_send_time = ossl_time_subtract(now, loss_delay);
1140 
1141     h   = get_tx_history(ackm, pkt_space);
1142     pkt = h->head;
1143 
1144     for (; pkt != NULL; pkt = pnext) {
1145         assert(pkt_space == pkt->pkt_space);
1146 
1147         /*
1148          * Save prev value as it will be zeroed if we remove the packet from the
1149          * history list below.
1150          */
1151         pnext = pkt->next;
1152 
1153         if (pkt->pkt_num > ackm->largest_acked_pkt[pkt_space])
1154             continue;
1155 
1156         /*
1157          * Mark packet as lost, or set time when it should be marked.
1158          */
1159         if (ossl_time_compare(pkt->time, lost_send_time) <= 0
1160                 || ackm->largest_acked_pkt[pkt_space]
1161                 >= pkt->pkt_num + K_PKT_THRESHOLD) {
1162             tx_pkt_history_remove(h, pkt->pkt_num);
1163 
1164             *fixup = pkt;
1165             fixup = &pkt->lnext;
1166             *fixup = NULL;
1167         } else {
1168             if (ossl_time_is_zero(ackm->loss_time[pkt_space]))
1169                 ackm->loss_time[pkt_space] =
1170                     ossl_time_add(pkt->time, loss_delay);
1171             else
1172                 ackm->loss_time[pkt_space] =
1173                     ossl_time_min(ackm->loss_time[pkt_space],
1174                                   ossl_time_add(pkt->time, loss_delay));
1175         }
1176     }
1177 
1178     return lost_pkts;
1179 }
1180 
ackm_get_loss_time_and_space(OSSL_ACKM * ackm,int * pspace)1181 static OSSL_TIME ackm_get_loss_time_and_space(OSSL_ACKM *ackm, int *pspace)
1182 {
1183     OSSL_TIME time = ackm->loss_time[QUIC_PN_SPACE_INITIAL];
1184     int i, space = QUIC_PN_SPACE_INITIAL;
1185 
1186     for (i = space + 1; i < QUIC_PN_SPACE_NUM; ++i)
1187         if (ossl_time_is_zero(time)
1188             || ossl_time_compare(ackm->loss_time[i], time) == -1) {
1189             time    = ackm->loss_time[i];
1190             space   = i;
1191         }
1192 
1193     *pspace = space;
1194     return time;
1195 }
1196 
ackm_get_pto_time_and_space(OSSL_ACKM * ackm,int * space)1197 static OSSL_TIME ackm_get_pto_time_and_space(OSSL_ACKM *ackm, int *space)
1198 {
1199     OSSL_RTT_INFO rtt;
1200     OSSL_TIME duration;
1201     OSSL_TIME pto_timeout = ossl_time_infinite(), t;
1202     int pto_space = QUIC_PN_SPACE_INITIAL, i;
1203 
1204     ossl_statm_get_rtt_info(ackm->statm, &rtt);
1205 
1206     duration
1207         = ossl_time_add(rtt.smoothed_rtt,
1208                         ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4),
1209                                       ossl_ticks2time(K_GRANULARITY)));
1210 
1211     duration
1212         = ossl_time_multiply(duration, 1U << min_u32(ackm->pto_count,
1213                                                      MAX_PTO_COUNT));
1214 
1215     /* Anti-deadlock PTO starts from the current time. */
1216     if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) {
1217         assert(!ackm->peer_completed_addr_validation);
1218 
1219         *space = ackm->discarded[QUIC_PN_SPACE_INITIAL]
1220                     ? QUIC_PN_SPACE_HANDSHAKE
1221                     : QUIC_PN_SPACE_INITIAL;
1222         return ossl_time_add(ackm->now(ackm->now_arg), duration);
1223     }
1224 
1225     for (i = QUIC_PN_SPACE_INITIAL; i < QUIC_PN_SPACE_NUM; ++i) {
1226         if (ackm->ack_eliciting_bytes_in_flight[i] == 0)
1227             continue;
1228 
1229         if (i == QUIC_PN_SPACE_APP) {
1230             /* Skip application data until handshake confirmed. */
1231             if (!ackm->handshake_confirmed)
1232                 break;
1233 
1234             /* Include max_ack_delay and backoff for app data. */
1235             if (!ossl_time_is_infinite(rtt.max_ack_delay))
1236                 duration
1237                     = ossl_time_add(duration,
1238                                     ossl_time_multiply(rtt.max_ack_delay,
1239                                                        1U << min_u32(ackm->pto_count,
1240                                                                      MAX_PTO_COUNT)));
1241         }
1242 
1243         t = ossl_time_add(ackm->time_of_last_ack_eliciting_pkt[i], duration);
1244         if (ossl_time_compare(t, pto_timeout) < 0) {
1245             pto_timeout = t;
1246             pto_space   = i;
1247         }
1248     }
1249 
1250     *space = pto_space;
1251     return pto_timeout;
1252 }
1253 
ackm_set_loss_detection_timer_actual(OSSL_ACKM * ackm,OSSL_TIME deadline)1254 static void ackm_set_loss_detection_timer_actual(OSSL_ACKM *ackm,
1255                                                  OSSL_TIME deadline)
1256 {
1257     ackm->loss_detection_deadline = deadline;
1258 
1259     if (ackm->loss_detection_deadline_cb != NULL)
1260         ackm->loss_detection_deadline_cb(deadline,
1261                                          ackm->loss_detection_deadline_cb_arg);
1262 }
1263 
ackm_set_loss_detection_timer(OSSL_ACKM * ackm)1264 static int ackm_set_loss_detection_timer(OSSL_ACKM *ackm)
1265 {
1266     int space;
1267     OSSL_TIME earliest_loss_time, timeout;
1268 
1269     earliest_loss_time = ackm_get_loss_time_and_space(ackm, &space);
1270     if (!ossl_time_is_zero(earliest_loss_time)) {
1271         /* Time threshold loss detection. */
1272         ackm_set_loss_detection_timer_actual(ackm, earliest_loss_time);
1273         return 1;
1274     }
1275 
1276     if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0
1277             && ackm->peer_completed_addr_validation) {
1278         /*
1279          * Nothing to detect lost, so no timer is set. However, the client
1280          * needs to arm the timer if the server might be blocked by the
1281          * anti-amplification limit.
1282          */
1283         ackm_set_loss_detection_timer_actual(ackm, ossl_time_zero());
1284         return 1;
1285     }
1286 
1287     timeout = ackm_get_pto_time_and_space(ackm, &space);
1288     ackm_set_loss_detection_timer_actual(ackm, timeout);
1289     return 1;
1290 }
1291 
ackm_in_persistent_congestion(OSSL_ACKM * ackm,const OSSL_ACKM_TX_PKT * lpkt)1292 static int ackm_in_persistent_congestion(OSSL_ACKM *ackm,
1293                                          const OSSL_ACKM_TX_PKT *lpkt)
1294 {
1295     /* Persistent congestion not currently implemented. */
1296     return 0;
1297 }
1298 
ackm_on_pkts_lost(OSSL_ACKM * ackm,int pkt_space,const OSSL_ACKM_TX_PKT * lpkt)1299 static void ackm_on_pkts_lost(OSSL_ACKM *ackm, int pkt_space,
1300                               const OSSL_ACKM_TX_PKT *lpkt)
1301 {
1302     const OSSL_ACKM_TX_PKT *p, *pnext;
1303     OSSL_RTT_INFO rtt;
1304     QUIC_PN largest_pn_lost = 0;
1305     uint64_t num_bytes = 0;
1306 
1307     for (p = lpkt; p != NULL; p = pnext) {
1308         pnext = p->lnext;
1309 
1310         if (p->is_inflight) {
1311             ackm->bytes_in_flight -= p->num_bytes;
1312             if (p->is_ack_eliciting)
1313                 ackm->ack_eliciting_bytes_in_flight[p->pkt_space]
1314                     -= p->num_bytes;
1315 
1316             if (p->pkt_num > largest_pn_lost)
1317                 largest_pn_lost = p->pkt_num;
1318 
1319             num_bytes += p->num_bytes;
1320         }
1321 
1322         p->on_lost(p->cb_arg);
1323     }
1324 
1325     /*
1326      * Only consider lost packets with regards to congestion after getting an
1327      * RTT sample.
1328      */
1329     ossl_statm_get_rtt_info(ackm->statm, &rtt);
1330 
1331     if (ossl_time_is_zero(ackm->first_rtt_sample))
1332         return;
1333 
1334     ackm->cc_method->on_data_lost(ackm->cc_data,
1335         largest_pn_lost,
1336         ackm->tx_history[pkt_space].highest_sent,
1337         num_bytes,
1338         ackm_in_persistent_congestion(ackm, lpkt));
1339 }
1340 
ackm_on_pkts_acked(OSSL_ACKM * ackm,const OSSL_ACKM_TX_PKT * apkt)1341 static void ackm_on_pkts_acked(OSSL_ACKM *ackm, const OSSL_ACKM_TX_PKT *apkt)
1342 {
1343     const OSSL_ACKM_TX_PKT *anext;
1344     OSSL_TIME now;
1345     uint64_t num_retransmittable_bytes = 0;
1346     QUIC_PN last_pn_acked = 0;
1347 
1348     now = ackm->now(ackm->now_arg);
1349 
1350     for (; apkt != NULL; apkt = anext) {
1351         if (apkt->is_inflight) {
1352             ackm->bytes_in_flight -= apkt->num_bytes;
1353             if (apkt->is_ack_eliciting)
1354                 ackm->ack_eliciting_bytes_in_flight[apkt->pkt_space]
1355                     -= apkt->num_bytes;
1356 
1357             num_retransmittable_bytes += apkt->num_bytes;
1358             if (apkt->pkt_num > last_pn_acked)
1359                 last_pn_acked = apkt->pkt_num;
1360 
1361             if (apkt->largest_acked != QUIC_PN_INVALID)
1362                 /*
1363                  * This can fail, but it is monotonic; worst case we try again
1364                  * next time.
1365                  */
1366                 rx_pkt_history_bump_watermark(get_rx_history(ackm,
1367                                                              apkt->pkt_space),
1368                                               apkt->largest_acked + 1);
1369         }
1370 
1371         anext = apkt->anext;
1372         apkt->on_acked(apkt->cb_arg); /* may free apkt */
1373     }
1374 
1375     ackm->cc_method->on_data_acked(ackm->cc_data, now,
1376         last_pn_acked, num_retransmittable_bytes);
1377 }
1378 
ossl_ackm_new(OSSL_TIME (* now)(void * arg),void * now_arg,OSSL_STATM * statm,const OSSL_CC_METHOD * cc_method,OSSL_CC_DATA * cc_data)1379 OSSL_ACKM *ossl_ackm_new(OSSL_TIME (*now)(void *arg),
1380                          void *now_arg,
1381                          OSSL_STATM *statm,
1382                          const OSSL_CC_METHOD *cc_method,
1383                          OSSL_CC_DATA *cc_data)
1384 {
1385     OSSL_ACKM *ackm;
1386     int i;
1387 
1388     ackm = OPENSSL_zalloc(sizeof(OSSL_ACKM));
1389     if (ackm == NULL)
1390         return NULL;
1391 
1392     for (i = 0; i < (int)OSSL_NELEM(ackm->tx_history); ++i) {
1393         ackm->largest_acked_pkt[i] = QUIC_PN_INVALID;
1394         ackm->rx_ack_flush_deadline[i] = ossl_time_infinite();
1395         if (tx_pkt_history_init(&ackm->tx_history[i]) < 1)
1396             goto err;
1397     }
1398 
1399     for (i = 0; i < (int)OSSL_NELEM(ackm->rx_history); ++i)
1400         rx_pkt_history_init(&ackm->rx_history[i]);
1401 
1402     ackm->now       = now;
1403     ackm->now_arg   = now_arg;
1404     ackm->statm     = statm;
1405     ackm->cc_method = cc_method;
1406     ackm->cc_data   = cc_data;
1407     return ackm;
1408 
1409 err:
1410     while (--i >= 0)
1411         tx_pkt_history_destroy(&ackm->tx_history[i]);
1412 
1413     OPENSSL_free(ackm);
1414     return NULL;
1415 }
1416 
ossl_ackm_free(OSSL_ACKM * ackm)1417 void ossl_ackm_free(OSSL_ACKM *ackm)
1418 {
1419     size_t i;
1420 
1421     if (ackm == NULL)
1422         return;
1423 
1424     for (i = 0; i < OSSL_NELEM(ackm->tx_history); ++i)
1425         if (!ackm->discarded[i]) {
1426             tx_pkt_history_destroy(&ackm->tx_history[i]);
1427             rx_pkt_history_destroy(&ackm->rx_history[i]);
1428         }
1429 
1430     OPENSSL_free(ackm);
1431 }
1432 
ossl_ackm_on_tx_packet(OSSL_ACKM * ackm,OSSL_ACKM_TX_PKT * pkt)1433 int ossl_ackm_on_tx_packet(OSSL_ACKM *ackm, OSSL_ACKM_TX_PKT *pkt)
1434 {
1435     struct tx_pkt_history_st *h = get_tx_history(ackm, pkt->pkt_space);
1436 
1437     /* Time must be set and not move backwards. */
1438     if (ossl_time_is_zero(pkt->time)
1439         || ossl_time_compare(ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space],
1440                              pkt->time) > 0)
1441         return 0;
1442 
1443     /* Must have non-zero number of bytes. */
1444     if (pkt->num_bytes == 0)
1445         return 0;
1446 
1447     if (tx_pkt_history_add(h, pkt) == 0)
1448         return 0;
1449 
1450     if (pkt->is_inflight) {
1451         if (pkt->is_ack_eliciting) {
1452             ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space] = pkt->time;
1453             ackm->ack_eliciting_bytes_in_flight[pkt->pkt_space]
1454                 += pkt->num_bytes;
1455         }
1456 
1457         ackm->bytes_in_flight += pkt->num_bytes;
1458         ackm_set_loss_detection_timer(ackm);
1459 
1460         ackm->cc_method->on_data_sent(ackm->cc_data, pkt->num_bytes);
1461     }
1462 
1463     return 1;
1464 }
1465 
ossl_ackm_on_rx_datagram(OSSL_ACKM * ackm,size_t num_bytes)1466 int ossl_ackm_on_rx_datagram(OSSL_ACKM *ackm, size_t num_bytes)
1467 {
1468     /* No-op on the client. */
1469     return 1;
1470 }
1471 
ackm_on_congestion(OSSL_ACKM * ackm,OSSL_TIME send_time)1472 static void ackm_on_congestion(OSSL_ACKM *ackm, OSSL_TIME send_time)
1473 {
1474     /* Not currently implemented. */
1475 }
1476 
ackm_process_ecn(OSSL_ACKM * ackm,const OSSL_QUIC_FRAME_ACK * ack,int pkt_space)1477 static void ackm_process_ecn(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack,
1478                              int pkt_space)
1479 {
1480     struct tx_pkt_history_st *h;
1481     OSSL_ACKM_TX_PKT *pkt;
1482 
1483     /*
1484      * If the ECN-CE counter reported by the peer has increased, this could
1485      * be a new congestion event.
1486      */
1487     if (ack->ecnce > ackm->peer_ecnce[pkt_space]) {
1488         ackm->peer_ecnce[pkt_space] = ack->ecnce;
1489 
1490         h = get_tx_history(ackm, pkt_space);
1491         pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end);
1492         if (pkt == NULL)
1493             return;
1494 
1495         ackm_on_congestion(ackm, pkt->time);
1496     }
1497 }
1498 
ossl_ackm_on_rx_ack_frame(OSSL_ACKM * ackm,const OSSL_QUIC_FRAME_ACK * ack,int pkt_space,OSSL_TIME rx_time)1499 int ossl_ackm_on_rx_ack_frame(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack,
1500                               int pkt_space, OSSL_TIME rx_time)
1501 {
1502     OSSL_ACKM_TX_PKT *na_pkts, *lost_pkts;
1503     int must_set_timer = 0;
1504 
1505     if (ackm->largest_acked_pkt[pkt_space] == QUIC_PN_INVALID)
1506         ackm->largest_acked_pkt[pkt_space] = ack->ack_ranges[0].end;
1507     else
1508         ackm->largest_acked_pkt[pkt_space]
1509             = ossl_quic_pn_max(ackm->largest_acked_pkt[pkt_space],
1510                                ack->ack_ranges[0].end);
1511 
1512     /*
1513      * If we get an ACK in the handshake space, address validation is completed.
1514      * Make sure we update the timer, even if no packets were ACK'd.
1515      */
1516     if (!ackm->peer_completed_addr_validation
1517             && pkt_space == QUIC_PN_SPACE_HANDSHAKE) {
1518         ackm->peer_completed_addr_validation = 1;
1519         must_set_timer = 1;
1520     }
1521 
1522     /*
1523      * Find packets that are newly acknowledged and remove them from the list.
1524      */
1525     na_pkts = ackm_detect_and_remove_newly_acked_pkts(ackm, ack, pkt_space);
1526     if (na_pkts == NULL) {
1527         if (must_set_timer)
1528             ackm_set_loss_detection_timer(ackm);
1529 
1530         return 1;
1531     }
1532 
1533     /*
1534      * Update the RTT if the largest acknowledged is newly acked and at least
1535      * one ACK-eliciting packet was newly acked.
1536      *
1537      * First packet in the list is always the one with the largest PN.
1538      */
1539     if (na_pkts->pkt_num == ack->ack_ranges[0].end &&
1540         ack_includes_ack_eliciting(na_pkts)) {
1541         OSSL_TIME now = ackm->now(ackm->now_arg), ack_delay;
1542         if (ossl_time_is_zero(ackm->first_rtt_sample))
1543             ackm->first_rtt_sample = now;
1544 
1545         /* Enforce maximum ACK delay. */
1546         ack_delay = ack->delay_time;
1547         if (ackm->handshake_confirmed) {
1548             OSSL_RTT_INFO rtt;
1549 
1550             ossl_statm_get_rtt_info(ackm->statm, &rtt);
1551             ack_delay = ossl_time_min(ack_delay, rtt.max_ack_delay);
1552         }
1553 
1554         ossl_statm_update_rtt(ackm->statm, ack_delay,
1555                               ossl_time_subtract(now, na_pkts->time));
1556     }
1557 
1558     /* Process ECN information if present. */
1559     if (ack->ecn_present)
1560         ackm_process_ecn(ackm, ack, pkt_space);
1561 
1562     /* Handle inferred loss. */
1563     lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space);
1564     if (lost_pkts != NULL)
1565         ackm_on_pkts_lost(ackm, pkt_space, lost_pkts);
1566 
1567     ackm_on_pkts_acked(ackm, na_pkts);
1568 
1569     /*
1570      * Reset pto_count unless the client is unsure if the server validated the
1571      * client's address.
1572      */
1573     if (ackm->peer_completed_addr_validation)
1574         ackm->pto_count = 0;
1575 
1576     ackm_set_loss_detection_timer(ackm);
1577     return 1;
1578 }
1579 
ossl_ackm_on_pkt_space_discarded(OSSL_ACKM * ackm,int pkt_space)1580 int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM *ackm, int pkt_space)
1581 {
1582     OSSL_ACKM_TX_PKT *pkt, *pnext;
1583     uint64_t num_bytes_invalidated = 0;
1584 
1585     assert(pkt_space < QUIC_PN_SPACE_APP);
1586 
1587     if (ackm->discarded[pkt_space])
1588         return 0;
1589 
1590     if (pkt_space == QUIC_PN_SPACE_HANDSHAKE)
1591         ackm->peer_completed_addr_validation = 1;
1592 
1593     for (pkt = get_tx_history(ackm, pkt_space)->head; pkt != NULL; pkt = pnext) {
1594         pnext = pkt->next;
1595         if (pkt->is_inflight) {
1596             ackm->bytes_in_flight -= pkt->num_bytes;
1597             num_bytes_invalidated += pkt->num_bytes;
1598         }
1599 
1600         pkt->on_discarded(pkt->cb_arg); /* may free pkt */
1601     }
1602 
1603     tx_pkt_history_destroy(&ackm->tx_history[pkt_space]);
1604     rx_pkt_history_destroy(&ackm->rx_history[pkt_space]);
1605 
1606     if (num_bytes_invalidated > 0)
1607         ackm->cc_method->on_data_invalidated(ackm->cc_data,
1608                                              num_bytes_invalidated);
1609 
1610     ackm->time_of_last_ack_eliciting_pkt[pkt_space] = ossl_time_zero();
1611     ackm->loss_time[pkt_space] = ossl_time_zero();
1612     ackm->pto_count = 0;
1613     ackm->discarded[pkt_space] = 1;
1614     ackm->ack_eliciting_bytes_in_flight[pkt_space] = 0;
1615     ackm_set_loss_detection_timer(ackm);
1616     return 1;
1617 }
1618 
ossl_ackm_on_handshake_confirmed(OSSL_ACKM * ackm)1619 int ossl_ackm_on_handshake_confirmed(OSSL_ACKM *ackm)
1620 {
1621     ackm->handshake_confirmed               = 1;
1622     ackm->peer_completed_addr_validation    = 1;
1623     ackm_set_loss_detection_timer(ackm);
1624     return 1;
1625 }
1626 
ackm_queue_probe_handshake(OSSL_ACKM * ackm)1627 static void ackm_queue_probe_handshake(OSSL_ACKM *ackm)
1628 {
1629     ++ackm->pending_probe.handshake;
1630 }
1631 
ackm_queue_probe_padded_initial(OSSL_ACKM * ackm)1632 static void ackm_queue_probe_padded_initial(OSSL_ACKM *ackm)
1633 {
1634     ++ackm->pending_probe.padded_initial;
1635 }
1636 
ackm_queue_probe(OSSL_ACKM * ackm,int pkt_space)1637 static void ackm_queue_probe(OSSL_ACKM *ackm, int pkt_space)
1638 {
1639     ++ackm->pending_probe.pto[pkt_space];
1640 }
1641 
ossl_ackm_on_timeout(OSSL_ACKM * ackm)1642 int ossl_ackm_on_timeout(OSSL_ACKM *ackm)
1643 {
1644     int pkt_space;
1645     OSSL_TIME earliest_loss_time;
1646     OSSL_ACKM_TX_PKT *lost_pkts;
1647 
1648     earliest_loss_time = ackm_get_loss_time_and_space(ackm, &pkt_space);
1649     if (!ossl_time_is_zero(earliest_loss_time)) {
1650         /* Time threshold loss detection. */
1651         lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space);
1652         assert(lost_pkts != NULL);
1653         ackm_on_pkts_lost(ackm, pkt_space, lost_pkts);
1654         ackm_set_loss_detection_timer(ackm);
1655         return 1;
1656     }
1657 
1658     if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) {
1659         assert(!ackm->peer_completed_addr_validation);
1660         /*
1661          * Client sends an anti-deadlock packet: Initial is padded to earn more
1662          * anti-amplification credit. A handshake packet proves address
1663          * ownership.
1664          */
1665         if (ackm->discarded[QUIC_PN_SPACE_INITIAL])
1666             ackm_queue_probe_handshake(ackm);
1667         else
1668             ackm_queue_probe_padded_initial(ackm);
1669     } else {
1670         /*
1671          * PTO. The user of the ACKM should send new data if available, else
1672          * retransmit old data, or if neither is available, send a single PING
1673          * frame.
1674          */
1675         ackm_get_pto_time_and_space(ackm, &pkt_space);
1676         ackm_queue_probe(ackm, pkt_space);
1677     }
1678 
1679     ++ackm->pto_count;
1680     ackm_set_loss_detection_timer(ackm);
1681     return 1;
1682 }
1683 
ossl_ackm_get_loss_detection_deadline(OSSL_ACKM * ackm)1684 OSSL_TIME ossl_ackm_get_loss_detection_deadline(OSSL_ACKM *ackm)
1685 {
1686     return ackm->loss_detection_deadline;
1687 }
1688 
ossl_ackm_get_probe_request(OSSL_ACKM * ackm,int clear,OSSL_ACKM_PROBE_INFO * info)1689 int ossl_ackm_get_probe_request(OSSL_ACKM *ackm, int clear,
1690                                 OSSL_ACKM_PROBE_INFO *info)
1691 {
1692     *info = ackm->pending_probe;
1693 
1694     if (clear != 0)
1695         memset(&ackm->pending_probe, 0, sizeof(ackm->pending_probe));
1696 
1697     return 1;
1698 }
1699 
ossl_ackm_get_largest_unacked(OSSL_ACKM * ackm,int pkt_space,QUIC_PN * pn)1700 int ossl_ackm_get_largest_unacked(OSSL_ACKM *ackm, int pkt_space, QUIC_PN *pn)
1701 {
1702     struct tx_pkt_history_st *h;
1703 
1704     h = get_tx_history(ackm, pkt_space);
1705     if (h->tail != NULL) {
1706         *pn = h->tail->pkt_num;
1707         return 1;
1708     }
1709 
1710     return 0;
1711 }
1712 
1713 /* Number of ACK-eliciting packets RX'd before we always emit an ACK. */
1714 #define PKTS_BEFORE_ACK     2
1715 /* Maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */
1716 #define MAX_ACK_DELAY       (OSSL_TIME_MS * 25)
1717 
1718 /*
1719  * Return 1 if emission of an ACK frame is currently desired.
1720  *
1721  * This occurs when one or more of the following conditions occurs:
1722  *
1723  *   - We have flagged that we want to send an ACK frame
1724  *     (for example, due to the packet threshold count being exceeded), or
1725  *
1726  *   - We have exceeded the ACK flush deadline, meaning that
1727  *     we have received at least one ACK-eliciting packet, but held off on
1728  *     sending an ACK frame immediately in the hope that more ACK-eliciting
1729  *     packets might come in, but not enough did and we are now requesting
1730  *     transmission of an ACK frame anyway.
1731  *
1732  */
ossl_ackm_is_ack_desired(OSSL_ACKM * ackm,int pkt_space)1733 int ossl_ackm_is_ack_desired(OSSL_ACKM *ackm, int pkt_space)
1734 {
1735     return ackm->rx_ack_desired[pkt_space]
1736         || (!ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space])
1737             && ossl_time_compare(ackm->now(ackm->now_arg),
1738                                  ackm->rx_ack_flush_deadline[pkt_space]) >= 0);
1739 }
1740 
1741 /*
1742  * Returns 1 if an ACK frame matches a given packet number.
1743  */
ack_contains(const OSSL_QUIC_FRAME_ACK * ack,QUIC_PN pkt_num)1744 static int ack_contains(const OSSL_QUIC_FRAME_ACK *ack, QUIC_PN pkt_num)
1745 {
1746     size_t i;
1747 
1748     for (i = 0; i < ack->num_ack_ranges; ++i)
1749         if (range_contains(&ack->ack_ranges[i], pkt_num))
1750             return 1;
1751 
1752     return 0;
1753 }
1754 
1755 /*
1756  * Returns 1 iff a PN (which we have just received) was previously reported as
1757  * implied missing (by us, in an ACK frame we previously generated).
1758  */
ackm_is_missing(OSSL_ACKM * ackm,int pkt_space,QUIC_PN pkt_num)1759 static int ackm_is_missing(OSSL_ACKM *ackm, int pkt_space, QUIC_PN pkt_num)
1760 {
1761     /*
1762      * A PN is implied missing if it is not greater than the highest PN in our
1763      * generated ACK frame, but is not matched by the frame.
1764      */
1765     return ackm->ack[pkt_space].num_ack_ranges > 0
1766         && pkt_num <= ackm->ack[pkt_space].ack_ranges[0].end
1767         && !ack_contains(&ackm->ack[pkt_space], pkt_num);
1768 }
1769 
1770 /*
1771  * Returns 1 iff our RX of a PN newly establishes the implication of missing
1772  * packets.
1773  */
ackm_has_newly_missing(OSSL_ACKM * ackm,int pkt_space)1774 static int ackm_has_newly_missing(OSSL_ACKM *ackm, int pkt_space)
1775 {
1776     struct rx_pkt_history_st *h;
1777 
1778     h = get_rx_history(ackm, pkt_space);
1779 
1780     if (h->set.tail == NULL)
1781         return 0;
1782 
1783     /*
1784      * The second condition here establishes that the highest PN range in our RX
1785      * history comprises only a single PN. If there is more than one, then this
1786      * function will have returned 1 during a previous call to
1787      * ossl_ackm_on_rx_packet assuming the third condition below was met. Thus
1788      * we only return 1 when the missing PN condition is newly established.
1789      *
1790      * The third condition here establishes that the highest PN range in our RX
1791      * history is beyond (and does not border) the highest PN we have yet
1792      * reported in any ACK frame. Thus there is a gap of at least one PN between
1793      * the PNs we have ACK'd previously and the PN we have just received.
1794      */
1795     return ackm->ack[pkt_space].num_ack_ranges > 0
1796         && h->set.tail->range.start == h->set.tail->range.end
1797         && h->set.tail->range.start
1798             > ackm->ack[pkt_space].ack_ranges[0].end + 1;
1799 }
1800 
ackm_set_flush_deadline(OSSL_ACKM * ackm,int pkt_space,OSSL_TIME deadline)1801 static void ackm_set_flush_deadline(OSSL_ACKM *ackm, int pkt_space,
1802                                     OSSL_TIME deadline)
1803 {
1804     ackm->rx_ack_flush_deadline[pkt_space] = deadline;
1805 
1806     if (ackm->ack_deadline_cb != NULL)
1807         ackm->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm, pkt_space),
1808                               pkt_space, ackm->ack_deadline_cb_arg);
1809 }
1810 
1811 /* Explicitly flags that we want to generate an ACK frame. */
ackm_queue_ack(OSSL_ACKM * ackm,int pkt_space)1812 static void ackm_queue_ack(OSSL_ACKM *ackm, int pkt_space)
1813 {
1814     ackm->rx_ack_desired[pkt_space] = 1;
1815 
1816     /* Cancel deadline. */
1817     ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite());
1818 }
1819 
ackm_on_rx_ack_eliciting(OSSL_ACKM * ackm,OSSL_TIME rx_time,int pkt_space,int was_missing)1820 static void ackm_on_rx_ack_eliciting(OSSL_ACKM *ackm,
1821                                      OSSL_TIME rx_time, int pkt_space,
1822                                      int was_missing)
1823 {
1824     if (ackm->rx_ack_desired[pkt_space])
1825         /* ACK generation already requested so nothing to do. */
1826         return;
1827 
1828     ++ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space];
1829 
1830     if (!ackm->rx_ack_generated[pkt_space]
1831             || was_missing
1832             || ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space]
1833                 >= PKTS_BEFORE_ACK
1834             || ackm_has_newly_missing(ackm, pkt_space)) {
1835         /*
1836          * Either:
1837          *
1838          *   - We have never yet generated an ACK frame, meaning that this
1839          *     is the first ever packet received, which we should always
1840          *     acknowledge immediately, or
1841          *
1842          *   - We previously reported the PN that we have just received as
1843          *     missing in a previous ACK frame (meaning that we should report
1844          *     the fact that we now have it to the peer immediately), or
1845          *
1846          *   - We have exceeded the ACK-eliciting packet threshold count
1847          *     for the purposes of ACK coalescing, so request transmission
1848          *     of an ACK frame, or
1849          *
1850          *   - The PN we just received and added to our PN RX history
1851          *     newly implies one or more missing PNs, in which case we should
1852          *     inform the peer by sending an ACK frame immediately.
1853          *
1854          * We do not test the ACK flush deadline here because it is tested
1855          * separately in ossl_ackm_is_ack_desired.
1856          */
1857         ackm_queue_ack(ackm, pkt_space);
1858         return;
1859     }
1860 
1861     /*
1862      * Not emitting an ACK yet.
1863      *
1864      * Update the ACK flush deadline.
1865      */
1866     if (ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space]))
1867         ackm_set_flush_deadline(ackm, pkt_space,
1868                                 ossl_time_add(rx_time,
1869                                               ossl_ticks2time(MAX_ACK_DELAY)));
1870     else
1871         ackm_set_flush_deadline(ackm, pkt_space,
1872                                 ossl_time_min(ackm->rx_ack_flush_deadline[pkt_space],
1873                                               ossl_time_add(rx_time,
1874                                                             ossl_ticks2time(MAX_ACK_DELAY))));
1875 }
1876 
ossl_ackm_on_rx_packet(OSSL_ACKM * ackm,const OSSL_ACKM_RX_PKT * pkt)1877 int ossl_ackm_on_rx_packet(OSSL_ACKM *ackm, const OSSL_ACKM_RX_PKT *pkt)
1878 {
1879     struct rx_pkt_history_st *h = get_rx_history(ackm, pkt->pkt_space);
1880     int was_missing;
1881 
1882     if (ossl_ackm_is_rx_pn_processable(ackm, pkt->pkt_num, pkt->pkt_space) != 1)
1883         /* PN has already been processed or written off, no-op. */
1884         return 1;
1885 
1886     /*
1887      * Record the largest PN we have RX'd and the time we received it.
1888      * We use this to calculate the ACK delay field of ACK frames.
1889      */
1890     if (pkt->pkt_num > ackm->rx_largest_pn[pkt->pkt_space]) {
1891         ackm->rx_largest_pn[pkt->pkt_space]   = pkt->pkt_num;
1892         ackm->rx_largest_time[pkt->pkt_space] = pkt->time;
1893     }
1894 
1895     /*
1896      * If the PN we just received was previously implied missing by virtue of
1897      * being omitted from a previous ACK frame generated, we skip any packet
1898      * count thresholds or coalescing delays and emit a new ACK frame
1899      * immediately.
1900      */
1901     was_missing = ackm_is_missing(ackm, pkt->pkt_space, pkt->pkt_num);
1902 
1903     /*
1904      * Add the packet number to our history list of PNs we have not yet provably
1905      * acked.
1906      */
1907     if (rx_pkt_history_add_pn(h, pkt->pkt_num) != 1)
1908         return 0;
1909 
1910     /*
1911      * Receiving this packet may or may not cause us to emit an ACK frame.
1912      * We may not emit an ACK frame yet if we have not yet received a threshold
1913      * number of packets.
1914      */
1915     if (pkt->is_ack_eliciting)
1916         ackm_on_rx_ack_eliciting(ackm, pkt->time, pkt->pkt_space, was_missing);
1917 
1918     /* Update the ECN counters according to which ECN signal we got, if any. */
1919     switch (pkt->ecn) {
1920     case OSSL_ACKM_ECN_ECT0:
1921         ++ackm->rx_ect0[pkt->pkt_space];
1922         break;
1923     case OSSL_ACKM_ECN_ECT1:
1924         ++ackm->rx_ect1[pkt->pkt_space];
1925         break;
1926     case OSSL_ACKM_ECN_ECNCE:
1927         ++ackm->rx_ecnce[pkt->pkt_space];
1928         break;
1929     default:
1930         break;
1931     }
1932 
1933     return 1;
1934 }
1935 
ackm_fill_rx_ack_ranges(OSSL_ACKM * ackm,int pkt_space,OSSL_QUIC_FRAME_ACK * ack)1936 static void ackm_fill_rx_ack_ranges(OSSL_ACKM *ackm, int pkt_space,
1937                                     OSSL_QUIC_FRAME_ACK *ack)
1938 {
1939     struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space);
1940     struct pn_set_item_st *x;
1941     size_t i = 0;
1942 
1943     /*
1944      * Copy out ranges from the PN set, starting at the end, until we reach our
1945      * maximum number of ranges.
1946      */
1947     for (x = h->set.tail;
1948          x != NULL && i < OSSL_NELEM(ackm->ack_ranges);
1949          x = x->prev, ++i)
1950         ackm->ack_ranges[pkt_space][i] = x->range;
1951 
1952     ack->ack_ranges     = ackm->ack_ranges[pkt_space];
1953     ack->num_ack_ranges = i;
1954 }
1955 
ossl_ackm_get_ack_frame(OSSL_ACKM * ackm,int pkt_space)1956 const OSSL_QUIC_FRAME_ACK *ossl_ackm_get_ack_frame(OSSL_ACKM *ackm,
1957                                                    int pkt_space)
1958 {
1959     OSSL_QUIC_FRAME_ACK *ack = &ackm->ack[pkt_space];
1960     OSSL_TIME now = ackm->now(ackm->now_arg);
1961 
1962     ackm_fill_rx_ack_ranges(ackm, pkt_space, ack);
1963 
1964     if (!ossl_time_is_zero(ackm->rx_largest_time[pkt_space])
1965             && ossl_time_compare(now, ackm->rx_largest_time[pkt_space]) > 0
1966             && pkt_space == QUIC_PN_SPACE_APP)
1967         ack->delay_time =
1968             ossl_time_subtract(now, ackm->rx_largest_time[pkt_space]);
1969     else
1970         ack->delay_time = ossl_time_zero();
1971 
1972     ack->ect0              = ackm->rx_ect0[pkt_space];
1973     ack->ect1              = ackm->rx_ect1[pkt_space];
1974     ack->ecnce             = ackm->rx_ecnce[pkt_space];
1975     ack->ecn_present       = 1;
1976 
1977     ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] = 0;
1978 
1979     ackm->rx_ack_generated[pkt_space]       = 1;
1980     ackm->rx_ack_desired[pkt_space]         = 0;
1981     ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite());
1982     return ack;
1983 }
1984 
1985 
ossl_ackm_get_ack_deadline(OSSL_ACKM * ackm,int pkt_space)1986 OSSL_TIME ossl_ackm_get_ack_deadline(OSSL_ACKM *ackm, int pkt_space)
1987 {
1988     if (ackm->rx_ack_desired[pkt_space])
1989         /* Already desired, deadline is now. */
1990         return ossl_time_zero();
1991 
1992     return ackm->rx_ack_flush_deadline[pkt_space];
1993 }
1994 
ossl_ackm_is_rx_pn_processable(OSSL_ACKM * ackm,QUIC_PN pn,int pkt_space)1995 int ossl_ackm_is_rx_pn_processable(OSSL_ACKM *ackm, QUIC_PN pn, int pkt_space)
1996 {
1997     struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space);
1998 
1999     return pn >= h->watermark && pn_set_query(&h->set, pn) == 0;
2000 }
2001 
ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM * ackm,void (* fn)(OSSL_TIME deadline,void * arg),void * arg)2002 void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM *ackm,
2003                                                     void (*fn)(OSSL_TIME deadline,
2004                                                                void *arg),
2005                                                     void *arg)
2006 {
2007     ackm->loss_detection_deadline_cb      = fn;
2008     ackm->loss_detection_deadline_cb_arg  = arg;
2009 }
2010 
ossl_ackm_set_ack_deadline_callback(OSSL_ACKM * ackm,void (* fn)(OSSL_TIME deadline,int pkt_space,void * arg),void * arg)2011 void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM *ackm,
2012                                          void (*fn)(OSSL_TIME deadline,
2013                                                     int pkt_space,
2014                                                     void *arg),
2015                                          void *arg)
2016 {
2017     ackm->ack_deadline_cb     = fn;
2018     ackm->ack_deadline_cb_arg = arg;
2019 }
2020