subspace_proof_of_space/chiapos/
table.rs

1#[cfg(test)]
2mod tests;
3pub(super) mod types;
4
5#[cfg(not(feature = "std"))]
6extern crate alloc;
7
8use crate::chiapos::constants::{PARAM_B, PARAM_BC, PARAM_C, PARAM_EXT, PARAM_M};
9use crate::chiapos::table::types::{Metadata, Position, X, Y};
10use crate::chiapos::utils::EvaluatableUsize;
11use crate::chiapos::Seed;
12#[cfg(not(feature = "std"))]
13use alloc::vec;
14#[cfg(not(feature = "std"))]
15use alloc::vec::Vec;
16use chacha20::cipher::{KeyIvInit, StreamCipher, StreamCipherSeek};
17use chacha20::{ChaCha8, Key, Nonce};
18use core::mem;
19use core::simd::num::SimdUint;
20use core::simd::Simd;
21#[cfg(all(feature = "std", any(feature = "parallel", test)))]
22use parking_lot::Mutex;
23#[cfg(any(feature = "parallel", test))]
24use rayon::prelude::*;
25use seq_macro::seq;
26#[cfg(all(not(feature = "std"), any(feature = "parallel", test)))]
27use spin::Mutex;
28use static_assertions::const_assert;
29use subspace_core_primitives::hashes::{blake3_hash, blake3_hash_list};
30
31pub(super) const COMPUTE_F1_SIMD_FACTOR: usize = 8;
32pub(super) const FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR: usize = 8;
33pub(super) const HAS_MATCH_UNROLL_FACTOR: usize = 8;
34
35/// Compute the size of `y` in bits
36pub(super) const fn y_size_bits(k: u8) -> usize {
37    k as usize + PARAM_EXT as usize
38}
39
40/// Metadata size in bytes
41pub const fn metadata_size_bytes(k: u8, table_number: u8) -> usize {
42    metadata_size_bits(k, table_number).div_ceil(u8::BITS as usize)
43}
44
45/// Metadata size in bits
46pub(super) const fn metadata_size_bits(k: u8, table_number: u8) -> usize {
47    k as usize
48        * match table_number {
49            1 => 1,
50            2 => 2,
51            3 | 4 => 4,
52            5 => 3,
53            6 => 2,
54            7 => 0,
55            _ => unreachable!(),
56        }
57}
58
59/// ChaCha8 [`Vec`] sufficient for the whole first table for [`K`].
60/// Prefer [`partial_y`] if you need partial y just for a single `x`.
61fn partial_ys<const K: u8>(seed: Seed) -> Vec<u8> {
62    let output_len_bits = usize::from(K) * (1 << K);
63    let mut output = vec![0; output_len_bits.div_ceil(u8::BITS as usize)];
64
65    let key = Key::from(seed);
66    let nonce = Nonce::default();
67
68    let mut cipher = ChaCha8::new(&key, &nonce);
69
70    cipher.apply_keystream(&mut output);
71
72    output
73}
74
75/// ChaCha8 byte for a single `y` at `x` in the first table for [`K`], returns bytes and offset (in
76/// bits) within those bytes at which data start.
77/// Prefer [`partial_ys`] if you process the whole first table.
78pub(super) fn partial_y<const K: u8>(
79    seed: Seed,
80    x: X,
81) -> ([u8; (K as usize * 2).div_ceil(u8::BITS as usize)], usize) {
82    let skip_bits = usize::from(K) * usize::from(x);
83    let skip_bytes = skip_bits / u8::BITS as usize;
84    let skip_bits = skip_bits % u8::BITS as usize;
85
86    let mut output = [0; (K as usize * 2).div_ceil(u8::BITS as usize)];
87
88    let key = Key::from(seed);
89    let nonce = Nonce::default();
90
91    let mut cipher = ChaCha8::new(&key, &nonce);
92
93    cipher.seek(skip_bytes);
94    cipher.apply_keystream(&mut output);
95
96    (output, skip_bits)
97}
98
99#[derive(Debug, Clone)]
100struct LeftTargets {
101    left_targets: Vec<Position>,
102}
103
104fn calculate_left_targets() -> LeftTargets {
105    let mut left_targets = Vec::with_capacity(2 * usize::from(PARAM_BC) * usize::from(PARAM_M));
106
107    let param_b = u32::from(PARAM_B);
108    let param_c = u32::from(PARAM_C);
109
110    for parity in 0..=1u32 {
111        for r in 0..u32::from(PARAM_BC) {
112            let c = r / param_c;
113
114            for m in 0..u32::from(PARAM_M) {
115                let target = ((c + m) % param_b) * param_c
116                    + (((2 * m + parity) * (2 * m + parity) + r) % param_c);
117                left_targets.push(Position::from(target));
118            }
119        }
120    }
121
122    LeftTargets { left_targets }
123}
124
125fn calculate_left_target_on_demand(parity: u32, r: u32, m: u32) -> u32 {
126    let param_b = u32::from(PARAM_B);
127    let param_c = u32::from(PARAM_C);
128
129    let c = r / param_c;
130
131    ((c + m) % param_b) * param_c + (((2 * m + parity) * (2 * m + parity) + r) % param_c)
132}
133
134/// Caches that can be used to optimize creation of multiple [`Tables`](super::Tables).
135#[derive(Debug, Clone)]
136pub struct TablesCache<const K: u8> {
137    buckets: Vec<Bucket>,
138    rmap_scratch: Vec<RmapItem>,
139    left_targets: LeftTargets,
140}
141
142impl<const K: u8> Default for TablesCache<K> {
143    /// Create new instance
144    fn default() -> Self {
145        Self {
146            buckets: Vec::new(),
147            rmap_scratch: Vec::new(),
148            left_targets: calculate_left_targets(),
149        }
150    }
151}
152
153#[derive(Debug)]
154struct Match {
155    left_position: Position,
156    left_y: Y,
157    right_position: Position,
158}
159
160#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
161struct Bucket {
162    /// Bucket index
163    bucket_index: u32,
164    /// Start position of this bucket in the table
165    start_position: Position,
166    /// Size of this bucket
167    size: Position,
168}
169
170#[derive(Debug, Default, Copy, Clone)]
171pub(super) struct RmapItem {
172    count: Position,
173    start_position: Position,
174}
175
176/// `partial_y_offset` is in bits
177pub(super) fn compute_f1<const K: u8>(x: X, partial_y: &[u8], partial_y_offset: usize) -> Y {
178    let partial_y_length =
179        (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
180    let mut pre_y_bytes = 0u64.to_be_bytes();
181    pre_y_bytes[..partial_y_length]
182        .copy_from_slice(&partial_y[partial_y_offset / u8::BITS as usize..][..partial_y_length]);
183    // Contains `K` desired bits of `partial_y` in the final offset of eventual `y` with the rest
184    // of bits being in undefined state
185    let pre_y = u64::from_be_bytes(pre_y_bytes)
186        >> (u64::BITS as usize - usize::from(K + PARAM_EXT) - partial_y_offset % u8::BITS as usize);
187    let pre_y = pre_y as u32;
188    // Mask for clearing the rest of bits of `pre_y`.
189    let pre_y_mask = (u32::MAX << usize::from(PARAM_EXT))
190        & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT)));
191
192    // Extract `PARAM_EXT` most significant bits from `x` and store in the final offset of
193    // eventual `y` with the rest of bits being in undefined state.
194    let pre_ext = u32::from(x) >> (usize::from(K - PARAM_EXT));
195    // Mask for clearing the rest of bits of `pre_ext`.
196    let pre_ext_mask = u32::MAX >> (u32::BITS as usize - usize::from(PARAM_EXT));
197
198    // Combine all of the bits together:
199    // [padding zero bits][`K` bits rom `partial_y`][`PARAM_EXT` bits from `x`]
200    Y::from((pre_y & pre_y_mask) | (pre_ext & pre_ext_mask))
201}
202
203pub(super) fn compute_f1_simd<const K: u8>(
204    xs: [X; COMPUTE_F1_SIMD_FACTOR],
205    partial_ys: &[u8; K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize],
206) -> [Y; COMPUTE_F1_SIMD_FACTOR] {
207    // Each element contains `K` desired bits of `partial_ys` in the final offset of eventual `ys`
208    // with the rest of bits being in undefined state
209    let pre_ys_bytes: Simd<_, COMPUTE_F1_SIMD_FACTOR> = Simd::from(seq!(N in 0..8 {
210        [
211        #(
212        {
213            #[allow(clippy::erasing_op, clippy::identity_op)]
214            let partial_y_offset = N * usize::from(K);
215            let partial_y_length =
216                (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
217            let mut pre_y_bytes = 0u64.to_be_bytes();
218            pre_y_bytes[..partial_y_length].copy_from_slice(
219                &partial_ys[partial_y_offset / u8::BITS as usize..][..partial_y_length],
220            );
221
222            u64::from_be_bytes(pre_y_bytes)
223        },
224        )*
225        ]
226    }));
227    let pre_ys_right_offset: Simd<_, COMPUTE_F1_SIMD_FACTOR> = Simd::from(seq!(N in 0..8 {
228        [
229        #(
230        {
231            #[allow(clippy::erasing_op, clippy::identity_op)]
232            let partial_y_offset = N * u32::from(K);
233            u64::from(u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset % u8::BITS)
234        },
235        )*
236        ]
237    }));
238    // TODO: both this and above operations are most likely possible on x86-64 with a special
239    //  intrinsic in a more efficient way
240    let pre_ys = pre_ys_bytes >> pre_ys_right_offset;
241
242    // Mask for clearing the rest of bits of `pre_ys`.
243    let pre_ys_mask = Simd::splat(
244        (u32::MAX << usize::from(PARAM_EXT))
245            & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT))),
246    );
247
248    // SAFETY: `X` is `#[repr(transparent)]` and guaranteed to have the same memory layout as `u32`
249    let xs =
250        unsafe { mem::transmute::<[X; COMPUTE_F1_SIMD_FACTOR], [u32; COMPUTE_F1_SIMD_FACTOR]>(xs) };
251    // Extract `PARAM_EXT` most significant bits from `xs` and store in the final offset of
252    // eventual `ys` with the rest of bits being in undefined state.
253    let pre_exts = Simd::from(xs) >> Simd::splat(u32::from(K - PARAM_EXT));
254
255    // Mask for clearing the rest of bits of `pre_exts`.
256    let pre_exts_mask = Simd::splat(u32::MAX >> (u32::BITS as usize - usize::from(PARAM_EXT)));
257
258    // Combine all of the bits together:
259    // [padding zero bits][`K` bits rom `partial_y`][`PARAM_EXT` bits from `x`]
260    // NOTE: `pre_exts_mask` is unnecessary here and makes no difference, but it allows compiler to
261    // generate faster code 🤷‍
262    let ys = (pre_ys.cast() & pre_ys_mask) | (pre_exts & pre_exts_mask);
263
264    // SAFETY: `Y` is `#[repr(transparent)]` and guaranteed to have the same memory layout as `u32`
265    unsafe { mem::transmute(ys.to_array()) }
266}
267
268/// `rmap_scratch` is just an optimization to reuse allocations between calls.
269///
270/// For verification purposes use [`has_match`] instead.
271///
272/// Returns `None` if either of buckets is empty.
273#[allow(clippy::too_many_arguments)]
274fn find_matches<T, Map>(
275    left_bucket_ys: &[Y],
276    left_bucket_start_position: Position,
277    right_bucket_ys: &[Y],
278    right_bucket_start_position: Position,
279    rmap_scratch: &mut Vec<RmapItem>,
280    left_targets: &LeftTargets,
281    map: Map,
282    output: &mut Vec<T>,
283) where
284    Map: Fn(Match) -> T,
285{
286    // Clear and set to correct size with zero values
287    rmap_scratch.clear();
288    rmap_scratch.resize_with(usize::from(PARAM_BC), RmapItem::default);
289    let rmap = rmap_scratch;
290
291    // Both left and right buckets can be empty
292    let Some(&first_left_bucket_y) = left_bucket_ys.first() else {
293        return;
294    };
295    let Some(&first_right_bucket_y) = right_bucket_ys.first() else {
296        return;
297    };
298    // Since all entries in a bucket are obtained after division by `PARAM_BC`, we can compute
299    // quotient more efficiently by subtracting base value rather than computing remainder of
300    // division
301    let base = (usize::from(first_right_bucket_y) / usize::from(PARAM_BC)) * usize::from(PARAM_BC);
302    for (&y, right_position) in right_bucket_ys.iter().zip(right_bucket_start_position..) {
303        let r = usize::from(y) - base;
304
305        // Same `y` and as the result `r` can appear in the table multiple times, in which case
306        // they'll all occupy consecutive slots in `right_bucket` and all we need to store is just
307        // the first position and number of elements.
308        if rmap[r].count == Position::ZERO {
309            rmap[r].start_position = right_position;
310        }
311        rmap[r].count += Position::ONE;
312    }
313    let rmap = rmap.as_slice();
314
315    // Same idea as above, but avoids division by leveraging the fact that each bucket is exactly
316    // `PARAM_BC` away from the previous one in terms of divisor by `PARAM_BC`
317    let base = base - usize::from(PARAM_BC);
318    let parity = (usize::from(first_left_bucket_y) / usize::from(PARAM_BC)) % 2;
319    let left_targets_parity = {
320        let (a, b) = left_targets
321            .left_targets
322            .split_at(left_targets.left_targets.len() / 2);
323        if parity == 0 {
324            a
325        } else {
326            b
327        }
328    };
329
330    for (&y, left_position) in left_bucket_ys.iter().zip(left_bucket_start_position..) {
331        let r = usize::from(y) - base;
332        let left_targets_r = left_targets_parity
333            .chunks_exact(left_targets_parity.len() / usize::from(PARAM_BC))
334            .nth(r)
335            .expect("r is valid");
336
337        const_assert!(PARAM_M as usize % FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR == 0);
338
339        for r_targets in left_targets_r
340            .array_chunks::<{ FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR }>()
341            .take(usize::from(PARAM_M) / FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR)
342        {
343            let _: [(); FIND_MATCHES_AND_COMPUTE_UNROLL_FACTOR] = seq!(N in 0..8 {
344                [
345                #(
346                {
347                    let rmap_item = rmap[usize::from(r_targets[N])];
348
349                    for right_position in
350                        rmap_item.start_position..rmap_item.start_position + rmap_item.count
351                    {
352                        let m = Match {
353                            left_position,
354                            left_y: y,
355                            right_position,
356                        };
357                        output.push(map(m));
358                    }
359                },
360                )*
361                ]
362            });
363        }
364    }
365}
366
367/// Simplified version of [`find_matches`] for verification purposes.
368pub(super) fn has_match(left_y: Y, right_y: Y) -> bool {
369    let right_r = u32::from(right_y) % u32::from(PARAM_BC);
370    let parity = (u32::from(left_y) / u32::from(PARAM_BC)) % 2;
371    let left_r = u32::from(left_y) % u32::from(PARAM_BC);
372
373    const_assert!(PARAM_M as usize % HAS_MATCH_UNROLL_FACTOR == 0);
374
375    for m in 0..u32::from(PARAM_M) / HAS_MATCH_UNROLL_FACTOR as u32 {
376        let _: [(); HAS_MATCH_UNROLL_FACTOR] = seq!(N in 0..8 {
377            [
378            #(
379            {
380                #[allow(clippy::identity_op)]
381                let r_target = calculate_left_target_on_demand(parity, left_r, m * HAS_MATCH_UNROLL_FACTOR as u32 + N);
382                if r_target == right_r {
383                    return true;
384                }
385            },
386            )*
387            ]
388        });
389    }
390
391    false
392}
393
394pub(super) fn compute_fn<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
395    y: Y,
396    left_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
397    right_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
398) -> (Y, Metadata<K, TABLE_NUMBER>)
399where
400    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
401    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
402{
403    let left_metadata = u128::from(left_metadata);
404    let right_metadata = u128::from(right_metadata);
405
406    let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
407
408    // Only supports `K` from 15 to 25 (otherwise math will not be correct when concatenating y,
409    // left metadata and right metadata)
410    let hash = {
411        // Take only bytes where bits were set
412        let num_bytes_with_data = (y_size_bits(K) + metadata_size_bits(K, PARENT_TABLE_NUMBER) * 2)
413            .div_ceil(u8::BITS as usize);
414
415        // Collect `K` most significant bits of `y` at the final offset of eventual `input_a`
416        let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
417
418        // Move bits of `left_metadata` at the final offset of eventual `input_a`
419        let left_metadata_bits =
420            left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
421
422        // Part of the `right_bits` at the final offset of eventual `input_a`
423        let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
424        let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
425
426        // If `right_metadata` bits start to the left of the desired position in `input_a` move
427        // bits right, else move left
428        if right_bits_start_offset < y_and_left_bits {
429            let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
430            // Collect bits of `right_metadata` that will fit into `input_a` at the final offset in
431            // eventual `input_a`
432            let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
433            let input_a = y_bits | left_metadata_bits | right_bits_a;
434            // Collect bits of `right_metadata` that will spill over into `input_b`
435            let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
436
437            blake3_hash_list(&[
438                &input_a.to_be_bytes(),
439                &input_b.to_be_bytes()
440                    [..right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize)],
441            ])
442        } else {
443            let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
444            let input_a = y_bits | left_metadata_bits | right_bits_a;
445
446            blake3_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
447        }
448    };
449
450    let y_output = Y::from(
451        u32::from_be_bytes(
452            hash[..mem::size_of::<u32>()]
453                .try_into()
454                .expect("Hash if statically guaranteed to have enough bytes; qed"),
455        ) >> (u32::BITS as usize - y_size_bits(K)),
456    );
457
458    let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
459
460    let metadata = if TABLE_NUMBER < 4 {
461        (left_metadata << parent_metadata_bits) | right_metadata
462    } else if metadata_size_bits > 0 {
463        // For K under 25 it is guaranteed that metadata + bit offset will always fit into u128.
464        // We collect bytes necessary, potentially with extra bits at the start and end of the bytes
465        // that will be taken care of later.
466        let metadata = u128::from_be_bytes(
467            hash[y_size_bits(K) / u8::BITS as usize..][..mem::size_of::<u128>()]
468                .try_into()
469                .expect("Always enough bits for any K; qed"),
470        );
471        // Remove extra bits at the beginning
472        let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
473        // Move bits into correct location
474        metadata >> (u128::BITS as usize - metadata_size_bits)
475    } else {
476        0
477    };
478
479    (y_output, Metadata::from(metadata))
480}
481
482fn match_to_result<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
483    last_table: &Table<K, PARENT_TABLE_NUMBER>,
484    m: Match,
485) -> (Y, [Position; 2], Metadata<K, TABLE_NUMBER>)
486where
487    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
488    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
489{
490    let left_metadata = last_table
491        .metadata(m.left_position)
492        .expect("Position resulted from matching is correct; qed");
493    let right_metadata = last_table
494        .metadata(m.right_position)
495        .expect("Position resulted from matching is correct; qed");
496
497    let (y, metadata) =
498        compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(m.left_y, left_metadata, right_metadata);
499
500    (y, [m.left_position, m.right_position], metadata)
501}
502
503fn match_and_compute_fn<'a, const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
504    last_table: &'a Table<K, PARENT_TABLE_NUMBER>,
505    left_bucket: Bucket,
506    right_bucket: Bucket,
507    rmap_scratch: &'a mut Vec<RmapItem>,
508    left_targets: &'a LeftTargets,
509    results_table: &mut Vec<(Y, [Position; 2], Metadata<K, TABLE_NUMBER>)>,
510) where
511    EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
512    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
513{
514    find_matches(
515        &last_table.ys()[usize::from(left_bucket.start_position)..]
516            [..usize::from(left_bucket.size)],
517        left_bucket.start_position,
518        &last_table.ys()[usize::from(right_bucket.start_position)..]
519            [..usize::from(right_bucket.size)],
520        right_bucket.start_position,
521        rmap_scratch,
522        left_targets,
523        |m| match_to_result(last_table, m),
524        results_table,
525    )
526}
527
528#[derive(Debug)]
529pub(super) enum Table<const K: u8, const TABLE_NUMBER: u8>
530where
531    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
532{
533    /// First table with contents of entries split into separate vectors for more efficient access
534    First {
535        /// Derived values computed from `x`
536        ys: Vec<Y>,
537        /// X values
538        xs: Vec<X>,
539    },
540    /// Other tables
541    Other {
542        /// Derived values computed from previous table
543        ys: Vec<Y>,
544        /// Left and right entry positions in a previous table encoded into bits
545        positions: Vec<[Position; 2]>,
546        /// Metadata corresponding to each entry
547        metadatas: Vec<Metadata<K, TABLE_NUMBER>>,
548    },
549}
550
551impl<const K: u8> Table<K, 1>
552where
553    EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
554{
555    /// Create the table
556    pub(super) fn create(seed: Seed) -> Self
557    where
558        EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
559    {
560        let partial_ys = partial_ys::<K>(seed);
561
562        let mut t_1 = Vec::with_capacity(1_usize << K);
563        for (x_start, partial_ys) in X::all::<K>().step_by(COMPUTE_F1_SIMD_FACTOR).zip(
564            partial_ys
565                .array_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
566                .copied(),
567        ) {
568            let xs: [_; COMPUTE_F1_SIMD_FACTOR] = seq!(N in 0..8 {
569                [
570                #(
571                #[allow(clippy::erasing_op, clippy::identity_op)]
572                {
573                    x_start + X::from(N)
574                },
575                )*
576                ]
577            });
578
579            let ys = compute_f1_simd::<K>(xs, &partial_ys);
580            t_1.extend(ys.into_iter().zip(xs));
581        }
582
583        t_1.sort_unstable();
584
585        let (ys, xs) = t_1.into_iter().unzip();
586
587        Self::First { ys, xs }
588    }
589
590    /// Create the table, leverages available parallelism
591    #[cfg(any(feature = "parallel", test))]
592    pub(super) fn create_parallel(seed: Seed) -> Self
593    where
594        EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
595    {
596        let partial_ys = partial_ys::<K>(seed);
597
598        let mut t_1 = Vec::with_capacity(1_usize << K);
599        for (x_start, partial_ys) in X::all::<K>().step_by(COMPUTE_F1_SIMD_FACTOR).zip(
600            partial_ys
601                .array_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
602                .copied(),
603        ) {
604            let xs: [_; COMPUTE_F1_SIMD_FACTOR] = seq!(N in 0..8 {
605                [
606                #(
607                #[allow(clippy::erasing_op, clippy::identity_op)]
608                {
609                    x_start + X::from(N)
610                },
611                )*
612                ]
613            });
614
615            let ys = compute_f1_simd::<K>(xs, &partial_ys);
616            t_1.extend(ys.into_iter().zip(xs));
617        }
618
619        t_1.par_sort_unstable();
620
621        let (ys, xs) = t_1.into_iter().unzip();
622
623        Self::First { ys, xs }
624    }
625
626    /// All `x`s as [`BitSlice`], for individual `x`s needs to be slices into [`K`] bits slices
627    pub(super) fn xs(&self) -> &[X] {
628        match self {
629            Table::First { xs, .. } => xs,
630            _ => {
631                unreachable!()
632            }
633        }
634    }
635}
636
637mod private {
638    pub(in super::super) trait SupportedOtherTables {}
639}
640
641impl<const K: u8> private::SupportedOtherTables for Table<K, 2> where
642    EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized
643{
644}
645
646impl<const K: u8> private::SupportedOtherTables for Table<K, 3> where
647    EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized
648{
649}
650
651impl<const K: u8> private::SupportedOtherTables for Table<K, 4> where
652    EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized
653{
654}
655
656impl<const K: u8> private::SupportedOtherTables for Table<K, 5> where
657    EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized
658{
659}
660
661impl<const K: u8> private::SupportedOtherTables for Table<K, 6> where
662    EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized
663{
664}
665
666impl<const K: u8> private::SupportedOtherTables for Table<K, 7> where
667    EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized
668{
669}
670
671impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
672where
673    Self: private::SupportedOtherTables,
674    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
675{
676    /// Creates new [`TABLE_NUMBER`] table. There also exists [`Self::create_parallel()`] that
677    /// trades CPU efficiency and memory usage for lower latency.
678    pub(super) fn create<const PARENT_TABLE_NUMBER: u8>(
679        last_table: &Table<K, PARENT_TABLE_NUMBER>,
680        cache: &mut TablesCache<K>,
681    ) -> Self
682    where
683        EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
684    {
685        let buckets = &mut cache.buckets;
686        let rmap_scratch = &mut cache.rmap_scratch;
687        let left_targets = &cache.left_targets;
688
689        let mut bucket = Bucket {
690            bucket_index: 0,
691            start_position: Position::ZERO,
692            size: Position::ZERO,
693        };
694
695        let last_y = *last_table
696            .ys()
697            .last()
698            .expect("List of y values is never empty; qed");
699        buckets.clear();
700        buckets.reserve(1 + usize::from(last_y) / usize::from(PARAM_BC));
701        last_table
702            .ys()
703            .iter()
704            .zip(Position::ZERO..)
705            .for_each(|(&y, position)| {
706                let bucket_index = u32::from(y) / u32::from(PARAM_BC);
707
708                if bucket_index == bucket.bucket_index {
709                    bucket.size += Position::ONE;
710                    return;
711                }
712
713                buckets.push(bucket);
714
715                bucket = Bucket {
716                    bucket_index,
717                    start_position: position,
718                    size: Position::ONE,
719                };
720            });
721        // Iteration stopped, but we did not store the last bucket yet
722        buckets.push(bucket);
723
724        let num_values = 1 << K;
725        let mut t_n = Vec::with_capacity(num_values);
726        buckets
727            .array_windows::<2>()
728            .for_each(|&[left_bucket, right_bucket]| {
729                match_and_compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
730                    last_table,
731                    left_bucket,
732                    right_bucket,
733                    rmap_scratch,
734                    left_targets,
735                    &mut t_n,
736                );
737            });
738
739        t_n.sort_unstable();
740
741        let mut ys = Vec::with_capacity(t_n.len());
742        let mut positions = Vec::with_capacity(t_n.len());
743        let mut metadatas = Vec::with_capacity(t_n.len());
744
745        for (y, [left_position, right_position], metadata) in t_n {
746            ys.push(y);
747            positions.push([left_position, right_position]);
748            // Last table doesn't have metadata
749            if metadata_size_bits(K, TABLE_NUMBER) > 0 {
750                metadatas.push(metadata);
751            }
752        }
753
754        Self::Other {
755            ys,
756            positions,
757            metadatas,
758        }
759    }
760
761    /// Almost the same as [`Self::create()`], but uses parallelism internally for better
762    /// performance (though not efficiency of CPU and memory usage), if you create multiple tables
763    /// in parallel, prefer [`Self::create()`] for better overall performance.
764    #[cfg(any(feature = "parallel", test))]
765    pub(super) fn create_parallel<const PARENT_TABLE_NUMBER: u8>(
766        last_table: &Table<K, PARENT_TABLE_NUMBER>,
767        cache: &mut TablesCache<K>,
768    ) -> Self
769    where
770        EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
771    {
772        let left_targets = &cache.left_targets;
773
774        let mut first_bucket = Bucket {
775            bucket_index: u32::from(last_table.ys()[0]) / u32::from(PARAM_BC),
776            start_position: Position::ZERO,
777            size: Position::ZERO,
778        };
779        for &y in last_table.ys() {
780            let bucket_index = u32::from(y) / u32::from(PARAM_BC);
781
782            if bucket_index == first_bucket.bucket_index {
783                first_bucket.size += Position::ONE;
784            } else {
785                break;
786            }
787        }
788
789        let previous_bucket = Mutex::new(first_bucket);
790
791        let t_n = rayon::broadcast(|_ctx| {
792            let mut entries = Vec::new();
793            let mut rmap_scratch = Vec::new();
794
795            loop {
796                let left_bucket;
797                let right_bucket;
798                {
799                    let mut previous_bucket = previous_bucket.lock();
800
801                    let right_bucket_start_position =
802                        previous_bucket.start_position + previous_bucket.size;
803                    let right_bucket_index = match last_table
804                        .ys()
805                        .get(usize::from(right_bucket_start_position))
806                    {
807                        Some(&y) => u32::from(y) / u32::from(PARAM_BC),
808                        None => {
809                            break;
810                        }
811                    };
812                    let mut right_bucket_size = Position::ZERO;
813
814                    for &y in &last_table.ys()[usize::from(right_bucket_start_position)..] {
815                        let bucket_index = u32::from(y) / u32::from(PARAM_BC);
816
817                        if bucket_index == right_bucket_index {
818                            right_bucket_size += Position::ONE;
819                        } else {
820                            break;
821                        }
822                    }
823
824                    right_bucket = Bucket {
825                        bucket_index: right_bucket_index,
826                        start_position: right_bucket_start_position,
827                        size: right_bucket_size,
828                    };
829
830                    left_bucket = *previous_bucket;
831                    *previous_bucket = right_bucket;
832                }
833
834                match_and_compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
835                    last_table,
836                    left_bucket,
837                    right_bucket,
838                    &mut rmap_scratch,
839                    left_targets,
840                    &mut entries,
841                );
842            }
843
844            entries
845        });
846
847        let mut t_n = t_n.into_iter().flatten().collect::<Vec<_>>();
848        t_n.par_sort_unstable();
849
850        let mut ys = Vec::with_capacity(t_n.len());
851        let mut positions = Vec::with_capacity(t_n.len());
852        let mut metadatas = Vec::with_capacity(t_n.len());
853
854        for (y, [left_position, right_position], metadata) in t_n.drain(..) {
855            ys.push(y);
856            positions.push([left_position, right_position]);
857            // Last table doesn't have metadata
858            if metadata_size_bits(K, TABLE_NUMBER) > 0 {
859                metadatas.push(metadata);
860            }
861        }
862
863        // Drop from a background thread, which typically helps with overall concurrency
864        rayon::spawn(move || {
865            drop(t_n);
866        });
867
868        Self::Other {
869            ys,
870            positions,
871            metadatas,
872        }
873    }
874}
875
876impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
877where
878    EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
879{
880    /// All `y`s as [`BitSlice`], for individual `x`s needs to be slices into [`K`] bits slices
881    pub(super) fn ys(&self) -> &[Y] {
882        let (Table::First { ys, .. } | Table::Other { ys, .. }) = self;
883        ys
884    }
885
886    /// Returns `None` on invalid position or first table, `Some(left_position, right_position)` in
887    /// previous table on success
888    pub(super) fn position(&self, position: Position) -> Option<[Position; 2]> {
889        match self {
890            Table::First { .. } => None,
891            Table::Other { positions, .. } => positions.get(usize::from(position)).copied(),
892        }
893    }
894
895    /// Returns `None` on invalid position or for table number 7
896    pub(super) fn metadata(&self, position: Position) -> Option<Metadata<K, TABLE_NUMBER>> {
897        match self {
898            Table::First { xs, .. } => xs.get(usize::from(position)).map(|&x| Metadata::from(x)),
899            Table::Other { metadatas, .. } => metadatas.get(usize::from(position)).copied(),
900        }
901    }
902}