1#[cfg(any(feature = "alloc", test))]
4mod rmap;
5#[cfg(test)]
6mod tests;
7pub(super) mod types;
8
9use crate::chiapos::Seed;
10use crate::chiapos::constants::{PARAM_B, PARAM_BC, PARAM_C, PARAM_EXT, PARAM_M};
11#[cfg(feature = "alloc")]
12use crate::chiapos::table::rmap::Rmap;
13use crate::chiapos::table::types::{Metadata, X, Y};
14#[cfg(feature = "alloc")]
15use crate::chiapos::table::types::{Position, R};
16use crate::chiapos::utils::EvaluatableUsize;
17use ab_chacha8::{ChaCha8Block, ChaCha8State};
18#[cfg(feature = "alloc")]
19use alloc::boxed::Box;
20#[cfg(feature = "alloc")]
21use alloc::vec;
22#[cfg(feature = "alloc")]
23use alloc::vec::Vec;
24#[cfg(feature = "alloc")]
25use chacha20::cipher::{Iv, KeyIvInit, StreamCipher};
26#[cfg(feature = "alloc")]
27use chacha20::{ChaCha8, Key};
28use core::array;
29#[cfg(feature = "parallel")]
30use core::cell::SyncUnsafeCell;
31#[cfg(feature = "alloc")]
32use core::mem;
33#[cfg(feature = "alloc")]
34use core::mem::MaybeUninit;
35#[cfg(any(feature = "alloc", test))]
36use core::simd::prelude::*;
37#[cfg(feature = "parallel")]
38use core::sync::atomic::{AtomicUsize, Ordering};
39#[cfg(feature = "alloc")]
40use derive_more::Deref;
41#[cfg(any(feature = "alloc", test))]
42use seq_macro::seq;
43#[cfg(feature = "alloc")]
45use alloc::sync::Arc;
46use subspace_core_primitives::hashes::blake3_hash;
47
48pub(super) const COMPUTE_F1_SIMD_FACTOR: usize = 8;
49#[cfg(any(feature = "alloc", test))]
50const COMPUTE_FN_SIMD_FACTOR: usize = 16;
51#[allow(dead_code, reason = "unused when crate is compiled separately")]
52const MAX_BUCKET_SIZE: usize = 512;
53#[cfg(feature = "alloc")]
54const BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS: u8 = 128;
55#[allow(dead_code, reason = "unused when crate is compiled separately")]
62pub(crate) const REDUCED_BUCKET_SIZE: usize = MAX_BUCKET_SIZE;
63#[allow(dead_code, reason = "unused when crate is compiled separately")]
68const REDUCED_MATCHES_COUNT: usize = MAX_BUCKET_SIZE;
69#[cfg(feature = "parallel")]
70const CACHE_LINE_SIZE: usize = 64;
71
72const _: () = {
73 debug_assert!(REDUCED_BUCKET_SIZE <= MAX_BUCKET_SIZE);
74 debug_assert!(REDUCED_MATCHES_COUNT <= MAX_BUCKET_SIZE);
75};
76
77pub(super) const fn y_size_bits(k: u8) -> usize {
79 k as usize + PARAM_EXT as usize
80}
81
82pub const fn metadata_size_bytes(k: u8, table_number: u8) -> usize {
84 metadata_size_bits(k, table_number).div_ceil(u8::BITS as usize)
85}
86
87pub(super) const fn metadata_size_bits(k: u8, table_number: u8) -> usize {
89 k as usize
90 * match table_number {
91 1 => 1,
92 2 => 2,
93 3 | 4 => 4,
94 5 => 3,
95 6 => 2,
96 7 => 0,
97 _ => unreachable!(),
98 }
99}
100
101pub const fn num_buckets(k: u8) -> usize {
103 2_usize
104 .pow(y_size_bits(k) as u32)
105 .div_ceil(PARAM_BC as usize)
106}
107
108#[cfg(feature = "alloc")]
111const fn max_table_entries(k: u8) -> usize {
112 (num_buckets(k) - 1) * REDUCED_MATCHES_COUNT
113}
114
115#[cfg(feature = "parallel")]
116#[inline(always)]
117fn strip_sync_unsafe_cell<const N: usize, T>(value: Box<[SyncUnsafeCell<T>; N]>) -> Box<[T; N]> {
118 unsafe { Box::from_raw(Box::into_raw(value).cast()) }
120}
121
122#[cfg(feature = "alloc")]
125fn partial_ys<const K: u8>(seed: Seed) -> Vec<u8> {
126 let output_len_bits = usize::from(K) * (1 << K);
127 let mut output = vec![0; output_len_bits.div_ceil(u8::BITS as usize)];
128
129 let key = Key::from(seed);
130 let iv = Iv::<ChaCha8>::default();
131
132 let mut cipher = ChaCha8::new(&key, &iv);
133
134 cipher.apply_keystream(&mut output);
135
136 output
137}
138
139#[cfg(feature = "alloc")]
147const fn bucket_size_upper_bound(k: u8, security_bits: u8) -> usize {
148 const LAMBDA: u64 = PARAM_BC as u64 / 2u64.pow(PARAM_EXT as u32);
151 const LN2_NUM: u128 = 693147;
154 const LN2_DEN: u128 = 1000000;
155
156 let ks = k as u128 + security_bits as u128;
158 let num = 3u128 * LAMBDA as u128 * ks * LN2_NUM;
161 let den = LN2_DEN;
163
164 let ceil_div: u128 = num.div_ceil(den);
165
166 let mut low = 0u64;
171 let mut high = u64::MAX;
172 while low < high {
173 let mid = low + (high - low) / 2;
174 let left = (mid as u128) * (mid as u128);
175 if left >= ceil_div {
176 high = mid;
177 } else {
178 low = mid + 1;
179 }
180 }
181 let add_term = low;
182
183 (LAMBDA + add_term) as usize
184}
185
186#[cfg(feature = "alloc")]
187fn group_by_buckets<const K: u8>(
188 ys: &[Y],
189) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
190where
191 [(); num_buckets(K)]:,
192{
193 let mut bucket_offsets = [0_u16; num_buckets(K)];
194 let mut buckets = unsafe {
196 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
197 .assume_init()
198 };
199
200 for (&y, position) in ys.iter().zip(Position::ZERO..) {
201 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
202
203 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
205 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
207
208 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
209 bucket[*bucket_offset as usize].write((position, y));
210 *bucket_offset += 1;
211 }
212 }
213
214 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
215 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
216 }
217
218 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
220}
221
222#[cfg(feature = "parallel")]
228unsafe fn group_by_buckets_from_buckets<'a, const K: u8, I>(
229 iter: I,
230) -> Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>
231where
232 I: Iterator<Item = (&'a [MaybeUninit<Y>; REDUCED_MATCHES_COUNT], usize)> + 'a,
233 [(); num_buckets(K)]:,
234{
235 let mut bucket_offsets = [0_u16; num_buckets(K)];
236 let mut buckets = unsafe {
238 Box::<[[MaybeUninit<(Position, Y)>; REDUCED_BUCKET_SIZE]; num_buckets(K)]>::new_uninit()
239 .assume_init()
240 };
241
242 for ((ys, count), batch_start) in iter.zip((Position::ZERO..).step_by(REDUCED_MATCHES_COUNT)) {
243 let ys = unsafe { ys[..count].assume_init_ref() };
245 for (&y, position) in ys.iter().zip(batch_start..) {
246 let bucket_index = (u32::from(y) / u32::from(PARAM_BC)) as usize;
247
248 let bucket_offset = unsafe { bucket_offsets.get_unchecked_mut(bucket_index) };
250 let bucket = unsafe { buckets.get_unchecked_mut(bucket_index) };
252
253 if *bucket_offset < REDUCED_BUCKET_SIZE as u16 {
254 bucket[*bucket_offset as usize].write((position, y));
255 *bucket_offset += 1;
256 }
257 }
258 }
259
260 for (bucket, initialized) in buckets.iter_mut().zip(bucket_offsets) {
261 bucket[usize::from(initialized)..].write_filled((Position::SENTINEL, Y::SENTINEL));
262 }
263
264 unsafe { Box::from_raw(Box::into_raw(buckets).cast()) }
266}
267
268#[cfg(feature = "alloc")]
271fn sort_buckets<const K: u8>(buckets: &mut [[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)])
272where
273 [(); num_buckets(K)]:,
274{
275 for bucket in buckets.iter_mut() {
276 let len = bucket
277 .iter()
278 .take_while(|&&(pos, _)| pos != Position::SENTINEL)
279 .count();
280 bucket[..len].sort_unstable_by_key(|&(pos, y)| (u32::from(y), u32::from(pos)));
281 }
282}
283
284#[cfg(feature = "alloc")]
285#[derive(Debug, Copy, Clone, Deref)]
286#[repr(align(64))]
287struct CacheLineAligned<T>(T);
288
289#[cfg(feature = "alloc")]
291type LeftTargets = [[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2];
292
293#[cfg(feature = "alloc")]
294fn calculate_left_targets() -> Arc<LeftTargets> {
295 let mut left_targets = Arc::<LeftTargets>::new_uninit();
296 let left_targets_slice = unsafe {
298 mem::transmute::<
299 &mut MaybeUninit<[[CacheLineAligned<[R; PARAM_M as usize]>; PARAM_BC as usize]; 2]>,
300 &mut [[MaybeUninit<CacheLineAligned<[R; PARAM_M as usize]>>; PARAM_BC as usize]; 2],
301 >(Arc::get_mut_unchecked(&mut left_targets))
302 };
303
304 for parity in 0..=1 {
305 for r in 0..PARAM_BC {
306 let c = r / PARAM_C;
307
308 let mut arr = array::from_fn(|m| {
309 let m = m as u16;
310 R::from(
311 ((c + m) % PARAM_B) * PARAM_C
312 + (((2 * m + parity) * (2 * m + parity) + r) % PARAM_C),
313 )
314 });
315 arr.sort_unstable();
316 left_targets_slice[parity as usize][r as usize].write(CacheLineAligned(arr));
317 }
318 }
319
320 unsafe { left_targets.assume_init() }
322}
323
324fn calculate_left_target_on_demand(parity: u32, r: u32, m: u32) -> u32 {
325 let param_b = u32::from(PARAM_B);
326 let param_c = u32::from(PARAM_C);
327
328 ((r / param_c + m) % param_b) * param_c + (((2 * m + parity) * (2 * m + parity) + r) % param_c)
329}
330
331#[cfg(feature = "alloc")]
333#[derive(Debug, Clone)]
334pub struct TablesCache {
335 left_targets: Arc<LeftTargets>,
336}
337
338#[cfg(feature = "alloc")]
339impl Default for TablesCache {
340 fn default() -> Self {
342 Self {
343 left_targets: calculate_left_targets(),
344 }
345 }
346}
347
348#[cfg(feature = "alloc")]
349#[derive(Debug, Copy, Clone)]
350struct Match {
351 left_position: Position,
352 left_y: Y,
353 right_position: Position,
354}
355
356pub(super) fn compute_f1<const K: u8>(x: X, seed: &Seed) -> Y {
358 let skip_bits = u32::from(K) * u32::from(x);
359 let skip_u32s = skip_bits / u32::BITS;
360 let partial_y_offset = skip_bits % u32::BITS;
361
362 const U32S_PER_BLOCK: usize = size_of::<ChaCha8Block>() / size_of::<u32>();
363
364 let initial_state = ChaCha8State::init(seed, &[0; _]);
365 let first_block_counter = skip_u32s / U32S_PER_BLOCK as u32;
366 let u32_in_first_block = skip_u32s as usize % U32S_PER_BLOCK;
367
368 let first_block = initial_state.compute_block(first_block_counter);
369 let hi = first_block[u32_in_first_block].to_be();
370
371 let lo = if u32_in_first_block + 1 == U32S_PER_BLOCK {
373 let second_block = initial_state.compute_block(first_block_counter + 1);
375 second_block[0].to_be()
376 } else {
377 first_block[u32_in_first_block + 1].to_be()
378 };
379
380 let partial_y = (u64::from(hi) << u32::BITS) | u64::from(lo);
381
382 let pre_y = partial_y >> (u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset);
383 let pre_y = pre_y as u32;
384 let pre_y_mask = (u32::MAX << PARAM_EXT) & (u32::MAX >> (u32::BITS - u32::from(K + PARAM_EXT)));
386
387 let pre_ext = u32::from(x) >> (K - PARAM_EXT);
390
391 Y::from((pre_y & pre_y_mask) | pre_ext)
394}
395
396#[cfg(any(feature = "alloc", test))]
397pub(super) fn compute_f1_simd<const K: u8>(
398 xs: Simd<u32, COMPUTE_F1_SIMD_FACTOR>,
399 partial_ys: &[u8; K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize],
400) -> [Y; COMPUTE_F1_SIMD_FACTOR] {
401 let pre_ys_bytes = array::from_fn(|i| {
404 let partial_y_offset = i * usize::from(K);
405 let partial_y_length =
406 (partial_y_offset % u8::BITS as usize + usize::from(K)).div_ceil(u8::BITS as usize);
407 let mut pre_y_bytes = 0u64.to_be_bytes();
408 pre_y_bytes[..partial_y_length].copy_from_slice(
409 &partial_ys[partial_y_offset / u8::BITS as usize..][..partial_y_length],
410 );
411
412 u64::from_be_bytes(pre_y_bytes)
413 });
414 let pre_ys_right_offset = array::from_fn(|i| {
415 let partial_y_offset = i as u32 * u32::from(K);
416 u64::from(u64::BITS - u32::from(K + PARAM_EXT) - partial_y_offset % u8::BITS)
417 });
418 let pre_ys = Simd::from_array(pre_ys_bytes) >> Simd::from_array(pre_ys_right_offset);
419
420 let pre_ys_mask = Simd::splat(
422 (u32::MAX << usize::from(PARAM_EXT))
423 & (u32::MAX >> (u32::BITS as usize - usize::from(K + PARAM_EXT))),
424 );
425
426 let pre_exts = xs >> Simd::splat(u32::from(K - PARAM_EXT));
429
430 let ys = (pre_ys.cast() & pre_ys_mask) | pre_exts;
433
434 Y::array_from_repr(ys.to_array())
435}
436
437#[cfg(feature = "alloc")]
444unsafe fn find_matches_in_buckets<'a>(
445 left_bucket_index: u32,
446 left_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
447 right_bucket: &[(Position, Y); REDUCED_BUCKET_SIZE],
448 matches: &'a mut [MaybeUninit<Match>; REDUCED_MATCHES_COUNT + PARAM_M as usize * 2],
451 left_targets: &LeftTargets,
452) -> &'a [Match] {
453 let left_base = left_bucket_index * u32::from(PARAM_BC);
454 let right_base = left_base + u32::from(PARAM_BC);
455
456 let mut rmap = Rmap::new();
457 for &(right_position, y) in right_bucket {
458 if right_position == Position::SENTINEL {
459 break;
460 }
461 let r = R::from((u32::from(y) - right_base) as u16);
462 unsafe {
465 rmap.add(r, right_position);
466 }
467 }
468
469 let parity = left_base % 2;
470 let left_targets_parity = &left_targets[parity as usize];
471 let mut next_match_index = 0;
472
473 for &(left_position, y) in left_bucket {
476 if left_position == Position::SENTINEL || next_match_index >= REDUCED_MATCHES_COUNT {
478 break;
480 }
481
482 let r = R::from((u32::from(y) - left_base) as u16);
483 let left_targets_r = unsafe { left_targets_parity.get_unchecked(usize::from(r)) };
485
486 for &r_target in left_targets_r.iter() {
487 let right_positions = unsafe { rmap.get(r_target) };
489
490 for &right_position in right_positions {
491 if next_match_index >= matches.len() {
492 break;
493 }
494 unsafe { matches.get_unchecked_mut(next_match_index) }.write(Match {
496 left_position,
497 left_y: y,
498 right_position,
499 });
500 next_match_index += 1;
501 }
502 }
503 }
504
505 unsafe { matches[..next_match_index].assume_init_ref() }
507}
508
509pub(super) fn has_match(left_y: Y, right_y: Y) -> bool {
511 let right_r = u32::from(right_y) % u32::from(PARAM_BC);
512 let parity = (u32::from(left_y) / u32::from(PARAM_BC)) % 2;
513 let left_r = u32::from(left_y) % u32::from(PARAM_BC);
514
515 let r_targets = array::from_fn::<_, { PARAM_M as usize }, _>(|i| {
516 calculate_left_target_on_demand(parity, left_r, i as u32)
517 });
518
519 r_targets.contains(&right_r)
520}
521
522#[inline(always)]
523pub(super) fn compute_fn<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
524 y: Y,
525 left_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
526 right_metadata: Metadata<K, PARENT_TABLE_NUMBER>,
527) -> (Y, Metadata<K, TABLE_NUMBER>)
528where
529 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
530 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
531{
532 let left_metadata = u128::from(left_metadata);
533 let right_metadata = u128::from(right_metadata);
534
535 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
536
537 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
539 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
540
541 let num_bytes_with_data =
543 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
544
545 let hash = {
548 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
550
551 let left_metadata_bits =
553 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
554
555 if right_bits_start_offset < y_and_left_bits {
558 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
559 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
562 let input_a = y_bits | left_metadata_bits | right_bits_a;
563 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
565
566 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
567 let input_len =
568 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
569 blake3_hash(&input.as_flattened()[..input_len])
570 } else {
571 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
572 let input_a = y_bits | left_metadata_bits | right_bits_a;
573
574 blake3_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
575 }
576 };
577
578 let y_output = Y::from(
579 u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])
580 >> (u32::BITS as usize - y_size_bits(K)),
581 );
582
583 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
584
585 let metadata = if TABLE_NUMBER < 4 {
586 (left_metadata << parent_metadata_bits) | right_metadata
587 } else if metadata_size_bits > 0 {
588 let metadata = u128::from_be_bytes(
592 hash[y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
593 .try_into()
594 .expect("Always enough bits for any K; qed"),
595 );
596 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
598 metadata >> (u128::BITS as usize - metadata_size_bits)
600 } else {
601 0
602 };
603
604 (y_output, Metadata::from(metadata))
605}
606
607#[cfg(any(feature = "alloc", test))]
611fn compute_fn_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
612 left_ys: [Y; COMPUTE_FN_SIMD_FACTOR],
613 left_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
614 right_metadatas: [Metadata<K, PARENT_TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
615) -> (
616 [Y; COMPUTE_FN_SIMD_FACTOR],
617 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
618)
619where
620 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
621 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
622{
623 let parent_metadata_bits = metadata_size_bits(K, PARENT_TABLE_NUMBER);
624 let metadata_size_bits = metadata_size_bits(K, TABLE_NUMBER);
625
626 let left_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
629 [
630 #(
631 u128::from(left_metadatas[N]),
632 )*
633 ]
634 });
635 let right_metadatas: [u128; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
636 [
637 #(
638 u128::from(right_metadatas[N]),
639 )*
640 ]
641 });
642
643 let y_and_left_bits = y_size_bits(K) + parent_metadata_bits;
645 let right_bits_start_offset = u128::BITS as usize - parent_metadata_bits;
646
647 let num_bytes_with_data =
649 (y_size_bits(K) + parent_metadata_bits * 2).div_ceil(u8::BITS as usize);
650
651 let hashes: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
656 [
657 #(
658 {
659 let y = left_ys[N];
660 let left_metadata = left_metadatas[N];
661 let right_metadata = right_metadatas[N];
662
663 let y_bits = u128::from(y) << (u128::BITS as usize - y_size_bits(K));
666
667 let left_metadata_bits =
669 left_metadata << (u128::BITS as usize - parent_metadata_bits - y_size_bits(K));
670
671 if right_bits_start_offset < y_and_left_bits {
674 let right_bits_pushed_into_input_b = y_and_left_bits - right_bits_start_offset;
675 let right_bits_a = right_metadata >> right_bits_pushed_into_input_b;
678 let input_a = y_bits | left_metadata_bits | right_bits_a;
679 let input_b = right_metadata << (u128::BITS as usize - right_bits_pushed_into_input_b);
681
682 let input = [input_a.to_be_bytes(), input_b.to_be_bytes()];
683 let input_len =
684 size_of::<u128>() + right_bits_pushed_into_input_b.div_ceil(u8::BITS as usize);
685 blake3_hash(&input.as_flattened()[..input_len])
686 } else {
687 let right_bits_a = right_metadata << (right_bits_start_offset - y_and_left_bits);
688 let input_a = y_bits | left_metadata_bits | right_bits_a;
689
690 blake3_hash(&input_a.to_be_bytes()[..num_bytes_with_data])
691 }
692 },
693 )*
694 ]
695 });
696
697 let y_outputs = Simd::from_array(
698 hashes.map(|hash| u32::from_be_bytes([hash[0], hash[1], hash[2], hash[3]])),
699 ) >> (u32::BITS - y_size_bits(K) as u32);
700 let y_outputs = Y::array_from_repr(y_outputs.to_array());
701
702 let metadatas = if TABLE_NUMBER < 4 {
703 seq!(N in 0..16 {
704 [
705 #(
706 Metadata::from((left_metadatas[N] << parent_metadata_bits) | right_metadatas[N]),
707 )*
708 ]
709 })
710 } else if metadata_size_bits > 0 {
711 seq!(N in 0..16 {
715 [
716 #(
717 {
718 let metadata = u128::from_be_bytes(
719 hashes[N][y_size_bits(K) / u8::BITS as usize..][..size_of::<u128>()]
720 .try_into()
721 .expect("Always enough bits for any K; qed"),
722 );
723 let metadata = metadata << (y_size_bits(K) % u8::BITS as usize);
725 Metadata::from(metadata >> (u128::BITS as usize - metadata_size_bits))
727 },
728 )*
729 ]
730 })
731 } else {
732 [Metadata::default(); _]
733 };
734
735 (y_outputs, metadatas)
736}
737
738#[cfg(feature = "alloc")]
741unsafe fn match_to_result<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
742 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
743 m: &Match,
744) -> (Y, [Position; 2], Metadata<K, TABLE_NUMBER>)
745where
746 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
747 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
748 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
749 [(); 1 << K]:,
750 [(); num_buckets(K)]:,
751 [(); num_buckets(K) - 1]:,
752{
753 let left_metadata = unsafe { parent_table.metadata(m.left_position) };
755 let right_metadata = unsafe { parent_table.metadata(m.right_position) };
757
758 let (y, metadata) =
759 compute_fn::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(m.left_y, left_metadata, right_metadata);
760
761 (y, [m.left_position, m.right_position], metadata)
762}
763
764#[cfg(feature = "alloc")]
767#[inline(always)]
768unsafe fn match_to_result_simd<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
769 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
770 matches: &[Match; COMPUTE_FN_SIMD_FACTOR],
771) -> (
772 [Y; COMPUTE_FN_SIMD_FACTOR],
773 [[Position; 2]; COMPUTE_FN_SIMD_FACTOR],
774 [Metadata<K, TABLE_NUMBER>; COMPUTE_FN_SIMD_FACTOR],
775)
776where
777 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
778 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
779 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
780 [(); 1 << K]:,
781 [(); num_buckets(K)]:,
782 [(); num_buckets(K) - 1]:,
783{
784 let left_ys: [_; COMPUTE_FN_SIMD_FACTOR] = seq!(N in 0..16 {
785 [
786 #(
787 matches[N].left_y,
788 )*
789 ]
790 });
791 let left_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
793 seq!(N in 0..16 {
794 [
795 #(
796 parent_table.metadata(matches[N].left_position),
797 )*
798 ]
799 })
800 };
801 let right_metadatas: [_; COMPUTE_FN_SIMD_FACTOR] = unsafe {
803 seq!(N in 0..16 {
804 [
805 #(
806 parent_table.metadata(matches[N].right_position),
807 )*
808 ]
809 })
810 };
811
812 let (y_outputs, metadatas) = compute_fn_simd::<K, TABLE_NUMBER, PARENT_TABLE_NUMBER>(
813 left_ys,
814 left_metadatas,
815 right_metadatas,
816 );
817
818 let positions = seq!(N in 0..16 {
819 [
820 #(
821 [
822 matches[N].left_position,
823 matches[N].right_position,
824 ],
825 )*
826 ]
827 });
828
829 (y_outputs, positions, metadatas)
830}
831
832#[cfg(feature = "alloc")]
836#[inline(always)]
837unsafe fn matches_to_results<const K: u8, const TABLE_NUMBER: u8, const PARENT_TABLE_NUMBER: u8>(
838 parent_table: &Table<K, PARENT_TABLE_NUMBER>,
839 matches: &[Match],
840 ys: &mut [MaybeUninit<Y>],
841 positions: &mut [MaybeUninit<[Position; 2]>],
842 metadatas: &mut [MaybeUninit<Metadata<K, TABLE_NUMBER>>],
843) where
844 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
845 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
846 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
847 [(); 1 << K]:,
848 [(); num_buckets(K)]:,
849 [(); num_buckets(K) - 1]:,
850{
851 let (grouped_matches, other_matches) = matches.as_chunks::<COMPUTE_FN_SIMD_FACTOR>();
852 let (grouped_ys, other_ys) = ys.split_at_mut(grouped_matches.as_flattened().len());
853 let grouped_ys = grouped_ys.as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>().0;
854 let (grouped_positions, other_positions) =
855 positions.split_at_mut(grouped_matches.as_flattened().len());
856 let grouped_positions = grouped_positions
857 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
858 .0;
859 let (grouped_metadatas, other_metadatas) =
860 metadatas.split_at_mut(grouped_matches.as_flattened().len());
861 let grouped_metadatas = grouped_metadatas
862 .as_chunks_mut::<COMPUTE_FN_SIMD_FACTOR>()
863 .0;
864
865 for (((grouped_matches, grouped_ys), grouped_positions), grouped_metadatas) in grouped_matches
866 .iter()
867 .zip(grouped_ys)
868 .zip(grouped_positions)
869 .zip(grouped_metadatas)
870 {
871 let (ys_group, positions_group, metadatas_group) =
873 unsafe { match_to_result_simd(parent_table, grouped_matches) };
874 grouped_ys.write_copy_of_slice(&ys_group);
875 grouped_positions.write_copy_of_slice(&positions_group);
876
877 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
879 grouped_metadatas.write_copy_of_slice(&metadatas_group);
880 }
881 }
882 for (((other_match, other_y), other_positions), other_metadata) in other_matches
883 .iter()
884 .zip(other_ys)
885 .zip(other_positions)
886 .zip(other_metadatas)
887 {
888 let (y, p, metadata) = unsafe { match_to_result(parent_table, other_match) };
890 other_y.write(y);
891 other_positions.write(p);
892 if metadata_size_bits(K, TABLE_NUMBER) > 0 {
894 other_metadata.write(metadata);
895 }
896 }
897}
898
899#[cfg(feature = "alloc")]
901#[derive(Debug)]
902pub(super) enum PrunedTable<const K: u8, const TABLE_NUMBER: u8>
903where
904 [(); 1 << K]:,
905 [(); num_buckets(K) - 1]:,
906{
907 First,
908 Other {
910 positions: Box<[MaybeUninit<[Position; 2]>]>,
912 },
913 #[cfg(feature = "parallel")]
915 OtherBuckets {
916 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
920 },
921}
922
923#[cfg(feature = "alloc")]
924impl<const K: u8, const TABLE_NUMBER: u8> PrunedTable<K, TABLE_NUMBER>
925where
926 [(); 1 << K]:,
927 [(); num_buckets(K) - 1]:,
928{
929 #[inline(always)]
936 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
937 match self {
938 Self::First => {
939 unreachable!("Not the first table");
940 }
941 Self::Other { positions, .. } => {
942 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
944 }
945 #[cfg(feature = "parallel")]
946 Self::OtherBuckets { positions, .. } => {
947 unsafe {
949 positions
950 .as_flattened()
951 .get_unchecked(usize::from(position))
952 .assume_init()
953 }
954 }
955 }
956 }
957}
958
959#[cfg(feature = "alloc")]
960#[derive(Debug)]
961pub(super) enum Table<const K: u8, const TABLE_NUMBER: u8>
962where
963 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
964 [(); 1 << K]:,
965 [(); num_buckets(K)]:,
966 [(); num_buckets(K) - 1]:,
967{
968 First {
970 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
974 },
975 Other {
977 positions: Box<[MaybeUninit<[Position; 2]>]>,
979 metadatas: Box<[MaybeUninit<Metadata<K, TABLE_NUMBER>>]>,
981 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
985 },
986 #[cfg(feature = "parallel")]
988 OtherBuckets {
989 positions: Box<[[MaybeUninit<[Position; 2]>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1]>,
993 metadatas: Box<
997 [[MaybeUninit<Metadata<K, TABLE_NUMBER>>; REDUCED_MATCHES_COUNT]; num_buckets(K) - 1],
998 >,
999 buckets: Box<[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)]>,
1003 },
1004}
1005
1006#[cfg(feature = "alloc")]
1007impl<const K: u8> Table<K, 1>
1008where
1009 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
1010 [(); 1 << K]:,
1011 [(); num_buckets(K)]:,
1012 [(); num_buckets(K) - 1]:,
1013{
1014 pub(super) fn create(seed: Seed) -> Self
1016 where
1017 EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1018 {
1019 debug_assert!(
1022 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1023 "Max bucket size is not sufficiently large"
1024 );
1025
1026 let partial_ys = partial_ys::<K>(seed);
1027
1028 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1030
1031 for ((ys, xs_batch_start), partial_ys) in ys
1032 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1033 .0
1034 .iter_mut()
1035 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1036 .zip(
1037 partial_ys
1038 .as_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1039 .0,
1040 )
1041 {
1042 let xs = Simd::splat(u32::from(xs_batch_start))
1043 + Simd::from_array(array::from_fn(|i| i as u32));
1044 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1045
1046 ys.write_copy_of_slice(&ys_batch);
1047 }
1048
1049 let ys = unsafe {
1052 let ys_len = ys.len();
1053 let ys = Box::into_raw(ys);
1054 Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
1055 };
1056
1057 let mut buckets = group_by_buckets::<K>(&ys);
1059 sort_buckets::<K>(&mut buckets);
1060
1061 Self::First { buckets }
1062 }
1063
1064 #[cfg(feature = "parallel")]
1066 pub(super) fn create_parallel(seed: Seed) -> Self
1067 where
1068 EvaluatableUsize<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>: Sized,
1069 {
1070 debug_assert!(
1073 MAX_BUCKET_SIZE >= bucket_size_upper_bound(K, BUCKET_SIZE_UPPER_BOUND_SECURITY_BITS),
1074 "Max bucket size is not sufficiently large"
1075 );
1076
1077 let partial_ys = partial_ys::<K>(seed);
1078
1079 let mut ys = unsafe { Box::<[MaybeUninit<Y>; 1 << K]>::new_uninit().assume_init() };
1081
1082 for ((ys, xs_batch_start), partial_ys) in ys
1084 .as_chunks_mut::<COMPUTE_F1_SIMD_FACTOR>()
1085 .0
1086 .iter_mut()
1087 .zip((X::ZERO..).step_by(COMPUTE_F1_SIMD_FACTOR))
1088 .zip(
1089 partial_ys
1090 .as_chunks::<{ K as usize * COMPUTE_F1_SIMD_FACTOR / u8::BITS as usize }>()
1091 .0,
1092 )
1093 {
1094 let xs = Simd::splat(u32::from(xs_batch_start))
1095 + Simd::from_array(array::from_fn(|i| i as u32));
1096 let ys_batch = compute_f1_simd::<K>(xs, partial_ys);
1097
1098 ys.write_copy_of_slice(&ys_batch);
1099 }
1100
1101 let ys = unsafe {
1104 let ys_len = ys.len();
1105 let ys = Box::into_raw(ys);
1106 Vec::from_raw_parts(ys.cast(), ys_len, ys_len)
1107 };
1108
1109 let mut buckets = group_by_buckets::<K>(&ys);
1111 sort_buckets::<K>(&mut buckets);
1112
1113 Self::First { buckets }
1114 }
1115}
1116
1117#[cfg(feature = "alloc")]
1118mod private {
1119 pub(in super::super) trait SupportedOtherTables {}
1120 pub(in super::super) trait NotLastTable {}
1121}
1122
1123#[cfg(feature = "alloc")]
1124impl<const K: u8> private::SupportedOtherTables for Table<K, 2>
1125where
1126 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1127 [(); 1 << K]:,
1128 [(); num_buckets(K)]:,
1129 [(); num_buckets(K) - 1]:,
1130{
1131}
1132#[cfg(feature = "alloc")]
1133impl<const K: u8> private::SupportedOtherTables for Table<K, 3>
1134where
1135 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1136 [(); 1 << K]:,
1137 [(); num_buckets(K)]:,
1138 [(); num_buckets(K) - 1]:,
1139{
1140}
1141#[cfg(feature = "alloc")]
1142impl<const K: u8> private::SupportedOtherTables for Table<K, 4>
1143where
1144 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1145 [(); 1 << K]:,
1146 [(); num_buckets(K)]:,
1147 [(); num_buckets(K) - 1]:,
1148{
1149}
1150#[cfg(feature = "alloc")]
1151impl<const K: u8> private::SupportedOtherTables for Table<K, 5>
1152where
1153 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1154 [(); 1 << K]:,
1155 [(); num_buckets(K)]:,
1156 [(); num_buckets(K) - 1]:,
1157{
1158}
1159#[cfg(feature = "alloc")]
1160impl<const K: u8> private::SupportedOtherTables for Table<K, 6>
1161where
1162 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1163 [(); 1 << K]:,
1164 [(); num_buckets(K)]:,
1165 [(); num_buckets(K) - 1]:,
1166{
1167}
1168#[cfg(feature = "alloc")]
1169impl<const K: u8> private::SupportedOtherTables for Table<K, 7>
1170where
1171 EvaluatableUsize<{ metadata_size_bytes(K, 7) }>: Sized,
1172 [(); 1 << K]:,
1173 [(); num_buckets(K)]:,
1174 [(); num_buckets(K) - 1]:,
1175{
1176}
1177
1178#[cfg(feature = "alloc")]
1179impl<const K: u8> private::NotLastTable for Table<K, 1>
1180where
1181 EvaluatableUsize<{ metadata_size_bytes(K, 1) }>: Sized,
1182 [(); 1 << K]:,
1183 [(); num_buckets(K)]:,
1184 [(); num_buckets(K) - 1]:,
1185{
1186}
1187#[cfg(feature = "alloc")]
1188impl<const K: u8> private::NotLastTable for Table<K, 2>
1189where
1190 EvaluatableUsize<{ metadata_size_bytes(K, 2) }>: Sized,
1191 [(); 1 << K]:,
1192 [(); num_buckets(K)]:,
1193 [(); num_buckets(K) - 1]:,
1194{
1195}
1196#[cfg(feature = "alloc")]
1197impl<const K: u8> private::NotLastTable for Table<K, 3>
1198where
1199 EvaluatableUsize<{ metadata_size_bytes(K, 3) }>: Sized,
1200 [(); 1 << K]:,
1201 [(); num_buckets(K)]:,
1202 [(); num_buckets(K) - 1]:,
1203{
1204}
1205#[cfg(feature = "alloc")]
1206impl<const K: u8> private::NotLastTable for Table<K, 4>
1207where
1208 EvaluatableUsize<{ metadata_size_bytes(K, 4) }>: Sized,
1209 [(); 1 << K]:,
1210 [(); num_buckets(K)]:,
1211 [(); num_buckets(K) - 1]:,
1212{
1213}
1214#[cfg(feature = "alloc")]
1215impl<const K: u8> private::NotLastTable for Table<K, 5>
1216where
1217 EvaluatableUsize<{ metadata_size_bytes(K, 5) }>: Sized,
1218 [(); 1 << K]:,
1219 [(); num_buckets(K)]:,
1220 [(); num_buckets(K) - 1]:,
1221{
1222}
1223#[cfg(feature = "alloc")]
1224impl<const K: u8> private::NotLastTable for Table<K, 6>
1225where
1226 EvaluatableUsize<{ metadata_size_bytes(K, 6) }>: Sized,
1227 [(); 1 << K]:,
1228 [(); num_buckets(K)]:,
1229 [(); num_buckets(K) - 1]:,
1230{
1231}
1232
1233#[cfg(feature = "alloc")]
1234impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1235where
1236 Self: private::SupportedOtherTables,
1237 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1238 [(); 1 << K]:,
1239 [(); num_buckets(K)]:,
1240 [(); num_buckets(K) - 1]:,
1241{
1242 pub(super) fn create<const PARENT_TABLE_NUMBER: u8>(
1246 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1247 cache: &TablesCache,
1248 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1249 where
1250 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1251 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1252 {
1253 let left_targets = &*cache.left_targets;
1254 let mut initialized_elements = 0_usize;
1255 let mut ys: Box<[MaybeUninit<Y>]> = Box::new_uninit_slice(max_table_entries(K));
1256 let mut positions: Box<[MaybeUninit<[Position; 2]>]> =
1257 Box::new_uninit_slice(max_table_entries(K));
1258 let mut metadatas: Box<[MaybeUninit<Metadata<K, TABLE_NUMBER>>]> =
1260 Box::new_uninit_slice(max_table_entries(K));
1261
1262 for ([left_bucket, right_bucket], left_bucket_index) in
1263 parent_table.buckets().array_windows().zip(0..)
1264 {
1265 let mut matches = [MaybeUninit::uninit(); _];
1266 let matches = unsafe {
1269 find_matches_in_buckets(
1270 left_bucket_index,
1271 left_bucket,
1272 right_bucket,
1273 &mut matches,
1274 left_targets,
1275 )
1276 };
1277 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1279 let (ys, positions, metadatas) = unsafe {
1281 (
1282 ys.split_at_mut_unchecked(initialized_elements).1,
1283 positions.split_at_mut_unchecked(initialized_elements).1,
1284 metadatas.split_at_mut_unchecked(initialized_elements).1,
1285 )
1286 };
1287
1288 let (ys, positions, metadatas) = unsafe {
1291 (
1292 ys.split_at_mut_unchecked(matches.len()).0,
1293 positions.split_at_mut_unchecked(matches.len()).0,
1294 metadatas.split_at_mut_unchecked(matches.len()).0,
1295 )
1296 };
1297
1298 unsafe {
1301 matches_to_results(&parent_table, matches, ys, positions, metadatas);
1302 }
1303
1304 initialized_elements += matches.len();
1305 }
1306
1307 let parent_table = parent_table.prune();
1308
1309 let ys = unsafe {
1312 let ys_len = ys.len();
1313 let ys = Box::into_raw(ys);
1314 Vec::from_raw_parts(ys.cast(), initialized_elements, ys_len)
1315 };
1316
1317 let mut buckets = group_by_buckets::<K>(&ys);
1319 sort_buckets::<K>(&mut buckets);
1320
1321 let table = Self::Other {
1322 positions,
1323 metadatas,
1324 buckets,
1325 };
1326
1327 (table, parent_table)
1328 }
1329
1330 #[cfg(feature = "parallel")]
1334 pub(super) fn create_parallel<const PARENT_TABLE_NUMBER: u8>(
1335 parent_table: Table<K, PARENT_TABLE_NUMBER>,
1336 cache: &TablesCache,
1337 ) -> (Self, PrunedTable<K, PARENT_TABLE_NUMBER>)
1338 where
1339 Table<K, PARENT_TABLE_NUMBER>: private::NotLastTable,
1340 EvaluatableUsize<{ metadata_size_bytes(K, PARENT_TABLE_NUMBER) }>: Sized,
1341 {
1342 let ys = unsafe {
1344 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1345 };
1346 let positions = unsafe {
1348 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1349 };
1350 let metadatas = unsafe {
1352 Box::<[SyncUnsafeCell<[MaybeUninit<_>; REDUCED_MATCHES_COUNT]>; num_buckets(K) - 1]>::new_uninit().assume_init()
1353 };
1354 let global_results_counts =
1355 array::from_fn::<_, { num_buckets(K) - 1 }, _>(|_| SyncUnsafeCell::new(0u16));
1356
1357 let left_targets = &*cache.left_targets;
1358
1359 let buckets = parent_table.buckets();
1360 let bucket_batch_size = CACHE_LINE_SIZE / size_of::<u16>();
1363 let bucket_batch_index = AtomicUsize::new(0);
1364
1365 rayon::broadcast(|_ctx| {
1366 loop {
1367 let bucket_batch_index = bucket_batch_index.fetch_add(1, Ordering::Relaxed);
1368
1369 let buckets_batch = buckets
1370 .array_windows::<2>()
1371 .enumerate()
1372 .skip(bucket_batch_index * bucket_batch_size)
1373 .take(bucket_batch_size);
1374
1375 if buckets_batch.is_empty() {
1376 break;
1377 }
1378
1379 for (left_bucket_index, [left_bucket, right_bucket]) in buckets_batch {
1380 let mut matches = [MaybeUninit::uninit(); _];
1381 let matches = unsafe {
1384 find_matches_in_buckets(
1385 left_bucket_index as u32,
1386 left_bucket,
1387 right_bucket,
1388 &mut matches,
1389 left_targets,
1390 )
1391 };
1392 let matches = &matches[..matches.len().min(REDUCED_MATCHES_COUNT)];
1394
1395 let ys = unsafe { &mut *ys.get_unchecked(left_bucket_index).get() };
1398 let positions =
1401 unsafe { &mut *positions.get_unchecked(left_bucket_index).get() };
1402 let metadatas =
1405 unsafe { &mut *metadatas.get_unchecked(left_bucket_index).get() };
1406 let count = unsafe {
1409 &mut *global_results_counts.get_unchecked(left_bucket_index).get()
1410 };
1411
1412 unsafe {
1415 matches_to_results::<_, TABLE_NUMBER, _>(
1416 &parent_table,
1417 matches,
1418 ys,
1419 positions,
1420 metadatas,
1421 )
1422 };
1423 *count = matches.len() as u16;
1424 }
1425 }
1426 });
1427
1428 let parent_table = parent_table.prune();
1429
1430 let ys = strip_sync_unsafe_cell(ys);
1431 let positions = strip_sync_unsafe_cell(positions);
1432 let metadatas = strip_sync_unsafe_cell(metadatas);
1433
1434 let mut buckets = unsafe {
1437 group_by_buckets_from_buckets::<K, _>(
1438 ys.iter().zip(
1439 global_results_counts
1440 .into_iter()
1441 .map(|count| usize::from(count.into_inner())),
1442 ),
1443 )
1444 };
1445 sort_buckets::<K>(&mut buckets);
1446
1447 let table = Self::OtherBuckets {
1448 positions,
1449 metadatas,
1450 buckets,
1451 };
1452
1453 (table, parent_table)
1454 }
1455
1456 #[inline(always)]
1463 pub(super) unsafe fn position(&self, position: Position) -> [Position; 2] {
1464 match self {
1465 Self::First { .. } => {
1466 unreachable!("Not the first table");
1467 }
1468 Self::Other { positions, .. } => {
1469 unsafe { positions.get_unchecked(usize::from(position)).assume_init() }
1471 }
1472 #[cfg(feature = "parallel")]
1473 Self::OtherBuckets { positions, .. } => {
1474 unsafe {
1476 positions
1477 .as_flattened()
1478 .get_unchecked(usize::from(position))
1479 .assume_init()
1480 }
1481 }
1482 }
1483 }
1484}
1485
1486#[cfg(feature = "alloc")]
1487impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1488where
1489 Self: private::NotLastTable,
1490 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1491 [(); 1 << K]:,
1492 [(); num_buckets(K)]:,
1493 [(); num_buckets(K) - 1]:,
1494{
1495 #[inline(always)]
1500 unsafe fn metadata(&self, position: Position) -> Metadata<K, TABLE_NUMBER> {
1501 match self {
1502 Self::First { .. } => {
1503 Metadata::from(X::from(u32::from(position)))
1505 }
1506 Self::Other { metadatas, .. } => {
1507 unsafe { metadatas.get_unchecked(usize::from(position)).assume_init() }
1509 }
1510 #[cfg(feature = "parallel")]
1511 Self::OtherBuckets { metadatas, .. } => {
1512 unsafe {
1514 metadatas
1515 .as_flattened()
1516 .get_unchecked(usize::from(position))
1517 .assume_init()
1518 }
1519 }
1520 }
1521 }
1522}
1523
1524#[cfg(feature = "alloc")]
1525impl<const K: u8, const TABLE_NUMBER: u8> Table<K, TABLE_NUMBER>
1526where
1527 EvaluatableUsize<{ metadata_size_bytes(K, TABLE_NUMBER) }>: Sized,
1528 [(); 1 << K]:,
1529 [(); num_buckets(K)]:,
1530 [(); num_buckets(K) - 1]:,
1531{
1532 #[inline(always)]
1533 fn prune(self) -> PrunedTable<K, TABLE_NUMBER> {
1534 match self {
1535 Self::First { .. } => PrunedTable::First,
1536 Self::Other { positions, .. } => PrunedTable::Other { positions },
1537 #[cfg(feature = "parallel")]
1538 Self::OtherBuckets { positions, .. } => PrunedTable::OtherBuckets { positions },
1539 }
1540 }
1541
1542 #[inline(always)]
1544 pub(super) fn buckets(&self) -> &[[(Position, Y); REDUCED_BUCKET_SIZE]; num_buckets(K)] {
1545 match self {
1546 Self::First { buckets, .. } => buckets,
1547 Self::Other { buckets, .. } => buckets,
1548 #[cfg(feature = "parallel")]
1549 Self::OtherBuckets { buckets, .. } => buckets,
1550 }
1551 }
1552}