subspace_farmer_components/
sector.rs

1//! Sector-related data structures
2//!
3//! Sectors and corresponding metadata created by functions in [`plotting`](crate::plotting) module
4//! have a specific structure, represented by data structured in this module.
5//!
6//! It is typically not needed to construct these data structures explicitly outside of this crate,
7//! instead they will be returned as a result of certain operations (like plotting).
8
9use bitvec::prelude::*;
10use parity_scale_codec::{Decode, Encode};
11use rayon::prelude::*;
12use std::mem::ManuallyDrop;
13use std::ops::{Deref, DerefMut};
14use std::{mem, slice};
15use subspace_core_primitives::checksum::Blake3Checksummed;
16use subspace_core_primitives::hashes::{Blake3Hash, blake3_hash};
17use subspace_core_primitives::pieces::{PieceOffset, Record, RecordCommitment, RecordWitness};
18use subspace_core_primitives::sectors::{SBucket, SectorIndex};
19use subspace_core_primitives::segments::{HistorySize, SegmentIndex};
20use thiserror::Error;
21use tracing::debug;
22
23/// Size of the part of the plot containing record chunks (s-buckets).
24///
25/// Total size of the plot can be computed with [`sector_size()`].
26#[inline]
27pub const fn sector_record_chunks_size(pieces_in_sector: u16) -> usize {
28    pieces_in_sector as usize * Record::SIZE
29}
30
31/// Size of the part of the plot containing record metadata.
32///
33/// Total size of the plot can be computed with [`sector_size()`].
34#[inline]
35pub const fn sector_record_metadata_size(pieces_in_sector: u16) -> usize {
36    pieces_in_sector as usize * RecordMetadata::encoded_size()
37}
38
39/// Exact sector plot size (sector contents map, record chunks, record metadata).
40///
41/// NOTE: Each sector also has corresponding fixed size metadata whose size can be obtained with
42/// [`SectorMetadataChecksummed::encoded_size()`], size of the record chunks (s-buckets) with
43/// [`sector_record_chunks_size()`] and size of record commitments and witnesses with
44/// [`sector_record_metadata_size()`]. This function just combines those three together for
45/// convenience.
46#[inline]
47pub const fn sector_size(pieces_in_sector: u16) -> usize {
48    sector_record_chunks_size(pieces_in_sector)
49        + sector_record_metadata_size(pieces_in_sector)
50        + SectorContentsMap::encoded_size(pieces_in_sector)
51        + Blake3Hash::SIZE
52}
53
54/// Metadata of the plotted sector
55#[derive(Debug, Encode, Decode, Clone)]
56pub struct SectorMetadata {
57    /// Sector index
58    pub sector_index: SectorIndex,
59    /// Number of pieces stored in this sector
60    pub pieces_in_sector: u16,
61    /// S-bucket sizes in a sector
62    pub s_bucket_sizes: Box<[u16; Record::NUM_S_BUCKETS]>,
63    /// Size of the blockchain history at time of sector creation
64    pub history_size: HistorySize,
65}
66
67impl SectorMetadata {
68    /// Returns offsets of each s-bucket relatively to the beginning of the sector (in chunks)
69    pub fn s_bucket_offsets(&self) -> Box<[u32; Record::NUM_S_BUCKETS]> {
70        let s_bucket_offsets = self
71            .s_bucket_sizes
72            .iter()
73            .map({
74                let mut base_offset = 0;
75
76                move |s_bucket_size| {
77                    let offset = base_offset;
78                    base_offset += u32::from(*s_bucket_size);
79                    offset
80                }
81            })
82            .collect::<Box<_>>();
83
84        assert_eq!(s_bucket_offsets.len(), Record::NUM_S_BUCKETS);
85        let mut s_bucket_offsets = ManuallyDrop::new(s_bucket_offsets);
86        // SAFETY: Original memory is not dropped, number of elements checked above
87        unsafe { Box::from_raw(s_bucket_offsets.as_mut_ptr() as *mut [u32; Record::NUM_S_BUCKETS]) }
88    }
89}
90
91/// Same as [`SectorMetadata`], but with checksums verified during SCALE encoding/decoding
92#[derive(Debug, Clone, Encode, Decode)]
93pub struct SectorMetadataChecksummed(Blake3Checksummed<SectorMetadata>);
94
95impl From<SectorMetadata> for SectorMetadataChecksummed {
96    #[inline]
97    fn from(value: SectorMetadata) -> Self {
98        Self(Blake3Checksummed(value))
99    }
100}
101
102impl Deref for SectorMetadataChecksummed {
103    type Target = SectorMetadata;
104
105    #[inline]
106    fn deref(&self) -> &Self::Target {
107        &self.0.0
108    }
109}
110
111impl DerefMut for SectorMetadataChecksummed {
112    #[inline]
113    fn deref_mut(&mut self) -> &mut Self::Target {
114        &mut self.0.0
115    }
116}
117
118impl SectorMetadataChecksummed {
119    /// Size of encoded checksummed sector metadata.
120    ///
121    /// For sector plot size use [`sector_size()`].
122    #[inline]
123    pub fn encoded_size() -> usize {
124        let default = SectorMetadataChecksummed::from(SectorMetadata {
125            sector_index: 0,
126            pieces_in_sector: 0,
127            // TODO: Should have been just `::new()`, but https://github.com/rust-lang/rust/issues/53827
128            // SAFETY: Data structure filled with zeroes is a valid invariant
129            s_bucket_sizes: unsafe { Box::new_zeroed().assume_init() },
130            history_size: HistorySize::from(SegmentIndex::ZERO),
131        });
132
133        default.encoded_size()
134    }
135}
136
137/// Commitment and witness corresponding to the same record
138#[derive(Debug, Default, Clone, Encode, Decode)]
139pub(crate) struct RecordMetadata {
140    /// Record commitment
141    pub(crate) commitment: RecordCommitment,
142    /// Record witness
143    pub(crate) witness: RecordWitness,
144    /// Checksum (hash) of the whole piece
145    pub(crate) piece_checksum: Blake3Hash,
146}
147
148impl RecordMetadata {
149    pub(crate) const fn encoded_size() -> usize {
150        RecordWitness::SIZE + RecordCommitment::SIZE + Blake3Hash::SIZE
151    }
152}
153
154/// Raw sector before it is transformed and written to plot, used during plotting
155#[derive(Debug, Clone)]
156pub(crate) struct RawSector {
157    /// List of records, likely downloaded from the network
158    pub(crate) records: Vec<Record>,
159    /// Metadata (commitment and witness) corresponding to the same record
160    pub(crate) metadata: Vec<RecordMetadata>,
161}
162
163impl RawSector {
164    /// Create new raw sector with internal vectors being allocated and filled with default values
165    pub(crate) fn new(pieces_in_sector: u16) -> Self {
166        Self {
167            records: Record::new_zero_vec(usize::from(pieces_in_sector)),
168            metadata: vec![RecordMetadata::default(); usize::from(pieces_in_sector)],
169        }
170    }
171}
172
173// Bit array containing space for bits equal to the number of s-buckets in a record
174type SingleRecordBitArray = BitArray<[u8; Record::NUM_S_BUCKETS / u8::BITS as usize]>;
175
176const SINGLE_RECORD_BIT_ARRAY_SIZE: usize = mem::size_of::<SingleRecordBitArray>();
177
178// TODO: I really tried to avoid `count_ones()`, but wasn't able to with safe Rust due to lifetimes
179/// Wrapper data structure that allows to iterate mutably over encoded chunks bitfields, while
180/// maintaining up-to-date number of encoded chunks
181///
182/// ## Panics
183/// Panics on drop if too many chunks are encoded
184#[derive(Debug)]
185pub struct EncodedChunksUsed<'a> {
186    encoded_record_chunks_used: &'a mut SingleRecordBitArray,
187    num_encoded_record_chunks: &'a mut SBucket,
188    potentially_updated: bool,
189}
190
191impl Drop for EncodedChunksUsed<'_> {
192    fn drop(&mut self) {
193        if self.potentially_updated {
194            let num_encoded_record_chunks = self.encoded_record_chunks_used.count_ones();
195            assert!(num_encoded_record_chunks <= SBucket::MAX.into());
196            *self.num_encoded_record_chunks = SBucket::try_from(num_encoded_record_chunks)
197                .expect("Checked with explicit assert above; qed");
198        }
199    }
200}
201
202impl EncodedChunksUsed<'_> {
203    /// Produces an iterator over encoded chunks bitfields.
204    pub fn iter(&self) -> impl ExactSizeIterator<Item = impl Deref<Target = bool> + '_> + '_ {
205        self.encoded_record_chunks_used.iter()
206    }
207
208    /// Produces a mutable iterator over encoded chunks bitfields.
209    pub fn iter_mut(
210        &mut self,
211    ) -> impl ExactSizeIterator<Item = impl DerefMut<Target = bool> + '_> + '_ {
212        self.potentially_updated = true;
213        self.encoded_record_chunks_used.iter_mut()
214    }
215}
216
217/// Error happening when trying to create [`SectorContentsMap`] from bytes
218#[derive(Debug, Error, Copy, Clone, Eq, PartialEq)]
219pub enum SectorContentsMapFromBytesError {
220    /// Invalid bytes length
221    #[error("Invalid bytes length, expected {expected}, actual {actual}")]
222    InvalidBytesLength {
223        /// Expected length
224        expected: usize,
225        /// Actual length
226        actual: usize,
227    },
228    /// Invalid number of encoded record chunks
229    #[error("Invalid number of encoded record chunks: {actual}")]
230    InvalidEncodedRecordChunks {
231        /// Actual number of encoded record chunks
232        actual: usize,
233        /// Max supported
234        max: usize,
235    },
236    /// Checksum mismatch
237    #[error("Checksum mismatch")]
238    ChecksumMismatch,
239}
240
241/// Error happening when trying to encode [`SectorContentsMap`] into bytes
242#[derive(Debug, Error, Copy, Clone, Eq, PartialEq)]
243pub enum SectorContentsMapEncodeIntoError {
244    /// Invalid bytes length
245    #[error("Invalid bytes length, expected {expected}, actual {actual}")]
246    InvalidBytesLength {
247        /// Expected length
248        expected: usize,
249        /// Actual length
250        actual: usize,
251    },
252}
253
254/// Error happening when trying to create [`SectorContentsMap`] from bytes
255#[derive(Debug, Error, Copy, Clone, Eq, PartialEq)]
256pub enum SectorContentsMapIterationError {
257    /// S-bucket provided is out of range
258    #[error("S-bucket provided {provided} is out of range, max {max}")]
259    SBucketOutOfRange {
260        /// Provided s-bucket
261        provided: usize,
262        /// Max s-bucket
263        max: usize,
264    },
265}
266
267/// Map of sector contents.
268///
269/// Abstraction on top of bitfields that allow making sense of sector contents that contains both
270/// encoded (meaning erasure coded and encoded with existing PoSpace quality) and unencoded chunks
271/// (just erasure coded) used at the same time both in records (before writing to plot) and
272/// s-buckets (written into the plot) format
273#[derive(Debug, Clone, Eq, PartialEq)]
274pub struct SectorContentsMap {
275    /// Number of encoded chunks used in each record.
276    ///
277    /// This is technically redundant, but allows to drastically decrease amount of work in
278    /// [`Self::iter_s_bucket_records()`] and other places, which become unusably slow otherwise.
279    num_encoded_record_chunks: Vec<SBucket>,
280    /// Bitfields for each record, each bit is `true` if encoded chunk at corresponding position was
281    /// used
282    encoded_record_chunks_used: Vec<SingleRecordBitArray>,
283}
284
285impl SectorContentsMap {
286    /// Create new sector contents map initialized with zeroes to store data for `pieces_in_sector`
287    /// records
288    pub fn new(pieces_in_sector: u16) -> Self {
289        Self {
290            num_encoded_record_chunks: vec![SBucket::default(); usize::from(pieces_in_sector)],
291            encoded_record_chunks_used: vec![
292                SingleRecordBitArray::default();
293                usize::from(pieces_in_sector)
294            ],
295        }
296    }
297
298    /// Reconstruct sector contents map from bytes.
299    ///
300    /// Returns error if length of the vector doesn't match [`Self::encoded_size()`] for
301    /// `pieces_in_sector`.
302    pub fn from_bytes(
303        bytes: &[u8],
304        pieces_in_sector: u16,
305    ) -> Result<Self, SectorContentsMapFromBytesError> {
306        if bytes.len() != Self::encoded_size(pieces_in_sector) {
307            return Err(SectorContentsMapFromBytesError::InvalidBytesLength {
308                expected: Self::encoded_size(pieces_in_sector),
309                actual: bytes.len(),
310            });
311        }
312
313        let (single_records_bit_arrays, expected_checksum) =
314            bytes.split_at(bytes.len() - Blake3Hash::SIZE);
315        // Verify checksum
316        let actual_checksum = blake3_hash(single_records_bit_arrays);
317        if *actual_checksum != *expected_checksum {
318            debug!(
319                actual_checksum = %hex::encode(actual_checksum),
320                expected_checksum = %hex::encode(expected_checksum),
321                "Hash doesn't match, corrupted bytes"
322            );
323
324            return Err(SectorContentsMapFromBytesError::ChecksumMismatch);
325        }
326
327        let mut encoded_record_chunks_used =
328            vec![SingleRecordBitArray::default(); pieces_in_sector.into()];
329
330        let num_encoded_record_chunks = encoded_record_chunks_used
331            .iter_mut()
332            .zip(single_records_bit_arrays.array_chunks::<{ SINGLE_RECORD_BIT_ARRAY_SIZE }>())
333            .map(|(encoded_record_chunks_used, bytes)| {
334                encoded_record_chunks_used
335                    .as_raw_mut_slice()
336                    .copy_from_slice(bytes);
337                let num_encoded_record_chunks = encoded_record_chunks_used.count_ones();
338                if num_encoded_record_chunks > Record::NUM_CHUNKS {
339                    return Err(
340                        SectorContentsMapFromBytesError::InvalidEncodedRecordChunks {
341                            actual: num_encoded_record_chunks,
342                            max: Record::NUM_CHUNKS,
343                        },
344                    );
345                }
346                Ok(SBucket::try_from(num_encoded_record_chunks).expect("Verified above; qed"))
347            })
348            .collect::<Result<Vec<_>, _>>()?;
349
350        Ok(Self {
351            num_encoded_record_chunks,
352            encoded_record_chunks_used,
353        })
354    }
355
356    /// Size of sector contents map when encoded and stored in the plot for specified number of
357    /// pieces in sector
358    pub const fn encoded_size(pieces_in_sector: u16) -> usize {
359        SINGLE_RECORD_BIT_ARRAY_SIZE * pieces_in_sector as usize + Blake3Hash::SIZE
360    }
361
362    /// Encode internal contents into `output`
363    pub fn encode_into(&self, output: &mut [u8]) -> Result<(), SectorContentsMapEncodeIntoError> {
364        if output.len() != Self::encoded_size(self.encoded_record_chunks_used.len() as u16) {
365            return Err(SectorContentsMapEncodeIntoError::InvalidBytesLength {
366                expected: Self::encoded_size(self.encoded_record_chunks_used.len() as u16),
367                actual: output.len(),
368            });
369        }
370
371        let slice = self.encoded_record_chunks_used.as_slice();
372        // SAFETY: `BitArray` is a transparent data structure containing array of bytes
373        let slice = unsafe {
374            slice::from_raw_parts(
375                slice.as_ptr() as *const u8,
376                slice.len() * SINGLE_RECORD_BIT_ARRAY_SIZE,
377            )
378        };
379
380        // Write data and checksum
381        output[..slice.len()].copy_from_slice(slice);
382        output[slice.len()..].copy_from_slice(blake3_hash(slice).as_ref());
383
384        Ok(())
385    }
386
387    /// Number of encoded chunks in each record
388    pub fn num_encoded_record_chunks(&self) -> &[SBucket] {
389        &self.num_encoded_record_chunks
390    }
391
392    /// Iterate over individual record bitfields
393    pub fn iter_record_bitfields(&self) -> &[SingleRecordBitArray] {
394        &self.encoded_record_chunks_used
395    }
396
397    /// Iterate mutably over individual record bitfields
398    pub fn iter_record_bitfields_mut(
399        &mut self,
400    ) -> impl ExactSizeIterator<Item = EncodedChunksUsed<'_>> + '_ {
401        self.encoded_record_chunks_used
402            .iter_mut()
403            .zip(&mut self.num_encoded_record_chunks)
404            .map(
405                |(encoded_record_chunks_used, num_encoded_record_chunks)| EncodedChunksUsed {
406                    encoded_record_chunks_used,
407                    num_encoded_record_chunks,
408                    potentially_updated: false,
409                },
410            )
411    }
412
413    /// Returns sizes of each s-bucket
414    pub fn s_bucket_sizes(&self) -> Box<[u16; Record::NUM_S_BUCKETS]> {
415        // Rayon doesn't support iteration over custom types yet
416        let s_bucket_sizes = (u16::from(SBucket::ZERO)..=u16::from(SBucket::MAX))
417            .into_par_iter()
418            .map(SBucket::from)
419            .map(|s_bucket| {
420                self.iter_s_bucket_records(s_bucket)
421                    .expect("S-bucket guaranteed to be in range; qed")
422                    .count() as u16
423            })
424            .collect::<Box<_>>();
425
426        assert_eq!(s_bucket_sizes.len(), Record::NUM_S_BUCKETS);
427        let mut s_bucket_sizes = ManuallyDrop::new(s_bucket_sizes);
428        // SAFETY: Original memory is not dropped, number of elements checked above
429        unsafe { Box::from_raw(s_bucket_sizes.as_mut_ptr() as *mut [u16; Record::NUM_S_BUCKETS]) }
430    }
431
432    /// Creates an iterator of `(s_bucket, encoded_chunk_used, chunk_location)`, where `s_bucket` is
433    /// position of the chunk in the erasure coded record, `encoded_chunk_used` indicates whether it
434    /// was encoded and `chunk_location` is the offset of the chunk in the plot (across all
435    /// s-buckets).
436    pub fn iter_record_chunk_to_plot(
437        &self,
438        piece_offset: PieceOffset,
439    ) -> impl Iterator<Item = (SBucket, bool, usize)> + '_ {
440        // Iterate over all s-buckets
441        (SBucket::ZERO..=SBucket::MAX)
442            // In each s-bucket map all records used
443            .flat_map(|s_bucket| {
444                self.iter_s_bucket_records(s_bucket)
445                    .expect("S-bucket guaranteed to be in range; qed")
446                    .map(move |(current_piece_offset, encoded_chunk_used)| {
447                        (s_bucket, current_piece_offset, encoded_chunk_used)
448                    })
449            })
450            // We've got contents of all s-buckets in a flat iterator, enumerating them so it is
451            // possible to find in the plot later if desired
452            .enumerate()
453            // Everything about the piece offset we care about
454            .filter_map(
455                move |(chunk_location, (s_bucket, current_piece_offset, encoded_chunk_used))| {
456                    // In case record for `piece_offset` is found, return necessary information
457                    (current_piece_offset == piece_offset).then_some((
458                        s_bucket,
459                        encoded_chunk_used,
460                        chunk_location,
461                    ))
462                },
463            )
464            // Tiny optimization in case we have found chunks for all records already
465            .take(Record::NUM_CHUNKS)
466    }
467
468    /// Creates an iterator of `Option<(chunk_offset, encoded_chunk_used)>`, where each entry
469    /// corresponds s-bucket/position of the chunk in the erasure coded record, `encoded_chunk_used`
470    /// indicates whether it was encoded and `chunk_offset` is the offset of the chunk in the
471    /// corresponding s-bucket.
472    ///
473    /// Similar to `Self::iter_record_chunk_to_plot()`, but runs in parallel, returns entries for
474    /// all s-buckets and offsets are within corresponding s-buckets rather than the whole plot.
475    pub fn par_iter_record_chunk_to_plot(
476        &self,
477        piece_offset: PieceOffset,
478    ) -> impl IndexedParallelIterator<Item = Option<(usize, bool)>> + '_ {
479        let piece_offset = usize::from(piece_offset);
480        (u16::from(SBucket::ZERO)..=u16::from(SBucket::MAX))
481            .into_par_iter()
482            .map(SBucket::from)
483            // In each s-bucket map all records used
484            .map(move |s_bucket| {
485                let encoded_chunk_used = record_has_s_bucket_chunk(
486                    s_bucket.into(),
487                    &self.encoded_record_chunks_used[piece_offset],
488                    usize::from(self.num_encoded_record_chunks[piece_offset]),
489                )?;
490
491                // How many other record chunks we have in s-bucket before piece offset we care
492                // about
493                let chunk_offset = self
494                    .encoded_record_chunks_used
495                    .iter()
496                    .zip(&self.num_encoded_record_chunks)
497                    .take(piece_offset)
498                    .filter(move |(record_bitfields, num_encoded_record_chunks)| {
499                        record_has_s_bucket_chunk(
500                            s_bucket.into(),
501                            record_bitfields,
502                            usize::from(**num_encoded_record_chunks),
503                        )
504                        .is_some()
505                    })
506                    .count();
507
508                Some((chunk_offset, encoded_chunk_used))
509            })
510    }
511
512    /// Creates an iterator of `(piece_offset, encoded_chunk_used)`, where `piece_offset`
513    /// corresponds to the record to which chunk belongs and `encoded_chunk_used` indicates whether
514    /// it was encoded.
515    ///
516    /// Returns error if `s_bucket` is outside of [`Record::NUM_S_BUCKETS`] range.
517    pub fn iter_s_bucket_records(
518        &self,
519        s_bucket: SBucket,
520    ) -> Result<impl Iterator<Item = (PieceOffset, bool)> + '_, SectorContentsMapIterationError>
521    {
522        let s_bucket = usize::from(s_bucket);
523
524        if s_bucket >= Record::NUM_S_BUCKETS {
525            return Err(SectorContentsMapIterationError::SBucketOutOfRange {
526                provided: s_bucket,
527                max: Record::NUM_S_BUCKETS,
528            });
529        }
530
531        Ok((PieceOffset::ZERO..)
532            .zip(
533                self.encoded_record_chunks_used
534                    .iter()
535                    .zip(&self.num_encoded_record_chunks),
536            )
537            .filter_map(
538                move |(piece_offset, (record_bitfields, num_encoded_record_chunks))| {
539                    let encoded_chunk_used = record_has_s_bucket_chunk(
540                        s_bucket,
541                        record_bitfields,
542                        usize::from(*num_encoded_record_chunks),
543                    )?;
544
545                    Some((piece_offset, encoded_chunk_used))
546                },
547            ))
548    }
549
550    /// Iterate over chunks of s-bucket indicating if encoded chunk is used at corresponding
551    /// position
552    ///
553    /// ## Panics
554    /// Panics if `s_bucket` is outside of [`Record::NUM_S_BUCKETS`] range.
555    pub fn iter_s_bucket_encoded_record_chunks_used(
556        &self,
557        s_bucket: SBucket,
558    ) -> Result<impl Iterator<Item = bool> + '_, SectorContentsMapIterationError> {
559        let s_bucket = usize::from(s_bucket);
560
561        if s_bucket >= Record::NUM_S_BUCKETS {
562            return Err(SectorContentsMapIterationError::SBucketOutOfRange {
563                provided: s_bucket,
564                max: Record::NUM_S_BUCKETS,
565            });
566        }
567
568        Ok(self
569            .encoded_record_chunks_used
570            .iter()
571            .map(move |record_bitfields| record_bitfields[s_bucket]))
572    }
573}
574
575/// Checks if record has corresponding s-bucket chunk, returns `Some(true)` if yes and chunk is
576/// encoded, `Some(false)` if yes and chunk is not encoded, `None` if chunk at corresponding
577/// s-bucket is not found.
578fn record_has_s_bucket_chunk(
579    s_bucket: usize,
580    record_bitfields: &SingleRecordBitArray,
581    num_encoded_record_chunks: usize,
582) -> Option<bool> {
583    if record_bitfields[s_bucket] {
584        // Bit is explicitly set to `true`, easy case
585        Some(true)
586    } else if num_encoded_record_chunks == Record::NUM_CHUNKS {
587        None
588    } else {
589        // Count how many encoded chunks we have before current offset
590        let encoded_before = record_bitfields[..s_bucket].count_ones();
591        let unencoded_before = s_bucket - encoded_before;
592        // And how many unencoded we have total and how many before current offset
593        // (we know that total number of used chunks is always `Record::NUM_CHUNKS`)
594        let unencoded_total = Record::NUM_CHUNKS.saturating_sub(num_encoded_record_chunks);
595
596        if unencoded_before < unencoded_total {
597            // Have not seen all unencoded chunks before current offset yet, hence
598            // current offset qualifies
599            Some(false)
600        } else {
601            None
602        }
603    }
604}