subspace_data_retrieval/object_fetcher/
segment_header.rs

1//! Object fetching internals for segment headers.
2//!
3//! This module implements segment header stripping. Segment headers are stripped from pieces
4//! before they are used to reconstruct objects.
5
6use crate::object_fetcher::{decode_data_length, Error};
7use parity_scale_codec::{Decode, Encode, Input, IoReader};
8use std::io::Cursor;
9use subspace_archiving::archiver::SegmentItem;
10use subspace_core_primitives::hashes::Blake3Hash;
11use subspace_core_primitives::objects::GlobalObject;
12use subspace_core_primitives::segments::{
13    ArchivedBlockProgress, LastArchivedBlock, SegmentCommitment, SegmentHeader, SegmentIndex,
14};
15
16/// The maximum amount of segment padding.
17///
18/// This is the difference between the lengths of the compact encodings of the minimum and maximum
19/// block sizes in the consensus chain. As of January 2025, the minimum block size is (potentially)
20/// 63 or less, and the maximum block size is in the range 2^14 to 2^30 - 1.
21/// <https://docs.substrate.io/reference/scale-codec/#fn-1>
22pub const MAX_SEGMENT_PADDING: usize = 3;
23
24/// Maximum block length for non-`Normal` extrinsic is 5 MiB.
25/// This is a copy of the constant in `subspace_runtime_primitives`.
26pub const MAX_BLOCK_LENGTH: u32 = 5 * 1024 * 1024;
27
28/// The segment version this code knows how to parse.
29const SEGMENT_VERSION_VARIANT: u8 = 0;
30
31/// The variant for block continuations.
32const BLOCK_CONTINUATION_VARIANT: u8 = 3;
33
34/// The minimum size of a segment header.
35#[inline]
36pub fn min_segment_header_encoded_size() -> usize {
37    let min_segment_header = SegmentHeader::V0 {
38        segment_index: 0.into(),
39        segment_commitment: SegmentCommitment::default(),
40        prev_segment_header_hash: Blake3Hash::default(),
41        last_archived_block: LastArchivedBlock {
42            number: 0,
43            archived_progress: ArchivedBlockProgress::Complete,
44        },
45    };
46
47    min_segment_header.encoded_size()
48}
49
50/// The maximum size of the segment header.
51#[inline]
52pub fn max_segment_header_encoded_size() -> usize {
53    let max_segment_header = SegmentHeader::V0 {
54        segment_index: u64::MAX.into(),
55        segment_commitment: SegmentCommitment::default(),
56        prev_segment_header_hash: Blake3Hash::default(),
57        last_archived_block: LastArchivedBlock {
58            number: u32::MAX,
59            archived_progress: ArchivedBlockProgress::Partial(u32::MAX),
60        },
61    };
62
63    max_segment_header.encoded_size()
64}
65
66/// Removes the segment header from the start of a piece, and returns the remaining data.
67/// Also returns the maximum remaining bytes in the object.
68///
69/// The maximum remaining bytes is the length of the data in the block continuation containing the
70/// object. This block continuation might span multiple pieces, and can contain multiple objects
71/// (or other data).
72///
73/// Returns an error if the data is too short to contain a segment header, or if the header is
74/// invalid.
75///
76/// The segment index and mapping are only used for error reporting.
77pub fn strip_segment_header(
78    piece_data: Vec<u8>,
79    segment_index: SegmentIndex,
80    mapping: GlobalObject,
81) -> Result<(Vec<u8>, usize), Error> {
82    let mut piece_data = IoReader(Cursor::new(piece_data));
83
84    // Decode::decode() wants to read the entire segment here, so we have to decode it manually.
85    // In SCALE encoding, variants are always one byte.
86    let segment_variant = piece_data
87        .read_byte()
88        .map_err(|source| Error::SegmentDecoding {
89            source,
90            segment_index,
91            mapping,
92        })?;
93    // We only know how to decode variant 0.
94    if segment_variant != SEGMENT_VERSION_VARIANT {
95        return Err(Error::UnknownSegmentVariant {
96            segment_variant,
97            segment_index,
98            mapping,
99        });
100    }
101
102    // Variant 0 consists of a list of items, with no length prefix.
103    let segment_item =
104        SegmentItem::decode(&mut piece_data).map_err(|source| Error::SegmentDecoding {
105            source,
106            segment_index,
107            mapping,
108        })?;
109
110    // The parent segment header is always first.
111    let SegmentItem::ParentSegmentHeader(_) = segment_item else {
112        return Err(Error::UnexpectedSegmentItem {
113            segment_progress: piece_data.0.position() as usize,
114            segment_index,
115            segment_item: Box::new(segment_item),
116            mapping,
117        });
118    };
119
120    // Since we're reading a continuing object, the next item must be a block continuation.
121    // We want to discard its header and keep its data. But the block continuation might span
122    // multiple pieces. So we need to read its header manually, too.
123    let segment_item_variant = piece_data
124        .read_byte()
125        .map_err(|source| Error::SegmentDecoding {
126            source,
127            segment_index,
128            mapping,
129        })?;
130
131    // Now strip off the header so we can read the block continuation length.
132    let header_bytes = piece_data.0.position() as usize;
133    let mut piece_data = piece_data.0.into_inner().split_off(header_bytes);
134    let segment_item_lengths = decode_data_length(&piece_data, MAX_BLOCK_LENGTH as usize, mapping)?;
135
136    // Block continuations are variant 3
137    if segment_item_variant != BLOCK_CONTINUATION_VARIANT || segment_item_lengths.is_none() {
138        return Err(Error::UnexpectedSegmentItemVariant {
139            segment_progress: header_bytes,
140            segment_index,
141            segment_item_variant,
142            segment_item_lengths,
143            mapping,
144        });
145    }
146
147    let (segment_item_prefix_len, segment_item_data_len) =
148        segment_item_lengths.expect("just checked length is Some; qed");
149    // Now strip off the length prefix, and any bytes that aren't in the block continuation.
150    let mut piece_data = piece_data.split_off(segment_item_prefix_len);
151    piece_data.truncate(segment_item_data_len);
152
153    Ok((piece_data, segment_item_data_len))
154}
155
156#[cfg(test)]
157mod test {
158    use super::*;
159    use parity_scale_codec::{Compact, CompactLen};
160    use subspace_archiving::archiver::Segment;
161    use subspace_core_primitives::objects::BlockObjectMapping;
162
163    #[test]
164    fn max_segment_padding_constant() {
165        assert_eq!(
166            MAX_SEGMENT_PADDING,
167            Compact::compact_len(&MAX_BLOCK_LENGTH) - Compact::<u32>::compact_len(&1)
168        );
169    }
170
171    #[test]
172    fn segment_header_length_constants() {
173        assert!(
174            min_segment_header_encoded_size() < max_segment_header_encoded_size(),
175            "min_segment_header_encoded_size: {} must be less than max_segment_header_encoded_size: {}",
176            min_segment_header_encoded_size(),
177            max_segment_header_encoded_size()
178        );
179    }
180
181    #[test]
182    fn segment_version_variant_constant() {
183        let segment = Segment::V0 { items: Vec::new() };
184        let segment = segment.encode();
185
186        assert_eq!(segment[0], SEGMENT_VERSION_VARIANT);
187    }
188
189    #[test]
190    fn block_continuation_variant_constant() {
191        let block_continuation = SegmentItem::BlockContinuation {
192            bytes: Vec::new(),
193            object_mapping: BlockObjectMapping::default(),
194        };
195        let block_continuation = block_continuation.encode();
196
197        assert_eq!(block_continuation[0], BLOCK_CONTINUATION_VARIANT);
198    }
199}