subspace_data_retrieval/object_fetcher/
partial_object.rs

1//! Object fetching internals for partial objects.
2//!
3//! Partial objects can contain potential segment padding at the start, middle, or end of the
4//! object. They never contain segment headers, which are stripped before reconstruction.
5//!
6//! Objects are reconstructed from 3 kinds of pieces:
7//! - pieces at the start of a segment, which have a variable-length segment header,
8//! - pieces in the middle of a segment, which have no header or padding, and
9//! - pieces at the end of a segment, which have variable-length padding.
10//!
11//! This module provides a uniform interface for handling these different kinds of pieces:
12//! - `SegmentDataLength` is used to track the length of the partial object data before and in
13//!   potential segment padding. It also tracks the presence of segment headers.
14//! - `RawPieceData` is used to store the partial object data (including potential segment padding)
15//!   before the object length is known, and before each new piece is added to the partial object.
16//! - `PartialObject` is used to store the partial object data after the object length is known.
17
18use crate::object_fetcher::segment_header::{strip_segment_header, MAX_SEGMENT_PADDING};
19use crate::object_fetcher::{decode_data_length, Error, MAX_ENCODED_LENGTH_SIZE};
20use parity_scale_codec::{Decode, Input};
21use std::cmp::min;
22use std::collections::BTreeSet;
23use std::fmt;
24use std::fmt::Formatter;
25use subspace_core_primitives::hashes::{blake3_hash, Blake3Hash};
26use subspace_core_primitives::objects::GlobalObject;
27use subspace_core_primitives::pieces::{PieceIndex, RawRecord};
28use subspace_core_primitives::segments::RecordedHistorySegment;
29use tracing::{debug, trace};
30
31/// The fixed value of every padding byte.
32pub(crate) const PADDING_BYTE_VALUE: u8 = 0;
33
34/// The data before an object's length is known.
35#[derive(Clone, Eq, PartialEq)]
36pub struct RawPieceData {
37    /// The available data for the object in the current segment.
38    segment_data_length: Option<usize>,
39
40    /// The object data before segment padding, starting with the encoded object length.
41    ///
42    /// This can be empty for an object which starts in segment padding.
43    prefix_data: Vec<u8>,
44
45    /// The potential padding data. The bytes in this data can have any values.
46    maybe_padding_data: Vec<u8>,
47
48    /// The partial object data after segment padding.
49    /// For objects which don't overlap the end of a segment, this is empty.
50    ///
51    /// The encoded object length might overlap with the start of the suffix data.
52    suffix_data: Vec<u8>,
53}
54
55impl fmt::Debug for RawPieceData {
56    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
57        f.debug_struct("RawPieceData")
58            .field("segment_data_length", &self.segment_data_length)
59            .field("prefix_data", &hex::encode(&self.prefix_data))
60            .field("maybe_padding_data", &hex::encode(&self.maybe_padding_data))
61            .field("suffix_data", &hex::encode(&self.suffix_data))
62            .finish()
63    }
64}
65
66impl RawPieceData {
67    /// Create a new empty `RawPieceData` object for the first piece in an object.
68    pub fn new_for_first_piece(mapping: GlobalObject) -> Self {
69        let GlobalObject {
70            piece_index,
71            offset,
72            hash: _,
73        } = mapping;
74
75        let data_shards = RecordedHistorySegment::NUM_RAW_RECORDS;
76        let source_position_in_segment = piece_index.source_position() as usize;
77
78        // How much bytes are definitely available starting at `piece_index` and `offset` without
79        // crossing a segment boundary.
80        let bytes_available_in_segment =
81            (data_shards - source_position_in_segment) * RawRecord::SIZE - offset as usize;
82
83        Self {
84            segment_data_length: Some(bytes_available_in_segment),
85            prefix_data: Vec::new(),
86            maybe_padding_data: Vec::new(),
87            suffix_data: Vec::new(),
88        }
89    }
90
91    /// Create a new empty `RawPieceData` object for the next piece in an object.
92    pub fn new_for_next_piece(remaining_data_length: usize, piece_index: PieceIndex) -> Self {
93        let source_position_in_segment = piece_index.source_position() as usize;
94        let first_data_piece_in_segment = source_position_in_segment == 0;
95
96        if first_data_piece_in_segment {
97            Self {
98                // We have to strip the segment header to work out the segment data length
99                segment_data_length: None,
100                prefix_data: Vec::new(),
101                maybe_padding_data: Vec::new(),
102                suffix_data: Vec::new(),
103            }
104        } else {
105            let data_shards = RecordedHistorySegment::NUM_RAW_RECORDS;
106            // How much bytes are definitely available starting at `piece_index` and `offset` without
107            // crossing a segment boundary.
108            let bytes_available_in_segment =
109                (data_shards - source_position_in_segment) * RawRecord::SIZE;
110            let remaining_data_length = min(remaining_data_length, bytes_available_in_segment);
111
112            Self {
113                segment_data_length: Some(remaining_data_length),
114                prefix_data: Vec::new(),
115                maybe_padding_data: Vec::new(),
116                suffix_data: Vec::new(),
117            }
118        }
119    }
120
121    /// Adds the supplied `piece_data` to this `RawPieceData` object.
122    pub fn add_piece_data(
123        &mut self,
124        piece_index: PieceIndex,
125        mut piece_data: Vec<u8>,
126        mapping: GlobalObject,
127    ) -> Result<(), Error> {
128        trace!(
129            ?self,
130            ?piece_index,
131            ?mapping,
132            piece_data_len = %piece_data.len(),
133            "about to add piece data",
134        );
135
136        let data_shards = RecordedHistorySegment::NUM_RAW_RECORDS;
137        let source_position_in_segment = piece_index.source_position() as usize;
138
139        // The last few bytes might contain padding if a piece is the last piece in the segment
140        let last_data_piece_in_segment = source_position_in_segment == data_shards - 1;
141
142        if self.segment_data_length.is_none() {
143            let (piece_data, max_remaining_object_bytes) =
144                strip_segment_header(piece_data, piece_index.segment_index(), mapping)?;
145
146            // Objects are currently much smaller than segments, so we know there's no further
147            // headers or segment padding in an object that's near the start of a segment.
148            // TODO: support objects that span multiple segments
149            self.segment_data_length = Some(max_remaining_object_bytes);
150
151            self.add_piece_data_without_padding(piece_data);
152        } else if last_data_piece_in_segment {
153            let segment_data_length = self
154                .segment_data_length
155                .expect("already checked for None; qed");
156
157            // The offset has already been skipped, but the available segment data could still
158            // truncate bytes from the end of the piece. So each byte we truncate removes one byte
159            // of possible padding.
160            let truncated_byte_len = piece_data.len().saturating_sub(segment_data_length);
161            let possible_padding_data_length =
162                MAX_SEGMENT_PADDING.saturating_sub(truncated_byte_len);
163
164            // There's no header in this piece, so we can just truncate the data.
165            piece_data.truncate(segment_data_length);
166
167            // Now split off the padding, if any.
168            let maybe_padding_data = piece_data.split_off(
169                piece_data
170                    .len()
171                    .saturating_sub(possible_padding_data_length),
172            );
173
174            // Segment headers are variable length, so we don't know how much data is in the next
175            // segment
176            self.segment_data_length = None;
177
178            self.add_piece_data_with_padding(&piece_data, &maybe_padding_data);
179        } else {
180            let segment_data_length = self
181                .segment_data_length
182                .as_mut()
183                .expect("already checked for None; qed");
184
185            // There's no header and no padding in this piece - the typical case.
186            // (If we started at the first piece in a segment, the offset has already skipped the header.)
187            piece_data.truncate(*segment_data_length);
188            *segment_data_length = segment_data_length.saturating_sub(piece_data.len());
189
190            self.add_piece_data_without_padding(piece_data);
191        }
192
193        trace!(?self, ?piece_index, ?mapping, "added piece data");
194
195        Ok(())
196    }
197
198    /// Add data with potential padding to the end of this partial data.
199    /// This method can also handle data that doesn't actually have any padding.
200    ///
201    /// Panics if the object already has padding.
202    fn add_piece_data_with_padding(
203        &mut self,
204        new_prefix_data: &[u8],
205        new_maybe_padding_data: &[u8],
206    ) {
207        // Doesn't actually have padding
208        if new_maybe_padding_data.is_empty() {
209            self.add_piece_data_without_padding(new_prefix_data.to_vec());
210            return;
211        }
212
213        trace!(
214            new_prefix_data = ?hex::encode(new_prefix_data),
215            maybe_padding_data = ?hex::encode(new_maybe_padding_data),
216            "adding piece data with padding",
217        );
218
219        assert!(
220            !self.has_padding(),
221            "add_piece_data_with_padding() can only be called once: {self:?}, \
222             {new_prefix_data:?}, {new_maybe_padding_data:?}"
223        );
224
225        self.prefix_data.extend_from_slice(new_prefix_data);
226        // The padding was empty, so this does assignment (but without allocating another vector)
227        self.maybe_padding_data
228            .extend_from_slice(new_maybe_padding_data);
229    }
230
231    /// Add padding-less `piece_data` to the end of this partial object.
232    fn add_piece_data_without_padding(&mut self, mut piece_data: Vec<u8>) {
233        trace!(
234            piece_data = ?hex::encode(&piece_data),
235            "adding piece data without padding",
236        );
237
238        if self.has_padding() {
239            // If there might be padding, or there is suffix data, new data must be added at the end
240            // of the suffix data.
241            self.suffix_data.append(&mut piece_data);
242        } else {
243            // Otherwise, the data belongs to the existing prefix data.
244            self.prefix_data.append(&mut piece_data);
245        }
246    }
247
248    /// Returns true if this object has padding, or might have padding.
249    fn has_padding(&self) -> bool {
250        !self.maybe_padding_data.is_empty() || !self.suffix_data.is_empty()
251    }
252
253    /// Returns the available object data in the current segment.
254    pub fn segment_data_length(&self) -> Option<usize> {
255        self.segment_data_length
256    }
257}
258
259/// A possible data length and segment padding length for an object.
260/// Sorts by data length first, then ignored padding length.
261///
262/// This sorting order allows us to check objects that contain all the potential padding bytes,
263/// then download more pieces if needed.
264#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
265struct ObjectLength {
266    /// The length of the object data, including the encoded length.
267    data_length: usize,
268
269    /// The number of ignored zero-valued bytes at the end of the prefix data.
270    /// These bytes could be segment padding, or part of the object data.
271    ignored_padding_length: usize,
272}
273
274/// The state of a single object that is being fetched.
275/// Each object is contained within a single block.
276///
277/// An object at the end of a segment needs to ignore any segment padding. To avoid reconstructing
278/// the entire segment, we try each possible padding against the hash instead. We don't need to
279/// store the padding bytes, because padding is always zero-valued.
280///
281/// Objects also need to ignore the parent segment header and `BlockContinuation` header at the
282/// start of a segment.
283#[derive(Clone, Eq, PartialEq)]
284pub struct PartialObject {
285    /// The object data, starting with the encoded object length, and maybe ending with segment
286    /// padding.
287    ///
288    /// This can be empty for an object which starts in segment padding.
289    /// (Such objects are always zero length, because padding is always zero-valued.)
290    prefix_data: Vec<u8>,
291
292    /// The partial object data after segment padding.
293    /// For objects which don't overlap the end of a segment, this is empty.
294    ///
295    /// The encoded object length might overlap with the start of the suffix data.
296    suffix_data: Vec<u8>,
297
298    /// The possible object lengths and their corresponding padding lengths.
299    /// For objects which aren't at the end of a segment, there is only one data length, and the
300    /// padding length is zero.
301    lengths: BTreeSet<ObjectLength>,
302}
303
304impl fmt::Debug for PartialObject {
305    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
306        f.debug_struct("PartialObject")
307            .field("prefix_data", &hex::encode(&self.prefix_data))
308            .field("suffix_data", &hex::encode(&self.suffix_data))
309            .field("lengths", &self.lengths)
310            .finish()
311    }
312}
313
314impl PartialObject {
315    /// Given prefix data, potential padding data, and suffix data, returns a new `PartialObject`.
316    /// Returns `Ok(None)` if more data is needed, or an error if object retrieval failed.
317    ///
318    /// This method can also handle data that doesn't actually have any padding.
319    ///
320    /// Panics if an object has a suffix, but no padding.
321    pub fn new_with_padding(
322        raw_data: &RawPieceData,
323        max_object_len: usize,
324        mapping: GlobalObject,
325    ) -> Result<Option<Self>, Error> {
326        let RawPieceData {
327            segment_data_length,
328            prefix_data,
329            maybe_padding_data,
330            suffix_data,
331        } = raw_data;
332
333        // No padding, so the data is continuous
334        if maybe_padding_data.is_empty() {
335            assert!(
336                suffix_data.is_empty(),
337                "suffix_data can only have bytes if there might be padding bytes"
338            );
339            return Self::new_without_padding(prefix_data, max_object_len, mapping);
340        }
341
342        trace!(
343            ?max_object_len,
344            ?mapping,
345            ?segment_data_length,
346            prefix_data = ?hex::encode(prefix_data),
347            maybe_padding_data = ?hex::encode(maybe_padding_data),
348            suffix_data = ?hex::encode(suffix_data),
349            "trying to create new partial object with padding",
350        );
351
352        // Count the padding bytes
353        let object_max_segment_padding_length = Self::count_padding_bytes(maybe_padding_data);
354
355        trace!(?object_max_segment_padding_length, "analysed padding");
356
357        // Keep the remaining non-padding and potential padding bytes
358        let mut prefix_data = prefix_data.to_vec();
359        prefix_data.extend_from_slice(maybe_padding_data);
360
361        // Find the possible lengths for the object, for each potential padding byte.
362        let mut lengths = BTreeSet::new();
363        let mut last_error = None;
364
365        for ignored_padding_length in 0..=object_max_segment_padding_length {
366            let (length_prefix, length_suffix) = padded_data(
367                &prefix_data,
368                ignored_padding_length,
369                suffix_data,
370                MAX_ENCODED_LENGTH_SIZE,
371            );
372            let mut length_data = ObjectDataInput::new_from_parts(length_prefix, length_suffix);
373
374            match decode_data_length(&length_data.remaining_data(), max_object_len, mapping) {
375                // A valid length
376                Ok(Some((length_prefix_len, data_length))) => {
377                    lengths.insert(ObjectLength {
378                        data_length: length_prefix_len + data_length,
379                        ignored_padding_length,
380                    });
381                }
382                // Not enough data to decode all the lengths yet (this is a rare edge case).
383                Ok(None) => {
384                    if lengths.is_empty() {
385                        // We need more data to decode a length.
386                        return Ok(None);
387                    }
388
389                    // Optimisation: try any lengths we already have.
390                    // This optimisation avoids trying to fetch a useless piece if there is a
391                    // zero-length object in potential segment padding.
392                    let mut partial_object = Self {
393                        prefix_data,
394                        suffix_data: suffix_data.to_vec(),
395                        lengths,
396                    };
397
398                    if let Ok(Some(_)) = partial_object.try_reconstruct_object(mapping) {
399                        // The other lengths don't matter, because we've found the object.
400                        // Return the completed object to the caller, so it can find that object.
401                        return Ok(Some(partial_object));
402                    } else {
403                        // We need more data to decode the correct length.
404                        return Ok(None);
405                    }
406                }
407                Err(err) => {
408                    // This length is invalid and needs to be skipped.
409                    last_error = Some(err);
410                    continue;
411                }
412            }
413        }
414
415        trace!(?lengths, ?last_error, "analysed possible lengths");
416
417        if lengths.is_empty() {
418            // All lengths were invalid
419            Err(last_error.expect("last_error is set if lengths is empty; qed"))
420        } else {
421            Ok(Some(Self {
422                prefix_data,
423                suffix_data: suffix_data.to_vec(),
424                lengths,
425            }))
426        }
427    }
428
429    /// Count the potential padding bytes in `maybe_padding_data`.
430    /// These bytes have the padding byte value, but could still be part of the object.
431    /// Returns the number of potential padding bytes.
432    fn count_padding_bytes(maybe_padding_data: &[u8]) -> usize {
433        maybe_padding_data
434            .iter()
435            .rev()
436            .take_while(|byte| **byte == PADDING_BYTE_VALUE)
437            .count()
438    }
439
440    /// Given the data for an object without any potential segment padding, returns a new
441    /// `PartialObject`.
442    ///
443    /// Returns `Ok(None)` if more data is needed, or an error if object retrieval failed.
444    ///
445    /// The mapping is only used for error reporting.
446    fn new_without_padding(
447        data: &[u8],
448        max_object_len: usize,
449        mapping: GlobalObject,
450    ) -> Result<Option<Self>, Error> {
451        trace!(
452            ?max_object_len,
453            ?mapping,
454            data = ?hex::encode(data),
455            "trying to create new partial object without padding",
456        );
457
458        let Some((length_prefix_len, data_length)) =
459            decode_data_length(data, max_object_len, mapping)?
460        else {
461            // Not enough data yet (this is a rare edge case).
462            return Ok(None);
463        };
464
465        Ok(Some(Self {
466            prefix_data: data.to_vec(),
467            suffix_data: Vec::new(),
468            lengths: [ObjectLength {
469                data_length: length_prefix_len + data_length,
470                ignored_padding_length: 0,
471            }]
472            .into(),
473        }))
474    }
475
476    /// Add `piece_data` with padding to the end of this partial object.
477    /// This method can also handle data that doesn't actually have any padding.
478    pub fn add_piece_data_with_padding(&mut self, mut new_data: RawPieceData) {
479        // Doesn't actually have padding
480        if new_data.maybe_padding_data.is_empty() {
481            let mut piece_data = new_data.prefix_data.to_vec();
482            piece_data.extend_from_slice(&new_data.suffix_data);
483
484            self.add_piece_data_without_padding(piece_data);
485            return;
486        }
487
488        assert!(
489            !self.has_padding(),
490            "add_piece_data_with_padding() can only be called once: {self:?}, {new_data:?}"
491        );
492
493        // Count the padding bytes
494        let object_max_segment_padding_length =
495            Self::count_padding_bytes(&new_data.maybe_padding_data);
496
497        // Add the new data to the existing data
498        self.prefix_data.append(&mut new_data.prefix_data);
499        // Keep the remaining non-padding and potential padding bytes
500        self.prefix_data.append(&mut new_data.maybe_padding_data);
501        self.suffix_data = new_data.suffix_data;
502
503        // There's only one data length, but now we have multiple padding lengths
504        let data_length = self.shortest_object_length();
505        // Avoid a range panic if there's actually no padding, by overwriting the existing length
506        for ignored_padding_length in 0..=object_max_segment_padding_length {
507            self.lengths.insert(ObjectLength {
508                data_length,
509                ignored_padding_length,
510            });
511        }
512    }
513
514    /// Add padding-less `piece_data` to the end of this partial object.
515    pub fn add_piece_data_without_padding(&mut self, mut piece_data: Vec<u8>) {
516        if self.has_padding() {
517            // If there might be padding, or there is suffix data, new data must be added at the end
518            // of the suffix data.
519            self.suffix_data.append(&mut piece_data);
520        } else {
521            // For performance, add the data to the existing prefix data.
522            self.prefix_data.append(&mut piece_data);
523        }
524    }
525
526    /// Returns true if this object has padding, or might have padding.
527    ///
528    /// Objects can have no padding in three different ways:
529    /// - they aren't at the end of a segment
530    /// - they are at the end of a segment, but all potential padding bytes are non-zero
531    /// - we've checked the non-zero ignored padding lengths, and they were all invalid
532    ///
533    /// Panics if the object has no valid lengths left.
534    fn has_padding(&self) -> bool {
535        !self.suffix_data.is_empty()
536            || self.lengths.len() > 1
537            || self.largest_ignored_padding_length() > 0
538    }
539
540    /// Returns the longest padding length the object could possibly have.
541    /// Adding padding can change the object length, but this is always returns the longest
542    /// possible amount of padding bytes.
543    ///
544    /// Panics if the object has no valid lengths left.
545    fn largest_ignored_padding_length(&self) -> usize {
546        self.lengths
547            .iter()
548            .map(|length| length.ignored_padding_length)
549            .max()
550            .expect("other methods return an error if lengths becomes empty; qed")
551    }
552
553    /// Check the hash against all possible objects with enough data.
554    /// If a possible object's data doesn't match the mapping hash, its length is removed.
555    ///
556    /// Returns valid object data, `Ok(None)` if we need more data, or an error if all possible
557    /// objects have invalid hashes.
558    ///
559    /// Panics if it is called after there are no possible lengths left.
560    /// (The previous call returned an error, so the caller should have stopped checking.)
561    pub fn try_reconstruct_object(
562        &mut self,
563        mapping: GlobalObject,
564    ) -> Result<Option<Vec<u8>>, Error> {
565        trace!(?mapping, ?self, "checking available objects");
566
567        // Try to decode the shortest object(s), until we don't have enough data.
568        loop {
569            let outcome = Vec::<u8>::decode(&mut self.shortest_object_data());
570
571            trace!(
572                checked_length = ?self.lengths.first(),
573                data = ?hex::encode(self.shortest_object_data().remaining_data()),
574                outcome = ?outcome.as_ref().map(hex::encode),
575                "checking object with length",
576            );
577
578            let Ok(data) = outcome else {
579                // Tell the caller we need more data, because the remaining lengths are longer.
580                return Ok(None);
581            };
582
583            let data_hash = blake3_hash(&data);
584
585            if data_hash == mapping.hash {
586                return Ok(Some(data));
587            } else {
588                // If we've run out of lengths to try, return a hash mismatch error.
589                // Otherwise, move on to the next longest object or next largest ignored padding.
590                self.mark_shortest_object_hash_invalid(data_hash, mapping, data)?;
591            }
592        }
593    }
594
595    /// Returns the maximum amount of data that still needs to be downloaded.
596    pub fn max_remaining_download_length(&self) -> usize {
597        // We add the ignored padding length, because if we ignore those padding bytes, we need to
598        // download extra bytes in another piece.
599        let longest_download = self
600            .lengths
601            .iter()
602            .map(|length| length.data_length + length.ignored_padding_length)
603            .max()
604            .expect("other methods return an error if lengths becomes empty; qed");
605
606        longest_download.saturating_sub(self.fetched_data_length())
607    }
608
609    /// Returns the shortest possible length for the object, based on potential segment padding.
610    ///
611    /// If the segment padding is outside the length prefix, the object length is known, and only
612    /// the object data varies.
613    ///
614    /// Panics if the object has no valid lengths left.
615    fn shortest_object_length(&self) -> usize {
616        self.lengths
617            .first()
618            .expect("other methods return an error if lengths becomes empty; qed")
619            .data_length
620    }
621
622    /// Returns the longest possible amount of data that has already been fetched for the object,
623    /// based on potential segment padding.
624    pub fn fetched_data_length(&self) -> usize {
625        self.prefix_data.len() + self.suffix_data.len()
626    }
627
628    /// Returns the shortest available data for the object, based on potential segment padding.
629    ///
630    /// If the segment padding is outside the length prefix, the object length is known, and only
631    /// the object data varies. These ties are broken using the smallest ignored padding length.
632    ///
633    /// Panics if the object has no valid lengths left.
634    fn shortest_object_data(&self) -> ObjectDataInput {
635        let length = self
636            .lengths
637            .first()
638            .expect("other methods return an error if lengths becomes empty; qed");
639
640        ObjectDataInput::new(self, *length)
641    }
642
643    /// Remove the shortest data, because it has an invalid hash.
644    /// Call this method if the shortest data has an incorrect hash.
645    ///
646    /// The mapping and data are only used for error reporting.
647    ///
648    /// Returns an error if there are no object lengths left to try.
649    fn mark_shortest_object_hash_invalid(
650        &mut self,
651        data_hash: Blake3Hash,
652        mapping: GlobalObject,
653        data: Vec<u8>,
654    ) -> Result<(), Error> {
655        trace!(data = %hex::encode(&data), "Invalid object data");
656
657        if self.lengths.len() > 1 {
658            // We still have more lengths to try.
659            let ignored_err = Error::InvalidDataHash {
660                data_hash,
661                data_length: self.shortest_object_length(),
662                mapping,
663                #[cfg(test)]
664                data: hex::encode(&data),
665            };
666            debug!(
667                ?ignored_err,
668                "Invalid data hash, trying next padding length"
669            );
670
671            self.lengths.pop_first();
672            Ok(())
673        } else {
674            // There are no lengths left, so we know all the possible data/padding combinations are invalid.
675            Err(Error::InvalidDataHash {
676                data_hash,
677                data_length: self.shortest_object_length(),
678                mapping,
679                #[cfg(test)]
680                data: hex::encode(&data),
681            })
682        }
683    }
684}
685
686/// A wrapper struct which impls Input for a PartialObject's data.
687#[derive(Clone, Debug, Eq, PartialEq)]
688struct ObjectDataInput<'obj> {
689    /// A sub-slice of the partial object's prefix data, with the exact amount of padding required.
690    prefix_data: &'obj [u8],
691
692    /// The partial object's suffix data.
693    suffix_data: &'obj [u8],
694
695    /// The number of bytes read from the input so far.
696    read_bytes: usize,
697}
698
699impl Input for ObjectDataInput<'_> {
700    fn remaining_len(&mut self) -> Result<Option<usize>, parity_scale_codec::Error> {
701        Ok(Some(self.remaining_bytes()))
702    }
703
704    fn read(&mut self, buf: &mut [u8]) -> Result<(), parity_scale_codec::Error> {
705        let buf_len = buf.len();
706        if buf_len > self.remaining_bytes() {
707            return Err("Not enough data to fill buffer".into());
708        }
709
710        self.prefix_data
711            .iter()
712            .chain(self.suffix_data)
713            .skip(self.read_bytes)
714            .zip(buf)
715            .for_each(|(input_byte, buf_byte)| {
716                *buf_byte = *input_byte;
717            });
718
719        self.read_bytes += buf_len;
720
721        Ok(())
722    }
723}
724
725impl<'obj> ObjectDataInput<'obj> {
726    /// Creates a new `ObjectDataInput` object from a `PartialObject` and an object length.
727    fn new(partial_object: &'obj PartialObject, object_length: ObjectLength) -> Self {
728        let (prefix_data, suffix_data) = padded_data(
729            &partial_object.prefix_data,
730            object_length.ignored_padding_length,
731            &partial_object.suffix_data,
732            object_length.data_length,
733        );
734
735        Self::new_from_parts(prefix_data, suffix_data)
736    }
737
738    /// Creates a new `ObjectDataInput` object from a slice pair.
739    fn new_from_parts(prefix_data: &'obj [u8], suffix_data: &'obj [u8]) -> Self {
740        Self {
741            prefix_data,
742            suffix_data,
743            read_bytes: 0,
744        }
745    }
746
747    /// Returns the total length of the data.
748    fn len(&self) -> usize {
749        self.prefix_data.len() + self.suffix_data.len()
750    }
751
752    /// Returns true if the data is empty.
753    #[allow(dead_code)]
754    fn is_empty(&self) -> bool {
755        self.prefix_data.is_empty() && self.suffix_data.is_empty()
756    }
757
758    /// Returns the total length of the data that has not been read.
759    fn remaining_bytes(&self) -> usize {
760        self.len().saturating_sub(self.read_bytes)
761    }
762
763    /// Reads `bytes` of unread data, and returns it as a vector.
764    fn data(&mut self, bytes: usize) -> Result<Vec<u8>, parity_scale_codec::Error> {
765        let mut data = vec![0; bytes];
766
767        self.read(&mut data).map(|()| data)
768    }
769
770    /// Reads and returns all the unread data, as a vector.
771    fn remaining_data(&mut self) -> Vec<u8> {
772        self.data(self.remaining_bytes())
773            .expect("vec has exact capacity needed; qed")
774    }
775}
776
777/// Returns two subslices of padded data from `prefix_data` and `suffix_data`, ignoring
778/// `ignored_padding_length` bytes at the end of the prefix data.
779///
780/// Limits the total length to `data_length` (or less, if the available data is shorter).
781fn padded_data<'data>(
782    prefix_data: &'data [u8],
783    ignored_padding_length: usize,
784    suffix_data: &'data [u8],
785    data_length: usize,
786) -> (&'data [u8], &'data [u8]) {
787    let prefix_data = &prefix_data[..min(
788        prefix_data.len().saturating_sub(ignored_padding_length),
789        data_length,
790    )];
791
792    let remaining_length = data_length.saturating_sub(prefix_data.len());
793    let suffix_data = &suffix_data[..min(suffix_data.len(), remaining_length)];
794
795    (prefix_data, suffix_data)
796}
797
798#[cfg(test)]
799mod test {
800    use super::*;
801    use parity_scale_codec::Encode;
802    use subspace_archiving::archiver::SegmentItem;
803
804    #[test]
805    fn padding_byte_value_constant() {
806        let padding_byte = SegmentItem::Padding.encode();
807        assert_eq!(padding_byte, vec![PADDING_BYTE_VALUE]);
808    }
809}