subspace_farmer/single_disk_farm/
metrics.rs

1//! Metrics for single disk farm
2
3use crate::farm::{FarmId, FarmingError, ProvingResult};
4use prometheus_client::metrics::counter::Counter;
5use prometheus_client::metrics::family::Family;
6use prometheus_client::metrics::gauge::Gauge;
7use prometheus_client::metrics::histogram::{exponential_buckets, Histogram};
8use prometheus_client::registry::{Registry, Unit};
9use std::fmt;
10use std::sync::atomic::{AtomicI64, AtomicU64};
11use std::time::Duration;
12use subspace_core_primitives::sectors::SectorIndex;
13
14#[derive(Debug, Copy, Clone)]
15pub(super) enum SectorState {
16    NotPlotted,
17    Plotted,
18    AboutToExpire,
19    Expired,
20}
21
22impl fmt::Display for SectorState {
23    #[inline]
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        f.write_str(match self {
26            Self::NotPlotted => "NotPlotted",
27            Self::Plotted => "Plotted",
28            Self::AboutToExpire => "AboutToExpire",
29            Self::Expired => "Expired",
30        })
31    }
32}
33
34/// Metrics for single disk farm
35#[derive(Debug)]
36pub(super) struct SingleDiskFarmMetrics {
37    pub(super) auditing_time: Histogram,
38    pub(super) skipped_slots: Counter<u64, AtomicU64>,
39    proving_time: Family<Vec<(&'static str, String)>, Histogram>,
40    farming_errors: Family<Vec<(&'static str, String)>, Counter<u64, AtomicU64>>,
41    pub(super) sector_downloading_time: Histogram,
42    pub(super) sector_encoding_time: Histogram,
43    pub(super) sector_writing_time: Histogram,
44    pub(super) sector_plotting_time: Histogram,
45    sectors_total: Family<Vec<(&'static str, String)>, Gauge<i64, AtomicI64>>,
46    pub(super) sector_downloading: Counter<u64, AtomicU64>,
47    pub(super) sector_downloaded: Counter<u64, AtomicU64>,
48    pub(super) sector_encoding: Counter<u64, AtomicU64>,
49    pub(super) sector_encoded: Counter<u64, AtomicU64>,
50    pub(super) sector_writing: Counter<u64, AtomicU64>,
51    pub(super) sector_written: Counter<u64, AtomicU64>,
52    pub(super) sector_plotting: Counter<u64, AtomicU64>,
53    pub(super) sector_plotted: Counter<u64, AtomicU64>,
54    pub(super) sector_plotting_error: Counter<u64, AtomicU64>,
55}
56
57impl SingleDiskFarmMetrics {
58    /// Create new instance for specified farm
59    pub(super) fn new(
60        registry: &mut Registry,
61        farm_id: &FarmId,
62        total_sectors_count: SectorIndex,
63        plotted_sectors_count: SectorIndex,
64    ) -> Self {
65        let sub_registry = registry
66            .sub_registry_with_prefix("farm")
67            .sub_registry_with_label(("farm_id".into(), farm_id.to_string().into()));
68
69        let auditing_time = Histogram::new(exponential_buckets(0.0002, 2.0, 15));
70        sub_registry.register_with_unit(
71            "auditing_time",
72            "Auditing time",
73            Unit::Seconds,
74            auditing_time.clone(),
75        );
76
77        let skipped_slots = Counter::default();
78        sub_registry.register(
79            "skipped_slots",
80            "Completely skipped slots (not even auditing)",
81            skipped_slots.clone(),
82        );
83
84        let proving_time = Family::<_, _>::new_with_constructor(|| {
85            Histogram::new(exponential_buckets(0.0002, 2.0, 15))
86        });
87        sub_registry.register_with_unit(
88            "proving_time",
89            "Proving time",
90            Unit::Seconds,
91            proving_time.clone(),
92        );
93
94        let farming_errors = Family::default();
95        sub_registry.register(
96            "farming_errors",
97            "Non-fatal farming errors",
98            farming_errors.clone(),
99        );
100
101        let sector_downloading_time = Histogram::new(exponential_buckets(0.1, 2.0, 15));
102        sub_registry.register_with_unit(
103            "sector_downloading_time",
104            "Sector downloading time",
105            Unit::Seconds,
106            sector_downloading_time.clone(),
107        );
108
109        let sector_encoding_time = Histogram::new(exponential_buckets(0.1, 2.0, 15));
110        sub_registry.register_with_unit(
111            "sector_encoding_time",
112            "Sector encoding time",
113            Unit::Seconds,
114            sector_encoding_time.clone(),
115        );
116
117        let sector_writing_time = Histogram::new(exponential_buckets(0.0002, 2.0, 15));
118        sub_registry.register_with_unit(
119            "sector_writing_time",
120            "Sector writing time",
121            Unit::Seconds,
122            sector_writing_time.clone(),
123        );
124
125        let sector_plotting_time = Histogram::new(exponential_buckets(0.1, 2.0, 15));
126        sub_registry.register_with_unit(
127            "sector_plotting_time",
128            "Sector plotting time",
129            Unit::Seconds,
130            sector_plotting_time.clone(),
131        );
132
133        let sectors_total = Family::default();
134        sub_registry.register_with_unit(
135            "sectors_total",
136            "Total number of sectors with corresponding state",
137            Unit::Other("Sectors".to_string()),
138            sectors_total.clone(),
139        );
140
141        let sector_downloading = Counter::default();
142        sub_registry.register_with_unit(
143            "sector_downloading_counter",
144            "Number of sectors being downloaded",
145            Unit::Other("Sectors".to_string()),
146            sector_downloading.clone(),
147        );
148
149        let sector_downloaded = Counter::default();
150        sub_registry.register_with_unit(
151            "sector_downloaded_counter",
152            "Number of downloaded sectors",
153            Unit::Other("Sectors".to_string()),
154            sector_downloaded.clone(),
155        );
156
157        let sector_encoding = Counter::default();
158        sub_registry.register_with_unit(
159            "sector_encoding_counter",
160            "Number of sectors being encoded",
161            Unit::Other("Sectors".to_string()),
162            sector_encoding.clone(),
163        );
164
165        let sector_encoded = Counter::default();
166        sub_registry.register_with_unit(
167            "sector_encoded_counter",
168            "Number of encoded sectors",
169            Unit::Other("Sectors".to_string()),
170            sector_encoded.clone(),
171        );
172
173        let sector_writing = Counter::default();
174        sub_registry.register_with_unit(
175            "sector_writing_counter",
176            "Number of sectors being written",
177            Unit::Other("Sectors".to_string()),
178            sector_writing.clone(),
179        );
180
181        let sector_written = Counter::default();
182        sub_registry.register_with_unit(
183            "sector_written_counter",
184            "Number of written sectors",
185            Unit::Other("Sectors".to_string()),
186            sector_written.clone(),
187        );
188
189        let sector_plotting = Counter::default();
190        sub_registry.register_with_unit(
191            "sector_plotting_counter",
192            "Number of sectors being plotted",
193            Unit::Other("Sectors".to_string()),
194            sector_plotting.clone(),
195        );
196
197        let sector_plotted = Counter::default();
198        sub_registry.register_with_unit(
199            "sector_plotted_counter",
200            "Number of plotted sectors",
201            Unit::Other("Sectors".to_string()),
202            sector_plotted.clone(),
203        );
204
205        let sector_plotting_error = Counter::default();
206        sub_registry.register_with_unit(
207            "sector_plotting_error_counter",
208            "Number of sector plotting failures",
209            Unit::Other("Sectors".to_string()),
210            sector_plotting_error.clone(),
211        );
212
213        let metrics = Self {
214            auditing_time,
215            skipped_slots,
216            proving_time,
217            farming_errors,
218            sector_downloading_time,
219            sector_encoding_time,
220            sector_writing_time,
221            sector_plotting_time,
222            sectors_total,
223            sector_downloading,
224            sector_downloaded,
225            sector_encoding,
226            sector_encoded,
227            sector_writing,
228            sector_written,
229            sector_plotting,
230            sector_plotted,
231            sector_plotting_error,
232        };
233
234        metrics.update_sectors_total(
235            total_sectors_count - plotted_sectors_count,
236            SectorState::NotPlotted,
237        );
238        metrics.update_sectors_total(plotted_sectors_count, SectorState::Plotted);
239
240        metrics
241    }
242
243    pub(super) fn observe_proving_time(&self, time: &Duration, result: ProvingResult) {
244        self.proving_time
245            .get_or_create(&vec![("result", result.to_string())])
246            .observe(time.as_secs_f64());
247    }
248
249    pub(super) fn note_farming_error(&self, error: &FarmingError) {
250        self.farming_errors
251            .get_or_create(&vec![("error", error.str_variant().to_string())])
252            .inc();
253    }
254
255    pub(super) fn update_sectors_total(&self, sectors: SectorIndex, state: SectorState) {
256        self.sectors_total
257            .get_or_create(&vec![("state", state.to_string())])
258            .set(i64::from(sectors));
259    }
260
261    pub(super) fn update_sector_state(&self, state: SectorState) {
262        self.sectors_total
263            .get_or_create(&vec![("state", state.to_string())])
264            .inc();
265        match state {
266            SectorState::NotPlotted => {
267                // Never called, doesn't make sense
268            }
269            SectorState::Plotted => {
270                // Separate blocks in because of mutex guard returned by `get_or_create` resulting
271                // in deadlock otherwise
272                {
273                    let not_plotted_sectors = self
274                        .sectors_total
275                        .get_or_create(&vec![("state", SectorState::NotPlotted.to_string())]);
276                    if not_plotted_sectors.get() > 0 {
277                        // Initial plotting
278                        not_plotted_sectors.dec();
279                        return;
280                    }
281                }
282                {
283                    let expired_sectors = self
284                        .sectors_total
285                        .get_or_create(&vec![("state", SectorState::Expired.to_string())]);
286                    if expired_sectors.get() > 0 {
287                        // Replaced expired sector
288                        expired_sectors.dec();
289                        return;
290                    }
291                }
292                // Replaced about to expire sector
293                self.sectors_total
294                    .get_or_create(&vec![("state", SectorState::AboutToExpire.to_string())])
295                    .dec();
296            }
297            SectorState::AboutToExpire | SectorState::Expired => {
298                self.sectors_total
299                    .get_or_create(&vec![("state", SectorState::Plotted.to_string())])
300                    .dec();
301            }
302        }
303    }
304}