subspace_metrics/
lib.rs

1#![warn(missing_docs)]
2//! This Rust module serves as a bridge between two different Prometheus metrics libraries
3//! used: `prometheus-client` (official library) and TiKV's `prometheus` client (used by Substrate).
4//! The module exposes a web server endpoint at "/metrics" that outputs metrics in Prometheus
5//! format. It adapts metrics from either or both of those libraries.
6
7use actix_web::http::StatusCode;
8use actix_web::web::Data;
9use actix_web::{get, App, HttpResponse, HttpServer};
10use prometheus::{Registry as SubstrateRegistry, TextEncoder};
11use prometheus_client::encoding::text::encode;
12use prometheus_client::registry::Registry as PrometheusClientRegistry;
13use std::error::Error;
14use std::future::Future;
15use std::io::ErrorKind;
16use std::net::SocketAddr;
17use tracing::{error, info, warn};
18
19/// Metrics registry adapter for prometheus-client and Substrate frameworks.
20/// It specifies which metrics registry or registries are in use.
21pub enum RegistryAdapter {
22    /// Uses only the prometheus-client metrics registry.
23    PrometheusClient(PrometheusClientRegistry),
24    /// Uses only the Substrate metrics registry.
25    Substrate(SubstrateRegistry),
26    /// We use both Substrate and prometheus-client metrics registries.
27    Both(PrometheusClientRegistry, SubstrateRegistry),
28}
29
30#[get("/metrics")]
31async fn metrics(registry: Data<RegistryAdapter>) -> Result<HttpResponse, Box<dyn Error>> {
32    let mut encoded_metrics = String::new();
33
34    match &**registry {
35        RegistryAdapter::PrometheusClient(libp2p_registry) => {
36            encode(&mut encoded_metrics, libp2p_registry)?;
37        }
38        RegistryAdapter::Substrate(substrate_registry) => {
39            TextEncoder::new().encode_utf8(&substrate_registry.gather(), &mut encoded_metrics)?;
40        }
41        RegistryAdapter::Both(libp2p_registry, substrate_registry) => {
42            // We combine outputs of both metrics registries in one string.
43            TextEncoder::new().encode_utf8(&substrate_registry.gather(), &mut encoded_metrics)?;
44            // prometheus-client string contains #EOF, order is important here
45            encode(&mut encoded_metrics, libp2p_registry)?;
46        }
47    }
48
49    let resp = HttpResponse::build(StatusCode::OK)
50        .content_type("application/openmetrics-text; version=1.0.0; charset=utf-8")
51        .body(encoded_metrics);
52
53    Ok(resp)
54}
55
56/// Start prometheus metrics server on the provided address.
57pub fn start_prometheus_metrics_server(
58    mut endpoints: Vec<SocketAddr>,
59    registry: RegistryAdapter,
60) -> std::io::Result<impl Future<Output = std::io::Result<()>>> {
61    let data = Data::new(registry);
62
63    let app_factory = move || App::new().app_data(data.clone()).service(metrics);
64    let result = HttpServer::new(app_factory.clone())
65        .workers(2)
66        .bind(endpoints.as_slice());
67
68    let server = match result {
69        Ok(server) => server,
70        Err(error) => {
71            if error.kind() != ErrorKind::AddrInUse {
72                error!(?error, "Failed to start metrics server.");
73
74                return Err(error);
75            }
76
77            // Trying to recover from "address in use" error.
78            warn!(
79                ?error,
80                "Failed to start metrics server. Falling back to the random port...",
81            );
82
83            endpoints.iter_mut().for_each(|endpoint| {
84                endpoint.set_port(0);
85            });
86
87            let result = HttpServer::new(app_factory)
88                .workers(2)
89                .bind(endpoints.as_slice());
90
91            match result {
92                Ok(server) => server,
93                Err(error) => {
94                    error!(?error, "Failed to start metrics server on the random port.");
95
96                    return Err(error);
97                }
98            }
99        }
100    };
101
102    info!(endpoints = ?server.addrs(), "Metrics server started.",);
103
104    Ok(server.run())
105}