Compare commits

..

No commits in common. "main" and "82f7101f54e01ac65fc94db77b65a49bfe301138" have entirely different histories.

12 changed files with 388 additions and 1115 deletions

4
.gitignore vendored
View file

@ -1,5 +1,3 @@
/target
**/*.rs.bk
.idea
/complete
/man
.idea

View file

@ -5,27 +5,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [v0.5.1] - 2022-04-03
### Fixed
- shell completion for config file now completes files
## [v0.5.0] - 2022-04-02
### Added
- shell completion generation
- man page generation
- allow configuring timeout
- allow configuring ping bucket sizes
### Changed
- replace prometheus crate with metrics
- replace raw hyper with axum
## [v0.4.1] - 2021-04-27
### Changed
- code cleanup
- switch over to tokio-ping fork, tokio-icmp-echo
## [v0.4.0] - 2021-04-23
### Changed
- update dependencies, including stable tokio this time.
- migrate error handling to anyhow

1024
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,33 +1,23 @@
[package]
name = "peshming"
version = "0.5.1"
license = "AGPL-3.0-only"
version = "0.3.0"
authors = ["Jan Christian Grünhage <jan.christian@gruenhage.xyz>"]
repository = "https://git.jcg.re/jcgruenhage/peshming"
keywords = ["ping", "icmp", "prometheus"]
edition = "2021"
edition = "2018"
description = "Pings configured hosts in a configurable intervals and exposes metrics for prometheus."
build = "build.rs"
[dependencies]
toml = "0.5"
futures = "0.3"
tokio = { version = "1", features = ["rt-multi-thread", "macros", "time"] }
clap = { version = "3", features = ["derive", "wrap_help"] }
fern = "0.6"
log = { version = "0.4", features = ["serde"] }
chrono = "0.4"
serde = { version = "1", features = ["derive"] }
serde_with = "1"
tokio-icmp-echo = "0.4"
futures-util = "0.3"
anyhow = "1"
async-anyhow-logger = "0.1"
axum = "0.5"
metrics = "0.18"
metrics-exporter-prometheus = { version = "0.9", default-features = false }
[build-dependencies]
clap = { version = "3", features = ["derive", "wrap_help"] }
clap_mangen = "0.1"
clap_complete = "3"
prometheus = "0.12.0"
toml = "0.5.8"
hyper = { version = "0.14.7", features = ["full"] }
lazy_static = "1.4.0"
futures = "0.3.14"
tokio = { version = "1.5.0", features = ["rt-multi-thread", "macros", "time"] }
clap = "2.33.3"
fern = "0.6.0"
log = "0.4.14"
chrono = "0.4.19"
serde = { version = "1.0.125", features = ["derive"] }
tokio-ping = { git = "https://github.com/jcgruenhage/tokio-ping", branch = "main" }
futures-util = "0.3.14"
anyhow = "1.0.40"
async-anyhow-logger = "0.1.0"

View file

@ -33,9 +33,3 @@ For configuration options, see the included sample config file.
There's two endpoints available:
- `/metrics`, which serves the metrics
- `/health`, which should always return a 200 status code
### Packaging Notes
If you're packaging peshming, you might be interested in the env vars
`PESHMING_MAN_DIR` and `PESHMING_COMPLETIONS_DIR`, which when set, will cause
cargo to generate a man page and shell completions to the folder passed in
those variables.

View file

@ -1,60 +0,0 @@
/********************************************************************************
* Prometheus exporter for monitoring network connectivity using icmp pings *
* *
* Copyright (C) 2019-2022 Jan Christian Grünhage *
* Copyright (C) 2020-2021 Famedly GmbH *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License as *
* published by the Free Software Foundation, either version 3 of the *
* License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Affero General Public License for more details. *
* *
* You should have received a copy of the GNU Affero General Public License *
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
********************************************************************************/
use std::path::PathBuf;
use clap::{ArgEnum, CommandFactory};
use clap_complete::{generate_to, Shell};
use cli::Cli;
#[path = "src/cli.rs"] mod cli;
fn main() -> std::io::Result<()> {
let mut cli = Cli::command();
if let Some(completions_dir) = std::env::var_os("PESHMING_COMPLETIONS_DIR") {
let completions_dir: PathBuf = completions_dir.into();
std::fs::create_dir_all(&completions_dir)
.expect("Could not create shell completions output folder.");
for shell in Shell::value_variants() {
generate_to(*shell, &mut cli, "peshming", &completions_dir).unwrap_or_else(|err| {
panic!(
"Failed to generate shell completions for {}: {}.",
shell, err
)
});
}
}
if let Some(man_dir) = std::env::var_os("PESHMING_MAN_DIR") {
let man_dir: PathBuf = man_dir.into();
std::fs::create_dir_all(&man_dir).expect("Could not create man page output folder.");
let man = clap_mangen::Man::new(cli);
let mut buffer: Vec<u8> = Default::default();
man.render(&mut buffer)?;
std::fs::write(man_dir.join("peshming.1"), buffer)?;
}
Ok(())
}

View file

@ -2,20 +2,10 @@
# Takes the format IPv4:port or [IPv6]:port
listener = "[::]:9898"
# Configuration of peshmings pinging behaviour
[ping]
# The timeout is specified in milliseconds, with a default of 3 seconds.
# timeout = 3000
# Array of hosts to ping. Currently only supports plain IPs, no DNS names.
# The format here is `"host" = interval in milliseconds`, so these examples
# will ping the primary and secondary IP of cloudflare's 1.1.1.1 DNS service
# every 500ms, or twice per second.
[ping.hosts]
[hosts]
"1.1.1.1" = 500
"1.0.0.1" = 500
# Configure logging is also possible here instead of using the CLI. If both are
# specified, the more verbose of the two will be used.
# [log]
# level = "ERROR"

View file

@ -1,33 +0,0 @@
/********************************************************************************
* Prometheus exporter for monitoring network connectivity using icmp pings *
* *
* Copyright (C) 2019-2022 Jan Christian Grünhage *
* Copyright (C) 2020-2021 Famedly GmbH *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License as *
* published by the Free Software Foundation, either version 3 of the *
* License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Affero General Public License for more details. *
* *
* You should have received a copy of the GNU Affero General Public License *
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
********************************************************************************/
use clap::ValueHint;
#[derive(clap::Parser)]
#[clap(author, version)]
/// Pings configured hosts in a configurable intervals and exposes metrics for prometheus.
pub struct Cli {
/// Set config file
#[clap(value_hint = ValueHint::FilePath)]
pub config: std::path::PathBuf,
#[clap(short, long, parse(from_occurrences))]
/// Be verbose (you can add this up to 4 times for more logs).
/// By default, only errors are logged, so no output is a good thing.
pub verbose: usize,
}

View file

@ -1,8 +1,8 @@
/********************************************************************************
* Prometheus exporter for monitoring network connectivity using icmp pings *
* *
* Copyright (C) 2019-2022 Jan Christian Grünhage *
* Copyright (C) 2020-2021 Famedly GmbH *
* Copyright (C) 2019-2020 Jan Christian Grünhage *
* Copyright (C) 2020 Famedly GmbH *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License as *
@ -18,71 +18,38 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
********************************************************************************/
use anyhow::{Context, Result};
use clap::Parser;
use clap::{clap_app, crate_authors, crate_description, crate_name, crate_version};
use log::info;
use metrics::{describe_histogram, register_histogram, Unit};
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
use serde::Deserialize;
use serde_with::{serde_as, DurationMilliSeconds};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::{collections::HashMap, time::Duration};
use crate::cli::Cli;
pub(crate) struct App {
pub(crate) config: Config,
pub(crate) handle: PrometheusHandle,
}
#[derive(Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone)]
pub(crate) struct Config {
pub(crate) listener: std::net::SocketAddr,
pub(crate) ping: PingConfig,
#[serde(default)]
pub(crate) log: LogConfig,
}
#[serde_as]
#[derive(Deserialize, Clone)]
pub(crate) struct PingConfig {
#[serde_as(as = "DurationMilliSeconds<f64>")]
#[serde(default = "default_timeout")]
pub(crate) timeout: Duration,
#[serde(default = "default_buckets")]
pub(crate) bucket_sizes: Vec<f64>,
pub(crate) hosts: HashMap<std::net::IpAddr, u64>,
}
fn default_timeout() -> Duration {
Duration::from_secs(3)
pub(crate) fn setup_clap() -> clap::ArgMatches<'static> {
clap_app!(myapp =>
(name: crate_name!())
(version: crate_version!())
(author: crate_authors!())
(about: crate_description!())
(@arg config: +required "Set config file")
(@arg v: -v --verbose ... "Be verbose (you can add this up to 4 times for more logs).
By default, only errors are logged, so no output is a good thing.")
)
.get_matches()
}
fn default_buckets() -> Vec<f64> {
vec![
0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 50.0, 75.0, 100.0, 150.0, 200.0, 250.0, 300.0,
350.0, 400.0, 450.0, 500.0, 550.0, 600.0, 650.0, 700.0, 750.0, 800.0, 900.0, 1000.0,
1250.0, 1500.0, 1750.0, 2000.0,
]
}
#[derive(Deserialize, Clone)]
pub(crate) struct LogConfig {
pub(crate) level: log::LevelFilter,
}
impl Default for LogConfig {
fn default() -> Self {
Self {
level: log::LevelFilter::Error,
}
}
}
fn setup_clap() -> Cli {
Cli::parse()
}
fn setup_fern(level: log::LevelFilter) {
pub(crate) fn setup_fern(level: u64) {
let level = match level {
0 => log::LevelFilter::Error,
1 => log::LevelFilter::Warn,
2 => log::LevelFilter::Info,
3 => log::LevelFilter::Debug,
_ => log::LevelFilter::Trace,
};
match fern::Dispatch::new()
.format(|out, message, record| {
out.finish(format_args!(
@ -103,48 +70,7 @@ fn setup_fern(level: log::LevelFilter) {
}
}
fn read_config(path: &std::path::Path) -> Result<Config> {
pub(crate) fn read_config(path: &str) -> Result<Config> {
let config_file_content = std::fs::read_to_string(path).context("Couldn't read config file")?;
toml::from_str(&config_file_content).context("Couldn't parse config file")
}
pub(crate) fn setup_app() -> Result<App> {
let cli = setup_clap();
let config = read_config(&cli.config).context("Couldn't read config file!")?;
setup_fern(determine_level(cli.verbose, config.log.level));
let handle = setup_prometheus(&config)?;
Ok(App { config, handle })
}
pub(crate) fn setup_prometheus(config: &Config) -> Result<PrometheusHandle> {
let handle = PrometheusBuilder::new()
.set_buckets_for_metric(
Matcher::Full("ping_rtt_milliseconds".into()),
&config.ping.bucket_sizes,
)?
.install_recorder()?;
for target in config.ping.hosts.keys() {
register_histogram!("ping_rtt_milliseconds", "target" => target.to_string());
}
describe_histogram!(
"ping_rtt_milliseconds",
Unit::Milliseconds,
"The ping round trip time in milliseconds"
);
Ok(handle)
}
fn determine_level(verbose_occurrences: usize, config_level: log::LevelFilter) -> log::LevelFilter {
let cli_level = match verbose_occurrences {
0 => log::LevelFilter::Error,
1 => log::LevelFilter::Warn,
2 => log::LevelFilter::Info,
3 => log::LevelFilter::Debug,
_ => log::LevelFilter::Trace,
};
Ord::max(cli_level, config_level)
Ok(toml::from_str(&config_file_content).context("Couldn't parse config file")?)
}

View file

@ -1,8 +1,8 @@
/********************************************************************************
* Prometheus exporter for monitoring network connectivity using icmp pings *
* *
* Copyright (C) 2019-2022 Jan Christian Grünhage *
* Copyright (C) 2020-2021 Famedly GmbH *
* Copyright (C) 2019-2020 Jan Christian Grünhage *
* Copyright (C) 2020 Famedly GmbH *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License as *
@ -17,26 +17,25 @@
* You should have received a copy of the GNU Affero General Public License *
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
********************************************************************************/
use anyhow::Result;
use async_anyhow_logger::catch;
use anyhow::{Context, Result};
mod cli;
mod config;
mod metrics;
mod ping;
use crate::{
config::{setup_app, App},
metrics::start_serving_metrics,
ping::start_pinging_hosts,
};
use crate::config::{read_config, setup_clap, setup_fern};
use crate::metrics::start_serving_metrics;
use crate::ping::start_pinging_hosts;
#[tokio::main]
async fn main() -> Result<()> {
let App { config, handle } = setup_app()?;
let clap = setup_clap();
setup_fern(clap.occurrences_of("v"));
let config =
read_config(clap.value_of("config").unwrap()).context("Couldn't read config file!")?;
let ping_fut = catch(start_pinging_hosts(&config));
let serve_fut = catch(start_serving_metrics(&config, handle));
let ping_fut = start_pinging_hosts(config.clone());
let serve_fut = start_serving_metrics(config.clone());
futures::pin_mut!(ping_fut);
futures::pin_mut!(serve_fut);

View file

@ -1,8 +1,8 @@
/********************************************************************************
* Prometheus exporter for monitoring network connectivity using icmp pings *
* *
* Copyright (C) 2019-2022 Jan Christian Grünhage *
* Copyright (C) 2020-2021 Famedly GmbH *
* Copyright (C) 2019-2020 Jan Christian Grünhage *
* Copyright (C) 2020 Famedly GmbH *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License as *
@ -18,26 +18,87 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
********************************************************************************/
use crate::config::Config;
use anyhow::Result;
use axum::{response::IntoResponse, routing::get, Router, Server};
use anyhow::{Context, Result};
use hyper::{
header::CONTENT_TYPE,
server::Server,
service::{make_service_fn, service_fn},
Body, Request, Response,
};
use lazy_static::lazy_static;
use log::info;
use metrics_exporter_prometheus::PrometheusHandle;
use prometheus::*;
use prometheus::{Counter, Gauge, HistogramVec, TextEncoder};
async fn metrics(handle: PrometheusHandle) -> impl IntoResponse {
handle.render()
lazy_static! {
static ref HTTP_COUNTER: Counter = register_counter!(opts!(
"http_requests_total",
"Total number of HTTP requests made.",
labels! {"handler" => "all",}
))
.unwrap();
static ref HTTP_BODY_GAUGE: Gauge = register_gauge!(opts!(
"http_response_size_bytes",
"The HTTP response sizes in bytes.",
labels! {"handler" => "all",}
))
.unwrap();
static ref HTTP_REQ_HISTOGRAM: HistogramVec = register_histogram_vec!(
"http_request_duration_seconds",
"The HTTP request latencies in seconds.",
&["handler"]
)
.unwrap();
}
pub(crate) async fn start_serving_metrics(config: &Config, handle: PrometheusHandle) -> Result<()> {
let app = Router::new()
.route(
"/metrics",
get({
let handle = handle.clone();
move || metrics(handle)
}),
)
.route("/health", get(|| async { "" }));
let serve_future = Server::bind(&config.listener).serve(app.into_make_service());
async fn serve_req(req: Request<Body>) -> Result<Response<Body>> {
match req.uri().path() {
"/metrics" => serve_metrics().await,
"/health" => serve_health_check().await,
_ => serve_not_found().await,
}
}
async fn serve_metrics() -> Result<Response<Body>> {
let encoder = TextEncoder::new();
HTTP_COUNTER.inc();
let timer = HTTP_REQ_HISTOGRAM.with_label_values(&["all"]).start_timer();
let metric_families = prometheus::gather();
let mut buffer = vec![];
encoder.encode(&metric_families, &mut buffer).unwrap();
HTTP_BODY_GAUGE.set(buffer.len() as f64);
let response = Response::builder()
.status(200)
.header(CONTENT_TYPE, encoder.format_type())
.body(Body::from(buffer))
.context("Couldn't build metrics response")?;
timer.observe_duration();
Ok(response)
}
async fn serve_health_check() -> Result<Response<Body>> {
Ok(Response::builder()
.status(200)
.body(Body::from(vec![]))
.context("Couldn't build health check response")?)
}
async fn serve_not_found() -> Result<Response<Body>> {
Ok(Response::builder()
.status(404)
.body(Body::from(vec![]))
.context("Couldn't build not found response")?)
}
pub(crate) async fn start_serving_metrics(config: Config) -> Result<()> {
let serve_future = Server::bind(&config.listener).serve(make_service_fn(|_| async {
Ok::<_, hyper::Error>(service_fn(serve_req))
}));
info!("Listening on {}", &config.listener);
Ok(serve_future.await?)
}

View file

@ -1,8 +1,8 @@
/********************************************************************************
* Prometheus exporter for monitoring network connectivity using icmp pings *
* *
* Copyright (C) 2019-2022 Jan Christian Grünhage *
* Copyright (C) 2020-2021 Famedly GmbH *
* Copyright (C) 2019-2020 Jan Christian Grünhage *
* Copyright (C) 2020 Famedly GmbH *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Affero General Public License as *
@ -20,31 +20,41 @@
use crate::config::Config;
use anyhow::{Context, Result};
use async_anyhow_logger::catch;
use lazy_static::lazy_static;
use log::{info, trace};
use metrics::histogram;
use prometheus::*;
use std::net::IpAddr;
use std::time::Duration;
use tokio_icmp_echo::{PingFuture, Pinger};
use tokio_ping::{PingFuture, Pinger};
pub(crate) async fn start_pinging_hosts(config: &Config) -> Result<()> {
lazy_static! {
static ref PING_HISTOGRAM: HistogramVec = register_histogram_vec!(
"ping_rtt_milliseconds",
"The ping round trip time in milliseconds",
&["target"],
vec![
0.5, 1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 50.0, 75.0, 100.0, 150.0, 200.0, 250.0, 300.0,
350.0, 400.0, 450.0, 500.0, 550.0, 600.0, 650.0, 700.0, 750.0, 800.0, 900.0, 1000.0,
1250.0, 1500.0, 1750.0, 2000.0
]
)
.unwrap();
}
pub(crate) async fn start_pinging_hosts(config: Config) -> Result<()> {
let pinger = Pinger::new().await.context("Couldn't create pinger")?;
let mut handles = vec![];
for (host, interval) in config.ping.hosts.clone() {
for (host, interval) in config.hosts.clone() {
info!("Spawn ping task for {}", host);
handles.push(tokio::spawn(ping_host(
pinger.clone(),
host,
interval,
config.ping.timeout,
)));
handles.push(tokio::spawn(catch(ping_host(pinger.clone(), host, interval))));
}
let (result, _, _) = futures::future::select_all(handles).await;
result??;
result?;
Ok(())
}
async fn ping_host(pinger: Pinger, host: IpAddr, interval: u64, timeout: Duration) -> Result<()> {
let mut pingchain = pinger.chain(host).timeout(timeout);
async fn ping_host(pinger: Pinger, host: IpAddr, interval: u64) -> Result<()> {
let mut pingchain = pinger.chain(host).timeout(Duration::from_secs(3));
let mut interval = tokio::time::interval(Duration::from_millis(interval));
let host_string = host.to_string();
loop {
@ -52,22 +62,23 @@ async fn ping_host(pinger: Pinger, host: IpAddr, interval: u64, timeout: Duratio
tokio::spawn(catch(handle_ping_result(
pingchain.send(),
host_string.clone(),
timeout,
)));
}
}
async fn handle_ping_result(result: PingFuture, host: String, timeout: Duration) -> Result<()> {
async fn handle_ping_result(result: PingFuture, host: String) -> Result<()> {
let pong = result.await.context(format!("Couldn't ping {}", &host))?;
match pong {
Some(time) => {
let ms = time.as_millis();
trace!("Received pong from {} after {} ms", &host, &ms);
histogram!("ping_rtt_milliseconds", ms as f64, "target" => host);
PING_HISTOGRAM
.with_label_values(&[&host])
.observe(ms as f64);
}
None => {
trace!("Received no response from {} within timeout", &host);
histogram!("ping_rtt_milliseconds", timeout.as_millis() as f64, "target" => host);
PING_HISTOGRAM.with_label_values(&[&host]).observe(3000.0);
}
};