From dd637f56bef1b65324e35e79432cdc02afec188f Mon Sep 17 00:00:00 2001 From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com> Date: Tue, 20 Feb 2024 17:44:11 -0500 Subject: [PATCH 1/5] Fix workflow and label coordinates by commit hash --- .github/workflows/bench-deploy.yml | 23 ++++++++---- src/plot.rs | 58 ++++++++++++++++++------------ 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml index feb3e57..854cfb1 100644 --- a/.github/workflows/bench-deploy.yml +++ b/.github/workflows/bench-deploy.yml @@ -34,6 +34,7 @@ jobs: - uses: taiki-e/install-action@v2 with: tool: just@1.22.0 + - run: sudo apt-get update && sudo apt-get install pkg-config libfontconfig1-dev # Run benchmarks and deploy - name: Get old benchmarks uses: actions/checkout@v4 @@ -56,26 +57,34 @@ jobs: just gpu-bench-ci fibonacci mv fibonacci-${{ env.COMMIT }}.json .. working-directory: ${{ github.workspace }}/benches + - uses: actions/checkout@v4 + with: + repository: lurk-lab/ci-workflows + path: ci-workflows # If no plot data found, unzip all historical bench results to re-create the plots - name: Check for existing plot data run: | - if [ ! -f "plot-data.json" ]; then - shopt -s nullglob # Make glob patterns that match no files expand to a null string - tarballs=(./*.tar.gz) + shopt -s nullglob # Make glob patterns that match no files expand to a null string + if [ ! -f "../gh-pages/benchmarks/history/plot-data.json" ]; then + echo "No plot data found" + tarballs=(../gh-pages/benchmarks/history/*.tar.gz) if (( ${#tarballs[@]} )); then cat "${tarballs[@]}" | tar -xvzf - -i else echo "No tarballs found for extraction." fi - shopt -u nullglob # Disable nullglob option fi - # TODO: This should probably be in a subdirectory or Cargo workspace + shopt -u nullglob # Disable nullglob option + working-directory: ${{ github.workspace }}/ci-workflows # Saves the plot data to be deployed - name: Generate historical performance plot run: | + cp ../fibonacci-${{ env.COMMIT }}.json . cargo run - mkdir -p history - mv -f plot-data.json history + mv *.png .. + mkdir -p ../history + mv -f plot-data.json ../history + working-directory: ${{ github.workspace }}/ci-workflows # TODO: Prettify labels for easier viewing # Compress the benchmark file and metadata for later analysis - name: Compress artifacts diff --git a/src/plot.rs b/src/plot.rs index f9ab23f..af0266c 100644 --- a/src/plot.rs +++ b/src/plot.rs @@ -1,3 +1,4 @@ +use anyhow::bail; use plotters::prelude::*; use chrono::{serde::ts_seconds, DateTime, Duration, Utc}; @@ -7,7 +8,6 @@ use std::{collections::HashMap, error::Error}; use crate::json::BenchData; -// TODO: Figure out how to include the commit hash as a label on the point or X-axis pub fn generate_plots(data: &Plots) -> Result<(), Box> { for plot in data.0.iter() { let out_file_name = format!("./{}.png", plot.0); @@ -40,6 +40,7 @@ pub fn generate_plots(data: &Plots) -> Result<(), Box> { .configure_mesh() .disable_x_mesh() .disable_y_mesh() + .x_label_formatter(&|x| format!("{}", x.format("%m/%d/%y"))) .x_labels(10) .max_light_lines(4) .x_desc("Commit Date") @@ -50,25 +51,25 @@ pub fn generate_plots(data: &Plots) -> Result<(), Box> { for (i, line) in plot.1.lines.iter().enumerate() { // Draw lines between each point chart - .draw_series(LineSeries::new( - line.1.iter().map(|p| (p.x, p.y)), - Palette99::pick(i), - ))? + .draw_series(LineSeries::new(line.1.iter().map(|p| (p.x, p.y)), style(i)))? .label(line.0) // TODO: Move the legend out of the plot area .legend(move |(x, y)| { - Rectangle::new( - [(x - 5, y - 5), (x + 5, y + 5)], - Palette99::pick(i).filled(), - ) + Rectangle::new([(x - 5, y - 5), (x + 5, y + 5)], style(i).filled()) }); - // Draw dots on each point - chart.draw_series( - line.1 - .iter() - .map(|p| Circle::new((p.x, p.y), 3, Palette99::pick(i).filled())), - )?; + // Draw points and text labels (Git commit) on each point + chart.draw_series(PointSeries::of_element( + line.1.iter(), + 5, + style(i).filled(), + &|coord, size, style| { + EmptyElement::at((coord.x, coord.y)) + + Circle::new((0, 0), size, style) + + Text::new(format!("{:?}", coord.label), (0, 0), ("sans-serif", 15)) + }, + ))?; + chart .configure_series_labels() .background_style(WHITE) @@ -84,15 +85,22 @@ pub fn generate_plots(data: &Plots) -> Result<(), Box> { Ok(()) } -// Convert - to a `DateTime` object, discarding `short-sha` -fn str_to_datetime(input: &str) -> Result, Box> { - // Removes the first 8 chars (assuming UTF8) for the `short-sha` and trailing '-' - let datetime: &str = input.split_at(8).1; +fn style(idx: usize) -> PaletteColor { + Palette99::pick(idx) +} + +// Splits a - input into a (String, `DateTime`) object +fn parse_commit_str(input: &str) -> anyhow::Result<(String, DateTime)> { + // Splits at the first `-` as the size is known (assumes UTF-8) + let (commit, date) = input.split_at(8); + let mut commit = commit.to_owned(); + commit.pop(); - DateTime::parse_from_rfc3339(datetime).map_or_else( - |e| Err(format!("Failed to parse string into `DateTime`: {}", e).into()), + let date = DateTime::parse_from_rfc3339(date).map_or_else( + |e| bail!("Failed to parse string into `DateTime`: {}", e), |dt| Ok(dt.with_timezone(&Utc)), - ) + )?; + Ok((commit, date)) } // Plots of benchmark results over time/Git history. This data structure is persistent between runs, @@ -115,10 +123,12 @@ impl Plots { // and adds the data to the `Plots` struct. pub fn add_data(&mut self, bench_data: &Vec) { for bench in bench_data { - let commit_date = str_to_datetime(&bench.id.bench_name).expect("Timestamp parse error"); + let (commit_hash, commit_date) = + parse_commit_str(&bench.id.bench_name).expect("Timestamp parse error"); let point = Point { x: commit_date, y: bench.result.time, + label: commit_hash, }; if self.0.get(&bench.id.group_name).is_none() { @@ -168,6 +178,8 @@ pub struct Point { x: DateTime, // Benchmark time (avg.) y: f64, + // Commit hash (short) + label: String, } // Min. and max. X axis values for a given plot From 36ed801f335f45d5793f69ea02b31e882ed8b1fc Mon Sep 17 00:00:00 2001 From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com> Date: Thu, 22 Feb 2024 10:32:19 -0500 Subject: [PATCH 2/5] Refactor bench params --- src/json.rs | 72 +++++++++++++++++++++++++++++++++++++++++++++++------ src/main.rs | 38 ++++++++++++++-------------- src/plot.rs | 36 ++++++++------------------- 3 files changed, 93 insertions(+), 53 deletions(-) diff --git a/src/json.rs b/src/json.rs index 1389560..5880102 100644 --- a/src/json.rs +++ b/src/json.rs @@ -2,7 +2,9 @@ use core::fmt; use std::io::Read; use std::{fs::File, path::Path}; -use serde::Deserialize; +use anyhow::{anyhow, bail}; +use chrono::{DateTime, Utc}; +use serde::{de, Deserialize}; use serde_json::de::{StrRead, StreamDeserializer}; use serde_json::{Deserializer, Error, Value}; @@ -17,7 +19,7 @@ pub struct BenchData { pub struct BenchId { pub group_name: String, pub bench_name: String, - pub params: String, + pub params: BenchParams, } // Assumes three `String` elements in a Criterion bench ID: // @@ -31,20 +33,56 @@ impl<'de> Deserialize<'de> for BenchId { let s = String::deserialize(deserializer)?; let id = s.split('/').collect::>(); if id.len() != 3 { - Err(serde::de::Error::custom("Expected 3 bench ID elements")) + Err(de::Error::custom("Expected 3 bench ID elements")) } else { - let bench_name = id[1].replace('_', ":"); Ok(BenchId { group_name: id[0].to_owned(), - // Criterion converts `:` to `_` in the timestamp as the former is valid JSON syntax, - // so we convert `_` back to `:` when deserializing - bench_name, - params: id[2].to_owned(), + bench_name: id[1].to_owned(), + params: BenchParams::try_from(id[2]) + .map_err(|e| de::Error::custom(format!("{}", e)))?, }) } } } +#[derive(Debug, PartialEq)] +pub struct BenchParams { + pub commit_hash: String, + pub commit_timestamp: DateTime, + pub params: String, +} + +impl TryFrom<&str> for BenchParams { + type Error = anyhow::Error; + // Splits a -- input into a (String, `DateTime`, String) object + // E.g. `dd2a8e6-2024-02-20T22:48:21-05:00-rc-100` becomes ("dd2a8e6", ``, "rc-100") + fn try_from(value: &str) -> anyhow::Result { + let (commit_hash, rest) = value + .split_once('-') + .ok_or_else(|| anyhow!("Invalid format for bench params"))?; + let arr: Vec<&str> = rest.split_inclusive('-').collect(); + // Criterion converts `:` to `_` in the timestamp as the former is valid JSON syntax, + // so we convert `_` back to `:` when deserializing + let mut date: String = arr[..4] + .iter() + .flat_map(|s| s.chars()) + .collect::() + .replace('_', ":"); + date.pop(); + let params = arr[4..].iter().flat_map(|s| s.chars()).collect(); + + let commit_timestamp = DateTime::parse_from_rfc3339(&date).map_or_else( + |e| bail!("Failed to parse string into `DateTime`: {}", e), + |dt| Ok(dt.with_timezone(&Utc)), + )?; + Ok(Self { + commit_hash: commit_hash.to_owned(), + commit_timestamp, + params, + }) + } +} + #[derive(Debug, Deserialize)] pub struct BenchResult { #[serde(rename = "estimate")] @@ -145,3 +183,21 @@ where } } } + +#[cfg(test)] +mod test { + use crate::json::BenchParams; + use chrono::{DateTime, Utc}; + + #[test] + fn parse_bench_params() { + let s = "dd2a8e6-2024-02-20T22:48:21-05:00-rc-100"; + let params = BenchParams::try_from(s).unwrap(); + let params_expected = BenchParams { + commit_hash: "dd2a8e6".into(), + commit_timestamp: DateTime::parse_from_rfc3339("2024-02-20T22:48:21-05:00").map(|dt| dt.with_timezone(&Utc)).unwrap(), + params: "rc-100".into() + }; + assert_eq!(params, params_expected); + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 6a8d8df..7b969ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,25 +11,6 @@ use json::read_json_from_file; use crate::plot::{generate_plots, Plots}; -// TODO: Switch to camino -// Gets all JSON paths in the current directory, optionally ending in a given suffix -// E.g. if `suffix` is `abc1234.json` it will return "*abc1234.json" -fn get_json_paths(suffix: Option<&str>) -> std::io::Result> { - let suffix = suffix.unwrap_or(".json"); - let entries = std::fs::read_dir(".")? - .flatten() - .filter_map(|e| { - let ext = e.path(); - if ext.to_str()?.ends_with(suffix) { - Some(ext) - } else { - None - } - }) - .collect::>(); - Ok(entries) -} - // Benchmark files to plot, e.g. `LURK_BENCH_FILES=fibonacci-abc1234,fibonacci-def5678` fn bench_files_env() -> anyhow::Result> { std::env::var("LURK_BENCH_FILES") @@ -71,6 +52,25 @@ fn write_plots_to_file(plot_data: &Plots) -> Result<(), io::Error> { file.write_all(json_data.as_bytes()) } +// TODO: Switch to camino +// Gets all JSON paths in the current directory, optionally ending in a given suffix +// E.g. if `suffix` is `abc1234.json` it will return "*abc1234.json" +fn get_json_paths(suffix: Option<&str>) -> std::io::Result> { + let suffix = suffix.unwrap_or(".json"); + let entries = std::fs::read_dir(".")? + .flatten() + .filter_map(|e| { + let ext = e.path(); + if ext.to_str()?.ends_with(suffix) { + Some(ext) + } else { + None + } + }) + .collect::>(); + Ok(entries) +} + fn main() { // If existing plot data is found on disk, only read and add benchmark files specified by `LURK_BENCH_FILES` // Data is stored in a `HashMap` so duplicates are ignored diff --git a/src/plot.rs b/src/plot.rs index af0266c..140d478 100644 --- a/src/plot.rs +++ b/src/plot.rs @@ -1,4 +1,3 @@ -use anyhow::bail; use plotters::prelude::*; use chrono::{serde::ts_seconds, DateTime, Duration, Utc}; @@ -89,20 +88,6 @@ fn style(idx: usize) -> PaletteColor { Palette99::pick(idx) } -// Splits a - input into a (String, `DateTime`) object -fn parse_commit_str(input: &str) -> anyhow::Result<(String, DateTime)> { - // Splits at the first `-` as the size is known (assumes UTF-8) - let (commit, date) = input.split_at(8); - let mut commit = commit.to_owned(); - commit.pop(); - - let date = DateTime::parse_from_rfc3339(date).map_or_else( - |e| bail!("Failed to parse string into `DateTime`: {}", e), - |dt| Ok(dt.with_timezone(&Utc)), - )?; - Ok((commit, date)) -} - // Plots of benchmark results over time/Git history. This data structure is persistent between runs, // saved to disk in `plot-data.json`, and is meant to be append-only to preserve historical results. // @@ -123,26 +108,25 @@ impl Plots { // and adds the data to the `Plots` struct. pub fn add_data(&mut self, bench_data: &Vec) { for bench in bench_data { - let (commit_hash, commit_date) = - parse_commit_str(&bench.id.bench_name).expect("Timestamp parse error"); + let id = &bench.id; let point = Point { - x: commit_date, + x: id.params.commit_timestamp, y: bench.result.time, - label: commit_hash, + label: id.params.commit_hash.clone(), }; - if self.0.get(&bench.id.group_name).is_none() { - self.0.insert(bench.id.group_name.to_owned(), Plot::new()); + if self.0.get(&id.group_name).is_none() { + self.0.insert(id.group_name.to_owned(), Plot::new()); } - let plot = self.0.get_mut(&bench.id.group_name).unwrap(); + let plot = self.0.get_mut(&id.group_name).unwrap(); - plot.x_axis.set_min_max(commit_date); + plot.x_axis.set_min_max(id.params.commit_timestamp); plot.y_axis.set_min_max(point.y); - if plot.lines.get(&bench.id.params).is_none() { - plot.lines.insert(bench.id.params.to_owned(), vec![]); + if plot.lines.get(&id.params.params).is_none() { + plot.lines.insert(id.params.params.to_owned(), vec![]); } - plot.lines.get_mut(&bench.id.params).unwrap().push(point); + plot.lines.get_mut(&id.params.params).unwrap().push(point); } // Sort each data point in each line for each plot for plot in self.0.iter_mut() { From f634195ed0f114f8bce6a0c298dccf2edceba0b0 Mon Sep 17 00:00:00 2001 From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com> Date: Tue, 27 Feb 2024 12:56:25 -0500 Subject: [PATCH 3/5] Address feedback --- .github/workflows/bench-deploy.yml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml index 854cfb1..a3955ae 100644 --- a/.github/workflows/bench-deploy.yml +++ b/.github/workflows/bench-deploy.yml @@ -14,6 +14,10 @@ on: required: false default: 'LURK' type: string + # List of prerequisite Ubuntu packages, separated by whitespace + packages: + required: false + type: string jobs: benchmark: @@ -27,6 +31,9 @@ jobs: with: gpu-framework: 'cuda' - uses: ./.github/actions/ci-env + - uses: ./.github/actions/install-deps + with: + packages: "${{ inputs.packages }} pkg-config libfontconfig1-dev" # Install deps - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable @@ -34,7 +41,6 @@ jobs: - uses: taiki-e/install-action@v2 with: tool: just@1.22.0 - - run: sudo apt-get update && sudo apt-get install pkg-config libfontconfig1-dev # Run benchmarks and deploy - name: Get old benchmarks uses: actions/checkout@v4 @@ -55,7 +61,7 @@ jobs: - name: Run benchmarks run: | just gpu-bench-ci fibonacci - mv fibonacci-${{ env.COMMIT }}.json .. + mv fibonacci-${{ env.COMMIT }}.json ${{ github.workspace }} working-directory: ${{ github.workspace }}/benches - uses: actions/checkout@v4 with: @@ -65,9 +71,9 @@ jobs: - name: Check for existing plot data run: | shopt -s nullglob # Make glob patterns that match no files expand to a null string - if [ ! -f "../gh-pages/benchmarks/history/plot-data.json" ]; then + if [ ! -f "${{ github.workspace }}/gh-pages/benchmarks/history/plot-data.json" ]; then echo "No plot data found" - tarballs=(../gh-pages/benchmarks/history/*.tar.gz) + tarballs=(${{ github.workspace }}/gh-pages/benchmarks/history/*.tar.gz) if (( ${#tarballs[@]} )); then cat "${tarballs[@]}" | tar -xvzf - -i else @@ -79,11 +85,11 @@ jobs: # Saves the plot data to be deployed - name: Generate historical performance plot run: | - cp ../fibonacci-${{ env.COMMIT }}.json . + cp ${{ github.workspace }}/fibonacci-${{ env.COMMIT }}.json . cargo run - mv *.png .. - mkdir -p ../history - mv -f plot-data.json ../history + mv *.png ${{ github.workspace }} + mkdir -p ${{ github.workspace }}/history + mv -f plot-data.json ${{ github.workspace }}/history working-directory: ${{ github.workspace }}/ci-workflows # TODO: Prettify labels for easier viewing # Compress the benchmark file and metadata for later analysis From 398c60f7e96617076a533eeb82a94b8e4f2d378a Mon Sep 17 00:00:00 2001 From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com> Date: Tue, 27 Feb 2024 22:01:14 -0500 Subject: [PATCH 4/5] Test --- .github/workflows/bench-deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml index a3955ae..b3aa4ef 100644 --- a/.github/workflows/bench-deploy.yml +++ b/.github/workflows/bench-deploy.yml @@ -66,6 +66,7 @@ jobs: - uses: actions/checkout@v4 with: repository: lurk-lab/ci-workflows + ref: bench-deploy path: ci-workflows # If no plot data found, unzip all historical bench results to re-create the plots - name: Check for existing plot data From 984155c20c93fab1f382a5cb983ae9c03e04c376 Mon Sep 17 00:00:00 2001 From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com> Date: Thu, 29 Feb 2024 20:41:10 -0500 Subject: [PATCH 5/5] Tidying --- .github/workflows/bench-deploy.yml | 15 ++- Cargo.toml | 2 + src/json.rs | 18 +-- src/main.rs | 178 ++++++++++++++++++----------- src/plot.rs | 12 +- 5 files changed, 146 insertions(+), 79 deletions(-) diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml index b3aa4ef..8aa7822 100644 --- a/.github/workflows/bench-deploy.yml +++ b/.github/workflows/bench-deploy.yml @@ -3,7 +3,20 @@ # - `gh-pages` branch with Pages deployment set up # - Ideally some HTML to link to the reports, e.g. https://lurk-lab.github.io/ci-lab/ # - Self-hosted runner attached to the caller repo with `gpu-bench` and `gh-pages` tags -# - `justfile` with a `gpu-bench-ci` recipe that outputs `-.json` +# - `justfile` with a `gpu-bench-ci` recipe that outputs `-.json` +# +# The core file structure is a `benchmarks/history` directory on the `gh-pages` branch that contains: +# - Historical data `.tar.gz` archives, one for each commit or workflow run, which contain the Criterion benchmark results +# and `Cargo.lock` for the given commit. +# - Historical data in `plot-data.json`, which contains only the relevant metadata and average benchmark result for each of +# the saved benchmarks. This file is persistent and append-only, and if it's not found then it is re-created using each of +# the `tar.gz` archives +# - `.png` plot images, created on each run using `plot-data.json` +# - HTML to render the images. +# +# This structure is all created/deployed by the workflow after running the benchmarks, +# with the only prerequisite being an existing `gh-pages` branch deployed via GitHub Pages. +# See https://github.com/lurk-lab/ci-lab/tree/gh-pages as an example of the deployed plots name: Deploy GPU benchmark from default branch on: diff --git a/Cargo.toml b/Cargo.toml index 3931797..075fc8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,10 @@ edition = "2021" [dependencies] anyhow = "1.0" +camino = "1.1.6" # chrono version is pinned to be compatible with plotters `build_cartesian_2d` API chrono = { version = "=0.4.20", features = ["clock", "serde"] } +clap = { version = "4.5.1", features = ["derive"] } plotters = "0.3.5" serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" diff --git a/src/json.rs b/src/json.rs index 5880102..8cff194 100644 --- a/src/json.rs +++ b/src/json.rs @@ -55,7 +55,7 @@ pub struct BenchParams { impl TryFrom<&str> for BenchParams { type Error = anyhow::Error; // Splits a -- input into a (String, `DateTime`, String) object - // E.g. `dd2a8e6-2024-02-20T22:48:21-05:00-rc-100` becomes ("dd2a8e6", ``, "rc-100") + // E.g. `dd2a8e6-2024-02-20T22:48:21-05:00-rc=100` becomes ("dd2a8e6", ``, "rc=100") fn try_from(value: &str) -> anyhow::Result { let (commit_hash, rest) = value .split_once('-') @@ -90,13 +90,15 @@ pub struct BenchResult { } // Deserializes the benchmark JSON file into structured data for plotting -pub fn read_json_from_file>(path: P) -> Result, Error> { +pub fn read_json_from_file, T: for<'de> Deserialize<'de>>( + path: P, +) -> Result, Error> { let mut file = File::open(path).unwrap(); let mut s = String::new(); file.read_to_string(&mut s).unwrap(); let mut data = vec![]; - for result in ResilientStreamDeserializer::::new(&s).flatten() { + for result in ResilientStreamDeserializer::::new(&s).flatten() { data.push(result); } Ok(data) @@ -191,13 +193,15 @@ mod test { #[test] fn parse_bench_params() { - let s = "dd2a8e6-2024-02-20T22:48:21-05:00-rc-100"; + let s = "dd2a8e6-2024-02-20T22:48:21-05:00-rc=100"; let params = BenchParams::try_from(s).unwrap(); let params_expected = BenchParams { commit_hash: "dd2a8e6".into(), - commit_timestamp: DateTime::parse_from_rfc3339("2024-02-20T22:48:21-05:00").map(|dt| dt.with_timezone(&Utc)).unwrap(), - params: "rc-100".into() + commit_timestamp: DateTime::parse_from_rfc3339("2024-02-20T22:48:21-05:00") + .map(|dt| dt.with_timezone(&Utc)) + .unwrap(), + params: "rc=100".into(), }; assert_eq!(params, params_expected); } -} \ No newline at end of file +} diff --git a/src/main.rs b/src/main.rs index 7b969ea..0561552 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,35 +1,113 @@ +// This crate provides a CLI for plotting and managing historical Criterion benchmark files +// It consists of the following commands: +// - `cargo run plot` will plot the benchmark JSON file(s) at a given path. The bench files are required to be in the +// `BenchOutputType::GhPages` format. See `BenchOutputType` for the different bench ID schemas. +// TODO: below +// - `cargo run convert` will convert benchmark JSONs to different formats for use in other tools. +// E.g. `cargo run convert --input gh-pages --output commit-comment` will reformat the bench ID and other attributes, +// which will enable the benchmark to be used with `criterion-table`. + +// NOTE: This tool is only intended for non-regression benchmarks, which compare performance for the *same* functions +// between Git commits over time. In future we may generalize this crate for comparison between different functions. + mod json; mod plot; -use std::{ - io::{self, Read, Write}, - path::PathBuf, -}; +use std::io::{self, Read, Write}; -use anyhow::anyhow; +use camino::{Utf8Path, Utf8PathBuf}; +use clap::{Args, Parser, Subcommand, ValueEnum}; use json::read_json_from_file; -use crate::plot::{generate_plots, Plots}; - -// Benchmark files to plot, e.g. `LURK_BENCH_FILES=fibonacci-abc1234,fibonacci-def5678` -fn bench_files_env() -> anyhow::Result> { - std::env::var("LURK_BENCH_FILES") - .map_err(|e| anyhow!("Benchmark files env var isn't set: {e}")) - .and_then(|commits| { - let vec: anyhow::Result> = commits - .split(',') - .map(|sha| { - sha.parse::() - .map_err(|e| anyhow!("Failed to parse Git commit string: {e}")) - }) - .collect(); - vec - }) +use crate::{ + json::BenchData, + plot::{generate_plots, Plots}, +}; + +/// Criterion benchmark JSON formatter & plotter +#[derive(Parser, Debug)] +#[clap(version, about, long_about = None)] +struct Cli { + #[clap(subcommand)] + command: Command, +} + +#[derive(Subcommand, Debug)] +enum Command { + /// Plot benchmark file(s) + Plot(PlotArgs), + /// Convert a benchmark file from one output format to another + Convert(ConvertArgs), +} + +#[derive(Args, Debug)] +struct PlotArgs { + #[clap(long, value_parser)] + dir: Option, +} + +impl PlotArgs { + fn create_plots(&self) { + // If existing plot data is found on disk, only read and add benchmark files specified by `LURK_BENCH_FILES` + // Data is stored in a `HashMap` so duplicates are ignored + let (mut plots, bench_files) = { + if let Ok(plots) = read_plots_from_file() { + // Adds all JSONs contained in `self.dir` to the plot + // Otherwise defaults to all files in workdir with current Git commit in the filename + let (dir, suffix) = if let Some(dir) = &self.dir { + (dir.as_path(), None) + } else { + let mut commit_hash = env!("VERGEN_GIT_SHA").to_owned(); + commit_hash.truncate(7); + (Utf8Path::new("."), Some(commit_hash)) + }; + let bench_files = + get_json_paths(dir, suffix.as_deref()).expect("Failed to read JSON paths"); + + (plots, bench_files) + } + // If no plot data exists, read all `JSON` files in the current directory and save to disk + else { + let paths = get_json_paths(&Utf8PathBuf::from("."), None) + .expect("Failed to read JSON paths"); + (Plots::new(), paths) + } + }; + println!("Adding bench files to plot: {:?}", bench_files); + let mut bench_data = vec![]; + for file in bench_files { + let mut data = read_json_from_file::<_, BenchData>(file).expect("JSON serde error"); + bench_data.append(&mut data); + } + plots.add_data(&bench_data); + + // Write to disk + write_plots_to_file(&plots).expect("Failed to write `Plots` to `plot-data.json`"); + generate_plots(&plots).unwrap(); + } +} + +#[derive(Args, Debug)] +struct ConvertArgs { + /// Bench format of the input + #[clap(long, value_enum)] + input: BenchOutputType, + + /// Desired bench format of the output + #[clap(long, value_enum)] + output: BenchOutputType, +} + +#[derive(Clone, Debug, ValueEnum)] +enum BenchOutputType { + GhPages, + CommitComment, + PrComment, } // Deserializes JSON file into `Plots` type fn read_plots_from_file() -> Result { - let path = std::path::Path::new("plot-data.json"); + let path = Utf8Path::new("plot-data.json"); let mut file = std::fs::File::open(path)?; @@ -43,7 +121,7 @@ fn read_plots_from_file() -> Result { // Serializes `Plots` type into file fn write_plots_to_file(plot_data: &Plots) -> Result<(), io::Error> { - let path = std::path::Path::new("plot-data.json"); + let path = Utf8Path::new("plot-data.json"); let mut file = std::fs::File::create(path)?; @@ -52,17 +130,17 @@ fn write_plots_to_file(plot_data: &Plots) -> Result<(), io::Error> { file.write_all(json_data.as_bytes()) } -// TODO: Switch to camino -// Gets all JSON paths in the current directory, optionally ending in a given suffix +// Searches for all JSON paths in the specified directory, optionally ending in a given suffix // E.g. if `suffix` is `abc1234.json` it will return "*abc1234.json" -fn get_json_paths(suffix: Option<&str>) -> std::io::Result> { +fn get_json_paths(dir: &Utf8Path, suffix: Option<&str>) -> std::io::Result> { let suffix = suffix.unwrap_or(".json"); - let entries = std::fs::read_dir(".")? + let entries = std::fs::read_dir(dir)? .flatten() .filter_map(|e| { let ext = e.path(); - if ext.to_str()?.ends_with(suffix) { - Some(ext) + let ext = ext.to_str()?; + if ext.ends_with(suffix) && ext != "./plot-data.json" { + Some(Utf8PathBuf::from(ext)) } else { None } @@ -72,43 +150,9 @@ fn get_json_paths(suffix: Option<&str>) -> std::io::Result p.create_plots(), + Command::Convert(_c) => todo!(), }; - println!("Adding bench files to plot: {:?}", bench_files); - let mut bench_data = vec![]; - for file in bench_files { - let mut data = read_json_from_file(file).expect("JSON serde error"); - bench_data.append(&mut data); - } - plots.add_data(&bench_data); - - // Write to disk - write_plots_to_file(&plots).expect("Failed to write `Plots` to `plot-data.json`"); - generate_plots(&plots).unwrap(); } diff --git a/src/plot.rs b/src/plot.rs index 140d478..1db5a7c 100644 --- a/src/plot.rs +++ b/src/plot.rs @@ -7,8 +7,10 @@ use std::{collections::HashMap, error::Error}; use crate::json::BenchData; +// TODO: Plot throughput as well as timings pub fn generate_plots(data: &Plots) -> Result<(), Box> { for plot in data.0.iter() { + println!("Plotting: {} {:?}", plot.0, plot.1); let out_file_name = format!("./{}.png", plot.0); let root = BitMapBackend::new(&out_file_name, (1024, 768)).into_drawing_area(); root.fill(&WHITE)?; @@ -92,7 +94,7 @@ fn style(idx: usize) -> PaletteColor { // saved to disk in `plot-data.json`, and is meant to be append-only to preserve historical results. // // Note: -// Plots are separated by benchmark input e.g. `Fibonacci-num-100`. It doesn't reveal much +// Plots are separated by benchmark group and function e.g. `Fibonacci-num=100-Prove`. It doesn't reveal much // information to view multiple benchmark input results on the same graph (e.g. fib-10 and fib-20), // since they are expected to be different. Instead, we group different benchmark parameters // (e.g. `rc` value) onto the same graph to compare/contrast their impact on performance. @@ -114,11 +116,13 @@ impl Plots { y: bench.result.time, label: id.params.commit_hash.clone(), }; + // plotters doesn't like `/` char in plot title so we use `-` + let plot_name = format!("{}-{}", id.group_name, id.bench_name); - if self.0.get(&id.group_name).is_none() { - self.0.insert(id.group_name.to_owned(), Plot::new()); + if self.0.get(&plot_name).is_none() { + self.0.insert(plot_name.clone(), Plot::new()); } - let plot = self.0.get_mut(&id.group_name).unwrap(); + let plot = self.0.get_mut(&plot_name).unwrap(); plot.x_axis.set_min_max(id.params.commit_timestamp); plot.y_axis.set_min_max(point.y);