Skip to content

Commit 640c438

Browse files
Add a benchmarking script.
* Builds "main" as a baseline and uses Hyperfine to compare the current directory against this baseline. * Testsuite added with four different error correction code groups. * Creates results directories for each run with whisker plots and stat summaries.
1 parent 1e0cd87 commit 640c438

20 files changed

Lines changed: 24550 additions & 0 deletions
13.6 KB
Binary file not shown.
2.55 KB
Binary file not shown.
4.41 KB
Binary file not shown.

benchmarking/benchmark.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
#!/usr/bin/env python3.13
2+
"""
3+
Tesseract Decoder Benchmarker
4+
5+
This script automates the process of benchmarking the Tesseract decoder using hyperfine.
6+
It compares the performance of your current working directory against a baseline revision.
7+
8+
Basic Usage:
9+
Run the benchmarker with default settings (compares current directory against 'main'):
10+
$ ./benchmarking/benchmark.py
11+
12+
Run a quick benchmark (minimal shots and runs, useful for sanity checking before a long run):
13+
$ ./benchmarking/benchmark.py -q
14+
15+
Compare against a specific baseline revision (e.g., a specific commit or branch):
16+
$ ./benchmarking/benchmark.py -b my-feature-branch
17+
18+
Filter circuits by group name (e.g., only run 'surface_code' circuits) See circuits.json for available groups:
19+
$ ./benchmarking/benchmark.py -g surface_code
20+
21+
Benchmarking Multiple Changes:
22+
You can benchmark multiple working directories simultaneously against the baseline.
23+
This is useful if you have several different implementations across different
24+
directories that you want to compare side-by-side in a single run.
25+
26+
To set up additional directories for your changes:
27+
- Using git: Create a new worktree.
28+
$ git worktree add ../path-to-experiment1 <branch-or-commit>
29+
- Using jj (jujutsu): Add a new workspace.
30+
$ jj workspace add ../path-to-experiment1 -r <revision>
31+
32+
Use the -d or --dir flag for each additional directory you want to include:
33+
$ ./benchmarking/benchmark.py -d ../path-to-experiment1 -d ../path-to-experiment2
34+
35+
You can also provide a label for the plot by using the format label=path:
36+
$ ./benchmarking/benchmark.py -d "experiment1=../path-to-experiment1"
37+
38+
This will benchmark the baseline, the current working directory, and the two
39+
extra directories specified, providing a single cohesive report.
40+
41+
Command Line Flags:
42+
-b, --baseline <rev> : Specify baseline revision (default: main). Can be a branch or commit.
43+
-d, --dir <lbl=path> : Add extra working directories to benchmark against. Format: path or label=path. Can be specified multiple times.
44+
-q, --quick : Enable quick mode (fewer shots, warmup rounds, and runs). Useful for testing.
45+
-g, --group <name> : Filter circuits to benchmark by group name (e.g. 'surface_code').
46+
--skip-build : Skip the bazel build step (assuming binaries are already built).
47+
--loop : Continuously loop the benchmarks. Take a step away from your computer, and grab a Nuka Cola.
48+
"""
49+
50+
import argparse
51+
import contextlib
52+
import json
53+
import logging
54+
import shutil
55+
import subprocess
56+
import sys
57+
import time
58+
from datetime import datetime
59+
from pathlib import Path
60+
from zoneinfo import ZoneInfo
61+
import plotting
62+
import workspace
63+
64+
# Configure logging with LA timezone
65+
class Formatter(logging.Formatter):
66+
def converter(self, timestamp):
67+
dt = datetime.fromtimestamp(timestamp, tz=ZoneInfo('America/Los_Angeles'))
68+
return dt.timetuple()
69+
70+
logger = logging.getLogger()
71+
logger.setLevel(logging.INFO)
72+
handler = logging.StreamHandler(sys.stdout)
73+
formatter = Formatter('[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
74+
handler.setFormatter(formatter)
75+
logger.addHandler(handler)
76+
77+
def print_batch_summary(json_output_files: list[Path], circuit_names: list[str]) -> None:
78+
logger.info("===================================================")
79+
logger.info(">>> BATCH RUN SUMMARY")
80+
logger.info("===================================================")
81+
82+
for json_file, c_name in zip(json_output_files, circuit_names):
83+
if Path(json_file).exists():
84+
try:
85+
with open(json_file, 'r') as f:
86+
results_data = json.load(f)
87+
88+
results_list = results_data.get('results', [])
89+
if len(results_list) >= 2:
90+
baseline_mean = results_list[0].get('mean')
91+
pwd_mean = results_list[1].get('mean')
92+
93+
if baseline_mean is not None and pwd_mean is not None and pwd_mean > 0:
94+
speedup = baseline_mean / pwd_mean
95+
logger.info(f"Circuit: {c_name}")
96+
logger.info(f" Baseline Mean: {baseline_mean:.4f} s")
97+
logger.info(f" PWD Mean: {pwd_mean:.4f} s")
98+
logger.info(f" Speedup: {speedup:.4f}x")
99+
logger.info("---------------------------------------------------")
100+
except Exception as e:
101+
logger.error(f"Failed to parse or summarize {json_file}: {e}")
102+
103+
def run_benchmark_batch(args: argparse.Namespace, workspaces: list[str | Path], workspace_names: list[str]) -> None:
104+
logger.info("===================================================")
105+
logger.info(">>> STARTING NEW BATCH RUN SEQUENCE")
106+
logger.info("===================================================")
107+
108+
109+
if args.quick:
110+
logger.info(f">>> Quick mode enabled: Reduced shots ({args.sample_num_shots}), warmup ({args.warmup_rounds}), and runs ({args.num_runs}).")
111+
112+
la_tz = ZoneInfo('America/Los_Angeles')
113+
timestamp = datetime.now(la_tz).strftime('%Y-%m-%d_%H_%M')
114+
result_dir = Path(f"benchmarking/results/{timestamp}_{args.num_runs}")
115+
116+
logger.info(f">>> Output directory: {result_dir}")
117+
(result_dir / "benchmark_json").mkdir(parents=True, exist_ok=True)
118+
(result_dir / "benchmark_whiskers").mkdir(parents=True, exist_ok=True)
119+
120+
try:
121+
with open("benchmarking/circuits.json", 'r') as f:
122+
circuits_data = json.load(f)
123+
except (FileNotFoundError, json.JSONDecodeError) as e:
124+
logger.error(f"Failed to load circuits JSON: {e}")
125+
sys.exit(1)
126+
127+
if args.group:
128+
logger.info(f">>> Filtering circuits by group: {args.group}")
129+
circuits = [c for c in circuits_data if c.get('group') == args.group]
130+
else:
131+
circuits = circuits_data
132+
133+
json_output_files = []
134+
circuit_names = []
135+
136+
tesseract_args = [
137+
"--sample-num-shots", str(args.sample_num_shots),
138+
"--print-stats", "--threads", "48", "--beam", "5",
139+
"--no-revisit-dets", "--num-det-orders", "1",
140+
"--pqlimit", "100000", "--sample-seed", "123456"
141+
]
142+
143+
for circuit in circuits:
144+
c_name = circuit['name']
145+
c_path = circuit['path']
146+
147+
json_file = result_dir / "benchmark_json" / f"results_{c_name}.json"
148+
whisker_file = result_dir / "benchmark_whiskers" / f"results_{c_name}.png"
149+
150+
json_output_files.append(json_file)
151+
circuit_names.append(c_name)
152+
153+
logger.info("---------------------------------------------------")
154+
logger.info(f">>> BENCHMARKING CIRCUIT: {c_name}")
155+
logger.info(f">>> Path: {c_path}")
156+
157+
hyperfine_cmd = [
158+
"hyperfine",
159+
"--warmup", str(args.warmup_rounds),
160+
"--runs", str(args.num_runs),
161+
"--export-json", str(json_file)
162+
]
163+
164+
for name, d in zip(workspace_names, workspaces):
165+
hyperfine_cmd.extend(["-n", name])
166+
167+
binary_path = Path(d) / "bazel-bin" / "src" / "tesseract"
168+
if str(d) == ".":
169+
binary_path = Path("bazel-bin") / "src" / "tesseract"
170+
171+
cmd_for_binary = f"{binary_path} --circuit '{c_path}' " + " ".join(tesseract_args)
172+
hyperfine_cmd.append(cmd_for_binary)
173+
174+
workspace.run_cmd(hyperfine_cmd)
175+
176+
plotting.plot_benchmark_results(json_file=str(json_file), labels=workspace_names, output_file=str(whisker_file))
177+
178+
print_batch_summary(json_output_files, circuit_names)
179+
logger.info(f">>> Batch Run Complete! Results saved in: {result_dir}")
180+
181+
def main() -> None:
182+
parser = argparse.ArgumentParser(description="Benchmark tesseract decoder using hyperfine.")
183+
parser.add_argument("-b", "--baseline", default="main", help="Specify baseline revision (default: main)")
184+
parser.add_argument("-d", "--dir", action="append", default=[], help="Add extra working directories to benchmark against. Format: path or label=path. Can be specified multiple times.")
185+
parser.add_argument("--skip-build", action="store_true", help="Skip the bazel build step")
186+
parser.add_argument("--loop", action="store_true", help="Loop runs rather than running once.")
187+
188+
parser.add_argument("-q", "--quick", action="store_true", help="Enable quick mode (fewer shots/runs)")
189+
parser.add_argument("-g", "--group", default="", help="Filter circuits by group name")
190+
parser.add_argument("--shots", type=int, default=5000, help="Override the default sample-num-shots (mutually exclusive with -q)")
191+
parser.add_argument("--warmup", type=int, default=15, help="Override the default warmup-rounds (mutually exclusive with -q)")
192+
parser.add_argument("--runs", type=int, default=50, help="Override the default num-runs (mutually exclusive with -q)")
193+
194+
args = parser.parse_args()
195+
196+
if args.quick and (args.shots != 5000 or args.warmup != 15 or args.runs != 50):
197+
parser.error("-q/--quick cannot be used with --shots, --warmup, or --runs")
198+
199+
args.sample_num_shots = 500 if args.quick else args.shots
200+
args.warmup_rounds = 1 if args.quick else args.warmup
201+
args.num_runs = 2 if args.quick else args.runs
202+
203+
baseline_dir = "../baseline_bench_tmp"
204+
vcs = workspace.check_vcs()
205+
if not vcs:
206+
logger.error("Error: Neither a jj nor git repository detected.")
207+
sys.exit(1)
208+
with workspace.managed_baseline(baseline_dir, args.baseline, vcs):
209+
extra_workspaces = []
210+
extra_names = []
211+
for d in args.dir:
212+
if '=' in d:
213+
lbl, pth = d.split('=', 1)
214+
extra_names.append(lbl)
215+
extra_workspaces.append(pth)
216+
else:
217+
extra_names.append(Path(d).name)
218+
extra_workspaces.append(d)
219+
220+
workspaces = [baseline_dir, "."] + extra_workspaces
221+
workspace_names = ["baseline", "pwd"] + extra_names
222+
223+
workspace.build_all(workspaces, args.skip_build)
224+
225+
if args.loop:
226+
while True:
227+
run_benchmark_batch(args, workspaces, workspace_names)
228+
logger.info(">>> Restarting in 5 seconds... (Press Ctrl+C to stop)")
229+
time.sleep(5)
230+
workspace.build_all(workspaces, args.skip_build)
231+
else:
232+
run_benchmark_batch(args, workspaces, workspace_names)
233+
234+
if __name__ == "__main__":
235+
main()

benchmarking/circuits.json

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
[
2+
{
3+
"name": "r12_d12_p0.001_bivariate_bicycle_X",
4+
"group": "bivariate_bicycle_X",
5+
"path": "benchmarking/testsuite/r=12,d=12,p=0.001,noise=si1000,c=bivariate_bicycle_X,nkd=[[144,12,12]],q=288,iscolored=True,A_poly=x^3+y+y^2,B_poly=y^3+x+x^2.stim"
6+
},
7+
{
8+
"name": "r12_d12_p0.002_bivariate_bicycle_X",
9+
"group": "bivariate_bicycle_X",
10+
"path": "benchmarking/testsuite/r=12,d=12,p=0.002,noise=si1000,c=bivariate_bicycle_X,nkd=[[144,12,12]],q=288,iscolored=True,A_poly=x^3+y+y^2,B_poly=y^3+x+x^2.stim"
11+
},
12+
{
13+
"name": "r18_d18_p0.001_bivariate_bicycle_X",
14+
"group": "bivariate_bicycle_X",
15+
"path": "benchmarking/testsuite/r=18,d=18,p=0.001,noise=si1000,c=bivariate_bicycle_X,nkd=[[288,12,18]],q=576,iscolored=True,A_poly=x^3+y^2+y^7,B_poly=y^3+x+x^2.stim"
16+
},
17+
{
18+
"name": "r11_d11_p0.001_superdense_color_code_X",
19+
"group": "superdense_color_code_X",
20+
"path": "benchmarking/testsuite/r=11,d=11,p=0.001,noise=si1000,c=superdense_color_code_X,q=181,gates=cz.stim"
21+
},
22+
{
23+
"name": "r11_d11_p0.002_superdense_color_code_X",
24+
"group": "superdense_color_code_X",
25+
"path": "benchmarking/testsuite/r=11,d=11,p=0.002,noise=si1000,c=superdense_color_code_X,q=181,gates=cz.stim"
26+
},
27+
{
28+
"name": "r9_d9_p0.001_superdense_color_code_X",
29+
"group": "superdense_color_code_X",
30+
"path": "benchmarking/testsuite/r=9,d=9,p=0.001,noise=si1000,c=superdense_color_code_X,q=121,gates=cz.stim"
31+
},
32+
{
33+
"name": "r9_d9_p0.002_superdense_color_code_X",
34+
"group": "superdense_color_code_X",
35+
"path": "benchmarking/testsuite/r=9,d=9,p=0.002,noise=si1000,c=superdense_color_code_X,q=121,gates=cz.stim"
36+
},
37+
{
38+
"name": "r23_d23_p0.001._surface_code",
39+
"group": "surface_code",
40+
"path": "benchmarking/testsuite/r=23,d=23,p=0.001.stim"
41+
},
42+
{
43+
"name": "r23_d23_p0.008._surface_code",
44+
"group": "surface_code",
45+
"path": "benchmarking/testsuite/r=23,d=23,p=0.008.stim"
46+
},
47+
{
48+
"name": "r29_d29_p0.001._surface_code",
49+
"group": "surface_code",
50+
"path": "benchmarking/testsuite/r=29,d=29,p=0.001.stim"
51+
},
52+
{
53+
"name": "r11_d11_p0.001_surface_code_trans_cx_X",
54+
"group": "surface_code_trans_cx_X",
55+
"path": "benchmarking/testsuite/r=11,d=11,p=0.001,noise=si1000,c=surface_code_trans_cx_X,q=482,gates=cz.stim"
56+
},
57+
{
58+
"name": "r9_d9_p0.001_surface_code_trans_cx_X",
59+
"group": "surface_code_trans_cx_X",
60+
"path": "benchmarking/testsuite/r=9,d=9,p=0.001,noise=si1000,c=surface_code_trans_cx_X,q=322,gates=cz.stim"
61+
},
62+
{
63+
"name": "r9_d9_p0.002_surface_code_trans_cx_X",
64+
"group": "surface_code_trans_cx_X",
65+
"path": "benchmarking/testsuite/r=9,d=9,p=0.002,noise=si1000,c=surface_code_trans_cx_X,q=322,gates=cz.stim"
66+
}
67+
]

benchmarking/plotting.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import json
2+
import matplotlib.pyplot as plt
3+
4+
def plot_benchmark_results(
5+
json_file: str,
6+
labels: list[str],
7+
output_file: str | None = None,
8+
title: str | None = None,
9+
) -> None:
10+
"""
11+
Plots benchmark results from a JSON file generated by hyperfine.
12+
13+
Args:
14+
json_file: Path to the JSON file with benchmark results.
15+
labels: List of labels for the plot legend.
16+
output_file: Optional path to save the generated image. If None, the plot is shown instead.
17+
title: Optional title for the plot.
18+
"""
19+
with open(json_file, encoding="utf-8") as f:
20+
results = json.load(f)["results"]
21+
22+
times = [b["times"] for b in results]
23+
24+
plt.figure(figsize=(10, 6), constrained_layout=True)
25+
boxplot = plt.boxplot(times, vert=True, patch_artist=True)
26+
cmap = plt.get_cmap("rainbow")
27+
colors = [cmap(val / len(times)) for val in range(len(times))]
28+
29+
for patch, color in zip(boxplot["boxes"], colors):
30+
patch.set_facecolor(color)
31+
32+
if title:
33+
plt.title(title)
34+
35+
plt.legend(handles=boxplot["boxes"], labels=labels, loc="best", fontsize="medium")
36+
plt.ylabel("Time [s]")
37+
plt.ylim(0, None)
38+
plt.xticks(list(range(1, len(labels) + 1)), labels, rotation=45)
39+
40+
if output_file:
41+
plt.savefig(output_file)
42+
else:
43+
plt.show()
44+
plt.close()

0 commit comments

Comments
 (0)