stage 6: per-app

2026-05-20 23:47:19 +10:00 · 2026-05-20 23:47:19 +10:00 · fcf421b94c
commit fcf421b94c
parent 9edd809416
31 changed files with 6360 additions and 344 deletions
--- a/crates/headroom-dsp/benches/layer_a.rs
+++ b/crates/headroom-dsp/benches/layer_a.rs
@ -0,0 +1,130 @@
+//! Microbenchmarks for the Layer A (per-app level control) audio-side
+//! work. Validates that the costs land within the budget PLAN §4.7
+//! cites (~10 μs/quantum audio-thread, ~few μs/measurement
+//! daemon-thread).
+//!
+//! What's measured:
+//! - `analysis_scan_stereo_1024` — the per-block peak + mean_sq pass
+//!   the audio thread runs on each managed stream. This is the only
+//!   work that touches the RT thread per managed app.
+//! - `level_envelopes_process_block` — the post-analysis envelope
+//!   smoothing the *daemon* thread runs.
+//!
+//! For reference (so the Layer A numbers can be compared against
+//! something we know is on the audio thread today):
+//! - `compressor_process_frame` and `limiter_process_frame` —
+//!   per-sample DSP cost in the processed-route filter chain.
+//!
+//! Run with `cargo bench -p headroom-dsp --bench layer_a` inside
+//! `nix develop`.
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
+use headroom_dsp::{
+    Compressor, CompressorConfig, LevelEnvelopes, LevelEnvelopesConfig, Limiter, LimiterConfig,
+};
+
+/// 1024-frame quantum at 48 kHz stereo: 2048 interleaved samples,
+/// 21.3 ms per block.
+const FRAMES: usize = 1024;
+const CHANNELS: usize = 2;
+const SR: f32 = 48_000.0;
+const BLOCK_DT_S: f32 = FRAMES as f32 / SR;
+
+/// Build a noisy-but-bounded test block. Synthetic — we want
+/// realistic-ish range of values so the branch predictors / FPU
+/// units exercise the same paths they would on real audio.
+fn make_block() -> Vec<f32> {
+    let mut buf = Vec::with_capacity(FRAMES * CHANNELS);
+    // Two sine partials + a tiny DC: enough variation that peak isn't
+    // pegged to one sample and the mean-square isn't trivially zero.
+    let f1 = 220.0 / SR;
+    let f2 = 1730.0 / SR;
+    for n in 0..FRAMES {
+        let t = n as f32;
+        let s = 0.4 * (2.0 * std::f32::consts::PI * f1 * t).sin()
+            + 0.18 * (2.0 * std::f32::consts::PI * f2 * t).sin()
+            + 0.005;
+        buf.push(s);
+        buf.push(s * 0.92); // slight L/R difference
+    }
+    buf
+}
+
+/// What the audio-thread Layer A callback computes per block.
+/// Hand-rolled tight loop so the bench measures the candidate code,
+/// not stdlib iterator combinators (which the compiler will inline
+/// to roughly the same thing — but we want to be honest about it).
+#[inline]
+fn analysis_scan(samples: &[f32]) -> (f32, f32) {
+    let mut peak = 0.0_f32;
+    let mut sumsq = 0.0_f32;
+    for &s in samples {
+        let a = s.abs();
+        if a > peak {
+            peak = a;
+        }
+        sumsq += s * s;
+    }
+    let mean_sq = sumsq / samples.len() as f32;
+    (peak, mean_sq)
+}
+
+fn bench_analysis_scan(c: &mut Criterion) {
+    let block = make_block();
+    let mut group = c.benchmark_group("layer_a_audio_thread");
+    group.throughput(Throughput::Elements((FRAMES * CHANNELS) as u64));
+    group.bench_function("analysis_scan_stereo_1024", |b| {
+        b.iter(|| {
+            let (p, m) = analysis_scan(black_box(&block));
+            black_box((p, m));
+        });
+    });
+    group.finish();
+}
+
+fn bench_level_envelopes(c: &mut Criterion) {
+    let mut env = LevelEnvelopes::new(LevelEnvelopesConfig::default(), BLOCK_DT_S);
+    let block = make_block();
+    let (peak, mean_sq) = analysis_scan(&block);
+
+    let mut group = c.benchmark_group("layer_a_daemon_thread");
+    group.bench_function("level_envelopes_process_block", |b| {
+        b.iter(|| {
+            let d = env.process_block(black_box(peak), black_box(mean_sq));
+            black_box(d);
+        });
+    });
+    group.finish();
+}
+
+fn bench_filter_kernels(c: &mut Criterion) {
+    // Reference points for "how big is Layer A relative to what
+    // the realtime filter is already doing." Not a Layer A cost —
+    // measured here for context.
+    let mut comp = Compressor::new(CompressorConfig::default(), SR);
+    let mut lim = Limiter::new(LimiterConfig::default(), SR);
+
+    let mut group = c.benchmark_group("filter_reference_per_frame");
+    group.throughput(Throughput::Elements(1));
+    group.bench_function("compressor_process_frame", |b| {
+        b.iter(|| {
+            let (l, r) = comp.process_frame(black_box(0.3), black_box(-0.2));
+            black_box((l, r));
+        });
+    });
+    group.bench_function("limiter_process_frame", |b| {
+        b.iter(|| {
+            let (l, r) = lim.process_frame(black_box(0.3), black_box(-0.2));
+            black_box((l, r));
+        });
+    });
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_analysis_scan,
+    bench_level_envelopes,
+    bench_filter_kernels
+);
+criterion_main!(benches);