stage 6: per-app

This commit is contained in:
atagen 2026-05-20 23:47:19 +10:00
parent 9edd809416
commit fcf421b94c
31 changed files with 6360 additions and 344 deletions

View file

@ -0,0 +1,130 @@
//! Microbenchmarks for the Layer A (per-app level control) audio-side
//! work. Validates that the costs land within the budget PLAN §4.7
//! cites (~10 μs/quantum audio-thread, ~few μs/measurement
//! daemon-thread).
//!
//! What's measured:
//! - `analysis_scan_stereo_1024` — the per-block peak + mean_sq pass
//! the audio thread runs on each managed stream. This is the only
//! work that touches the RT thread per managed app.
//! - `level_envelopes_process_block` — the post-analysis envelope
//! smoothing the *daemon* thread runs.
//!
//! For reference (so the Layer A numbers can be compared against
//! something we know is on the audio thread today):
//! - `compressor_process_frame` and `limiter_process_frame` —
//! per-sample DSP cost in the processed-route filter chain.
//!
//! Run with `cargo bench -p headroom-dsp --bench layer_a` inside
//! `nix develop`.
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use headroom_dsp::{
Compressor, CompressorConfig, LevelEnvelopes, LevelEnvelopesConfig, Limiter, LimiterConfig,
};
/// 1024-frame quantum at 48 kHz stereo: 2048 interleaved samples,
/// 21.3 ms per block.
const FRAMES: usize = 1024;
const CHANNELS: usize = 2;
const SR: f32 = 48_000.0;
const BLOCK_DT_S: f32 = FRAMES as f32 / SR;
/// Build a noisy-but-bounded test block. Synthetic — we want
/// realistic-ish range of values so the branch predictors / FPU
/// units exercise the same paths they would on real audio.
fn make_block() -> Vec<f32> {
let mut buf = Vec::with_capacity(FRAMES * CHANNELS);
// Two sine partials + a tiny DC: enough variation that peak isn't
// pegged to one sample and the mean-square isn't trivially zero.
let f1 = 220.0 / SR;
let f2 = 1730.0 / SR;
for n in 0..FRAMES {
let t = n as f32;
let s = 0.4 * (2.0 * std::f32::consts::PI * f1 * t).sin()
+ 0.18 * (2.0 * std::f32::consts::PI * f2 * t).sin()
+ 0.005;
buf.push(s);
buf.push(s * 0.92); // slight L/R difference
}
buf
}
/// What the audio-thread Layer A callback computes per block.
/// Hand-rolled tight loop so the bench measures the candidate code,
/// not stdlib iterator combinators (which the compiler will inline
/// to roughly the same thing — but we want to be honest about it).
#[inline]
fn analysis_scan(samples: &[f32]) -> (f32, f32) {
let mut peak = 0.0_f32;
let mut sumsq = 0.0_f32;
for &s in samples {
let a = s.abs();
if a > peak {
peak = a;
}
sumsq += s * s;
}
let mean_sq = sumsq / samples.len() as f32;
(peak, mean_sq)
}
fn bench_analysis_scan(c: &mut Criterion) {
let block = make_block();
let mut group = c.benchmark_group("layer_a_audio_thread");
group.throughput(Throughput::Elements((FRAMES * CHANNELS) as u64));
group.bench_function("analysis_scan_stereo_1024", |b| {
b.iter(|| {
let (p, m) = analysis_scan(black_box(&block));
black_box((p, m));
});
});
group.finish();
}
fn bench_level_envelopes(c: &mut Criterion) {
let mut env = LevelEnvelopes::new(LevelEnvelopesConfig::default(), BLOCK_DT_S);
let block = make_block();
let (peak, mean_sq) = analysis_scan(&block);
let mut group = c.benchmark_group("layer_a_daemon_thread");
group.bench_function("level_envelopes_process_block", |b| {
b.iter(|| {
let d = env.process_block(black_box(peak), black_box(mean_sq));
black_box(d);
});
});
group.finish();
}
fn bench_filter_kernels(c: &mut Criterion) {
// Reference points for "how big is Layer A relative to what
// the realtime filter is already doing." Not a Layer A cost —
// measured here for context.
let mut comp = Compressor::new(CompressorConfig::default(), SR);
let mut lim = Limiter::new(LimiterConfig::default(), SR);
let mut group = c.benchmark_group("filter_reference_per_frame");
group.throughput(Throughput::Elements(1));
group.bench_function("compressor_process_frame", |b| {
b.iter(|| {
let (l, r) = comp.process_frame(black_box(0.3), black_box(-0.2));
black_box((l, r));
});
});
group.bench_function("limiter_process_frame", |b| {
b.iter(|| {
let (l, r) = lim.process_frame(black_box(0.3), black_box(-0.2));
black_box((l, r));
});
});
group.finish();
}
criterion_group!(
benches,
bench_analysis_scan,
bench_level_envelopes,
bench_filter_kernels
);
criterion_main!(benches);