riir

2026-05-19 23:32:51 +10:00 · 2026-05-19 23:32:51 +10:00 · 0aa6ae9fbf
commit 0aa6ae9fbf
parent da4bc139eb
49 changed files with 10554 additions and 5482 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,4 @@
+pub mod parsers;
+pub mod pool;
+pub mod store;
+pub mod types;
--- a/src/main.rs
+++ b/src/main.rs
--- a/src/parsers/help.rs
+++ b/src/parsers/help.rs
@ -0,0 +1,187 @@
+mod description;
+mod helpers;
+mod options;
+mod positionals;
+mod subcommands;
+
+pub use options::{param_parser, parse_usage_flags, switch_parser};
+pub use positionals::{
+    extract_cli11_positionals, extract_usage_positionals, parse_usage_args, skip_command_name,
+};
+
+use std::collections::HashMap;
+
+use crate::{
+    parsers::help::{description::description, helpers::get_indent, subcommands::subcommand_entry},
+    types::*,
+};
+use nom::{IResult, Parser, character::complete::space0, combinator::opt};
+
+use crate::make_parser;
+
+type EntryParts<'a> = (
+    &'a str,
+    (Switch<'a>, Option<Param<'a>>),
+    (&'a str, Vec<&'a str>),
+);
+
+// parse a single flag entry: indent + switch + optional param + description.
+make_parser!(entry -> OptionEntry<'a>,
+    (
+        space0,
+        (switch_parser, opt(param_parser)),
+        description,
+    )
+    => |(_, (switch, param), (first, cont))
+        : EntryParts<'a>|
+    {
+        let mut desc: Vec<&str> = Vec::with_capacity(1 + cont.len());
+        if !first.trim().is_empty() { desc.push(first); }
+        desc.extend(cont.into_iter().filter(|l| !l.trim().is_empty()));
+        OptionEntry { switch, param, desc }
+    }
+);
+
+/// dedup raw subcommands by case-insensitive name, keeping the entry with
+/// the longest description. preserves first-seen ordering.
+fn dedup_subcommands<'a>(raw: Vec<Subcommand<'a>>) -> Vec<Subcommand<'a>> {
+    let mut by_name: HashMap<String, Subcommand<'a>> = HashMap::new();
+    let mut order: Vec<String> = Vec::new();
+    for sc in raw {
+        let key = sc.name.to_ascii_lowercase();
+        match by_name.get(&key) {
+            Some(prev) if prev.desc.len() >= sc.desc.len() => {}
+            _ => {
+                if !by_name.contains_key(&key) {
+                    order.push(key.clone());
+                }
+                by_name.insert(key, sc);
+            }
+        }
+    }
+    order
+        .into_iter()
+        .map(|k| by_name.remove(&k).unwrap())
+        .collect()
+}
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum HelpSection {
+    Unknown,
+    Options,
+    Commands,
+    Other,
+}
+
+fn classify_section_line(line: &str) -> Option<HelpSection> {
+    let (idx, indent) = get_indent(line);
+    if indent > 4 {
+        return None;
+    }
+    let trimmed = line[idx..].trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+    let without_colon = trimmed.trim_end_matches(':').trim();
+    let lower = without_colon.to_ascii_lowercase();
+
+    if lower.starts_with("usage") {
+        return Some(HelpSection::Unknown);
+    }
+    if lower.starts_with("valid arguments")
+        || lower.contains(" is one of the following")
+        || lower.contains(" defaults to")
+        || lower == "examples"
+        || lower == "example"
+    {
+        return Some(HelpSection::Other);
+    }
+    let command_header = matches!(lower.as_str(), "command" | "commands" | "subcommands")
+        || lower.ends_with(" commands")
+        || lower.ends_with(" subcommands");
+    if command_header && !lower.contains("option") && !lower.contains("flag") {
+        return Some(HelpSection::Commands);
+    }
+    if lower.contains("argument")
+        || lower == "args"
+        || lower == "positionals"
+        || lower == "positional arguments"
+    {
+        return Some(HelpSection::Other);
+    }
+    if lower.contains("option") || lower.contains("flag") || trimmed.ends_with(':') {
+        return Some(HelpSection::Options);
+    }
+    None
+}
+
+fn consume_line(s: &str) -> &str {
+    match s.find('\n') {
+        Some(idx) => &s[idx + 1..],
+        None => "",
+    }
+}
+
+fn parser_made_progress(original: &str, rem: &str) -> bool {
+    rem.len() < original.len()
+}
+
+/// build the final HelpResult by scanning help text with lightweight section
+/// awareness. options are accepted in option-like sections and before a
+/// section is known; subcommands are accepted only in command-like sections.
+fn build_help_result<'a>(original: &'a str) -> HelpResult<'a> {
+    let mut entries = Vec::new();
+    let mut raw_subcommands: Vec<Subcommand<'a>> = Vec::new();
+    let mut section = HelpSection::Unknown;
+    let mut rem = original;
+
+    while !rem.is_empty() {
+        let line = rem.split_once('\n').map(|(line, _)| line).unwrap_or(rem);
+        if let Some(next_section) = classify_section_line(line) {
+            section = next_section;
+            rem = consume_line(rem);
+            continue;
+        }
+
+        if matches!(section, HelpSection::Unknown | HelpSection::Options)
+            && let Ok((next, parsed)) = entry(rem)
+            && parser_made_progress(rem, next)
+        {
+            entries.push(parsed);
+            rem = next;
+            continue;
+        }
+
+        if section == HelpSection::Commands
+            && let Ok((next, parsed)) = subcommand_entry(rem)
+            && parser_made_progress(rem, next)
+        {
+            raw_subcommands.push(parsed);
+            rem = next;
+            continue;
+        }
+
+        rem = consume_line(rem);
+    }
+
+    let subcommands = dedup_subcommands(raw_subcommands);
+    // cli11 positional section takes priority over the usage-line scan
+    // when both are present — cli11 carries types and optionality.
+    let positionals = match extract_cli11_positionals(original) {
+        Ok((_, p)) if !p.is_empty() => p,
+        _ => extract_usage_positionals(original)
+            .map(|(_, p)| p)
+            .unwrap_or_default(),
+    };
+    HelpResult {
+        entries,
+        subcommands,
+        positionals,
+        desc: "",
+    }
+}
+
+/// top-level help parser.
+pub fn help_parser(s: &str) -> IResult<&str, HelpResult<'_>> {
+    Ok(("", build_help_result(s)))
+}
--- a/src/parsers/help/description.rs
+++ b/src/parsers/help/description.rs
@ -0,0 +1,37 @@
+use nom::{
+    IResult, Parser,
+    character::complete::space0,
+    combinator::verify,
+    multi::many0,
+    sequence::{preceded, terminated},
+};
+
+use crate::make_parser;
+use crate::parsers::help::helpers::{at_least_indent, eol, rest_of_line};
+
+// continuation line: an indented (≥8 visual cols), non-flag-shaped line
+// belonging to the previous flag's description. blank-but-indented lines
+// are accepted (content = ""), filtered out by the caller's join.
+make_parser!(continuation_line -> &'a str,
+    verify(
+        preceded(
+            // assert ≥8 visual cols of leading horizontal whitespace
+            // without consuming — space0 inside `rest_of_line`'s preceded
+            // will eat them next.
+            at_least_indent(8),
+            terminated(preceded(space0, rest_of_line), eol)
+        ),
+        // reject lines whose first non-space char is '-' — that's a new
+        // flag entry, not a continuation of the previous one.
+        |content: &&str| !content.starts_with('-')
+    )
+);
+
+// description: the line of text after the switch+param, plus any
+// continuation lines. always succeeds — first line may be empty (when
+// the switch is followed immediately by a newline, "clap long" style).
+make_parser!(pub description -> (&'a str, Vec<&'a str>),
+(
+    terminated(preceded(space0, rest_of_line), eol),
+    many0(continuation_line),
+));
--- a/src/parsers/help/helpers.rs
+++ b/src/parsers/help/helpers.rs
@ -0,0 +1,105 @@
+use nom::{
+    AsChar, IResult, Parser, branch::alt, bytes::complete::take_till,
+    character::complete::line_ending, combinator::eof,
+};
+#[allow(unused_imports)]
+use nom::{bytes::complete::take_while, combinator::peek, combinator::verify};
+
+#[macro_export]
+macro_rules! make_parser {
+    (pub $name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            let (rem, val) = $parser.parse(s)?;
+            Ok((rem, $wrap(val)))
+        }
+    };
+    (pub $name:ident -> $out:ty, $parser:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            $parser.parse(s)
+        }
+    };
+    ($name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            let (rem, val) = $parser.parse(s)?;
+            Ok((rem, $wrap(val)))
+        }
+    };
+    ($name:ident -> $out:ty, $parser:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            $parser.parse(s)
+        }
+    };
+}
+
+#[macro_export]
+macro_rules! make_predicate {
+    (pub $name:ident, |$c:ident| $($body:tt)*) => {
+        pub fn $name($c: char) -> bool { $($body)* }
+    };
+    ($name:ident, |$c:ident| $($body:tt)*) => {
+        fn $name($c: char) -> bool { $($body)* }
+    };
+}
+
+make_predicate!(pub is_option_char, |c| c.is_alphanumeric() || matches!(c, '-' | '_'));
+
+make_parser!(pub rest_of_line -> &'a str,
+    take_till(|c: char| c.is_newline())
+);
+
+// end of line — matches either a newline or end of input.
+// permissive version used in most line-consuming parsers.
+make_parser!(pub eol -> &'a str, alt((line_ending, eof)));
+
+/// compute the visual indent of a leading whitespace run.
+/// spaces count 1, tabs count 8 (typical terminal default).
+pub fn visual_indent(s: &str) -> u8 {
+    s.chars().fold(0u8, |acc, c| {
+        acc.saturating_add(match c {
+            ' ' => 1,
+            '\t' => 8,
+            _ => 0,
+        })
+    })
+}
+
+/// nom-shaped check that the input begins with at least `min` visual
+/// columns of horizontal whitespace (spaces or tabs). doesn't consume —
+/// pair with `space0`/`take_while` to actually eat the indent.
+pub fn at_least_indent<'a>(
+    min: u8,
+) -> impl Parser<&'a str, Output = &'a str, Error = nom::error::Error<&'a str>> {
+    verify(
+        peek(take_while(|c: char| c == ' ' || c == '\t')),
+        move |s: &str| visual_indent(s) >= min,
+    )
+}
+
+/// legacy helper: returns (byte index of first non-space, visual indent).
+/// used by callers that still need the byte index.
+pub fn get_indent(s: &str) -> (usize, u8) {
+    let mut traversed = 0;
+    let mut indent = 0;
+    for (i, c) in s.char_indices() {
+        let incr = match c {
+            ' ' => 1,
+            '\t' => 8,
+            _ => 0,
+        };
+        if incr == 0 {
+            traversed = i;
+            break;
+        } else {
+            indent += incr;
+        }
+    }
+    (traversed, indent)
+}
--- a/src/parsers/help/options.rs
+++ b/src/parsers/help/options.rs
@ -0,0 +1,192 @@
+use crate::make_parser;
+use crate::parsers::help::helpers::is_option_char;
+use crate::types::*;
+
+use nom::bytes::complete::{take_till, take_till1};
+use nom::character::complete::{space0, space1};
+use nom::combinator::{map, opt};
+use nom::multi::many0;
+use nom::sequence::separated_pair;
+use nom::{
+    IResult, Parser,
+    branch::alt,
+    bytes::complete::{tag, take_while1},
+    character::complete::{char, satisfy},
+    combinator::{value, verify},
+    sequence::{delimited, preceded},
+};
+
+make_parser!(short_switch -> char,
+    preceded(char('-'), satisfy(|c| c.is_alphanumeric())));
+
+make_parser!(long_switch -> &'a str,
+    preceded(tag("--"), take_while1(is_option_char)));
+
+make_parser!(negatable_long_switch -> &'a str,
+    preceded(tag("--[no-]"), take_while1(is_option_char)));
+
+make_parser!(comma -> (),
+    value((), preceded(char(','), space0)));
+
+make_parser!(eq_optional_param -> Param<'a>,
+    delimited(tag("[="), take_while1(is_option_char), char(']')) => Param::Optional);
+
+make_parser!(eq_optional_angle_param -> Param<'a>,
+    delimited(tag("[=<"), take_till1(|c| c == '>'), tag(">]")) => Param::Optional);
+
+make_parser!(eq_mandatory_param -> Param<'a>,
+    preceded(char('='), take_while1(is_option_char)) => Param::Mandatory);
+
+// take a wide alphanumeric/_/- token then verify the WHOLE thing looks
+// like an ALL_CAPS-style param name. taking only uppercase chars would
+// match just "N" of " Needs: ..." and leave "eeds:..." as desc, so we
+// widen, then reject anything that doesn't pass the all-caps check.
+make_parser!(spaced_uppercase_param -> Param<'a>,
+    preceded(
+        char(' '),
+        verify(
+            take_while1(|c: char|
+                c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_' || c == '-'
+            ),
+            |s: &str| {
+                let first = match s.chars().next() { Some(c) => c, None => return false };
+                if !(first.is_ascii_uppercase() || first == '_') { return false; }
+                s.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
+            }
+        )
+    ) => Param::Mandatory);
+
+make_parser!(spaced_angle_param -> Param<'a>,
+    preceded(char(' '), delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
+
+make_parser!(spaced_opt_angle_param -> Param<'a>,
+    preceded(char(' '), delimited(char('<'),
+        delimited(char('['), take_while1(|c| c != ']'), char(']')),
+        char('>'))) => Param::Optional);
+
+make_parser!(spaced_angle_param_after_space -> Param<'a>,
+    preceded(space1, delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
+
+// take the full lowercase token then verify it's <=10 chars. a
+// take_while_m_n with a 10-char cap would leave a partial match — e.g.
+// "--foo nanoseconds" would extract param "nanosecond" and leave "s" as
+// the description. a word longer than 10 chars is almost certainly the
+// start of the description, not a type annotation.
+make_parser!(spaced_type_param -> Param<'a>,
+    preceded(
+        char(' '),
+        verify(
+            take_while1(|c: char| !c.is_whitespace()),
+            |s: &str| s.len() <= 10 && s.chars().all(|c| c.is_ascii_lowercase())
+        )
+    ) => Param::Mandatory
+);
+
+make_parser!(pub param_parser -> Param<'a>, alt((
+    eq_optional_angle_param,
+    eq_optional_param,
+    eq_mandatory_param,
+    spaced_opt_angle_param,
+    spaced_angle_param_after_space,
+    spaced_angle_param,
+    spaced_uppercase_param,
+    spaced_type_param,
+)));
+
+macro_rules! switch_pair {
+    ($name:ident, $left:expr, $sep:expr, $right:expr => |$a:ident, $b:ident| $body:expr) => {
+        fn $name<'a>(s: &'a str) -> IResult<&'a str, Switch<'a>> {
+            use nom::sequence::separated_pair;
+            let (rem, ($a, $b)) = separated_pair($left, $sep, $right).parse(s)?;
+            Ok((rem, $body))
+        }
+    };
+}
+
+switch_pair!(short_comma_long,
+    short_switch, comma, long_switch => |s, l| Switch::Both(s, l));
+
+switch_pair!(short_comma_negatable_long,
+    short_switch, comma, negatable_long_switch => |s, l| Switch::Both(s, l));
+
+switch_pair!(short_space_long,
+    short_switch, char(' '), long_switch => |s, l| Switch::Both(s, l));
+
+switch_pair!(short_space_negatable_long,
+    short_switch, char(' '), negatable_long_switch => |s, l| Switch::Both(s, l));
+
+make_parser!(slash_sep -> (),
+    value((), delimited(space0, char('/'), space0)));
+
+switch_pair!(long_slash_short,
+    long_switch, slash_sep, short_switch => |l, s| Switch::Both(s, l));
+
+make_parser!(short_as_switch -> Switch<'a>, short_switch => Switch::Short);
+make_parser!(negatable_long_as_switch -> Switch<'a>, negatable_long_switch => Switch::Long);
+make_parser!(long_as_switch -> Switch<'a>, long_switch => Switch::Long);
+
+make_parser!(pub switch_parser -> Switch<'a>,
+    alt((
+        short_comma_negatable_long,
+        short_space_negatable_long,
+        short_comma_long,
+        short_space_long,
+        long_slash_short,
+        short_as_switch,
+        negatable_long_as_switch,
+        long_as_switch,
+    ))
+);
+
+// `{--long | -s}` — manpage SYNOPSIS-line switch pair. nix-env's
+// synopsis is the canonical case: `[{--file | -f} path] [{--profile |
+// -p} path]`. emits Switch::Both with the long name.
+make_parser!(brace_pipe_long_short -> Switch<'a>,
+    separated_pair(long_switch, (space0, char('|'), space0), short_switch)
+    => |(l, s): (&'a str, char)| Switch::Both(s, l)
+);
+
+make_parser!(brace_pipe_short_long -> Switch<'a>,
+    separated_pair(short_switch, (space0, char('|'), space0), long_switch)
+    => |(s, l): (char, &'a str)| Switch::Both(s, l)
+);
+
+make_parser!(brace_pipe_switch -> Switch<'a>,
+    delimited(
+        (char('{'), space0),
+        alt((brace_pipe_long_short, brace_pipe_short_long)),
+        (space0, char('}'))
+    )
+);
+
+make_parser!(usage_switch_parser -> Switch<'a>,
+    alt((brace_pipe_switch, switch_parser))
+);
+
+// consume any chars except `]`. used to swallow trailing tokens inside a
+// flag bracket — e.g. `[--option name value]` keeps switch=Long("option")
+// and param=Mandatory("name"), discarding ` value` before the closing `]`.
+make_parser!(take_till_bracket -> &'a str, take_till(|c: char| c == ']'));
+
+// `[<switch> [param] <junk>]` inside the SYNOPSIS line.
+make_parser!(flag_in_bracket -> (Switch<'a>, Option<Param<'a>>),
+    delimited(
+        (char('['), space0),
+        (usage_switch_parser, opt(param_parser)),
+        (take_till_bracket, char(']'))
+    )
+);
+
+// walk the joined SYNOPSIS-line text, collecting every flag-bracketed
+// switch + its first param. non-flag tokens (positional brackets,
+// command name, ellipses) are skipped one char at a time.
+make_parser!(pub parse_usage_flags -> Vec<(Switch<'a>, Option<Param<'a>>)>,
+    many0(alt((
+        map(flag_in_bracket, Some),
+        // `value(None, ...)` requires `None: Clone` which forces Clone
+        // on Switch/Param; `map(..., |_| None)` doesn't.
+        map(satisfy(|c| c != '\n' && c != '\r'), |_| None),
+    )))
+    => |v: Vec<Option<(Switch<'a>, Option<Param<'a>>)>>|
+        v.into_iter().flatten().collect()
+);
--- a/src/parsers/help/positionals.rs
+++ b/src/parsers/help/positionals.rs
@ -0,0 +1,400 @@
+use crate::parsers::help::helpers::rest_of_line;
+use crate::types::Positional;
+use crate::{make_parser, make_predicate};
+use nom::branch::alt;
+use nom::bytes::complete::{tag, tag_no_case, take_till, take_till1, take_while, take_while1};
+use nom::character::complete::{char, line_ending, satisfy, space0, space1};
+use nom::combinator::{map, not, opt, peek, recognize, value, verify};
+use nom::multi::many0;
+use nom::sequence::{delimited, preceded, terminated};
+use nom::{AsChar, IResult, Parser};
+
+#[derive(Clone)]
+enum PositionalParse<'a> {
+    Curly,
+    Flag,
+    Skip,
+    Mandatory(&'a str),
+    Optional(&'a str),
+    ManVariadic(&'a str),
+    OptVariadic(&'a str),
+}
+
+make_predicate!(is_word_char, |c| c.is_alphanumeric()
+    || matches!(c, '-' | '_' | '/' | '.'));
+
+make_predicate!(is_pos_char, |c| c.is_ascii_uppercase()
+    || c.is_numeric()
+    || matches!(c, '_' | '-'));
+
+make_parser!(section_label -> (),
+    value((), alt((
+        tag_no_case("options"),
+        tag_no_case("option"),
+        tag_no_case("flags"),
+        tag_no_case("flag")
+    )))
+);
+
+make_parser!(ellipses -> (),
+    value((),
+        alt((tag("..."), tag("\u{2026}")))
+    )
+);
+
+make_parser!(braces -> PositionalParse<'a>,
+    value(PositionalParse::Curly, delimited(char('{'), take_till1(|c| c == '}'), char('}')))
+);
+
+// FIXME should this be a take_while is_option_char?
+// why tf do we have a ']' condition
+make_parser!(flag -> PositionalParse<'a>,
+    value(PositionalParse::Flag, preceded(char('-'), take_till1(|c: char| c.is_space() || c == ']')))
+);
+
+fn check_positional(s: &str) -> bool {
+    let s = s.trim();
+    if s.is_empty() {
+        return false;
+    }
+    // reject names starting with '-' — these are flag tokens accidentally
+    // captured by the bracket parser, e.g. "[--at-operation]" in jj's
+    // synopsis. without this guard every `[--flag]` token would be
+    // recorded as a positional named "--flag".
+    if s.starts_with('-') {
+        return false;
+    }
+    if section_label.parse(s).is_ok() {
+        return false;
+    }
+    let upper = s.to_ascii_uppercase();
+    if matches!(upper.as_str(), "OPTIONS" | "OPTION" | "FLAGS" | "FLAG") {
+        return false;
+    }
+    s.chars()
+        .all(|c| c.is_alphanumeric() || matches!(c, '-' | '_' | '/' | '.'))
+}
+
+// recognize a balanced `[...]` block, tolerating ONE level of nested
+// brackets inside. expressed entirely via nom combinators:
+//
+//   `[` + many0(alt((nested_bracket_block, non_bracket_char))) + `]`
+//
+// nested_bracket_block is `[ chars_until_] ]`, which means we accept a
+// single inner `[...]` correctly but not arbitrarily-deep nesting —
+// manpages don't go deeper than two levels.
+// returns the inner content (everything between the outer brackets).
+make_parser!(balanced_bracket_inner -> &'a str,
+    recognize(delimited(
+        char('['),
+        many0(alt((
+            recognize((char('['), take_till(|c: char| c == ']'), char(']'))),
+            recognize(satisfy(|c: char| c != ']' && c != '[')),
+        ))),
+        char(']'),
+    ))
+    => |whole: &'a str| &whole[1..whole.len() - 1]
+);
+
+/// extract a positional name from already-trimmed bracket-inner content.
+/// returns the name slice and a flag indicating whether the bracket inner
+/// carried a trailing `...` (in-bracket variadic marker).
+fn parse_bracket_inner_name(inner: &str) -> Option<(&str, bool)> {
+    let inner = inner.trim();
+    // strip trailing "..." for in-bracket variadic.
+    let (rest, has_dots) = if let Some(stripped) = inner.strip_suffix("...") {
+        (stripped.trim_end(), true)
+    } else if let Some(stripped) = inner.strip_suffix('\u{2026}') {
+        (stripped.trim_end(), true)
+    } else {
+        (inner, false)
+    };
+    if rest.starts_with('[') {
+        let mut found = None;
+        let mut remaining = rest;
+        while let Some(start) = remaining.find('[') {
+            let after_start = &remaining[start + 1..];
+            let Some(end) = after_start.find(']') else {
+                break;
+            };
+            let nested = &after_start[..end];
+            if let Some((nested_name, nested_dots)) = parse_bracket_inner_name(nested)
+                && check_positional(nested_name)
+            {
+                found = Some((nested_name, has_dots || nested_dots));
+            }
+            remaining = &after_start[end + 1..];
+        }
+        return found;
+    }
+    let name = if let Some(after_lt) = rest.strip_prefix('<') {
+        // angle-bracket name: take everything up to the matching '>'
+        let end = after_lt.find('>')?;
+        let inner = after_lt[..end].trim();
+        let (inner, inner_dots) = if let Some(stripped) = inner.strip_suffix("...") {
+            (stripped.trim_end(), true)
+        } else if let Some(stripped) = inner.strip_suffix('\u{2026}') {
+            (stripped.trim_end(), true)
+        } else {
+            (inner, false)
+        };
+        return Some((inner, has_dots || inner_dots));
+    } else {
+        // bare name: take leading word
+        let end = rest
+            .find(|c: char| c.is_whitespace() || c == '[' || c == ']')
+            .unwrap_or(rest.len());
+        if end == 0 {
+            return None;
+        }
+        &rest[..end]
+    };
+    Some((name, has_dots))
+}
+
+// extract a balanced `[...]` block and decompose its inner content into
+// (name, has-inner-`...` flag). `map_opt` turns a `None` from
+// `parse_bracket_inner_name` into a nom parse error.
+make_parser!(opt_bracket_name -> (&'a str, bool),
+    nom::combinator::map_opt(balanced_bracket_inner, parse_bracket_inner_name)
+);
+
+make_parser!(
+    opt_positional -> PositionalParse<'a>,
+    verify(
+        // tuple parser: (name + in-bracket variadic, post-bracket ellipsis).
+        // matches "[name]", "[name...]", "[name ...]", "[name] ...",
+        // "[<name>]", and one-level nests like "[<program> [<arg>...]]".
+        (opt_bracket_name, opt(ellipses)),
+        |((name, _), _): &((&'a str, bool), Option<()>)| check_positional(name)
+    ) => |((name, has_inner_dots), post_dots): ((&'a str, bool), Option<()>)| {
+        if has_inner_dots || post_dots.is_some() {
+            PositionalParse::OptVariadic(name)
+        } else {
+            PositionalParse::Optional(name)
+        }
+    }
+);
+
+make_parser!(man_positional -> PositionalParse<'a>,
+    verify(
+        (
+            delimited(
+                char('<'),
+                (
+                    take_till1(|c| c == '.' || c == '\u{2026}' || c == '>'),
+                    opt(ellipses)
+                ),
+                char('>')
+            ),
+            opt(ellipses)
+        ),
+        |((ss, _), _)| check_positional(ss)
+    ) => |((p, v), v1): ((&'a str, Option<()>), Option<()>)|
+        if v.is_some() || v1.is_some() { PositionalParse::ManVariadic(p) }
+        else { PositionalParse::Mandatory(p) }
+);
+
+make_parser!(allcaps_positional -> PositionalParse<'a>,
+    verify(
+        (
+            preceded(
+                peek(
+                    satisfy(|c: char| c.is_ascii_uppercase())
+                ),
+                take_while1(is_pos_char)
+            ),
+            opt(
+                alt((
+                    tag("..."),
+                    tag("\u{2026}"))
+                )
+            )
+        ),
+        |(ss, _): &(&str, _)| check_positional(ss)
+    ) => |(p, v): (&'a str, Option<&'a str>)|
+        if v.is_some() { PositionalParse::ManVariadic(p) } else { PositionalParse::Mandatory(p) }
+);
+
+fn caseless_push<'a>(k: &'a str, v: Positional, acc: &mut Vec<(&'a str, Positional)>) {
+    let dupe = acc.iter().any(|(ik, _)| ik.eq_ignore_ascii_case(k));
+    if !dupe {
+        acc.push((k, v));
+    }
+}
+
+// parse_usage_args runs on a single logical usage line. SKIP refuses to
+// cross a newline boundary so many0 stops at end-of-line — without this
+// the parser would happily wander into the OPTIONS section and treat
+// every `--flag <name>` angle-bracket parameter as a positional.
+//
+// the inner positional terminator uses peek(line_ending) instead of
+// consuming the newline, so the trailing `opt(line_ending)` in the
+// outer delimited eats it cleanly and we never advance past the usage
+// line.
+make_parser!(pub parse_usage_args -> Vec<(&'a str, Positional)>,
+    (delimited(
+        space0,
+        many0(
+            alt((
+                map(
+                    (
+                        terminated(
+                            alt((
+                                braces,
+                                opt_positional,
+                                value(PositionalParse::Skip, balanced_bracket_inner),
+                                man_positional,
+                                flag,
+                                allcaps_positional,
+                            )),
+                            alt((
+                                space1,
+                                value("", peek(line_ending)),
+                                value("", peek(nom::combinator::eof)),
+                            ))
+                        ),
+                        // catch "[section] ..." patterns where the ellipsis is
+                        // on the *next* token, separated by whitespace.
+                        opt(terminated(
+                            alt((tag("..."), tag("\u{2026}"))),
+                            alt((
+                                space1,
+                                value("", peek(line_ending)),
+                                value("", peek(nom::combinator::eof)),
+                            ))
+                        ))
+                    ),
+                    |(positional, trailing): (PositionalParse<'a>, Option<_>)| {
+                        if trailing.is_none() { positional }
+                        else {
+                            match positional {
+                                PositionalParse::Optional(n) => PositionalParse::OptVariadic(n),
+                                PositionalParse::Mandatory(n) => PositionalParse::ManVariadic(n),
+                                other => other,
+                            }
+                        }
+                    }
+                ),
+                // SKIP must NOT consume a newline. without this, many0 keeps
+                // iterating past the usage line into OPTIONS-section flag
+                // syntax and over-extracts positionals.
+                value(PositionalParse::Skip, satisfy(|c: char| c != '\n' && c != '\r')),
+            ))
+        ),
+        opt((space0, line_ending))
+    )) => |p: Vec<PositionalParse<'a>>|
+            p.into_iter().fold(Vec::new(), |mut acc, parse|
+            {
+                match parse {
+                    PositionalParse::Curly => (),
+                    PositionalParse::Flag => (),
+                    PositionalParse::Skip => (),
+                    PositionalParse::OptVariadic(arg) => caseless_push(arg, Positional {
+                        optional: true,
+                        variadic: true
+                    }, &mut acc),
+                    PositionalParse::ManVariadic(arg) => caseless_push(arg, Positional {
+                        optional: false,
+                        variadic: true
+                    }, &mut acc),
+                    PositionalParse::Optional(arg) => caseless_push(arg, Positional {
+                        optional: true,
+                        variadic: false,
+                    }, &mut acc),
+                    PositionalParse::Mandatory(arg) => caseless_push(arg, Positional {
+                        optional: false,
+                        variadic: false
+                    }, &mut acc),
+                }
+                acc
+            })
+);
+
+make_parser!(pub skip_command_name -> (),
+    value((), preceded(space0,
+        many0(
+            (
+                verify(
+                    preceded(not(char('-')), take_while1(is_word_char)),
+                    |ss: &str| ss.chars().any(|c: char| c.is_ascii_lowercase())
+                ),
+                space1
+            )
+        )
+    ))
+);
+
+make_parser!(find_usage_line -> (),
+    value((), preceded(
+        space0,
+        terminated(
+            tag_no_case("usage"),
+            // accept any of:
+            //   "Usage:"              — inline form with colon
+            //   "Usage args"          — inline form, space follows the word
+            //   "USAGE\n  cmd args"   — clap-style header on its own line
+            alt(
+                (
+                    value((), char(':')),
+                    value((), peek(line_ending)),
+                    value((), peek(satisfy(|c: char| c == ' ' || c == '\t'))),
+                )
+            )
+        )
+    ))
+);
+
+make_parser!(pub extract_usage_positionals -> Vec<(&'a str, Positional)>,
+    preceded(
+        many0(preceded(not(find_usage_line), (rest_of_line, line_ending))),
+        preceded(
+            (find_usage_line, space0, opt(line_ending), space0, skip_command_name),
+            parse_usage_args
+        )
+    )
+);
+
+make_predicate!(is_cli11_name_char, |c| c.is_alphanumeric()
+    || matches!(c, '_' | '-'));
+
+make_parser!(cli11_section_header -> (),
+    value((),
+        delimited(
+            space0,
+            alt((tag("POSITIONALS:"), tag("Positionals:"))),
+            (rest_of_line, opt(line_ending))
+        )
+    )
+);
+
+make_parser!(cli11_pos_line -> (&'a str, bool),
+    preceded(
+        verify(space0, |ss: &str| !ss.is_empty()),
+        terminated(
+            (
+                verify(take_while1(is_cli11_name_char), |s: &str| s.len() >= 2),
+                preceded(
+                    (space0, take_while(|c: char| c.is_ascii_uppercase()), space0),
+                    opt(tag("..."))
+                )
+            ),
+            (rest_of_line, opt(line_ending))
+        )
+    ) => |(name, variadic): (&'a str, Option<_>)| (name, variadic.is_some())
+);
+
+make_parser!(parse_cli11_body -> Vec<(&'a str, Positional)>,
+    many0(cli11_pos_line) => |entries: Vec<(&'a str, bool)>|
+        entries.into_iter().fold(Vec::new(), |mut acc, (name, variadic)| {
+            caseless_push(name, Positional { optional: false, variadic }, &mut acc);
+            acc
+        })
+);
+
+make_parser!(pub extract_cli11_positionals -> Vec<(&'a str, Positional)>,
+    preceded(
+        many0(preceded(not(cli11_section_header), (rest_of_line, line_ending))),
+        preceded(cli11_section_header, parse_cli11_body)
+    )
+);
--- a/src/parsers/help/subcommands.rs
+++ b/src/parsers/help/subcommands.rs
@ -0,0 +1,83 @@
+use nom::{
+    AsChar, IResult, Parser,
+    branch::alt,
+    bytes::complete::{tag, take_till, take_while1},
+    character::complete::{char, space0},
+    combinator::{not, value, verify},
+    multi::many0,
+    sequence::{delimited, preceded, terminated},
+};
+
+use crate::make_parser;
+use crate::parsers::help::helpers::{eol, is_option_char};
+use crate::types::Subcommand;
+
+fn is_placeholder(c: char) -> bool {
+    match c {
+        _ if c.is_alphanumeric() => true,
+        '_' | '-' | '.' | '|' | ',' => true,
+        _ => false,
+    }
+}
+
+/// chars allowed inside a bare (unbracketed) placeholder token, e.g.
+/// "FILE", "PATTERN...", "A|B". excludes lowercase letters so mixed-case
+/// description words like "NixOS" or "Home-manager" don't get swallowed
+/// as placeholders.
+fn is_bare_placeholder_char(c: char) -> bool {
+    matches!(c, 'A'..='Z' | '0'..='9' | '_' | '-' | '.' | '|' | ',')
+}
+
+make_parser!(
+    skip_arg_placeholders -> (),
+    value(
+        (),
+        many0(preceded(
+            // peek ahead one char (don't consume) so the per-branch parser can
+            // see the full token. needed because the bare ALL_CAPS branch must
+            // verify the *entire* token before deciding to consume.
+            char(' '),
+            alt((
+                // <...> bracketed placeholder
+                delimited(char('<'), take_while1(is_placeholder), char('>')),
+                // [...] optional bracketed placeholder
+                delimited(char('['), take_while1(is_placeholder), char(']')),
+                // bare ALL_CAPS placeholder — first char must be uppercase or
+                // a digit (allows e.g. "N", "M2"), and the whole token must
+                // be uppercase-friendly. rejects "NixOS"-style mixed-case so
+                // descriptions don't get swallowed.
+                verify(
+                    take_while1(is_bare_placeholder_char),
+                    |s: &str| {
+                        let first = s.chars().next().unwrap();
+                        first.is_ascii_uppercase() || first.is_ascii_digit()
+                    }
+                ),
+            )),
+        )),
+    )
+);
+
+// parse a subcommand entry: leading whitespace, then a name (2+ option
+// chars, not starting with '-'), optional argument placeholders, exactly
+// two spaces, optional padding, then the description text and eol.
+make_parser!(pub subcommand_entry -> Subcommand<'a>,
+    (
+        preceded(
+            space0,
+            verify(
+                preceded(not(char('-')), take_while1(is_option_char)),
+                |n: &str| n.len() >= 2,
+            ),
+        ),
+        skip_arg_placeholders,
+        tag("  "),
+        space0,
+        terminated(take_till(|c: char| c.is_newline()), eol),
+    ) => |(name, _, _, _, desc): (&'a str, _, _, _, &'a str)| {
+        // some help formats prefix desc with "- " (manpage-style); strip it.
+        let d = desc.trim_start();
+        let desc = d.strip_prefix("- ").map(|s| s.trim_start()).unwrap_or(d);
+        Subcommand { name, desc }
+    }
+);
--- a/src/parsers/manpage.rs
+++ b/src/parsers/manpage.rs
@ -0,0 +1,335 @@
+//! parse unix manpages (groff/mdoc format) into a structured result.
+//!
+//! manpages are written in roff/groff markup — a decades-old typesetting language
+//! used by man(1). this module strips the formatting and extracts structured data
+//! (flags, subcommands, positionals) from the raw groff source.
+//!
+//! there are two major manpage macro packages:
+//!   - man (groff) — used by gnu/linux tools. uses macros like .SH, .TP, .IP, .PP
+//!   - mdoc (bsd) — used by bsd tools. uses .Sh, .Fl, .Ar, .Op, .It, .Bl/.El
+//!
+//! this module handles both, auto-detecting the format by checking for .Sh macros.
+//!
+//! for groff manpages, flag extraction uses multiple "strategies" that target
+//! different common formatting patterns:
+//!   - strategy_tp: .TP tagged paragraphs (gnu coreutils, help2man)
+//!   - strategy_ip: .IP indented paragraphs (curl, hand-written)
+//!   - strategy_pp_rs: .PP + .RS/.RE blocks (git, docbook)
+//!   - strategy_nix: nix3-style bullet .IP with .UR/.UE hyperlinks
+//!   - strategy_deroff: fallback — strip all groff, feed to help text parser
+//!
+//! the module tries all applicable strategies and picks the one that extracts
+//! the most flag entries, on the theory that more results = better match.
+
+mod commands;
+mod groff;
+mod mdoc;
+mod sections;
+mod strategies;
+
+use std::io::{self, Read};
+use std::path::Path;
+
+use crate::types::{HelpResult, OptionEntry, Param, Positional, Subcommand, Switch};
+
+pub use self::groff::{GroffLine, classify_line, strip_groff_escapes};
+pub use self::sections::{extract_subcommand_sections, extract_synopsis_command};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum OwnedSwitch {
+    Short(char),
+    Long(String),
+    Both(char, String),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum OwnedParam {
+    Mandatory(String),
+    Optional(String),
+}
+
+#[derive(Debug, Clone)]
+pub struct ManpageEntry {
+    pub switch: OwnedSwitch,
+    pub param: Option<OwnedParam>,
+    pub desc: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct ManpageSubcommand {
+    pub name: String,
+    pub desc: String,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct ManpageResult {
+    pub entries: Vec<ManpageEntry>,
+    pub subcommands: Vec<ManpageSubcommand>,
+    pub positionals: Vec<(String, Positional)>,
+    pub description: String,
+}
+
+impl From<&Switch<'_>> for OwnedSwitch {
+    fn from(s: &Switch<'_>) -> Self {
+        match s {
+            Switch::Short(c) => OwnedSwitch::Short(*c),
+            Switch::Long(l) => OwnedSwitch::Long((*l).to_string()),
+            Switch::Both(c, l) => OwnedSwitch::Both(*c, (*l).to_string()),
+        }
+    }
+}
+
+impl From<&Param<'_>> for OwnedParam {
+    fn from(p: &Param<'_>) -> Self {
+        match p {
+            Param::Mandatory(s) => OwnedParam::Mandatory((*s).to_string()),
+            Param::Optional(s) => OwnedParam::Optional((*s).to_string()),
+        }
+    }
+}
+
+impl From<&OptionEntry<'_>> for ManpageEntry {
+    fn from(e: &OptionEntry<'_>) -> Self {
+        let desc: String = e
+            .desc
+            .iter()
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .collect::<Vec<_>>()
+            .join(" ");
+        ManpageEntry {
+            switch: (&e.switch).into(),
+            param: e.param.as_ref().map(Into::into),
+            desc,
+        }
+    }
+}
+
+impl From<&Subcommand<'_>> for ManpageSubcommand {
+    fn from(sc: &Subcommand<'_>) -> Self {
+        // lowercase the subcommand name here so (a) file naming is
+        // consistent (meat_yum.json vs meat_YUM.json) and (b) recursive
+        // --help probes use the lowercase form, which is what most real
+        // CLIs accept — even tools like meat that DISPLAY uppercase
+        // names in their help text dispatch on the lowercased argument.
+        ManpageSubcommand {
+            name: sc.name.to_ascii_lowercase(),
+            desc: sc.desc.to_string(),
+        }
+    }
+}
+
+impl From<&HelpResult<'_>> for ManpageResult {
+    fn from(r: &HelpResult<'_>) -> Self {
+        ManpageResult {
+            entries: r.entries.iter().map(Into::into).collect(),
+            subcommands: r.subcommands.iter().map(Into::into).collect(),
+            // positional names are stored lowercased so output is
+            // stable across the various places we extract them from
+            // (synopsis, usage, cli11 sections).
+            positionals: r
+                .positionals
+                .iter()
+                .map(|(k, v)| (k.to_ascii_lowercase(), v.clone()))
+                .collect(),
+            description: r.desc.to_string(),
+        }
+    }
+}
+
+/// parse a manpage from its classified lines.
+/// auto-detects mdoc vs groff format. for groff, runs the multi-strategy
+/// extraction pipeline.
+pub fn parse_manpage_lines(lines: &[GroffLine]) -> ManpageResult {
+    if mdoc::is_mdoc(lines) {
+        mdoc::parse_mdoc_lines(lines)
+    } else {
+        let options_section = sections::extract_options_section(lines);
+        let mut entries = strategies::extract_entries(&options_section);
+        // merge SYNOPSIS-only flags (nix-env's `[{--profile | -p} path]`
+        // pattern, where the flag is declared in the synopsis but never
+        // listed as an entry in the OPTIONS body). body entries take
+        // precedence on duplicate names — they carry the descriptions.
+        let synopsis_flags = sections::extract_synopsis_flags(lines);
+        if !synopsis_flags.is_empty() {
+            let have_long: std::collections::HashSet<String> = entries
+                .iter()
+                .filter_map(|e| match &e.switch {
+                    OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => Some(l.to_ascii_lowercase()),
+                    _ => None,
+                })
+                .collect();
+            let have_short: std::collections::HashSet<char> = entries
+                .iter()
+                .filter_map(|e| match &e.switch {
+                    OwnedSwitch::Short(c) | OwnedSwitch::Both(c, _) => Some(*c),
+                    _ => None,
+                })
+                .collect();
+            for e in synopsis_flags {
+                let dup = match &e.switch {
+                    OwnedSwitch::Long(l) => have_long.contains(&l.to_ascii_lowercase()),
+                    OwnedSwitch::Short(c) => have_short.contains(c),
+                    OwnedSwitch::Both(c, l) => {
+                        have_short.contains(c) || have_long.contains(&l.to_ascii_lowercase())
+                    }
+                };
+                if !dup {
+                    entries.push(e);
+                }
+            }
+        }
+        let positionals = sections::extract_synopsis_positionals(lines);
+        let commands_section = sections::extract_commands_section(lines);
+        let mut subcommands = commands::extract_subcommands_from_commands(&commands_section);
+        for positional in sections::extract_description_positionals(lines) {
+            if !subcommands
+                .iter()
+                .any(|sc| sc.name.eq_ignore_ascii_case(&positional.name))
+            {
+                subcommands.push(positional);
+            }
+        }
+        ManpageResult {
+            entries,
+            subcommands,
+            positionals,
+            description: String::new(),
+        }
+    }
+}
+
+/// parse a manpage from its raw string contents.
+/// splits into lines, parses, then extracts the NAME section description.
+pub fn parse_manpage_string(contents: &str) -> ManpageResult {
+    let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
+    let mut result = parse_manpage_lines(&lines);
+    if let Some(desc) = sections::extract_name_description(&lines) {
+        result.description = desc;
+    }
+    result
+}
+
+/// parse a manpage and also pull out clap-style `.SH SUBCOMMAND` sections
+/// as separate per-subcommand results. each subcommand section in a
+/// clap-generated manpage is its own command with its own flags; the
+/// parent's subcommand list is populated from their names.
+///
+/// returns (main_result, sub_results) where each sub_result has
+/// name=full_command ("nh os"), desc, and its own ManpageResult.
+pub fn parse_manpage_with_subs(contents: &str) -> (ManpageResult, Vec<(String, ManpageResult)>) {
+    let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
+    let mut result = parse_manpage_lines(&lines);
+    if let Some(desc) = sections::extract_name_description(&lines) {
+        result.description = desc;
+    }
+    let sub_sections = sections::extract_subcommand_sections(&lines);
+    if !sub_sections.is_empty() {
+        // overwrite subcommands with the SUBCOMMAND-section names —
+        // these are the authoritative list for clap-generated manpages.
+        result.subcommands = sub_sections
+            .iter()
+            .map(|(name, desc, _)| ManpageSubcommand {
+                name: name.to_ascii_lowercase(),
+                desc: desc.clone(),
+            })
+            .collect();
+    }
+    // each SUBCOMMAND section body is parsed via the same strategy-picker
+    // as the top-level OPTIONS section — clap puts flag definitions
+    // directly under the .SH SUBCOMMAND header with no inner .SH wrapping,
+    // so parse_manpage_lines (which looks for a child OPTIONS section)
+    // would come back empty.
+    let subs: Vec<(String, ManpageResult)> = sub_sections
+        .into_iter()
+        .map(|(name, desc, lines)| {
+            let entries = strategies::extract_entries(&lines);
+            let sub_result = ManpageResult {
+                entries,
+                subcommands: Vec::new(),
+                positionals: Default::default(),
+                description: desc,
+            };
+            (name, sub_result)
+        })
+        .collect();
+    (result, subs)
+}
+
+/// read a manpage file from disk. handles .gz compressed files (the common
+/// case — most installed manpages are gzipped). plain text files are read directly.
+pub fn read_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<String> {
+    let path = path.as_ref();
+    let bytes = std::fs::read(path)?;
+    if path.extension().and_then(|e| e.to_str()) == Some("gz") {
+        let mut decoder = flate2::read::GzDecoder::new(&bytes[..]);
+        let mut out = String::new();
+        decoder.read_to_string(&mut out)?;
+        Ok(out)
+    } else {
+        String::from_utf8(bytes).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+    }
+}
+
+/// read + parse a manpage file in one step.
+pub fn parse_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<ManpageResult> {
+    let contents = read_manpage_file(path)?;
+    Ok(parse_manpage_string(&contents))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const TP_MANPAGE: &str = r#".TH FOO 1 "2024" "1.0" "User Commands"
+.SH NAME
+foo \- a synthetic test command
+.SH SYNOPSIS
+.B foo
+[\fIOPTIONS\fR] <input> [output]
+.SH OPTIONS
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+increase output verbosity
+.TP
+\fB\-o\fR \fIFILE\fR, \fB\-\-output\fR=\fIFILE\fR
+write to FILE
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help and exit
+"#;
+
+    #[test]
+    fn tp_strategy_extracts_flags() {
+        let r = parse_manpage_string(TP_MANPAGE);
+        assert_eq!(
+            r.entries.len(),
+            3,
+            "expected 3 entries, got {:?}",
+            r.entries
+        );
+        assert_eq!(r.description, "a synthetic test command");
+        assert!(matches!(
+            r.entries[0].switch,
+            OwnedSwitch::Both('v', ref l) if l == "verbose"
+        ));
+        assert!(matches!(
+            r.entries[2].switch,
+            OwnedSwitch::Both('h', ref l) if l == "help"
+        ));
+        assert!(r.entries[0].desc.contains("verbosity"));
+    }
+
+    #[test]
+    fn mdoc_format_detected() {
+        let src = ".Sh NAME\n.Nm test\n.Nd a test\n.Sh DESCRIPTION\nstuff\n";
+        let lines: Vec<GroffLine> = src.split('\n').map(classify_line).collect();
+        assert!(mdoc::is_mdoc(&lines));
+    }
+
+    #[test]
+    fn groff_escapes_stripped() {
+        let stripped = groff::strip_groff_escapes("\\fB\\-v\\fR \\fIfile\\fR");
+        assert_eq!(stripped.trim(), "-v file");
+    }
+}
--- a/src/parsers/manpage/commands.rs
+++ b/src/parsers/manpage/commands.rs
@ -0,0 +1,157 @@
+//! COMMANDS section subcommand extraction.
+//!
+//! some manpages (notably systemctl) have a dedicated COMMANDS section
+//! listing subcommands with descriptions. these use .PP + bold name +
+//! .RS/.RE blocks:
+//!   .PP
+//!   \fBstart\fR \fIUNIT\fR...
+//!   .RS 4
+//!   Start (activate) one or more units.
+//!   .RE
+
+use crate::parsers::manpage::ManpageSubcommand;
+use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes, strip_inline_macro_args};
+
+/// validate that the extracted name looks like a subcommand: lowercase,
+/// at least 2 chars, no leading dash.
+fn is_valid_subcmd(name: &str) -> bool {
+    name.len() >= 2
+        && !name.starts_with('-')
+        && name
+            .chars()
+            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
+}
+
+/// extract subcommand name from a bold groff text like
+///   "\fBlist\-units\fR [\fIPATTERN\fR...]" -> "list-units"
+fn extract_bold_command_name(text: &str) -> Option<String> {
+    let trimmed = text.trim();
+    if trimmed.len() >= 4 && trimmed.starts_with("\\fB") {
+        // look for \fB...\fR at the start: find the next '\\' and take
+        // the segment between \fB and there.
+        let after = &trimmed[3..];
+        let segment_end = after.find('\\').unwrap_or(after.len());
+        let name_part = &after[..segment_end];
+        let reconstructed = format!("\\fB{name_part}\\fR");
+        let name = normalize_command_token(strip_groff_escapes(&reconstructed).trim());
+        if is_valid_subcmd(&name) {
+            return Some(name);
+        }
+        return None;
+    }
+    // fallback: take the first whitespace-delimited word of the stripped text
+    let stripped = strip_groff_escapes(trimmed);
+    let first_word = stripped.split_whitespace().next().unwrap_or("");
+    let name = normalize_command_token(first_word);
+    if is_valid_subcmd(&name) {
+        Some(name)
+    } else {
+        None
+    }
+}
+
+fn normalize_command_token(token: &str) -> String {
+    let token = token.trim();
+    let token = token
+        .find('(')
+        .map(|idx| &token[..idx])
+        .unwrap_or(token)
+        .trim_end_matches(',');
+    token.to_string()
+}
+
+fn extract_command_name_from_line(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(tag) => extract_bold_command_name(tag),
+        GroffLine::Macro { name, args }
+            if matches!(
+                name.as_str(),
+                "B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
+            ) =>
+        {
+            let rendered = strip_groff_escapes(&strip_inline_macro_args(args));
+            extract_bold_command_name(&rendered)
+        }
+        _ => None,
+    }
+}
+
+/// walk through commands section lines, extracting subcommand name+description
+/// pairs from .PP + Text + .RS/.RE blocks.
+pub fn extract_subcommands_from_commands(lines: &[GroffLine]) -> Vec<ManpageSubcommand> {
+    let mut out = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, .. } = &lines[i]
+            && name == "PP"
+        {
+            i += 1;
+            if i >= lines.len() {
+                continue;
+            }
+            if let Some(name) = extract_command_name_from_line(&lines[i]) {
+                let (desc, new_i) = collect_subcmd_desc(lines, i + 1);
+                let short_desc = first_sentence(&desc);
+                out.push(ManpageSubcommand {
+                    name: name.to_ascii_lowercase(),
+                    desc: short_desc,
+                });
+                i = new_i;
+                continue;
+            } else {
+                i += 1;
+            }
+        } else {
+            i += 1;
+        }
+    }
+    out
+}
+
+/// collect the description for a subcommand entry. handles .RS/.RE blocks
+/// and stops at the next .PP/.SH/.SS boundary.
+fn collect_subcmd_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut acc: Vec<String> = Vec::new();
+    let mut i = start;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. } if name == "RS" => {
+                i += 1;
+                // inside .RS — collect until .RE or boundary
+                while i < lines.len() {
+                    match &lines[i] {
+                        GroffLine::Macro { name, .. } if name == "RE" => {
+                            return (acc.join(" "), i + 1);
+                        }
+                        GroffLine::Text(t) => {
+                            acc.push(t.clone());
+                            i += 1;
+                        }
+                        GroffLine::Macro { name, .. }
+                            if name == "PP" || name == "SH" || name == "SS" =>
+                        {
+                            return (acc.join(" "), i);
+                        }
+                        _ => i += 1,
+                    }
+                }
+                return (acc.join(" "), i);
+            }
+            GroffLine::Text(t) => {
+                acc.push(t.clone());
+                i += 1;
+            }
+            _ => return (acc.join(" "), i),
+        }
+    }
+    (acc.join(" "), i)
+}
+
+/// take the first sentence (up to '.') as the description.
+fn first_sentence(s: &str) -> String {
+    let s = s.trim();
+    match s.find('.') {
+        Some(idx) if idx > 0 => s[..idx].trim().to_string(),
+        _ => s.to_string(),
+    }
+}
--- a/src/parsers/manpage/groff.rs
+++ b/src/parsers/manpage/groff.rs
@ -0,0 +1,385 @@
+//! groff escape/formatting stripping and line classification.
+//!
+//! groff escapes start with backslash and use various continuation syntaxes.
+//! we strip them, replacing named characters (like \(aq for apostrophe) with
+//! their text equivalents and discarding formatting directives.
+//!
+//! also exports `make_macro_walker!`, the manpage-side analogue of the
+//! help parser's `make_parser!`. all of our strategy_* functions are
+//! "scan lines, on each .MACRO_NAME run a handler, advance, accumulate"
+//! — this macro factors out the loop scaffolding so each strategy reduces
+//! to its specific extraction logic.
+
+/// walk a `&[GroffLine]` slice, and on each macro whose name matches
+/// `$mname`, invoke the body with `(lines, i, args)` where:
+///   - `lines` is the full slice (for slicing further bodies)
+///   - `i` is the current index of the matched macro
+///   - `args` is the macro's argument string (by reference)
+///
+/// the body returns `Option<(T, usize)>`. `Some((value, new_i))` pushes
+/// `value` and advances the cursor to `new_i` (typically computed as
+/// `lines.len() - rest.len()` after `collect_text_lines`). `None`
+/// advances by one line and keeps scanning.
+///
+/// matches the help-parser pattern `make_parser!(name -> T, parser => wrap)`:
+/// the macro hides the loop scaffolding, the handler expresses the actual
+/// extraction logic.
+#[macro_export]
+macro_rules! make_macro_walker {
+    (pub $name:ident -> Vec<$t:ty>, on macro $mname:expr =>
+     |$lines:ident, $i:ident, $args:ident| $body:expr) => {
+        pub fn $name(lines_input: &[$crate::parsers::manpage::GroffLine]) -> Vec<$t> {
+            let mut out = Vec::new();
+            let mut cursor = 0;
+            let $lines: &[$crate::parsers::manpage::GroffLine] = lines_input;
+            while cursor < $lines.len() {
+                if let $crate::parsers::manpage::GroffLine::Macro {
+                    name: macro_name,
+                    args: $args,
+                } = &$lines[cursor]
+                {
+                    if macro_name == $mname {
+                        let $i = cursor;
+                        // wrap the handler body in an IIFE so an early
+                        // `return None` inside the handler returns from the
+                        // closure, not from the surrounding strategy function.
+                        #[allow(clippy::redundant_closure_call)]
+                        let result: Option<($t, usize)> = (|| $body)();
+                        if let Some((value, new_i)) = result {
+                            out.push(value);
+                            cursor = new_i;
+                            continue;
+                        }
+                    }
+                }
+                cursor += 1;
+            }
+            out
+        }
+    };
+}
+
+/// every line in a manpage is classified as one of four types.
+/// this classification drives all subsequent parsing — strategies
+/// pattern-match on sequences of classified lines.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum GroffLine {
+    /// macro name + args, e.g. ("SH", "OPTIONS") or ("TP", "")
+    Macro { name: String, args: String },
+    /// plain text after groff stripping
+    Text(String),
+    /// empty line
+    Blank,
+    /// groff comment: .backslash-quote or backslash-quote
+    Comment,
+}
+
+/// translate a groff named character escape to its text equivalent.
+/// groff uses two-letter codes like "aq" for apostrophe, "lq"/"rq" for
+/// left/right quotes, "em"/"en" for dashes.
+fn named_char_of(name: &str) -> Option<char> {
+    match name {
+        "aq" => Some('\''),
+        "lq" | "Lq" | "rq" | "Rq" => Some('"'),
+        "em" | "en" => Some('-'),
+        _ => None,
+    }
+}
+
+fn is_alnum(c: u8) -> bool {
+    c.is_ascii_alphanumeric()
+}
+
+/// strip groff escape sequences, replacing named characters with text
+/// equivalents and discarding formatting directives.
+pub fn strip_groff_escapes(source: &str) -> String {
+    let bytes = source.as_bytes();
+    let len = bytes.len();
+    let mut buffer = String::with_capacity(len);
+    let mut pos = 0;
+    let mut prev_char: u8 = 0;
+
+    while pos < len {
+        if bytes[pos] == b'\\' && pos + 1 < len {
+            let next = bytes[pos + 1];
+            match next {
+                b'f' => {
+                    // font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...]
+                    if pos + 2 < len {
+                        let font_char = bytes[pos + 2];
+                        // insert space before italic font to preserve word boundaries
+                        // e.g. \fB--max-results\fR\fIcount\fR -> "--max-results count"
+                        if font_char == b'I' && is_alnum(prev_char) {
+                            buffer.push(' ');
+                            prev_char = b' ';
+                        }
+                        if font_char == b'(' {
+                            pos += 5; // \f(XX — two-character font name
+                        } else if font_char == b'[' {
+                            pos += 3;
+                            skip_to_byte(bytes, len, &mut pos, b']');
+                            if pos < len {
+                                pos += 1;
+                            }
+                        } else {
+                            pos += 3; // \fX — single-character font selector
+                        }
+                    } else {
+                        pos += 2;
+                    }
+                }
+                b'-' => {
+                    // escaped hyphen-minus — emit a plain hyphen
+                    buffer.push('-');
+                    prev_char = b'-';
+                    pos += 2;
+                }
+                b'&' | b'/' | b',' => {
+                    // zero-width characters — discard without output
+                    pos += 2;
+                }
+                b'(' => {
+                    // two-char named character: \(aq, \(lq, \(rq, etc.
+                    if pos + 3 < len {
+                        let name = &source[pos + 2..pos + 4];
+                        if let Some(c) = named_char_of(name) {
+                            buffer.push(c);
+                            prev_char = c as u8;
+                        }
+                        pos += 4;
+                    } else {
+                        pos += 2;
+                    }
+                }
+                b'[' => {
+                    // bracketed named character: \[aq], \[lq], etc.
+                    pos += 2;
+                    let start = pos;
+                    skip_to_byte(bytes, len, &mut pos, b']');
+                    if pos < len {
+                        let name = &source[start..pos];
+                        if let Some(c) = named_char_of(name) {
+                            buffer.push(c);
+                            prev_char = c as u8;
+                        }
+                        pos += 1;
+                    }
+                }
+                b's' => {
+                    // size escape: \sN, \s+N, \s-N — skip the numeric argument
+                    pos += 2;
+                    if pos < len && (bytes[pos] == b'+' || bytes[pos] == b'-') {
+                        pos += 1;
+                    }
+                    if pos < len && bytes[pos].is_ascii_digit() {
+                        pos += 1;
+                    }
+                    if pos < len && bytes[pos].is_ascii_digit() {
+                        pos += 1;
+                    }
+                }
+                b'm' => {
+                    // color escape: \m[...] — skip the bracketed color name
+                    pos += 2;
+                    if pos < len && bytes[pos] == b'[' {
+                        pos += 1;
+                        skip_to_byte(bytes, len, &mut pos, b']');
+                        if pos < len {
+                            pos += 1;
+                        }
+                    }
+                }
+                b'X' => {
+                    // device control: \X'...' — skip the single-quoted payload
+                    pos += 2;
+                    if pos < len && bytes[pos] == b'\'' {
+                        pos += 1;
+                        skip_to_byte(bytes, len, &mut pos, b'\'');
+                        if pos < len {
+                            pos += 1;
+                        }
+                    }
+                }
+                b'*' => {
+                    // string variable: \*X or \*(XX or \*[...] — skip the reference
+                    pos += 2;
+                    skip_groff_reference(bytes, len, &mut pos);
+                }
+                b'n' => {
+                    // number register: \nX or \n(XX or \n[...] — skip the reference
+                    pos += 2;
+                    skip_groff_reference(bytes, len, &mut pos);
+                }
+                b'e' => {
+                    // escaped backslash literal
+                    buffer.push('\\');
+                    prev_char = b'\\';
+                    pos += 2;
+                }
+                b'\\' => {
+                    // double backslash — emit one
+                    buffer.push('\\');
+                    prev_char = b'\\';
+                    pos += 2;
+                }
+                b' ' | b'~' => {
+                    // escaped/non-breaking space — emit a regular space
+                    buffer.push(' ');
+                    prev_char = b' ';
+                    pos += 2;
+                }
+                _ => {
+                    // unknown escape — skip the two-character sequence
+                    pos += 2;
+                }
+            }
+        } else {
+            // copy a full utf-8 char from source to buffer
+            let c = source[pos..].chars().next().unwrap();
+            buffer.push(c);
+            prev_char = if c.is_ascii() { c as u8 } else { 0 };
+            pos += c.len_utf8();
+        }
+    }
+    buffer
+}
+
+fn skip_to_byte(bytes: &[u8], len: usize, pos: &mut usize, delim: u8) {
+    while *pos < len && bytes[*pos] != delim {
+        *pos += 1;
+    }
+}
+
+/// skip a groff reference that uses one of three sub-forms:
+///   single char  — e.g. \*X or \nX
+///   ( + 2 chars  — e.g. \*(XX or \n(XX
+///   [ to ]       — e.g. \*[name] or \n[name]
+fn skip_groff_reference(bytes: &[u8], len: usize, pos: &mut usize) {
+    if *pos < len {
+        if bytes[*pos] == b'(' {
+            *pos += 3; // skip past '(' + two-character name
+        } else if bytes[*pos] == b'[' {
+            *pos += 1;
+            skip_to_byte(bytes, len, pos, b']');
+            if *pos < len {
+                *pos += 1;
+            }
+        } else {
+            *pos += 1;
+        }
+    }
+}
+
+/// strip inline macro formatting: .BI, .BR, .IR, etc.
+/// these macros alternate between fonts for their arguments, e.g.:
+///   .BI "--output " "FILE"
+/// becomes "--outputFILE" (arguments concatenated without spaces).
+///
+/// quoted strings are kept together (quotes stripped), but unquoted spaces
+/// are consumed. this matches groff's actual rendering of these macros.
+pub fn strip_inline_macro_args(text: &str) -> String {
+    let bytes = text.as_bytes();
+    let len = bytes.len();
+    let mut buffer = String::with_capacity(len);
+    let mut pos = 0;
+    while pos < len {
+        if bytes[pos] == b'"' {
+            // quoted argument — copy characters up to the closing quote
+            pos += 1;
+            while pos < len && bytes[pos] != b'"' {
+                let c = text[pos..].chars().next().unwrap();
+                buffer.push(c);
+                pos += c.len_utf8();
+            }
+            if pos < len {
+                pos += 1;
+            }
+        } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
+            // unquoted whitespace — skip (arguments are concatenated)
+            pos += 1;
+        } else {
+            let c = text[pos..].chars().next().unwrap();
+            buffer.push(c);
+            pos += c.len_utf8();
+        }
+    }
+    buffer
+}
+
+/// render same-font macro arguments (.B/.I) where arguments are separated
+/// by spaces. quote delimiters group arguments in roff source but should
+/// not become part of the visible text.
+pub fn strip_space_macro_args(text: &str) -> String {
+    strip_groff_escapes(&text.replace('"', ""))
+        .trim()
+        .to_string()
+}
+
+/// strip escapes and trim whitespace.
+pub fn strip_groff(line: &str) -> String {
+    strip_groff_escapes(line).trim().to_string()
+}
+
+/// refined comment detection — the base classify_line may miss some comment
+/// forms, so this wrapper checks more carefully before falling through.
+fn is_comment_line(line: &str) -> bool {
+    let bytes = line.as_bytes();
+    let len = bytes.len();
+    (len >= 3 && bytes[0] == b'.' && bytes[1] == b'\\' && bytes[2] == b'"')
+        || (len >= 2 && bytes[0] == b'\\' && bytes[1] == b'"')
+}
+
+/// classify a single line of manpage source.
+/// macro lines start with '.' or '\'' (groff alternate control char).
+/// the macro name is split from its arguments at the first space/tab.
+/// arguments wrapped in double quotes are unquoted.
+pub fn classify_line(line: &str) -> GroffLine {
+    if is_comment_line(line) {
+        return GroffLine::Comment;
+    }
+    let len = line.len();
+    if len == 0 {
+        return GroffLine::Blank;
+    }
+    let bytes = line.as_bytes();
+    // base classify also flags dot-backslash forms as comments
+    if len >= 2 && bytes[0] == b'.' && bytes[1] == b'\\' && (len < 3 || bytes[2] == b'"') {
+        return GroffLine::Comment;
+    }
+    if len >= 3 && bytes[0] == b'\\' && bytes[1] == b'"' {
+        return GroffLine::Comment;
+    }
+    if bytes[0] == b'.' || bytes[0] == b'\'' {
+        // macro line — extract macro name and arguments
+        let rest = line[1..].trim();
+        let split_at = rest.find([' ', '\t']);
+        match split_at {
+            Some(idx) => {
+                let name = rest[..idx].to_string();
+                let args = rest[idx + 1..].trim();
+                // strip surrounding quotes from arguments
+                let args = if args.len() >= 2
+                    && args.starts_with('"')
+                    && args.ends_with('"')
+                    && !args[1..args.len() - 1].contains('"')
+                {
+                    args[1..args.len() - 1].to_string()
+                } else {
+                    args.to_string()
+                };
+                GroffLine::Macro { name, args }
+            }
+            None => GroffLine::Macro {
+                name: rest.to_string(),
+                args: String::new(),
+            },
+        }
+    } else {
+        let stripped = strip_groff(line);
+        if stripped.is_empty() {
+            GroffLine::Blank
+        } else {
+            GroffLine::Text(stripped)
+        }
+    }
+}
--- a/src/parsers/manpage/mdoc.rs
+++ b/src/parsers/manpage/mdoc.rs
@ -0,0 +1,237 @@
+//! BSD mdoc format support.
+//!
+//! mdoc is the bsd manpage macro package. it uses semantic macros rather than
+//! presentation macros:
+//!   .Fl v    -> flag: -v
+//!   .Ar file -> argument: file
+//!   .Op ...  -> optional: [...]
+//!   .Bl/.It/.El -> list begin/item/end
+//!   .Sh      -> section header (note lowercase 'h', vs groff's .SH)
+
+use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes};
+use crate::parsers::manpage::{ManpageEntry, ManpageResult, OwnedParam, OwnedSwitch};
+use crate::types::Positional;
+
+/// detect mdoc format by looking for any .Sh macro.
+pub fn is_mdoc(lines: &[GroffLine]) -> bool {
+    lines
+        .iter()
+        .any(|l| matches!(l, GroffLine::Macro { name, .. } if name == "Sh"))
+}
+
+/// extract renderable text from an mdoc line, skipping structural macros.
+fn mdoc_text_of(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(t) => Some(strip_groff_escapes(t)),
+        GroffLine::Macro { name, args } => match name.as_str() {
+            "Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt" | "Oo" | "Oc" | "Op" => None,
+            _ => {
+                let text = strip_groff_escapes(args);
+                let text = text.trim();
+                if text.is_empty() {
+                    None
+                } else {
+                    Some(text.to_string())
+                }
+            }
+        },
+        _ => None,
+    }
+}
+
+/// parse an mdoc .It (list item) line that contains flag definitions.
+/// mdoc .It lines look like: ".It Fl v Ar file"
+/// where Fl = flag, Ar = argument.
+fn parse_mdoc_it(args: &str) -> Option<ManpageEntry> {
+    let words: Vec<&str> = args
+        .split(' ')
+        .filter(|w| !w.is_empty() && *w != "Ns")
+        .collect();
+    let param = match words.as_slice() {
+        [_, _, "Ar", name, ..] => Some(OwnedParam::Mandatory(name.to_string())),
+        _ => None,
+    };
+    match words.as_slice() {
+        ["Fl", ch, ..] if ch.len() == 1 && ch.chars().next().unwrap().is_ascii_alphanumeric() => {
+            Some(ManpageEntry {
+                switch: OwnedSwitch::Short(ch.chars().next().unwrap()),
+                param,
+                desc: String::new(),
+            })
+        }
+        ["Fl", name, ..] if name.len() > 1 && name.starts_with('-') => Some(ManpageEntry {
+            switch: OwnedSwitch::Long(name[1..].to_string()),
+            param,
+            desc: String::new(),
+        }),
+        _ => None,
+    }
+}
+
+/// extract a positional argument from an mdoc line (.Ar or .Op Ar).
+fn positional_of_mdoc_line(args: &str) -> Option<(String, bool)> {
+    let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
+    let variadic = words.contains(&"...");
+    match words.first() {
+        Some(name) if name.len() >= 2 => Some((name.to_ascii_lowercase(), variadic)),
+        _ => None,
+    }
+}
+
+/// parse an entire mdoc-format manpage.
+/// walks through all classified lines looking for:
+///   1. .Bl/.It/.El list blocks containing flag definitions
+///   2. .Sh SYNOPSIS sections containing positional arguments (.Ar, .Op Ar)
+pub fn parse_mdoc_lines(lines: &[GroffLine]) -> ManpageResult {
+    // collect description for an entry — until next structural macro
+    fn desc_of(lines: &[GroffLine], start: usize) -> (String, usize) {
+        let mut acc: Vec<String> = Vec::new();
+        let mut i = start;
+        while i < lines.len() {
+            if let GroffLine::Macro { name, .. } = &lines[i]
+                && matches!(name.as_str(), "It" | "El" | "Sh" | "Ss")
+            {
+                break;
+            }
+            if let Some(t) = mdoc_text_of(&lines[i]) {
+                acc.push(t);
+            }
+            i += 1;
+        }
+        (acc.join(" ").trim().to_string(), i)
+    }
+
+    fn skip_to_el(lines: &[GroffLine], start: usize) -> usize {
+        let mut i = start;
+        while i < lines.len() {
+            if let GroffLine::Macro { name, .. } = &lines[i]
+                && name == "El"
+            {
+                return i + 1;
+            }
+            i += 1;
+        }
+        i
+    }
+
+    /// parse a single .It entry: extract flag, collect description.
+    fn parse_it(
+        args: &str,
+        lines: &[GroffLine],
+        start: usize,
+        entries: &mut Vec<ManpageEntry>,
+    ) -> usize {
+        let (desc, new_start) = desc_of(lines, start);
+        if let Some(mut entry) = parse_mdoc_it(args) {
+            entry.desc = desc;
+            entries.push(entry);
+        }
+        new_start
+    }
+
+    /// parse all .It entries within a .Bl/.El option list.
+    fn parse_option_list(
+        entries: &mut Vec<ManpageEntry>,
+        lines: &[GroffLine],
+        start: usize,
+    ) -> usize {
+        let mut i = start;
+        while i < lines.len() {
+            match &lines[i] {
+                GroffLine::Macro { name, .. } if name == "El" => return i + 1,
+                GroffLine::Macro { name, args } if name == "It" => {
+                    i = parse_it(args, lines, i + 1, entries);
+                }
+                _ => i += 1,
+            }
+        }
+        i
+    }
+
+    fn parse_synopsis(
+        positionals: &mut Vec<(String, bool, bool)>,
+        lines: &[GroffLine],
+        start: usize,
+    ) -> usize {
+        let mut i = start;
+        while i < lines.len() {
+            match &lines[i] {
+                GroffLine::Macro { name, .. } if name == "Sh" => return i,
+                GroffLine::Macro { name, args } if name == "Ar" => {
+                    if let Some((n, v)) = positional_of_mdoc_line(args) {
+                        positionals.push((n, false, v));
+                    }
+                    i += 1;
+                }
+                GroffLine::Macro { name, args } if name == "Op" => {
+                    let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
+                    if matches!(words.first(), Some(&"Ar")) {
+                        let rest = if args.len() > 3 { &args[3..] } else { "" };
+                        if let Some((n, v)) = positional_of_mdoc_line(rest) {
+                            positionals.push((n, true, v));
+                        }
+                    }
+                    i += 1;
+                }
+                _ => i += 1,
+            }
+        }
+        i
+    }
+
+    let mut entries: Vec<ManpageEntry> = Vec::new();
+    let mut positionals: Vec<(String, bool, bool)> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        // .Bl + .It header sequence — peek at first .It to decide if this is a flag list
+        if let GroffLine::Macro { name: n1, .. } = &lines[i]
+            && n1 == "Bl"
+        {
+            let j = i + 1;
+            if j < lines.len()
+                && let GroffLine::Macro {
+                    name: n2,
+                    args: it_args,
+                } = &lines[j]
+                && n2 == "It"
+            {
+                let words: Vec<&str> = it_args.split(' ').filter(|w| !w.is_empty()).collect();
+                if matches!(words.first(), Some(&"Fl")) {
+                    let k = parse_it(it_args, lines, j + 1, &mut entries);
+                    i = parse_option_list(&mut entries, lines, k);
+                    continue;
+                } else {
+                    i = skip_to_el(lines, j + 1);
+                    continue;
+                }
+            }
+            i = skip_to_el(lines, j);
+            continue;
+        }
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "Sh"
+            && args.trim().eq_ignore_ascii_case("SYNOPSIS")
+        {
+            i = parse_synopsis(&mut positionals, lines, i + 1);
+            continue;
+        }
+        i += 1;
+    }
+
+    // deduplicate positionals by name, preserving first-seen order
+    let mut seen: Vec<String> = Vec::new();
+    let mut deduped: Vec<(String, Positional)> = Vec::new();
+    for (name, optional, variadic) in positionals {
+        if !seen.contains(&name) {
+            seen.push(name.clone());
+            deduped.push((name, Positional { optional, variadic }));
+        }
+    }
+
+    ManpageResult {
+        entries,
+        subcommands: Vec::new(),
+        positionals: deduped,
+        description: String::new(),
+    }
+}
--- a/src/parsers/manpage/sections.rs
+++ b/src/parsers/manpage/sections.rs
@ -0,0 +1,851 @@
+//! section extraction from manpages.
+//!
+//! manpages are divided into sections by .SH macros. we extract OPTIONS,
+//! NAME, SYNOPSIS, and COMMANDS sections for their specific content.
+
+use nom::{Parser, sequence::preceded};
+
+use crate::parsers::help::{parse_usage_args, parse_usage_flags, skip_command_name};
+use crate::parsers::manpage::groff::{
+    GroffLine, strip_groff_escapes, strip_inline_macro_args, strip_space_macro_args,
+};
+use crate::parsers::manpage::{ManpageEntry, ManpageSubcommand, OwnedParam, OwnedSwitch};
+use crate::types::{Param, Positional, Switch};
+
+fn is_options_section(name: &str) -> bool {
+    let upper = name.trim().to_ascii_uppercase();
+    upper == "OPTIONS" || upper.contains("OPTION")
+}
+
+/// extract the lines from the OPTIONS section(s). collects from all
+/// option-like .SH sections and concatenates them (handles the nix pattern
+/// of "Options" and "Common Options" being separate sections).
+/// falls back to DESCRIPTION if no OPTIONS section exists.
+pub fn extract_options_section(lines: &[GroffLine]) -> Vec<GroffLine> {
+    let mut acc: Vec<GroffLine> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && is_options_section(args)
+        {
+            i += 1;
+            // synthetic separator between concatenated sections so that
+            // collect_desc_text (which stops on SH/SS) does not let descriptions
+            // bleed between sections.
+            if !acc.is_empty() {
+                acc.push(GroffLine::Macro {
+                    name: "SH".to_string(),
+                    args: String::new(),
+                });
+            }
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                acc.push(lines[i].clone());
+                i += 1;
+            }
+        } else {
+            i += 1;
+        }
+    }
+    if !acc.is_empty() {
+        return acc;
+    }
+    // fallback: DESCRIPTION section
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && args.trim().eq_ignore_ascii_case("DESCRIPTION")
+        {
+            i += 1;
+            let mut desc_acc: Vec<GroffLine> = Vec::new();
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                desc_acc.push(lines[i].clone());
+                i += 1;
+            }
+            return desc_acc;
+        }
+        i += 1;
+    }
+    Vec::new()
+}
+
+fn extract_named_section(lines: &[GroffLine], section_name: &str) -> Vec<GroffLine> {
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && args.trim().eq_ignore_ascii_case(section_name)
+        {
+            i += 1;
+            let mut acc: Vec<GroffLine> = Vec::new();
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                acc.push(lines[i].clone());
+                i += 1;
+            }
+            return acc;
+        }
+        i += 1;
+    }
+    Vec::new()
+}
+
+/// the NAME section follows the convention "command \- short description".
+/// extract the part after "\-" as the command's description.
+/// handles both "\-" (groff) and " - " (plain text) separators.
+pub fn extract_name_description(lines: &[GroffLine]) -> Option<String> {
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && args.trim().eq_ignore_ascii_case("NAME")
+        {
+            i += 1;
+            let mut acc: Vec<String> = Vec::new();
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                match &lines[i] {
+                    GroffLine::Text(t) => acc.push(t.clone()),
+                    GroffLine::Macro { name, args }
+                        if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR") =>
+                    {
+                        let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                        let text = text.trim();
+                        if !text.is_empty() {
+                            acc.push(text.to_string());
+                        }
+                    }
+                    GroffLine::Macro { name, args } if name == "Nm" => {
+                        let text = strip_groff_escapes(args);
+                        let text = text.trim();
+                        if !text.is_empty() {
+                            acc.push(text.to_string());
+                        }
+                    }
+                    GroffLine::Macro { name, args } if name == "Nd" => {
+                        let text = strip_groff_escapes(args);
+                        let text = text.trim();
+                        if !text.is_empty() {
+                            acc.push(format!("\\- {text}"));
+                        }
+                    }
+                    _ => (),
+                }
+                i += 1;
+            }
+            let full = acc.join(" ").trim().to_string();
+            return split_name_separator(&full);
+        }
+        i += 1;
+    }
+    None
+}
+
+/// split a NAME line on either "\-" (groff) or " - " (plain).
+/// returns the part after the separator, trimmed.
+fn split_name_separator(full: &str) -> Option<String> {
+    // search for either marker
+    let groff_idx = find_padded(full, "\\-");
+    let dash_idx = find_padded(full, " - ");
+    let idx = match (groff_idx, dash_idx) {
+        (Some(a), Some(b)) => Some(a.min(b)),
+        (Some(a), None) => Some(a),
+        (None, Some(b)) => Some(b),
+        (None, None) => None,
+    }?;
+    // skip past the matched separator
+    let after = if full[idx..].starts_with("\\-") {
+        &full[idx + 2..]
+    } else {
+        &full[idx + 3..]
+    };
+    let desc = after.trim().to_string();
+    if desc.is_empty() { None } else { Some(desc) }
+}
+
+/// find a marker preceded and followed by optional surrounding space.
+/// approximated by a simple substring search — accepts spaces on either
+/// side without enforcing how many.
+fn find_padded(s: &str, needle: &str) -> Option<usize> {
+    s.find(needle)
+}
+
+/// extract the command name from the SYNOPSIS section.
+///
+/// the SYNOPSIS section shows how to invoke the command:
+///   .SH SYNOPSIS
+///   .B git add
+///   [\fIOPTIONS\fR] [\fB\-\-\fR] [\fI<pathspec>\fR...]
+///
+/// we extract the command name by taking consecutive "word" tokens until
+/// we hit something that looks like an argument (starts with [, <, -, etc.).
+pub fn extract_synopsis_command(contents: &str) -> Option<String> {
+    // pre-replace italic text (\fI...\fR) with angle-bracketed placeholders
+    // before classification strips the font info. italic in groff indicates
+    // a parameter/placeholder (e.g. \fIoperation\fR), not a command word.
+    // the angle brackets cause extract_cmd to stop at these tokens since
+    // '<' is in its stop set.
+    let preprocessed: Vec<String> = contents
+        .split('\n')
+        .map(replace_italic_with_angles)
+        .collect();
+    let classified: Vec<GroffLine> = preprocessed
+        .iter()
+        .map(|line| crate::parsers::manpage::groff::classify_line(line))
+        .collect();
+    let mut i = 0;
+    while i < classified.len() {
+        if let Some((stop_on_ss, content_start)) = synopsis_heading_at(&classified, i) {
+            i = content_start;
+            while i < classified.len() {
+                match &classified[i] {
+                    GroffLine::Macro { name, .. }
+                        if name == "SH" || (stop_on_ss && name == "SS") =>
+                    {
+                        return None;
+                    }
+                    GroffLine::Text(text) => {
+                        let trimmed = text.trim();
+                        if let Some(cmd) = synopsis_command_candidate(trimmed, true) {
+                            return Some(cmd);
+                        }
+                        i += 1;
+                    }
+                    GroffLine::Macro { name, args } if name == "SY" => {
+                        let text = strip_groff_escapes(args);
+                        if let Some(cmd) = synopsis_command_candidate(text.trim(), false) {
+                            return Some(cmd);
+                        }
+                        i += 1;
+                    }
+                    GroffLine::Macro { name, args }
+                        if matches!(name.as_str(), "B" | "BI" | "BR") =>
+                    {
+                        let text = render_synopsis_command_macro(name, args);
+                        if let Some(cmd) = synopsis_command_candidate(text.trim(), false) {
+                            return Some(cmd);
+                        }
+                        i += 1;
+                    }
+                    _ => i += 1,
+                }
+            }
+            return None;
+        }
+        i += 1;
+    }
+    None
+}
+
+fn synopsis_heading_at(lines: &[GroffLine], i: usize) -> Option<(bool, usize)> {
+    let GroffLine::Macro { name, args } = &lines[i] else {
+        return None;
+    };
+    if !matches!(name.as_str(), "SH" | "SS") {
+        return None;
+    }
+    if args.trim().eq_ignore_ascii_case("SYNOPSIS") {
+        return Some((name == "SS", i + 1));
+    }
+    if !args.trim().is_empty() {
+        return None;
+    }
+    let mut j = i + 1;
+    while j < lines.len() {
+        match &lines[j] {
+            GroffLine::Text(text) if text.trim().eq_ignore_ascii_case("SYNOPSIS") => {
+                return Some((name == "SS", j + 1));
+            }
+            GroffLine::Blank | GroffLine::Comment => j += 1,
+            _ => return None,
+        }
+    }
+    None
+}
+
+fn render_synopsis_command_macro(name: &str, args: &str) -> String {
+    match name {
+        "B" | "I" => strip_space_macro_args(args),
+        _ => strip_groff_escapes(&strip_inline_macro_args(args))
+            .trim()
+            .to_string(),
+    }
+}
+
+fn synopsis_command_candidate(line: &str, reject_long_unmarked: bool) -> Option<String> {
+    let trimmed = line.trim();
+    if trimmed.is_empty() || trimmed.ends_with(':') {
+        return None;
+    }
+    let cmd = extract_cmd(trimmed)?;
+    if cmd.starts_with('.') {
+        return None;
+    }
+    if looks_like_synopsis_prose(trimmed, &cmd, reject_long_unmarked) {
+        None
+    } else {
+        Some(cmd)
+    }
+}
+
+fn looks_like_synopsis_prose(line: &str, cmd: &str, reject_long_unmarked: bool) -> bool {
+    let Some(first) = cmd.split_whitespace().next() else {
+        return true;
+    };
+    if matches!(
+        first.to_ascii_lowercase().as_str(),
+        "a" | "an" | "and" | "or" | "the" | "this" | "these"
+    ) {
+        return true;
+    }
+
+    let line_has_invocation_marker = line.split_whitespace().any(|word| {
+        word.starts_with('[')
+            || word.starts_with('<')
+            || word.starts_with('-')
+            || word.starts_with('{')
+    }) || line.contains('|');
+    if line.ends_with('.') && !line_has_invocation_marker {
+        return true;
+    }
+    if reject_long_unmarked && cmd.split_whitespace().count() > 3 && !line_has_invocation_marker {
+        return true;
+    }
+    let looks_like_sentence_starter = first.chars().next().is_some_and(|c| c.is_ascii_uppercase())
+        && first.chars().skip(1).all(|c| c.is_ascii_lowercase());
+    looks_like_sentence_starter
+        && line.split_whitespace().count() > 1
+        && !line_has_invocation_marker
+}
+
+/// replace \fI...\f[RP] sequences with <...> so italic params are seen as
+/// non-word tokens by extract_cmd.
+///
+/// exception: some manpages put the command name itself in italics (e.g.
+/// git-am.1's synopsis reads `\fIgit am\fR ...`). when the first italic
+/// block on the line appears at the very start (preceded only by
+/// whitespace) and its content looks like a command word, we strip the
+/// font markers but leave the content bare so extract_cmd treats it as
+/// the command name rather than a placeholder.
+fn replace_italic_with_angles(line: &str) -> String {
+    let bytes = line.as_bytes();
+    let len = bytes.len();
+    let mut out = String::with_capacity(len);
+    let mut i = 0;
+    let mut command_consumed = false;
+    while i < len {
+        // byte-compare to avoid panicking on non-ASCII char boundaries
+        if i + 3 <= len && &bytes[i..i + 3] == b"\\fI" {
+            // find closing \fR or \fP — scan to next '\\'
+            let inner_start = i + 3;
+            let mut j = inner_start;
+            while j < len && bytes[j] != b'\\' {
+                j += 1;
+            }
+            if j + 3 <= len
+                && bytes[j] == b'\\'
+                && bytes[j + 1] == b'f'
+                && (bytes[j + 2] == b'R' || bytes[j + 2] == b'P')
+            {
+                let inner = &line[inner_start..j];
+                let at_line_start = !command_consumed && line[..i].chars().all(char::is_whitespace);
+                if at_line_start && italic_looks_like_command(inner) {
+                    out.push_str(inner);
+                    command_consumed = true;
+                } else {
+                    out.push('<');
+                    out.push_str(inner);
+                    out.push('>');
+                }
+                i = j + 3;
+                continue;
+            }
+        }
+        let c = line[i..].chars().next().unwrap();
+        out.push(c);
+        i += c.len_utf8();
+    }
+    out
+}
+
+/// is the italic content something that looks like a command name (rather
+/// than a placeholder)? lowercase letters, digits, hyphens, underscores,
+/// dots, and spaces only, after groff escapes (like `\-`) are resolved.
+fn italic_looks_like_command(inner: &str) -> bool {
+    let stripped = strip_groff_escapes(inner);
+    let trimmed = stripped.trim();
+    !trimmed.is_empty()
+        && trimmed.chars().all(|c| {
+            c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '-' | '_' | '.' | ' ')
+        })
+}
+
+/// extract the command name from a synopsis line by taking leading word tokens.
+fn extract_cmd(line: &str) -> Option<String> {
+    let words: Vec<&str> = line.split(' ').filter(|w| !w.is_empty()).collect();
+    let is_cmd_char = |c: char| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.');
+    let mut taken: Vec<&str> = Vec::new();
+    for word in words {
+        let first = word.chars().next().unwrap();
+        if matches!(first, '[' | '-' | '<' | '(' | '{') {
+            break;
+        }
+        if word.chars().all(is_cmd_char) {
+            taken.push(word);
+        } else {
+            break;
+        }
+    }
+    if taken.is_empty() {
+        None
+    } else {
+        Some(taken.join(" "))
+    }
+}
+
+/// extract the lines that form the SYNOPSIS section.
+fn extract_synopsis_section(lines: &[GroffLine]) -> Vec<GroffLine> {
+    extract_named_section(lines, "SYNOPSIS")
+}
+
+/// extract positional arguments from the SYNOPSIS section.
+/// joins all text/formatting macro lines via `join_synopsis_text`, then
+/// skips the command name prefix and runs `parse_usage_args` on the rest.
+pub fn extract_synopsis_positionals(lines: &[GroffLine]) -> Vec<(String, Positional)> {
+    let full = join_synopsis_text(lines);
+    if full.is_empty() {
+        return Vec::new();
+    }
+    let result: nom::IResult<&str, Vec<(&str, Positional)>> =
+        preceded(skip_command_name, parse_usage_args).parse(&full);
+    match result {
+        Ok((_, map)) => map
+            .into_iter()
+            .map(|(k, v)| (k.to_ascii_lowercase(), v))
+            .collect(),
+        Err(_) => Vec::new(),
+    }
+}
+
+/// join the SYNOPSIS section into a single line of plain text, stripping
+/// groff escapes and inline font macros. shared by both the positional
+/// and flag extractors so they see identical input.
+fn join_synopsis_text(lines: &[GroffLine]) -> String {
+    let section = extract_synopsis_section(lines);
+    let mut acc: Vec<String> = Vec::new();
+    for line in section {
+        match line {
+            GroffLine::Macro { name, .. } if name == "SS" || name == "br" => break,
+            GroffLine::Macro { name, args } if name == "SY" => {
+                let text = strip_groff_escapes(&args).trim().to_string();
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Macro { name, args } if name == "I" => {
+                let text = strip_groff_escapes(&args).trim().to_string();
+                if !text.is_empty() {
+                    acc.push(format!("<{text}>"));
+                }
+            }
+            GroffLine::Macro { name, args } if name == "IR" => {
+                let text = render_leading_italic_arg(&args);
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Text(t) => {
+                let text = strip_groff_escapes(&t).trim().to_string();
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Macro { name, args } if name == "B" => {
+                let text = strip_space_macro_args(&args);
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "B" | "BI" | "BR" | "IB" | "RB" | "RI") =>
+            {
+                let text = strip_groff_escapes(&strip_inline_macro_args(&args));
+                let text = text.trim();
+                if !text.is_empty() {
+                    acc.push(text.to_string());
+                }
+            }
+            _ => (),
+        }
+    }
+    acc.join(" ").trim().to_string()
+}
+
+fn render_leading_italic_arg(args: &str) -> String {
+    let trimmed = args.trim();
+    if trimmed.is_empty() {
+        return String::new();
+    }
+    let (first, rest) = match trimmed.find(char::is_whitespace) {
+        Some(idx) => (&trimmed[..idx], trimmed[idx..].trim()),
+        None => (trimmed, ""),
+    };
+    let first = strip_groff_escapes(first).trim().to_string();
+    if first.is_empty() {
+        return String::new();
+    }
+    let rest = strip_groff_escapes(&strip_inline_macro_args(rest));
+    let rest = rest.trim();
+    if rest.is_empty() {
+        format!("<{first}>")
+    } else {
+        format!("<{first}> {rest}")
+    }
+}
+
+fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
+    match s {
+        Switch::Short(c) => OwnedSwitch::Short(c),
+        Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
+        Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
+    }
+}
+
+fn to_owned_param(p: Param<'_>) -> OwnedParam {
+    match p {
+        Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
+        Param::Optional(s) => OwnedParam::Optional(s.to_string()),
+    }
+}
+
+/// extract flag-tagged entries from the SYNOPSIS line. some manpages
+/// (notably nix-env, sed) declare flags only in the synopsis and never
+/// repeat them as entries in the OPTIONS body, so the body-only pass
+/// misses them. we join the synopsis text the same way the positional
+/// extractor does, then run `parse_usage_flags` over every bracketed
+/// switch+param. callers merge with body entries; body wins on duplicate
+/// flag names since body descriptions are richer.
+pub fn extract_synopsis_flags(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let full = join_synopsis_text(lines);
+    if full.is_empty() {
+        return Vec::new();
+    }
+    let result: nom::IResult<&str, Vec<(Switch<'_>, Option<Param<'_>>)>> =
+        preceded(skip_command_name, parse_usage_flags).parse(&full);
+    match result {
+        Ok((_, pairs)) => pairs
+            .into_iter()
+            .map(|(switch, param)| ManpageEntry {
+                switch: to_owned_switch(switch),
+                param: param.map(to_owned_param),
+                desc: String::new(),
+            })
+            .collect(),
+        Err(_) => Vec::new(),
+    }
+}
+
+/// extract first-positional choices from prose lists in DESCRIPTION.
+///
+/// getent(1) is the motivating shape: the synopsis has a `database`
+/// positional, while the actual database names are documented as a tagged
+/// list under DESCRIPTION rather than as subcommands or options. The
+/// completion model currently has no separate "positional choices" channel,
+/// so these are represented as subcommand-like candidates for completion.
+pub fn extract_description_positionals(lines: &[GroffLine]) -> Vec<ManpageSubcommand> {
+    let description = extract_named_section(lines, "DESCRIPTION");
+    if description.is_empty() || !description_mentions_listed_database(&description) {
+        return Vec::new();
+    }
+
+    let mut out = Vec::new();
+    let mut seen = std::collections::HashSet::new();
+    let mut i = 0;
+    let mut in_database_list = false;
+    while i < description.len() {
+        match &description[i] {
+            GroffLine::Text(text)
+                if text.to_ascii_lowercase().contains("listed below")
+                    || text.to_ascii_lowercase().contains("may be any of") =>
+            {
+                in_database_list = true;
+                i += 1;
+            }
+            GroffLine::Macro { name, .. } if name == "TP" && in_database_list => {
+                if i + 1 >= description.len() {
+                    break;
+                }
+                let Some(name) = description_tag_name(&description[i + 1]) else {
+                    i += 1;
+                    continue;
+                };
+                if !is_description_choice_name(&name) {
+                    i += 1;
+                    continue;
+                }
+                let (desc, new_i) = collect_description_choice_desc(&description, i + 2);
+                if seen.insert(name.clone()) {
+                    out.push(ManpageSubcommand { name, desc });
+                }
+                i = new_i;
+            }
+            _ => {
+                i += 1;
+            }
+        }
+    }
+    out
+}
+
+fn description_mentions_listed_database(lines: &[GroffLine]) -> bool {
+    let mut saw_database = false;
+    let mut saw_list = false;
+    for line in lines {
+        let text = match line {
+            GroffLine::Text(text) => text.clone(),
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR" | "RI") =>
+            {
+                strip_groff_escapes(&strip_inline_macro_args(args))
+            }
+            _ => String::new(),
+        };
+        let lower = text.to_ascii_lowercase();
+        saw_database |= lower.contains("database");
+        saw_list |= lower.contains("listed below") || lower.contains("may be any of");
+    }
+    saw_database && saw_list
+}
+
+fn description_tag_name(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(text) => Some(text.trim().to_string()),
+        GroffLine::Macro { name, args }
+            if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR") =>
+        {
+            Some(
+                strip_groff_escapes(&strip_inline_macro_args(args))
+                    .trim()
+                    .to_string(),
+            )
+        }
+        _ => None,
+    }
+}
+
+fn is_description_choice_name(name: &str) -> bool {
+    !name.is_empty()
+        && name.len() <= 32
+        && !name.starts_with('-')
+        && name
+            .chars()
+            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
+}
+
+fn collect_description_choice_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut parts = Vec::new();
+    let mut i = start;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. } if matches!(name.as_str(), "TP" | "SH" | "SS") => {
+                break;
+            }
+            GroffLine::Text(text) => {
+                parts.push(text.clone());
+                i += 1;
+            }
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR" | "RI") =>
+            {
+                let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                let text = text.trim();
+                if !text.is_empty() {
+                    parts.push(text.to_string());
+                }
+                i += 1;
+            }
+            GroffLine::Blank | GroffLine::Comment => {
+                i += 1;
+            }
+            GroffLine::Macro { .. } => {
+                i += 1;
+            }
+        }
+    }
+    (first_sentence(&parts.join(" ")), i)
+}
+
+fn first_sentence(text: &str) -> String {
+    let text = text.split_whitespace().collect::<Vec<_>>().join(" ");
+    for marker in [". ", ".) "] {
+        if let Some(idx) = text.find(marker) {
+            return text[..idx + 1].trim().to_string();
+        }
+    }
+    text.trim().to_string()
+}
+
+fn is_commands_section(name: &str) -> bool {
+    let trimmed = name.trim();
+    // strip a trailing parenthetical group so "HIGH-LEVEL COMMANDS (PORCELAIN)"
+    // (which is git.1's pattern) is treated as "HIGH-LEVEL COMMANDS".
+    let core = match (trimmed.rfind('('), trimmed.ends_with(')')) {
+        (Some(open), true) => trimmed[..open].trim(),
+        _ => trimmed,
+    };
+    let upper = core.to_ascii_uppercase();
+    if upper == "COMMAND" || upper == "COMMANDS" {
+        return true;
+    }
+    // accept headings ending in " COMMANDS" — catches "GIT COMMANDS",
+    // "MAIN COMMANDS", "HIGH-LEVEL COMMANDS", "LOW-LEVEL COMMANDS". the
+    // leading space prevents matches against "COMMAND LINE OPTIONS" etc.
+    upper.ends_with(" COMMANDS")
+}
+
+/// find all COMMANDS/.COMMAND sections and collect their lines.
+pub fn extract_commands_section(lines: &[GroffLine]) -> Vec<GroffLine> {
+    let mut acc: Vec<GroffLine> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && is_commands_section(args)
+        {
+            i += 1;
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                acc.push(lines[i].clone());
+                i += 1;
+            }
+        } else {
+            i += 1;
+        }
+    }
+    acc
+}
+
+/// extract SUBCOMMAND-style sections (clap-generated manpages put each
+/// subcommand under its own .SH SUBCOMMAND header with a Usage: line).
+/// returns triples of (name, description, lines) so the caller can re-parse
+/// each section as its own help_result.
+pub fn extract_subcommand_sections(lines: &[GroffLine]) -> Vec<(String, String, Vec<GroffLine>)> {
+    // split into sections at .SH boundaries, keeping only SUBCOMMAND(S) ones
+    let mut sections: Vec<Vec<GroffLine>> = Vec::new();
+    let mut current_name: Option<String> = None;
+    let mut current: Vec<GroffLine> = Vec::new();
+    for line in lines {
+        if let GroffLine::Macro { name, args } = line
+            && name == "SH"
+        {
+            if current_name.is_some() {
+                sections.push(std::mem::take(&mut current));
+            }
+            let n = args.trim().to_ascii_uppercase();
+            if n == "SUBCOMMAND" || n == "SUBCOMMANDS" {
+                current_name = Some(n);
+            } else {
+                current_name = None;
+            }
+            continue;
+        }
+        if current_name.is_some() {
+            current.push(line.clone());
+        }
+    }
+    if current_name.is_some() {
+        sections.push(current);
+    }
+
+    let mut out = Vec::new();
+    for section in sections {
+        // scan section lines for the Usage: line to get the subcommand name
+        let mut subcmd_name: Option<String> = None;
+        let mut desc_lines: Vec<String> = Vec::new();
+        for line in &section {
+            if subcmd_name.is_some() {
+                break;
+            }
+            match line {
+                GroffLine::Text(t) => match find_usage_name(t) {
+                    Some(name) => subcmd_name = Some(name),
+                    None => desc_lines.push(t.clone()),
+                },
+                GroffLine::Macro { name, args }
+                    if matches!(name.as_str(), "TP" | "B" | "BI" | "BR") =>
+                {
+                    let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                    let text = text.trim();
+                    subcmd_name = find_usage_name(text);
+                }
+                _ => (),
+            }
+        }
+        if let Some(name) = subcmd_name {
+            let desc_raw = desc_lines.join(" ");
+            let desc = strip_groff_escapes(&desc_raw).trim().to_string();
+            let desc = strip_backtick_words(&desc);
+            out.push((name, desc, section));
+        }
+    }
+    out
+}
+
+/// look for "Usage: NAME" and return NAME if found.
+/// NAME contains alphanumeric, underscore, or dash.
+fn find_usage_name(text: &str) -> Option<String> {
+    const MARKER: &str = "Usage: ";
+    let idx = text.find(MARKER)?;
+    let after = &text[idx + MARKER.len()..];
+    let end = after
+        .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
+        .unwrap_or(after.len());
+    if end == 0 {
+        None
+    } else {
+        Some(after[..end].to_string())
+    }
+}
+
+/// strip backtick-quoted words: `word` -> word.
+fn strip_backtick_words(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut i = 0;
+    let bytes = s.as_bytes();
+    while i < bytes.len() {
+        if bytes[i] == b'`'
+            && let Some(end) = s[i + 1..].find('`')
+        {
+            out.push_str(&s[i + 1..i + 1 + end]);
+            i += end + 2;
+            continue;
+        }
+        let c = s[i..].chars().next().unwrap();
+        out.push(c);
+        i += c.len_utf8();
+    }
+    out
+}
--- a/src/parsers/manpage/strategies.rs
+++ b/src/parsers/manpage/strategies.rs
@ -0,0 +1,456 @@
+//! strategy-based entry extraction.
+//!
+//! rather than a single monolithic parser, we use multiple "strategies" that
+//! each target a specific groff formatting pattern. this is necessary because
+//! manpage authors use very different macro combinations for the same purpose.
+
+use nom::{Parser, combinator::opt};
+
+use crate::make_macro_walker;
+use crate::parsers::help::{help_parser, param_parser, switch_parser};
+use crate::parsers::manpage::groff::{
+    GroffLine, strip_groff_escapes, strip_inline_macro_args, strip_space_macro_args,
+};
+use crate::parsers::manpage::{ManpageEntry, OwnedParam, OwnedSwitch};
+use crate::types::{Param, Switch};
+
+/// collect consecutive text lines, joining them with spaces.
+/// returns (collected, remaining).
+fn collect_text_lines(lines: &[GroffLine]) -> (String, &[GroffLine]) {
+    let mut acc: Vec<&str> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Text(t) => acc.push(t),
+            _ => break,
+        }
+        i += 1;
+    }
+    (acc.join(" "), &lines[i..])
+}
+
+fn collect_description_lines(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut acc: Vec<String> = Vec::new();
+    let mut i = start;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. }
+                if matches!(name.as_str(), "TP" | "TQ" | "IP" | "PP" | "SH" | "SS") =>
+            {
+                break;
+            }
+            GroffLine::Text(t) => {
+                acc.push(t.clone());
+                i += 1;
+            }
+            GroffLine::Macro { name, args }
+                if matches!(
+                    name.as_str(),
+                    "B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
+                ) =>
+            {
+                let text = tag_of_macro(name, args);
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+                i += 1;
+            }
+            GroffLine::Blank | GroffLine::Comment => {
+                i += 1;
+            }
+            GroffLine::Macro { .. } => {
+                i += 1;
+            }
+        }
+    }
+    (acc.join(" "), i)
+}
+
+fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
+    match s {
+        Switch::Short(c) => OwnedSwitch::Short(c),
+        Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
+        Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
+    }
+}
+
+fn to_owned_param(p: Param<'_>) -> OwnedParam {
+    match p {
+        Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
+        Param::Optional(s) => OwnedParam::Optional(s.to_string()),
+    }
+}
+
+/// attempt to parse a tag string (e.g. "-v, --verbose FILE") into an entry.
+/// uses the nom switch_parser + param_parser from the help module.
+/// returns None if the tag doesn't look like a flag definition.
+pub fn parse_tag_to_entry(tag: &str, desc: String) -> Option<ManpageEntry> {
+    let tag = strip_groff_escapes(tag);
+    let tag = tag.trim();
+    let result: nom::IResult<&str, (Switch<'_>, Option<Param<'_>>)> =
+        (switch_parser, opt(param_parser)).parse(tag);
+    match result {
+        Ok((_, (switch, param))) => Some(ManpageEntry {
+            switch: to_owned_switch(switch),
+            param: param.map(to_owned_param),
+            desc,
+        }),
+        Err(_) => None,
+    }
+}
+
+/// extract tag text from a macro line.
+/// .B and .I preserve spaces (single argument); .BI, .BR, .IR alternate
+/// fonts and concatenate arguments.
+pub fn tag_of_macro(name: &str, args: &str) -> String {
+    match name {
+        "B" | "I" => strip_space_macro_args(args),
+        _ => strip_groff_escapes(&strip_inline_macro_args(args))
+            .trim()
+            .to_string(),
+    }
+}
+
+// strategy a: .TP style (most common — gnu coreutils, help2man).
+// .TP introduces a tagged paragraph: the next line is the "tag" (flag name)
+// and subsequent text lines are the description. the tag can be plain text
+// or wrapped in a formatting macro (.B, .BI, etc.).
+pub fn strategy_tp(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let mut out = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        let GroffLine::Macro { name, .. } = &lines[i] else {
+            i += 1;
+            continue;
+        };
+        if name != "TP" {
+            i += 1;
+            continue;
+        }
+
+        let (tags, body_start) = collect_tp_tags(lines, i + 1);
+        if tags.is_empty() {
+            i += 1;
+            continue;
+        }
+        let (desc, new_i) = collect_description_lines(lines, body_start);
+        out.extend(entries_from_tag_alternates(&tags, desc));
+        i = new_i;
+    }
+    out
+}
+
+fn collect_tp_tags(lines: &[GroffLine], start: usize) -> (Vec<String>, usize) {
+    let mut tags = Vec::new();
+    let mut i = start;
+    loop {
+        if i >= lines.len() {
+            break;
+        }
+        let Some(tag) = tag_from_line(&lines[i]) else {
+            break;
+        };
+        tags.push(tag);
+        i += 1;
+        if i < lines.len() && matches!(&lines[i], GroffLine::Macro { name, .. } if name == "TQ") {
+            i += 1;
+            continue;
+        }
+        break;
+    }
+    (tags, i)
+}
+
+fn tag_from_line(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(tag) => Some(tag.clone()),
+        GroffLine::Macro { name, args }
+            if matches!(
+                name.as_str(),
+                "B" | "I" | "BI" | "BR" | "IR" | "IB" | "RB" | "RI"
+            ) =>
+        {
+            Some(tag_of_macro(name, args))
+        }
+        _ => None,
+    }
+}
+
+fn entries_from_tag_alternates(tags: &[String], desc: String) -> Vec<ManpageEntry> {
+    let entries: Vec<ManpageEntry> = tags
+        .iter()
+        .filter_map(|tag| parse_tag_to_entry(tag, desc.clone()))
+        .collect();
+    if entries.len() == 2
+        && let Some(combined) = combine_short_long_alternates(&entries[0], &entries[1])
+    {
+        return vec![combined];
+    }
+    entries
+}
+
+fn combine_short_long_alternates(
+    left: &ManpageEntry,
+    right: &ManpageEntry,
+) -> Option<ManpageEntry> {
+    match (&left.switch, &right.switch) {
+        (OwnedSwitch::Long(l), OwnedSwitch::Short(c)) => Some(ManpageEntry {
+            switch: OwnedSwitch::Both(*c, l.clone()),
+            param: left.param.clone().or_else(|| right.param.clone()),
+            desc: left.desc.clone(),
+        }),
+        (OwnedSwitch::Short(c), OwnedSwitch::Long(l)) => Some(ManpageEntry {
+            switch: OwnedSwitch::Both(*c, l.clone()),
+            param: right.param.clone().or_else(|| left.param.clone()),
+            desc: left.desc.clone(),
+        }),
+        _ => None,
+    }
+}
+
+// strategy b: .IP style (curl, hand-written manpages).
+// .IP takes an inline tag argument: .IP "-v, --verbose"
+// the description follows as text lines.
+make_macro_walker!(pub strategy_ip -> Vec<ManpageEntry>, on macro "IP" =>
+    |lines, i, args| {
+        let tag = strip_groff_escapes(args);
+        let (desc, rest) = collect_text_lines(&lines[i + 1..]);
+        let new_i = lines.len() - rest.len();
+        parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
+    }
+);
+
+// strategy c: .PP + .RS/.RE style (git, docbook-generated manpages).
+// flag entries are introduced by .PP (paragraph), with the flag name as
+// plain text, followed by a .RS (indent) block containing the description,
+// closed by .RE (de-indent).
+make_macro_walker!(pub strategy_pp_rs -> Vec<ManpageEntry>, on macro "PP" =>
+    |lines, i, _args| {
+        if i + 1 >= lines.len() { return None; }
+        if let GroffLine::Text(tag) = &lines[i + 1] {
+            let (desc, new_i) = collect_pp_rs_desc(lines, i + 2);
+            parse_tag_to_entry(tag, desc).map(|e| (e, new_i))
+        } else {
+            None
+        }
+    }
+);
+
+fn collect_pp_rs_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut acc: Vec<String> = Vec::new();
+    let mut i = start;
+    // outer: look for .RS marker or text
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. } if name == "RS" => {
+                i += 1;
+                // inside .RS — collect until .RE or boundary macro
+                while i < lines.len() {
+                    match &lines[i] {
+                        GroffLine::Macro { name, .. } if name == "RE" => {
+                            return (acc.join(" "), i + 1);
+                        }
+                        GroffLine::Text(t) => {
+                            acc.push(t.clone());
+                            i += 1;
+                        }
+                        GroffLine::Macro { name, .. } if name == "PP" || name == "SH" => {
+                            return (acc.join(" "), i);
+                        }
+                        _ => i += 1,
+                    }
+                }
+                return (acc.join(" "), i);
+            }
+            GroffLine::Text(t) => {
+                acc.push(t.clone());
+                i += 1;
+            }
+            _ => return (acc.join(" "), i),
+        }
+    }
+    (acc.join(" "), i)
+}
+
+/// strategy d: deroff fallback — strip all groff markup, then feed the
+/// resulting plain text through the help parser.
+pub fn strategy_deroff(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let mut buffer = String::with_capacity(256);
+    for line in lines {
+        match line {
+            GroffLine::Text(text) => {
+                buffer.push_str(text);
+                buffer.push('\n');
+            }
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "BI" | "BR" | "IR" | "B" | "I") =>
+            {
+                let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                buffer.push_str(&text);
+                buffer.push('\n');
+            }
+            GroffLine::Blank => buffer.push('\n'),
+            _ => (),
+        }
+    }
+    match help_parser(&buffer) {
+        Ok((_, result)) => result
+            .entries
+            .into_iter()
+            .map(|e| ManpageEntry {
+                switch: to_owned_switch(e.switch),
+                param: e.param.map(to_owned_param),
+                desc: e.desc.join(" "),
+            })
+            .collect(),
+        Err(_) => Vec::new(),
+    }
+}
+
+fn is_bullet_ip(args: &str) -> bool {
+    !args.trim().is_empty()
+}
+
+// strategy e: nix3-style bullet .IP with .UR/.UE hyperlinks.
+// nix's manpages use .IP with bullet markers for flag entries, interleaved
+// with .UR/.UE hyperlink macros. the flag tag is in text lines after the
+// bullet .IP, and the description follows a non-bullet .IP marker.
+make_macro_walker!(pub strategy_nix -> Vec<ManpageEntry>, on macro "IP" =>
+    |lines, i, args| {
+        if !is_bullet_ip(args) { return None; }
+        // collect tag: skip .UR/.UE macros, gather Text lines
+        let mut tag_idx = i + 1;
+        let mut tag_parts: Vec<String> = Vec::new();
+        while tag_idx < lines.len() {
+            match &lines[tag_idx] {
+                GroffLine::Macro { name, .. } if name == "UR" || name == "UE" => {
+                    tag_idx += 1;
+                }
+                GroffLine::Text(t) => {
+                    tag_parts.push(t.clone());
+                    tag_idx += 1;
+                }
+                _ => break,
+            }
+        }
+        let tag = tag_parts.join(" ");
+        let (desc, new_i) = collect_nix_desc(lines, tag_idx);
+        parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
+    }
+);
+
+fn collect_nix_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    if start >= lines.len() {
+        return (String::new(), start);
+    }
+    let mut i = start;
+    // require non-bullet .IP marker for description
+    if let GroffLine::Macro { name, args } = &lines[i]
+        && name == "IP"
+        && args.trim().is_empty()
+    {
+        i += 1;
+    } else {
+        return (String::new(), start);
+    }
+    let mut parts: Vec<String> = Vec::new();
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Text(t) => {
+                parts.push(t.clone());
+                i += 1;
+            }
+            GroffLine::Macro { name, args } if name == "IP" => {
+                if !args.trim().is_empty() {
+                    // next bullet entry — stop
+                    return (parts.join(" "), i);
+                }
+                // non-bullet .IP = continuation paragraph
+                i += 1;
+            }
+            GroffLine::Macro { name, .. } if name == "SS" || name == "SH" => {
+                return (parts.join(" "), i);
+            }
+            GroffLine::Macro { name, .. } if name == "RS" => {
+                i = skip_rs(lines, i + 1, 1);
+            }
+            GroffLine::Macro { .. } => {
+                i += 1;
+            }
+            GroffLine::Blank | GroffLine::Comment => {
+                i += 1;
+            }
+        }
+    }
+    (parts.join(" "), i)
+}
+
+fn skip_rs(lines: &[GroffLine], start: usize, mut depth: usize) -> usize {
+    let mut i = start;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, .. } = &lines[i] {
+            if name == "RE" {
+                depth -= 1;
+                if depth == 0 {
+                    return i + 1;
+                }
+            } else if name == "RS" {
+                depth += 1;
+            }
+        }
+        i += 1;
+    }
+    i
+}
+
+/// count occurrences of a specific macro in the section.
+fn count_macro(name: &str, lines: &[GroffLine]) -> usize {
+    lines
+        .iter()
+        .filter(|line| matches!(line, GroffLine::Macro { name: n, .. } if n == name))
+        .count()
+}
+
+/// auto-detect and try strategies, return the one with most entries.
+/// first counts macros to determine which strategies are applicable,
+/// then runs all applicable ones and picks the winner by entry count.
+/// if no specialized strategy produces results, falls back to deroff.
+pub fn extract_entries(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let tp = count_macro("TP", lines);
+    let ip = count_macro("IP", lines);
+    let pp = count_macro("PP", lines);
+    let rs = count_macro("RS", lines);
+    let ur = count_macro("UR", lines);
+
+    let mut specialized: Vec<(&str, Vec<ManpageEntry>)> = Vec::new();
+    if tp > 0 {
+        specialized.push(("TP", strategy_tp(lines)));
+    }
+    if ip > 0 {
+        specialized.push(("IP", strategy_ip(lines)));
+    }
+    if pp > 0 && rs > 0 {
+        specialized.push(("PP+RS", strategy_pp_rs(lines)));
+    }
+    if ur > 0 && ip > 0 {
+        specialized.push(("nix", strategy_nix(lines)));
+    }
+    let candidates: Vec<(&str, Vec<ManpageEntry>)> = {
+        let filtered: Vec<_> = specialized
+            .into_iter()
+            .filter(|(_, e)| !e.is_empty())
+            .collect();
+        if filtered.is_empty() {
+            vec![("deroff", strategy_deroff(lines))]
+        } else {
+            filtered
+        }
+    };
+    let mut best: Vec<ManpageEntry> = Vec::new();
+    for (_, entries) in candidates {
+        if entries.len() >= best.len() {
+            best = entries;
+        }
+    }
+    best
+}
--- a/src/parsers/mod.rs
+++ b/src/parsers/mod.rs
@ -0,0 +1,3 @@
+pub mod help;
+pub mod manpage;
+pub mod nushell;
--- a/src/parsers/nushell.rs
+++ b/src/parsers/nushell.rs
@ -0,0 +1,475 @@
+//! generate nushell `extern` definitions from parsed help data.
+//!
+//! this module is the code generation backend. it takes a [`ManpageResult`]
+//! (from the help or manpage parsers) and produces nushell source that defines
+//! `extern` declarations — nushell's mechanism for teaching the shell about
+//! external commands' flags and subcommands so it can offer completions.
+//!
+//! key responsibilities:
+//!   - deduplicating flag entries (same flag from multiple help sources)
+//!   - mapping parameter names to nushell types (path, int, string)
+//!   - formatting flags in nushell syntax: --flag(-f): type  # description
+//!   - handling positional arguments with nushell's ordering constraints
+//!   - escaping special characters for nushell string literals
+
+use std::borrow::Cow;
+use std::collections::{HashMap, HashSet};
+use std::sync::OnceLock;
+
+use crate::parsers::manpage::{
+    ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
+};
+use crate::types::Positional;
+
+/// nushell built-in commands and keywords — we must never generate `extern`
+/// definitions for these because it would shadow nushell's own implementations.
+/// maintained manually and should be updated with new nushell releases.
+pub const NUSHELL_BUILTINS: &[&str] = &[
+    "alias",
+    "all",
+    "ansi",
+    "any",
+    "append",
+    "ast",
+    "attr",
+    "bits",
+    "break",
+    "bytes",
+    "cal",
+    "cd",
+    "char",
+    "chunk-by",
+    "chunks",
+    "clear",
+    "collect",
+    "columns",
+    "commandline",
+    "compact",
+    "complete",
+    "config",
+    "const",
+    "continue",
+    "cp",
+    "date",
+    "debug",
+    "decode",
+    "def",
+    "default",
+    "describe",
+    "detect",
+    "do",
+    "drop",
+    "du",
+    "each",
+    "echo",
+    "encode",
+    "enumerate",
+    "error",
+    "every",
+    "exec",
+    "exit",
+    "explain",
+    "explore",
+    "export",
+    "export-env",
+    "extern",
+    "fill",
+    "filter",
+    "find",
+    "first",
+    "flatten",
+    "for",
+    "format",
+    "from",
+    "generate",
+    "get",
+    "glob",
+    "grid",
+    "group-by",
+    "hash",
+    "headers",
+    "help",
+    "hide",
+    "hide-env",
+    "histogram",
+    "history",
+    "http",
+    "if",
+    "ignore",
+    "input",
+    "insert",
+    "inspect",
+    "interleave",
+    "into",
+    "is-admin",
+    "is-empty",
+    "is-not-empty",
+    "is-terminal",
+    "items",
+    "job",
+    "join",
+    "keybindings",
+    "kill",
+    "last",
+    "length",
+    "let",
+    "let-env",
+    "lines",
+    "load-env",
+    "loop",
+    "ls",
+    "match",
+    "math",
+    "merge",
+    "metadata",
+    "mkdir",
+    "mktemp",
+    "module",
+    "move",
+    "mut",
+    "mv",
+    "nu-check",
+    "nu-highlight",
+    "open",
+    "overlay",
+    "panic",
+    "par-each",
+    "parse",
+    "path",
+    "plugin",
+    "port",
+    "prepend",
+    "print",
+    "ps",
+    "query",
+    "random",
+    "reduce",
+    "reject",
+    "rename",
+    "return",
+    "reverse",
+    "rm",
+    "roll",
+    "rotate",
+    "run-external",
+    "save",
+    "schema",
+    "scope",
+    "select",
+    "seq",
+    "shuffle",
+    "skip",
+    "sleep",
+    "slice",
+    "sort",
+    "sort-by",
+    "source",
+    "source-env",
+    "split",
+    "start",
+    "stor",
+    "str",
+    "sys",
+    "table",
+    "take",
+    "tee",
+    "term",
+    "timeit",
+    "to",
+    "touch",
+    "transpose",
+    "try",
+    "tutor",
+    "ulimit",
+    "umask",
+    "uname",
+    "uniq",
+    "uniq-by",
+    "unlet",
+    "update",
+    "upsert",
+    "url",
+    "use",
+    "values",
+    "version",
+    "view",
+    "watch",
+    "where",
+    "which",
+    "while",
+    "whoami",
+    "window",
+    "with-env",
+    "wrap",
+    "zip",
+];
+
+fn builtin_set() -> &'static HashSet<&'static str> {
+    static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
+    SET.get_or_init(|| NUSHELL_BUILTINS.iter().copied().collect())
+}
+
+/// returns true if the given command name collides with a nushell built-in.
+pub fn is_nushell_builtin(cmd: &str) -> bool {
+    builtin_set().contains(cmd)
+}
+
+/// map parameter names to nushell types.
+/// nushell's `extern` declarations use typed parameters, so we infer the type
+/// from the parameter name. file/path-related names become "path" (enables
+/// path completion), numeric names become "int", everything else is "string".
+pub fn nushell_type_of_param(name: &str) -> &'static str {
+    match name {
+        "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY" | "FILENAME"
+        | "PATTERNFILE" => "path",
+        "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH" | "LINES" | "DEPTH"
+        | "depth" => "int",
+        _ => "string",
+    }
+}
+
+/// escape a string for use inside nushell double-quoted string literals.
+/// only double quotes and backslashes need escaping in nushell's syntax.
+pub fn escape_nu(s: &str) -> Cow<'_, str> {
+    if !s.contains('"') && !s.contains('\\') {
+        Cow::Borrowed(s)
+    } else {
+        let mut buf = String::with_capacity(s.len() + 4);
+        for c in s.chars() {
+            match c {
+                '"' => buf.push_str("\\\""),
+                '\\' => buf.push_str("\\\\"),
+                c => buf.push(c),
+            }
+        }
+        Cow::Owned(buf)
+    }
+}
+
+fn entry_key(e: &ManpageEntry) -> String {
+    match &e.switch {
+        OwnedSwitch::Short(c) => format!("-{c}"),
+        OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"),
+    }
+}
+
+fn entry_score(e: &ManpageEntry) -> i32 {
+    let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) {
+        10
+    } else {
+        0
+    };
+    let param_bonus = if e.param.is_some() { 5 } else { 0 };
+    let desc_bonus = (e.desc.len() / 10).min(5) as i32;
+    switch_bonus + param_bonus + desc_bonus
+}
+
+/// deduplicate flag entries that refer to the same flag.
+///
+/// when the same flag appears multiple times (e.g. from overlapping manpage
+/// sections or repeated help text), we keep the "best" version using a score:
+///   - both short+long form present: +10 (most informative)
+///   - has a parameter: +5
+///   - description length bonus: up to +5
+///
+/// after deduplication by long name, we also remove standalone short flags
+/// whose letter is already covered by a Both(short, long) entry. this prevents
+/// emitting both "-v" and "--verbose(-v)" which nushell would reject as a
+/// duplicate. the filtering preserves original ordering from the help text.
+pub fn dedup_entries(entries: &[ManpageEntry]) -> Vec<ManpageEntry> {
+    let mut best: HashMap<String, &ManpageEntry> = HashMap::new();
+    for e in entries {
+        let key = entry_key(e);
+        match best.get(&key) {
+            Some(prev) if entry_score(prev) >= entry_score(e) => {}
+            _ => {
+                best.insert(key, e);
+            }
+        }
+    }
+    let mut covered: HashSet<char> = HashSet::new();
+    for e in best.values() {
+        if let OwnedSwitch::Both(c, _) = &e.switch {
+            covered.insert(*c);
+        }
+    }
+    let mut seen: HashSet<String> = HashSet::new();
+    let mut out: Vec<ManpageEntry> = Vec::new();
+    for e in entries {
+        let key = entry_key(e);
+        if seen.contains(&key) {
+            continue;
+        }
+        if let OwnedSwitch::Short(c) = &e.switch
+            && covered.contains(c)
+        {
+            continue;
+        }
+        seen.insert(key.clone());
+        out.push((*best.get(&key).unwrap()).clone());
+    }
+    out
+}
+
+/// format a single flag entry as a nushell `extern` parameter line.
+/// output examples:
+///   "    --verbose(-v)                       # increase verbosity"
+///   "    --output(-o): path                  # write output to file"
+///   "    -n: int                             # number of results"
+///
+/// the description is right-padded to column 40 with a "# " comment prefix.
+pub fn format_flag(entry: &ManpageEntry) -> String {
+    let name = match &entry.switch {
+        OwnedSwitch::Both(c, l) => format!("--{l}(-{c})"),
+        OwnedSwitch::Long(l) => format!("--{l}"),
+        OwnedSwitch::Short(c) => format!("-{c}"),
+    };
+    let typed = match &entry.param {
+        Some(OwnedParam::Mandatory(p)) | Some(OwnedParam::Optional(p)) => {
+            format!(": {}", nushell_type_of_param(p))
+        }
+        None => String::new(),
+    };
+    let flag = format!("    {name}{typed}");
+    if entry.desc.is_empty() {
+        flag
+    } else {
+        let pad_len = 40usize.saturating_sub(flag.len()).max(1);
+        format!("{flag}{}# {}", " ".repeat(pad_len), entry.desc)
+    }
+}
+
+/// format a positional argument as a nushell `extern` parameter line.
+/// nushell syntax: "...name: type" for variadic, "name?: type" for optional.
+/// hyphens in names are converted to underscores since nushell identifiers
+/// cannot contain hyphens.
+pub fn format_positional(name: &str, p: &Positional) -> String {
+    let name_underscored: String = name
+        .chars()
+        .map(|c| if c == '-' { '_' } else { c })
+        .collect();
+    let prefix = if p.variadic { "..." } else { "" };
+    let suffix = if p.optional && !p.variadic { "?" } else { "" };
+    let typ = nushell_type_of_param(&name.to_ascii_uppercase());
+    format!("    {prefix}{name_underscored}{suffix}: {typ}")
+}
+
+/// enforce nushell's positional argument ordering rules:
+///   1. no required positional may follow an optional one
+///   2. at most one variadic ("rest") parameter is allowed
+///
+/// if a required positional appears after an optional one, it is silently
+/// promoted to optional. duplicate variadic params are dropped.
+pub fn fixup_positionals(positionals: Vec<(String, Positional)>) -> Vec<(String, Positional)> {
+    let mut seen_optional = false;
+    let mut seen_variadic = false;
+    let mut out = Vec::with_capacity(positionals.len());
+    for (name, mut p) in positionals {
+        if p.variadic {
+            if seen_variadic {
+                continue;
+            }
+            seen_variadic = true;
+            seen_optional = true;
+            out.push((name, p));
+        } else if seen_optional {
+            p.optional = true;
+            out.push((name, p));
+        } else {
+            seen_optional = p.optional;
+            out.push((name, p));
+        }
+    }
+    out
+}
+
+/// derive a nushell `module` name from a command name.
+/// replaces non-alphanumeric characters with hyphens and appends "-completions".
+pub fn module_name_of(cmd_name: &str) -> String {
+    let mut s: String = cmd_name
+        .chars()
+        .map(|c| {
+            if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
+                c
+            } else {
+                '-'
+            }
+        })
+        .collect();
+    s.push_str("-completions");
+    s
+}
+
+/// generate the full nushell `extern` block for a command.
+///
+/// produces output like:
+///   export extern "git add" [
+///     ...pathspec?: path
+///     --verbose(-v)              # be verbose
+///     --dry-run(-n)              # dry run
+///   ]
+///
+/// subcommands that weren't resolved into their own full definitions get
+/// stub `extern` blocks with just a comment containing their description:
+///   export extern "git stash" [  # stash changes
+///   ]
+pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String {
+    let entries = dedup_entries(&result.entries);
+    let escaped_name = escape_nu(cmd_name);
+    let positionals = fixup_positionals(result.positionals.clone());
+
+    let mut out = String::new();
+    out.push_str(&format!("export extern \"{escaped_name}\" [\n"));
+    for (name, p) in &positionals {
+        out.push_str(&format_positional(name, p));
+        out.push('\n');
+    }
+    for entry in &entries {
+        out.push_str(&format_flag(entry));
+        out.push('\n');
+    }
+    out.push_str("]\n");
+
+    for sc in &result.subcommands {
+        out.push_str(&format!(
+            "\nexport extern \"{} {}\" [  # {}\n]\n",
+            escaped_name,
+            escape_nu(&sc.name),
+            escape_nu(&sc.desc)
+        ));
+    }
+    out
+}
+
+/// generate a complete nushell `module` wrapping the `extern`.
+/// output: "module git-completions { ... }\n\nuse git-completions *\n"
+/// the `use` at the end makes the `extern` immediately available in scope.
+pub fn generate_module(cmd_name: &str, result: &ManpageResult) -> String {
+    let mod_name = module_name_of(cmd_name);
+    format!(
+        "module {mod_name} {{\n{}}}\n\nuse {mod_name} *\n",
+        generate_extern(cmd_name, result)
+    )
+}
+
+/// convenience wrapper: generate an `extern` from just a list of entries.
+pub fn generate_extern_from_entries(cmd_name: &str, entries: Vec<ManpageEntry>) -> String {
+    generate_extern(
+        cmd_name,
+        &ManpageResult {
+            entries,
+            subcommands: Vec::new(),
+            positionals: Vec::new(),
+            description: String::new(),
+        },
+    )
+}
+
+/// stub subcommand entry used when extracting subcommands from a parsed
+/// help result for nushell output.
+pub fn manpage_subcommand_from(name: &str, desc: &str) -> ManpageSubcommand {
+    ManpageSubcommand {
+        name: name.to_string(),
+        desc: desc.to_string(),
+    }
+}
--- a/src/pool.rs
+++ b/src/pool.rs
@ -0,0 +1,233 @@
+//! BFS-queue worker pool for parallel subprocess scraping.
+//!
+//! workers pull jobs from a shared queue and call a user-supplied
+//! handler; the handler gets a `Submitter` to push newly-discovered
+//! child jobs back onto the same queue. when the in-flight count
+//! reaches zero the pool shuts down and `wait` returns.
+//!
+//! the queue-back design is deliberate: command-help trees are uneven
+//! (one binary has 30 subs, another has 1). queue-back keeps every
+//! worker fed; spawn-in-place would leave cores idle on lopsided trees.
+//!
+//! synchronization: `parking_lot::Condvar` parks workers when the queue is
+//! empty. the queue, in-flight count, and close state live under one mutex so
+//! the condvar predicate cannot miss a wakeup.
+//! parking_lot gives no-poison locks (no `Result` noise on every
+//! `lock()`) and a single-syscall fast path in the uncontended case.
+
+use std::collections::VecDeque;
+use std::sync::Arc;
+use std::thread::{self, JoinHandle};
+
+use parking_lot::{Condvar, Mutex};
+
+struct State<J> {
+    queue: VecDeque<J>,
+    /// jobs created but not yet completed. counts both queued and
+    /// in-progress jobs. workers can exit once wait() has closed the pool
+    /// and this reaches 0.
+    in_flight: usize,
+    /// set by wait(), which is also the point where top-level submission is
+    /// done. workers must not exit on transient empty periods before this.
+    closed: bool,
+}
+
+/// shared state held behind an `Arc` by every worker and by the
+/// submitter handles handed to the per-job handler.
+struct Inner<J> {
+    state: Mutex<State<J>>,
+    notify: Condvar,
+}
+
+impl<J> Inner<J> {
+    fn submit(&self, job: J) {
+        let mut state = self.state.lock();
+        state.in_flight += 1;
+        state.queue.push_back(job);
+        self.notify.notify_one();
+    }
+
+    fn next(&self) -> Option<J> {
+        let mut state = self.state.lock();
+        loop {
+            if let Some(job) = state.queue.pop_front() {
+                return Some(job);
+            }
+            if state.closed && state.in_flight == 0 {
+                return None;
+            }
+            self.notify.wait(&mut state);
+        }
+    }
+
+    fn complete(&self) {
+        let mut state = self.state.lock();
+        state.in_flight -= 1;
+        if state.closed && state.in_flight == 0 {
+            // we were the last in-flight job after wait() closed top-level
+            // submission, so parked workers can wake and exit.
+            self.notify.notify_all();
+        }
+    }
+}
+
+/// cheap-to-clone handle that lets a job handler enqueue further jobs.
+/// passed by reference to the handler closure.
+pub struct Submitter<J> {
+    inner: Arc<Inner<J>>,
+}
+
+impl<J> Clone for Submitter<J> {
+    fn clone(&self) -> Self {
+        Submitter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<J> Submitter<J> {
+    pub fn submit(&self, job: J) {
+        self.inner.submit(job);
+    }
+}
+
+/// BFS-queue worker pool. each worker pulls a job, calls the handler
+/// (which may submit further jobs via the passed `Submitter`), then marks
+/// the job complete. when in-flight reaches zero the pool shuts down and
+/// `wait` returns.
+pub struct ScrapePool<J> {
+    inner: Arc<Inner<J>>,
+    workers: Vec<JoinHandle<()>>,
+}
+
+impl<J: Send + 'static> ScrapePool<J> {
+    /// spawn `num_workers` threads that run `handler` on each job pulled
+    /// from the queue. the handler receives the job by value and a
+    /// `&Submitter` for enqueuing children.
+    pub fn new<F>(num_workers: usize, handler: F) -> Self
+    where
+        F: Fn(J, &Submitter<J>) + Send + Sync + 'static,
+    {
+        let inner = Arc::new(Inner {
+            state: Mutex::new(State {
+                queue: VecDeque::new(),
+                in_flight: 0,
+                closed: false,
+            }),
+            notify: Condvar::new(),
+        });
+        let handler = Arc::new(handler);
+        let workers = (0..num_workers.max(1))
+            .map(|_| {
+                let inner = inner.clone();
+                let handler = handler.clone();
+                thread::spawn(move || {
+                    let submitter = Submitter {
+                        inner: inner.clone(),
+                    };
+                    while let Some(job) = inner.next() {
+                        handler(job, &submitter);
+                        inner.complete();
+                    }
+                })
+            })
+            .collect();
+        ScrapePool { inner, workers }
+    }
+
+    /// submit a top-level job. typically called by the orchestrating
+    /// thread before `wait`; handlers should use `Submitter::submit`.
+    pub fn submit(&self, job: J) {
+        self.inner.submit(job);
+    }
+
+    /// block until all jobs (initial + transitively discovered) have
+    /// completed, then join every worker thread.
+    pub fn wait(self) {
+        {
+            let mut state = self.inner.state.lock();
+            state.closed = true;
+            // Wake workers so they can either drain queued work or exit if
+            // the pool was empty. The close flag is guarded by this same lock,
+            // so this cannot race with a worker entering the condvar wait.
+            self.inner.notify.notify_all();
+        }
+        for w in self.workers {
+            let _ = w.join();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::time::Duration;
+
+    #[test]
+    fn flat_jobs_processed_once_each() {
+        let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
+        let pool = ScrapePool::new(4, {
+            let collected = collected.clone();
+            move |n: u32, _: &Submitter<u32>| {
+                collected.lock().push(n);
+            }
+        });
+        for i in 0..100u32 {
+            pool.submit(i);
+        }
+        pool.wait();
+        let mut got = collected.lock().clone();
+        got.sort();
+        assert_eq!(got, (0..100).collect::<Vec<_>>());
+    }
+
+    #[test]
+    fn discovered_children_processed_to_completion() {
+        // BFS expansion: every odd number under 10 spawns its successor.
+        let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
+        let pool = ScrapePool::new(2, {
+            let collected = collected.clone();
+            move |n: u32, sub: &Submitter<u32>| {
+                collected.lock().push(n);
+                if n < 10 && n % 2 == 1 {
+                    sub.submit(n + 1);
+                }
+            }
+        });
+        for i in [1u32, 3, 5, 7, 9] {
+            pool.submit(i);
+        }
+        pool.wait();
+        let mut got = collected.lock().clone();
+        got.sort();
+        assert_eq!(got, vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+    }
+
+    #[test]
+    fn transient_empty_queue_before_wait_does_not_stop_workers() {
+        let processed = Arc::new(AtomicUsize::new(0));
+        let pool = ScrapePool::new(1, {
+            let processed = processed.clone();
+            move |_: u32, _: &Submitter<u32>| {
+                processed.fetch_add(1, Ordering::SeqCst);
+            }
+        });
+
+        pool.submit(1);
+        while processed.load(Ordering::SeqCst) == 0 {
+            thread::yield_now();
+        }
+        thread::sleep(Duration::from_millis(10));
+        pool.submit(2);
+        pool.wait();
+
+        assert_eq!(processed.load(Ordering::SeqCst), 2);
+    }
+
+    #[test]
+    fn wait_with_no_jobs_returns_immediately() {
+        let pool: ScrapePool<()> = ScrapePool::new(2, |_, _| {});
+        pool.wait();
+    }
+}
--- a/src/store.rs
+++ b/src/store.rs
@ -0,0 +1,657 @@
+//! filesystem store for parsed completion data.
+//!
+//! write side: serialize ManpageResult to JSON, derive sanitised
+//! filenames from command names ("git add" → git_add.json).
+//!
+//! read side: look up a command by name across the user cache + system
+//! dirs, deserialize JSON or parse a .nu extern blob back into a result.
+
+use std::collections::HashMap;
+use std::fs;
+use std::io;
+use std::path::{Path, PathBuf};
+
+use serde_json::Value;
+
+use crate::parsers::manpage::{
+    ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
+};
+use crate::types::Positional;
+
+/// default cache directory: $XDG_CACHE_HOME/inshellah, falling back to
+/// $HOME/.cache/inshellah.
+pub fn default_store_path() -> PathBuf {
+    if let Ok(xdg) = std::env::var("XDG_CACHE_HOME")
+        && !xdg.is_empty()
+    {
+        return PathBuf::from(xdg).join("inshellah");
+    }
+    if let Ok(home) = std::env::var("HOME") {
+        return PathBuf::from(home).join(".cache/inshellah");
+    }
+    PathBuf::from(".cache/inshellah")
+}
+
+/// create directory and all parents.
+pub fn ensure_dir(dir: &Path) -> io::Result<()> {
+    fs::create_dir_all(dir)
+}
+
+/// derive a safe filename from a command name.
+/// spaces in subcommand names ("git add") become "_" ("git_add").
+/// any other non-filesystem-safe characters are also replaced.
+pub fn filename_of_command(cmd: &str) -> String {
+    cmd.chars()
+        .map(|c| match c {
+            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => c,
+            ' ' => '_',
+            _ => '_',
+        })
+        .collect()
+}
+
+/// reverse: a filename "git_add" produces command name "git add".
+/// underscores are flipped to spaces unconditionally — names that
+/// genuinely contained an underscore round-trip as spaces, which is
+/// acceptable since the read side is only used for display.
+pub fn command_of_filename(base: &str) -> String {
+    base.replace('_', " ")
+}
+
+fn escape_json(s: &str) -> String {
+    let mut out = String::with_capacity(s.len() + 2);
+    for c in s.chars() {
+        match c {
+            '"' => out.push_str("\\\""),
+            '\\' => out.push_str("\\\\"),
+            '\n' => out.push_str("\\n"),
+            '\r' => out.push_str("\\r"),
+            '\t' => out.push_str("\\t"),
+            '\x08' => out.push_str("\\b"),
+            '\x0c' => out.push_str("\\f"),
+            c if (c as u32) < 0x20 => {
+                out.push_str(&format!("\\u{:04x}", c as u32));
+            }
+            c => out.push(c),
+        }
+    }
+    out
+}
+
+fn json_string(s: &str) -> String {
+    format!("\"{}\"", escape_json(s))
+}
+
+fn json_switch(s: &OwnedSwitch) -> String {
+    match s {
+        OwnedSwitch::Short(c) => {
+            format!(
+                r#"{{"type":"short","char":{}}}"#,
+                json_string(&c.to_string())
+            )
+        }
+        OwnedSwitch::Long(l) => {
+            format!(r#"{{"type":"long","name":{}}}"#, json_string(l))
+        }
+        OwnedSwitch::Both(c, l) => format!(
+            r#"{{"type":"both","char":{},"name":{}}}"#,
+            json_string(&c.to_string()),
+            json_string(l)
+        ),
+    }
+}
+
+fn json_param(p: &Option<OwnedParam>) -> String {
+    match p {
+        None => "null".to_string(),
+        Some(OwnedParam::Mandatory(n)) => {
+            format!(r#"{{"kind":"mandatory","name":{}}}"#, json_string(n))
+        }
+        Some(OwnedParam::Optional(n)) => {
+            format!(r#"{{"kind":"optional","name":{}}}"#, json_string(n))
+        }
+    }
+}
+
+fn json_entry(e: &ManpageEntry) -> String {
+    format!(
+        r#"{{"switch":{},"param":{},"desc":{}}}"#,
+        json_switch(&e.switch),
+        json_param(&e.param),
+        json_string(&e.desc)
+    )
+}
+
+fn json_subcommand(sc: &ManpageSubcommand) -> String {
+    format!(
+        r#"{{"name":{},"desc":{}}}"#,
+        json_string(&sc.name),
+        json_string(&sc.desc)
+    )
+}
+
+fn json_positional(name: &str, p: &Positional) -> String {
+    format!(
+        r#"{{"name":{},"optional":{},"variadic":{}}}"#,
+        json_string(name),
+        p.optional,
+        p.variadic
+    )
+}
+
+fn json_list<T, F: Fn(&T) -> String>(items: &[T], f: F) -> String {
+    let parts: Vec<String> = items.iter().map(f).collect();
+    format!("[{}]", parts.join(","))
+}
+
+/// serialize a ManpageResult to JSON:
+///   {"source":..., "description":..., "entries":[...],
+///    "subcommands":[...], "positionals":[...]}
+pub fn json_of_result(source: &str, result: &ManpageResult) -> String {
+    let entries = json_list(&result.entries, json_entry);
+    let subcommands = json_list(&result.subcommands, json_subcommand);
+    let positionals_parts: Vec<String> = result
+        .positionals
+        .iter()
+        .map(|(name, p)| json_positional(name, p))
+        .collect();
+    let positionals = format!("[{}]", positionals_parts.join(","));
+    format!(
+        r#"{{"source":{},"description":{},"entries":{},"subcommands":{},"positionals":{}}}"#,
+        json_string(source),
+        json_string(&result.description),
+        entries,
+        subcommands,
+        positionals,
+    )
+}
+
+pub fn write_file(path: &Path, contents: &str) -> io::Result<()> {
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent)?;
+    }
+    fs::write(path, contents)
+}
+
+/// write the parsed result for `command` into `dir` as JSON.
+pub fn write_result(
+    dir: &Path,
+    command: &str,
+    source: &str,
+    result: &ManpageResult,
+) -> io::Result<()> {
+    let path = dir.join(format!("{}.json", filename_of_command(command)));
+    write_file(&path, &json_of_result(source, result))
+}
+
+/// write a native-nushell completion blob (the binary supplied its own).
+pub fn write_native(dir: &Path, command: &str, data: &str) -> io::Result<()> {
+    let path = dir.join(format!("{}.nu", filename_of_command(command)));
+    write_file(&path, data)
+}
+
+// --- read side ---
+
+fn read_file(path: &Path) -> Option<String> {
+    fs::read_to_string(path).ok()
+}
+
+fn read_json_result(path: &Path) -> Option<(String, ManpageResult)> {
+    let data = read_file(path)?;
+    let v = serde_json::from_str::<Value>(&data).ok()?;
+    let source = v
+        .get("source")
+        .and_then(|x| x.as_str())
+        .unwrap_or("json")
+        .to_string();
+    Some((source, result_from_json(&v)))
+}
+
+fn switch_from_json(v: &Value) -> Option<OwnedSwitch> {
+    let t = v.get("type")?.as_str()?;
+    match t {
+        "short" => {
+            let c = v.get("char")?.as_str()?.chars().next()?;
+            Some(OwnedSwitch::Short(c))
+        }
+        "long" => Some(OwnedSwitch::Long(v.get("name")?.as_str()?.to_string())),
+        "both" => {
+            let c = v.get("char")?.as_str()?.chars().next()?;
+            let n = v.get("name")?.as_str()?.to_string();
+            Some(OwnedSwitch::Both(c, n))
+        }
+        _ => None,
+    }
+}
+
+fn param_from_json(v: &Value) -> Option<OwnedParam> {
+    if v.is_null() {
+        return None;
+    }
+    let kind = v.get("kind")?.as_str()?;
+    let name = v.get("name")?.as_str()?.to_string();
+    Some(match kind {
+        "mandatory" => OwnedParam::Mandatory(name),
+        "optional" => OwnedParam::Optional(name),
+        _ => return None,
+    })
+}
+
+fn entry_from_json(v: &Value) -> Option<ManpageEntry> {
+    let switch = switch_from_json(v.get("switch")?)?;
+    let param = v.get("param").and_then(param_from_json);
+    let desc = v
+        .get("desc")
+        .and_then(|d| d.as_str())
+        .unwrap_or("")
+        .to_string();
+    Some(ManpageEntry {
+        switch,
+        param,
+        desc,
+    })
+}
+
+fn subcommand_from_json(v: &Value) -> Option<ManpageSubcommand> {
+    let name = v.get("name")?.as_str()?.to_string();
+    let desc = v
+        .get("desc")
+        .and_then(|d| d.as_str())
+        .unwrap_or("")
+        .to_string();
+    Some(ManpageSubcommand { name, desc })
+}
+
+fn positional_from_json(v: &Value) -> Option<(String, Positional)> {
+    let name = v.get("name")?.as_str()?.to_string();
+    let optional = v.get("optional").and_then(|x| x.as_bool()).unwrap_or(false);
+    let variadic = v.get("variadic").and_then(|x| x.as_bool()).unwrap_or(false);
+    Some((name, Positional { optional, variadic }))
+}
+
+/// deserialize a JSON cache entry into ManpageResult.
+pub fn result_from_json(v: &Value) -> ManpageResult {
+    let description = v
+        .get("description")
+        .and_then(|d| d.as_str())
+        .unwrap_or("")
+        .to_string();
+    let entries = v
+        .get("entries")
+        .and_then(|x| x.as_array())
+        .map(|arr| arr.iter().filter_map(entry_from_json).collect())
+        .unwrap_or_default();
+    let subcommands = v
+        .get("subcommands")
+        .and_then(|x| x.as_array())
+        .map(|arr| arr.iter().filter_map(subcommand_from_json).collect())
+        .unwrap_or_default();
+    let positionals = v
+        .get("positionals")
+        .and_then(|x| x.as_array())
+        .map(|arr| arr.iter().filter_map(positional_from_json).collect())
+        .unwrap_or_default();
+    ManpageResult {
+        entries,
+        subcommands,
+        positionals,
+        description,
+    }
+}
+
+/// parse nushell `export extern` blocks out of a .nu source file.
+///
+/// returns the help_result that matches `target_cmd` — its entries,
+/// positionals, and any other extern blocks under it (`cmd sub`) are
+/// folded into the subcommands list.
+pub fn parse_nu_completions(target_cmd: &str, contents: &str) -> ManpageResult {
+    let mut blocks: Vec<NuBlock> = Vec::new();
+    let mut current_desc = String::new();
+    let mut in_block = false;
+    let mut block = NuBlock::default();
+
+    for line in contents.split('\n') {
+        let trimmed = line.trim();
+        if !in_block {
+            if let Some(stripped) = trimmed.strip_prefix("# ") {
+                current_desc = stripped.trim().to_string();
+            } else if trimmed.contains("export extern")
+                && let Some(cmd) = extract_extern_name(trimmed)
+            {
+                in_block = true;
+                block = NuBlock {
+                    cmd,
+                    description: std::mem::take(&mut current_desc),
+                    ..Default::default()
+                };
+            } else {
+                current_desc.clear();
+            }
+        } else if trimmed.starts_with(']') {
+            blocks.push(std::mem::take(&mut block));
+            in_block = false;
+        } else {
+            let (param_part, desc) = match trimmed.find('#') {
+                Some(idx) => (trimmed[..idx].trim(), trimmed[idx + 1..].trim()),
+                None => (trimmed, ""),
+            };
+            parse_nu_param_line_into(param_part, desc, &mut block);
+        }
+    }
+    if in_block {
+        blocks.push(block);
+    }
+
+    // find the block matching target_cmd
+    let Some(matched) = blocks.iter().find(|b| b.cmd == target_cmd) else {
+        return ManpageResult::default();
+    };
+
+    // collect immediate subcommands from other blocks ("target sub" pattern)
+    let prefix = format!("{target_cmd} ");
+    let mut subcommands: Vec<ManpageSubcommand> = Vec::new();
+    for b in &blocks {
+        if let Some(suffix) = b.cmd.strip_prefix(&prefix)
+            && !suffix.contains(' ')
+            && !suffix.is_empty()
+        {
+            subcommands.push(ManpageSubcommand {
+                name: suffix.to_string(),
+                desc: b.description.clone(),
+            });
+        }
+    }
+
+    ManpageResult {
+        entries: matched.entries.clone(),
+        subcommands,
+        positionals: matched.positionals.clone(),
+        description: matched.description.clone(),
+    }
+}
+
+fn extract_extern_name(line: &str) -> Option<String> {
+    let idx = line.find("export extern")?;
+    let after = line[idx + "export extern".len()..].trim_start();
+    if let Some(rest) = after.strip_prefix('"') {
+        let end = rest.find('"')?;
+        Some(rest[..end].to_string())
+    } else {
+        let end = after
+            .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
+            .unwrap_or(after.len());
+        if end == 0 {
+            None
+        } else {
+            Some(after[..end].to_string())
+        }
+    }
+}
+
+fn parse_nu_param_line_into(param_part: &str, desc: &str, block: &mut NuBlock) {
+    if param_part.len() < 2 {
+        return;
+    }
+    if let Some(after) = param_part.strip_prefix("--") {
+        // long flag: --name(-c): type or --name: type or --name
+        let (name, rest) = split_at_non_name_char(after);
+        if name.is_empty() {
+            return;
+        }
+        let mut short: Option<char> = None;
+        let mut rest = rest;
+        if let Some(after_open) = rest.strip_prefix("(-")
+            && let Some(c) = after_open.chars().next()
+            && after_open[c.len_utf8()..].starts_with(')')
+        {
+            short = Some(c);
+            rest = &after_open[c.len_utf8() + 1..];
+        }
+        let param = parse_type_suffix(rest);
+        let switch = match short {
+            Some(c) => OwnedSwitch::Both(c, name.to_string()),
+            None => OwnedSwitch::Long(name.to_string()),
+        };
+        block.entries.push(ManpageEntry {
+            switch,
+            param,
+            desc: desc.to_string(),
+        });
+    } else if param_part.starts_with('-') {
+        // short flag: -c
+        if let Some(c) = param_part.chars().nth(1)
+            && c.is_ascii_alphanumeric()
+        {
+            block.entries.push(ManpageEntry {
+                switch: OwnedSwitch::Short(c),
+                param: None,
+                desc: desc.to_string(),
+            });
+        }
+    } else {
+        // positional: name: type or name?: type or ...name: type
+        let variadic = param_part.starts_with("...");
+        let after_prefix = if variadic {
+            &param_part[3..]
+        } else {
+            param_part
+        };
+        let optional = after_prefix.contains('?');
+        let name_end = after_prefix.find([':', '?']).unwrap_or(after_prefix.len());
+        let name = after_prefix[..name_end].trim();
+        let name: String = name
+            .chars()
+            .map(|c| if c == '-' { '_' } else { c })
+            .collect();
+        if !name.is_empty() && !name.starts_with('-') {
+            let duplicate = block
+                .positionals
+                .iter()
+                .any(|(existing, _)| existing.eq_ignore_ascii_case(&name));
+            if !duplicate {
+                block.positionals.push((
+                    name,
+                    Positional {
+                        optional: optional || variadic,
+                        variadic,
+                    },
+                ));
+            }
+        }
+    }
+}
+
+fn split_at_non_name_char(s: &str) -> (&str, &str) {
+    let end = s
+        .find(|c: char| !(c.is_ascii_alphanumeric() || c == '-'))
+        .unwrap_or(s.len());
+    (&s[..end], &s[end..])
+}
+
+/// parse a `: type` suffix into an OwnedParam (always Mandatory since the
+/// nushell extern syntax doesn't distinguish optional-with-default).
+fn parse_type_suffix(s: &str) -> Option<OwnedParam> {
+    let s = s.trim_start();
+    let s = s.strip_prefix(':')?;
+    let s = s.trim_start();
+    let end = s
+        .find(|c: char| !c.is_ascii_alphabetic())
+        .unwrap_or(s.len());
+    if end == 0 {
+        None
+    } else {
+        Some(OwnedParam::Mandatory(s[..end].to_string()))
+    }
+}
+
+#[derive(Default)]
+struct NuBlock {
+    cmd: String,
+    entries: Vec<ManpageEntry>,
+    positionals: Vec<(String, Positional)>,
+    description: String,
+}
+
+/// look up a command's parsed result. source priority is native nushell,
+/// then manpage JSON, then help JSON. parent .nu files are searched for
+/// subcommand lookups because clap-generated .nu files contain all extern
+/// blocks in a single file.
+pub fn lookup(dirs: &[PathBuf], command: &str) -> Option<ManpageResult> {
+    let base_name = filename_of_command(command);
+    let parent_base = command
+        .find(' ')
+        .map(|i| filename_of_command(&command[..i]));
+
+    for directory in dirs {
+        let nu_path = directory.join(format!("{base_name}.nu"));
+        if let Some(data) = read_file(&nu_path) {
+            return Some(parse_nu_completions(command, &data));
+        }
+        if let Some(pb) = &parent_base {
+            let parent_nu = directory.join(format!("{pb}.nu"));
+            if let Some(data) = read_file(&parent_nu) {
+                let r = parse_nu_completions(command, &data);
+                if !r.entries.is_empty() || !r.subcommands.is_empty() || !r.positionals.is_empty() {
+                    return Some(r);
+                }
+            }
+        }
+    }
+
+    for directory in dirs {
+        let json_path = directory.join(format!("{base_name}.json"));
+        if let Some((source, result)) = read_json_result(&json_path)
+            && source != "help"
+        {
+            return Some(result);
+        }
+    }
+
+    for directory in dirs {
+        let json_path = directory.join(format!("{base_name}.json"));
+        if let Some((_, result)) = read_json_result(&json_path) {
+            return Some(result);
+        }
+    }
+    None
+}
+
+/// look up a command's raw stored data (JSON or .nu source).
+pub fn lookup_raw(dirs: &[PathBuf], command: &str) -> Option<String> {
+    let base_name = filename_of_command(command);
+    for directory in dirs {
+        let nu_path = directory.join(format!("{base_name}.nu"));
+        if let Some(data) = read_file(&nu_path) {
+            return Some(data);
+        }
+    }
+    for directory in dirs {
+        let json_path = directory.join(format!("{base_name}.json"));
+        if let Some(data) = read_file(&json_path) {
+            return Some(data);
+        }
+    }
+    None
+}
+
+fn chop_extension(filename: &str) -> Option<&str> {
+    filename
+        .strip_suffix(".json")
+        .or_else(|| filename.strip_suffix(".nu"))
+}
+
+/// list all indexed commands across all store directories.
+/// returns a sorted, deduplicated list of command names.
+pub fn all_commands(dirs: &[PathBuf]) -> Vec<String> {
+    let mut out: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
+    for directory in dirs {
+        let Ok(entries) = fs::read_dir(directory) else {
+            continue;
+        };
+        for entry in entries.flatten() {
+            if let Some(name) = entry.file_name().to_str()
+                && let Some(base) = chop_extension(name)
+            {
+                out.insert(command_of_filename(base));
+            }
+        }
+    }
+    out.into_iter().collect()
+}
+
+/// discover subcommands of a command by scanning filenames in the store
+/// (e.g. for "git", finds "git_add.json", "git_log.json").
+pub fn subcommands_of(dirs: &[PathBuf], command: &str) -> Vec<ManpageSubcommand> {
+    let prefix = format!("{}_", filename_of_command(command));
+    let mut seen: HashMap<String, ManpageSubcommand> = HashMap::new();
+    for directory in dirs {
+        let Ok(entries) = fs::read_dir(directory) else {
+            continue;
+        };
+        for entry in entries.flatten() {
+            let Some(filename) = entry.file_name().to_str().map(|s| s.to_string()) else {
+                continue;
+            };
+            if !filename.starts_with(&prefix) {
+                continue;
+            }
+            let is_json = filename.ends_with(".json");
+            let Some(base) = chop_extension(&filename) else {
+                continue;
+            };
+            let rest = &base[prefix.len()..];
+            if rest.is_empty() || rest.contains('_') {
+                continue;
+            }
+            if seen.contains_key(rest) {
+                continue;
+            }
+            let desc = if is_json {
+                read_file(&entry.path())
+                    .and_then(|d| serde_json::from_str::<Value>(&d).ok())
+                    .and_then(|v| {
+                        v.get("description")
+                            .and_then(|x| x.as_str())
+                            .map(|s| s.to_string())
+                    })
+                    .unwrap_or_default()
+            } else {
+                String::new()
+            };
+            seen.insert(
+                rest.to_string(),
+                ManpageSubcommand {
+                    name: rest.to_string(),
+                    desc,
+                },
+            );
+        }
+    }
+    let mut out: Vec<ManpageSubcommand> = seen.into_values().collect();
+    out.sort_by(|a, b| a.name.cmp(&b.name));
+    out
+}
+
+/// determine how a command was indexed: "help", "manpage", "native", etc.
+/// for JSON files, returns the "source" field. for .nu files, returns "native".
+pub fn file_type_of(dirs: &[PathBuf], command: &str) -> Option<String> {
+    let base = filename_of_command(command);
+    for directory in dirs {
+        let nu_path = directory.join(format!("{base}.nu"));
+        if nu_path.exists() {
+            return Some("native".to_string());
+        }
+    }
+    for directory in dirs {
+        let json_path = directory.join(format!("{base}.json"));
+        if json_path.exists() {
+            return Some(
+                read_file(&json_path)
+                    .and_then(|d| serde_json::from_str::<Value>(&d).ok())
+                    .and_then(|v| v.get("source").and_then(|x| x.as_str()).map(String::from))
+                    .unwrap_or_else(|| "json".to_string()),
+            );
+        }
+    }
+    None
+}
--- a/src/types.rs
+++ b/src/types.rs
@ -0,0 +1,34 @@
+pub enum Switch<'a> {
+    Short(char),
+    Long(&'a str),
+    Both(char, &'a str),
+}
+
+pub enum Param<'a> {
+    Mandatory(&'a str),
+    Optional(&'a str),
+}
+
+pub struct OptionEntry<'a> {
+    pub switch: Switch<'a>,
+    pub param: Option<Param<'a>>,
+    pub desc: Vec<&'a str>,
+}
+
+pub struct Subcommand<'a> {
+    pub name: &'a str,
+    pub desc: &'a str,
+}
+
+#[derive(Debug, Clone)]
+pub struct Positional {
+    pub optional: bool,
+    pub variadic: bool,
+}
+
+pub struct HelpResult<'a> {
+    pub entries: Vec<OptionEntry<'a>>,
+    pub subcommands: Vec<Subcommand<'a>>,
+    pub positionals: Vec<(&'a str, Positional)>,
+    pub desc: &'a str,
+}