From 0aa6ae9fbff56483cb0ce4f141ea343480f7d651 Mon Sep 17 00:00:00 2001
From: atagen <boss@atagen.co>
Date: Tue, 19 May 2026 23:32:51 +1000
Subject: [PATCH] riir

---
 Cargo.lock                        |  285 ++++
 Cargo.toml                        |   12 +
 README.md                         |    6 +-
 bin/.ocamlformat                  |    0
 bin/dune                          |    4 -
 bin/main.ml                       | 1403 ------------------
 doc/building.md                   |  145 +-
 doc/nixos.md                      |  217 ++-
 doc/nushell-integration.md        |  254 ++--
 doc/runtime-completions.md        |   55 +-
 dune-project                      |   28 -
 flake.lock                        |    8 +-
 flake.nix                         |  308 ++--
 inshellah.opam                    |   35 -
 lib/.ocamlformat                  |    0
 lib/dune                          |    3 -
 lib/manpage.ml                    | 1145 ---------------
 lib/nushell.ml                    |  253 ----
 lib/parser.ml                     |  814 -----------
 lib/store.ml                      |  670 ---------
 nix/inshellah-completer.nu        |  813 +++++++++++
 nix/module.nix                    |   91 +-
 src/lib.rs                        |    4 +
 src/main.rs                       | 2241 +++++++++++++++++++++++++++++
 src/parsers/help.rs               |  187 +++
 src/parsers/help/description.rs   |   37 +
 src/parsers/help/helpers.rs       |  105 ++
 src/parsers/help/options.rs       |  192 +++
 src/parsers/help/positionals.rs   |  400 +++++
 src/parsers/help/subcommands.rs   |   83 ++
 src/parsers/manpage.rs            |  335 +++++
 src/parsers/manpage/commands.rs   |  157 ++
 src/parsers/manpage/groff.rs      |  385 +++++
 src/parsers/manpage/mdoc.rs       |  237 +++
 src/parsers/manpage/sections.rs   |  851 +++++++++++
 src/parsers/manpage/strategies.rs |  456 ++++++
 src/parsers/mod.rs                |    3 +
 src/parsers/nushell.rs            |  475 ++++++
 src/pool.rs                       |  233 +++
 src/store.rs                      |  657 +++++++++
 src/types.rs                      |   34 +
 test/dune                         |    3 -
 test/test_inshellah.ml            |  610 --------
 tests/git_clone_fix.rs            |   78 +
 tests/manpage_cli.rs              |  150 ++
 tests/nushell-completer.nu        |  128 ++
 tests/ports.rs                    |  915 ++++++++++++
 tests/runtime_complete.rs         |  500 +++++++
 tests/self_completions.rs         |   31 +
 49 files changed, 10554 insertions(+), 5482 deletions(-)
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 delete mode 100644 bin/.ocamlformat
 delete mode 100644 bin/dune
 delete mode 100644 bin/main.ml
 delete mode 100644 dune-project
 delete mode 100644 inshellah.opam
 delete mode 100644 lib/.ocamlformat
 delete mode 100644 lib/dune
 delete mode 100644 lib/manpage.ml
 delete mode 100644 lib/nushell.ml
 delete mode 100644 lib/parser.ml
 delete mode 100644 lib/store.ml
 create mode 100644 nix/inshellah-completer.nu
 create mode 100644 src/lib.rs
 create mode 100644 src/main.rs
 create mode 100644 src/parsers/help.rs
 create mode 100644 src/parsers/help/description.rs
 create mode 100644 src/parsers/help/helpers.rs
 create mode 100644 src/parsers/help/options.rs
 create mode 100644 src/parsers/help/positionals.rs
 create mode 100644 src/parsers/help/subcommands.rs
 create mode 100644 src/parsers/manpage.rs
 create mode 100644 src/parsers/manpage/commands.rs
 create mode 100644 src/parsers/manpage/groff.rs
 create mode 100644 src/parsers/manpage/mdoc.rs
 create mode 100644 src/parsers/manpage/sections.rs
 create mode 100644 src/parsers/manpage/strategies.rs
 create mode 100644 src/parsers/mod.rs
 create mode 100644 src/parsers/nushell.rs
 create mode 100644 src/pool.rs
 create mode 100644 src/store.rs
 create mode 100644 src/types.rs
 delete mode 100644 test/dune
 delete mode 100644 test/test_inshellah.ml
 create mode 100644 tests/git_clone_fix.rs
 create mode 100644 tests/manpage_cli.rs
 create mode 100644 tests/nushell-completer.nu
 create mode 100644 tests/ports.rs
 create mode 100644 tests/runtime_complete.rs
 create mode 100644 tests/self_completions.rs

diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..8e59a90
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,285 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "bitflags"
+version = "2.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "fast-strip-ansi"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3086ffd0a7160f58f988c74173a002e255da505a114e2f5425acb1eaab2b8ac"
+dependencies = [
+ "vt-push-parser",
+]
+
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "indexmap"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "inshellah"
+version = "0.1.1"
+dependencies = [
+ "fast-strip-ansi",
+ "flate2",
+ "libc",
+ "nom",
+ "parking_lot",
+ "serde_json",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "nom"
+version = "8.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "vt-push-parser"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdbf39d53c5a50cad8119d9cde929ecd208764e8d8d1626486b8929cbcd5f0e7"
+dependencies = [
+ "hex",
+ "smallvec",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..1319992
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "inshellah"
+version = "0.1.1"
+edition = "2024"
+
+[dependencies]
+fast-strip-ansi = "0.13.1"
+flate2 = "1.1.9"
+libc = "0.2.186"
+nom = "8.0.0"
+parking_lot = "0.12.5"
+serde_json = { version = "1.0.149", features = ["preserve_order"] }
diff --git a/README.md b/README.md
index 3d3e66d..1f3e779 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ completer.
 
 see `doc/` for details:
 
-- [building and installing](doc/building.md) — compilation, arch/debian/fedora, opam, nix
-- [nushell integration](doc/nushell-integration.md) — setup, usage, examples
-- [nixos module](doc/nixos.md) — automatic build-time indexing
+- [building and installing](doc/building.md) — cargo, nix, post-install setup
+- [nushell integration](doc/nushell-integration.md) — setup, the pipeline, the completer
+- [nixos module](doc/nixos.md) — automatic build-time indexing + module options
 - [runtime completions](doc/runtime-completions.md) — on-the-fly caching via the completer
diff --git a/bin/.ocamlformat b/bin/.ocamlformat
deleted file mode 100644
index e69de29..0000000
diff --git a/bin/dune b/bin/dune
deleted file mode 100644
index 4bb8309..0000000
--- a/bin/dune
+++ /dev/null
@@ -1,4 +0,0 @@
-(executable
- (public_name inshellah)
- (name main)
- (libraries inshellah))
diff --git a/bin/main.ml b/bin/main.ml
deleted file mode 100644
index b72a456..0000000
--- a/bin/main.ml
+++ /dev/null
@@ -1,1403 +0,0 @@
-(* main.ml — cli entry point for inshellah, a nushell completions engine.
- *
- * inshellah generates nushell "extern" definitions for external commands by
- * parsing their manpages and --help output. it has two main modes:
- *
- *   1. indexing (batch): scan a prefix directory's bin/ and share/man/,
- *      extract completions for every binary, and write them to a cache dir.
- *      this is typically run once per nix profile or system update.
- *
- *   2. completing (interactive): given a command and its current arguments,
- *      look up the cached data and return JSON completion candidates for
- *      nushell's custom completer protocol.
- *
- * the indexing pipeline for each binary:
- *   a. classify the binary (skip? try --help? try native completions?)
- *   b. if the tool has native nushell completion support, run --help and
- *      discover subcommands containing "complet", then try them with "nushell"
- *   c. otherwise, run the tool with --help/-h and parse the output
- *   d. recursively resolve subcommands (depth-limited to 5)
- *   e. after binaries, parse manpages for any commands not yet covered
- *
- * parallelism: indexing forks per binary, and subcommand resolution forks
- * per subcommand. results are marshaled back via pipes. this gives good
- * throughput on multi-core systems while keeping the code simple (no threads,
- * no async runtime — just unix fork/pipe/waitpid).
- *)
-
-open Inshellah.Parser
-open Inshellah.Manpage
-open Inshellah.Nushell
-open Inshellah.Store
-
-module SSet = Set.Make(String)
-
-(* print usage and exit. called when no valid subcommand is given. *)
-let usage () =
-  Printf.eprintf
-    {|inshellah - nushell completions engine
-
-Usage:
-  inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
-      Index completions into a directory of JSON/nu files.
-      PREFIX is a directory containing bin/ and share/man/.
-      Default dir: $XDG_CACHE_HOME/inshellah
-      --ignore FILE     skip listed commands entirely
-      --help-only FILE  skip manpages for listed commands, use --help instead
-  inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]]
-      Nushell custom completer. Outputs JSON completion candidates.
-      Falls back to --help resolution if command is not indexed.
-      --dir takes colon-separated paths. The first path is the writable
-      user cache; additional paths are read-only system directories.
-      Manpages are found via sibling share/man of system dir paths.
-  inshellah query CMD [--dir PATH[:PATH...]]
-      Print stored completion data for CMD.
-  inshellah dump [--dir PATH[:PATH...]]
-      List indexed commands.
-  inshellah manpage FILE            Parse a manpage and emit nushell extern
-  inshellah manpage-dir DIR         Batch-process manpages under DIR
-  inshellah completions             Generate nushell completions for inshellah
-
-|};
-  exit 1
-
-(* manpage sections that contain command documentation.
- * section 1 = user commands, section 8 = system administration commands. *)
-let command_sections = [1; 8]
-
-(* simple substring search using Str *)
-let contains_str haystack needle =
-  try ignore (Str.search_forward (Str.regexp_string needle) haystack 0); true
-  with Not_found -> false
-
-(* heuristic to detect whether text is valid nushell source code.
- * checks for common nushell declaration keywords. the length > 20
- * check avoids false positives on short error messages. *)
-let is_nushell_source text =
-  String.length text > 20
-  && (contains_str text "export extern"
-      || contains_str text "export def"
-      || (contains_str text "module " && contains_str text "export"))
-
-(* extract command name from a manpage filename.
- * "ls.1.gz" -> strip .gz -> "ls.1" -> chop extension -> "ls" *)
-let cmd_name_of_manpage path =
-  let base = Filename.basename path in
-  let base =
-    if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz"
-    else base in
-  try Filename.chop_extension base with Invalid_argument _ -> base
-
-(* sanitized environment for child processes.
- * strips display-related variables (DISPLAY, WAYLAND_DISPLAY, etc.) to prevent
- * gui tools from trying to open windows when we run them with --help.
- * without this, some tools would pop up dialogs or hang waiting for a
- * display connection. *)
-let safe_env = lazy (
-  Array.of_list (
-    List.filter (fun var ->
-      not (String.starts_with ~prefix:"DISPLAY=" var
-           || String.starts_with ~prefix:"WAYLAND_DISPLAY=" var
-           || String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" var
-           || String.starts_with ~prefix:"XAUTHORITY=" var))
-      (Array.to_list (Unix.environment ()))))
-
-(* non-blocking drain of a pipe fd into a buffer. safe to call repeatedly;
- * reads whatever is available without blocking. used by all fork-pipe sites
- * to keep pipes drained so children never block on write. *)
-let drain_fd rd buf =
-  let chunk = Bytes.create 8192 in
-  let continue = ref true in
-  while !continue do
-    match Unix.select [rd] [] [] 0.0 with
-    | (_ :: _, _, _) ->
-      (try
-         let bytes_read = Unix.read rd chunk 0 8192 in
-         if bytes_read = 0 then continue := false
-         else Buffer.add_subbytes buf chunk 0 bytes_read
-       with Unix.Unix_error _ -> continue := false)
-    | _ -> continue := false
-  done
-
-(* run a command with a timeout, capturing its stdout+stderr.
- * forks a child process, redirects stdin from /dev/null, and merges
- * stdout+stderr onto a pipe. reads from the pipe with select() polling
- * until either the child exits or the deadline is reached.
- *
- * the child is run in /tmp to prevent tools that create side-effect files
- * from polluting the user's working directory. we chdir to /tmp before
- * fork and restore after.
- *
- * the select timeout is capped at 0.05s per iteration to ensure we check
- * the deadline frequently even when no data is available.
- *
- * returns none if the process couldn't be started, produced no output,
- * or was killed due to timeout. *)
-let run_cmd args timeout_ms =
-  let (rd, wr) = Unix.pipe () in
-  let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in
-  let argv = Array.of_list args in
-  (* run subprocesses in /tmp so commands that write side-effect files
-   * don't pollute the working directory *)
-  let saved_cwd = Sys.getcwd () in
-  Sys.chdir "/tmp";
-  let pid =
-    try Unix.create_process_env (List.hd args) argv
-          (Lazy.force safe_env) devnull wr wr
-    with Unix.Unix_error _ ->
-      Unix.close rd; Unix.close wr; Unix.close devnull; -1 in
-  Sys.chdir saved_cwd;
-  Unix.close wr; Unix.close devnull;
-  if pid < 0 then (Unix.close rd; None)
-  else begin
-    let buf = Buffer.create 4096 in
-    let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in
-    let chunk = Bytes.create 8192 in
-    let alive = ref true in
-    (try while !alive do
-       let remaining = deadline -. Unix.gettimeofday () in
-       if remaining <= 0.0 then alive := false
-       else match Unix.select [rd] [] [] (min remaining 0.05) with
-         | (_ :: _, _, _) ->
-           let bytes_read = Unix.read rd chunk 0 8192 in
-           if bytes_read = 0 then raise Exit
-           else Buffer.add_subbytes buf chunk 0 bytes_read
-         | _ -> ()
-     done with Exit -> ());
-    Unix.close rd;
-    if not !alive then begin
-      (try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ());
-      ignore (Unix.waitpid [] pid)
-    end else
-      ignore (Unix.waitpid [] pid);
-    if Buffer.length buf > 0 then Some (Buffer.contents buf) else None
-  end
-
-(* check if a path is a regular file with at least one execute bit set *)
-let is_executable path =
-  try let st = Unix.stat path in
-    st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0
-  with Unix.Unix_error _ -> false
-
-(* check if a file is a script by looking for a #! shebang.
- * follows symlinks via realpath before reading. *)
-let is_script path =
-  try
-    let real = Unix.realpath path in
-    let ic = open_in_bin real in
-    let has_shebang =
-      try let b = Bytes.create 2 in
-        really_input ic b 0 2;
-        Bytes.get b 0 = '#' && Bytes.get b 1 = '!'
-      with End_of_file -> false in
-    close_in ic;
-    has_shebang
-  with _ -> false
-
-(* scan an elf binary for string needles without loading the entire file.
- * reads the file in 64kb chunks, searching each chunk for the needle strings.
- * uses a sliding window (carry) of max_needle bytes between chunks to handle
- * needles that span chunk boundaries.
- *
- * on read failure (e.g. if the path resolves to something unreadable), all
- * needles are marked as found. this is a conservative fallback — we'd rather
- * try --help on an unreadable binary than skip it.
- *
- * the inner loop is a manual byte-by-byte comparison rather than using
- * String.contains or Str for performance — this runs on every binary
- * in the prefix, so it needs to be fast. *)
-let elf_scan path needles =
-  let found = Hashtbl.create 4 in
-  let remaining () = List.filter (fun needle -> not (Hashtbl.mem found needle)) needles in
-  (try
-    let real = Unix.realpath path in
-    let ic = open_in_bin real in
-    let magic = Bytes.create 4 in
-    really_input ic magic 0 4;
-    if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E'
-       && Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin
-      let max_needle = List.fold_left (fun m needle -> max m (String.length needle)) 0 needles in
-      let chunk_size = 65536 in
-      let buf = Bytes.create (chunk_size + max_needle) in
-      let carry = ref 0 in
-      let eof = ref false in
-      while not !eof && remaining () <> [] do
-        let bytes_read = (try input ic buf !carry chunk_size with End_of_file -> 0) in
-        if bytes_read = 0 then eof := true
-        else begin
-          let total = !carry + bytes_read in
-          List.iter (fun needle ->
-            if not (Hashtbl.mem found needle) then begin
-              let nlen = String.length needle in
-              let pos = ref 0 in
-              while !pos <= total - nlen do
-                if Bytes.get buf !pos = needle.[0] then begin
-                  let matched = ref true in
-                  for j = 1 to nlen - 1 do
-                    if Bytes.get buf (!pos + j) <> needle.[j] then matched := false
-                  done;
-                  if !matched then (Hashtbl.replace found needle true; pos := total)
-                  else incr pos
-                end else incr pos
-              done
-            end
-          ) (remaining ());
-          let new_carry = min max_needle total in
-          Bytes.blit buf (total - new_carry) buf 0 new_carry;
-          carry := new_carry
-        end
-      done
-    end;
-    close_in ic
-  with _ ->
-    List.iter (fun needle -> Hashtbl.replace found needle true) needles);
-  found
-
-(* detect nix-generated c wrapper scripts and extract the real binary path.
- * nix's makeCWrapper creates small c programs that set up the environment
- * and exec the real binary. these wrappers won't contain "-h" or "complet"
- * in their own binary (they're just wrappers), so elf_scan would say "skip".
- * this function reads the wrapper source to find the actual /nix/store/.../bin/...
- * target path, so we can try --help on the real binary instead.
- *
- * caps the read at 64kb to avoid accidentally reading a large non-wrapper
- * binary into memory. *)
-let nix_wrapper_target path =
-  try
-    let real = Unix.realpath path in
-    let ic = open_in_bin real in
-    let size = in_channel_length ic in
-    if size > 65536 then (close_in ic; None)
-    else begin
-      let contents = Bytes.create size in
-      really_input ic contents 0 size; close_in ic;
-      let contents = Bytes.to_string contents in
-      if not (contains_str contents "makeCWrapper") then None
-      else
-        let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in
-        try ignore (Str.search_forward re contents 0);
-          let target = Str.matched_string contents in
-          if Sys.file_exists target then Some target else None
-        with Not_found -> None
-    end
-  with _ -> None
-
-(* detect nix bash/sh wrapper scripts that exec a real binary.
- * nix sometimes generates small shell scripts (e.g. to set env vars like
- * XDG_CONFIG_HOME) that exec the real binary. these look like:
- *   #!/nix/store/.../bash -e
- *   export FOO=...
- *   exec -a "$0" "/nix/store/.../bin/.foo-wrapped" "$@"
- * we extract the exec target path and resolve through it. *)
-let nix_script_wrapper_target path =
-  try
-    let real = Unix.realpath path in
-    let ic = open_in real in
-    let size = in_channel_length ic in
-    if size > 4096 then (close_in ic; None)
-    else begin
-      let contents = Bytes.create size in
-      really_input ic contents 0 size; close_in ic;
-      let contents = Bytes.to_string contents in
-      if not (contains_str contents "exec") then None
-      else
-        let re = Str.regexp "exec[ \t]+\\(-a[ \t]+\"\\$0\"[ \t]+\\)?\"?\\(/nix/store/[a-z0-9]+-[^\" \t\n]+/bin/[a-zA-Z0-9._-]+\\)\"?" in
-        try ignore (Str.search_forward re contents 0);
-          let target = Str.matched_group 2 contents in
-          let target = Unix.realpath target in
-          if Sys.file_exists target then Some target else None
-        with Not_found -> None
-    end
-  with _ -> None
-
-(* heuristic filter for binary names that should never be indexed.
- * skips: empty names, "-", dotfiles, libraries (lib-prefix), daemon wrappers
- * (suffixes -daemon, -wrapped), shared objects (.so suffix), and names with no
- * alphanumeric characters (e.g. punctuation-only names). *)
-let skip_name name =
-  String.length name = 0 || name = "-" || name.[0] = '.'
-  || String.starts_with ~prefix:"lib" name
-  || String.ends_with ~suffix:"-daemon" name
-  || String.ends_with ~suffix:"-wrapped" name
-  || String.ends_with ~suffix:".so" name
-  || not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name)
-
-(* classification result for a binary.
- *   Skip               — don't index this binary at all
- *   Try_help           — only try --help (scripts, binaries without "completion" string)
- *   Try_native_and_help — try native nushell completion first, fall back to --help *)
-type bin_class = Skip | Try_help | Try_native_and_help
-
-(* classify an elf binary path for indexing. *)
-let classify_elf path =
-  let scan = elf_scan path ["-h"; "complet"] in
-  if Hashtbl.mem scan "complet" then Try_native_and_help
-  else if Hashtbl.mem scan "-h" then Try_help
-  else Skip
-
-(* classify a binary to decide the indexing strategy.
- * decision tree:
- *   1. nushell builtin or bad name -> Skip
- *   2. not executable -> Skip
- *   3. script (has shebang) -> resolve through nix script wrapper if possible,
- *      otherwise Try_help
- *   4. elf binary containing "complet" -> Try_native_and_help
- *   5. elf binary containing "-h" -> Try_help
- *   6. nix c wrapper -> Try_help (the wrapper itself is just an exec shim)
- *   7. otherwise -> Skip (binary has no help infrastructure) *)
-let classify_binary bindir name =
-  if is_nushell_builtin name || skip_name name then Skip
-  else
-    let path = Filename.concat bindir name in
-    if not (is_executable path) then Skip
-    else if is_script path then
-      match nix_script_wrapper_target path with
-      | Some target ->
-        let cls = classify_elf target in
-        if cls <> Skip then cls else Try_help
-      | None -> Try_help
-    else
-      let cls = classify_elf path in
-      if cls <> Skip then cls
-      else if nix_wrapper_target path <> None then Try_help
-      else Skip
-
-(* detect available cpu cores by counting "processor" lines in /proc/cpuinfo.
- * falls back to 4 if /proc/cpuinfo can't be read (e.g. on non-linux). *)
-let num_cores () =
-  try
-    let ic = open_in "/proc/cpuinfo" in
-    let count = ref 0 in
-    (try while true do
-       if String.starts_with ~prefix:"processor" (input_line ic) then incr count
-     done with End_of_file -> ());
-    close_in ic; max 1 !count
-  with _ -> 4
-
-(* extract words from text that contain any of the given substrings.
- * words are sequences of [a-zA-Z0-9_-] optionally prefixed with --.
- * returns a deduplicated list. *)
-let extract_matching_words text needles =
-  let len = String.length text in
-  let module SSet = Set.Make(String) in
-  let words = ref SSet.empty in
-  let i = ref 0 in
-  while !i < len do
-    while !i < len && not (text.[!i] >= 'a' && text.[!i] <= 'z'
-                         || text.[!i] >= 'A' && text.[!i] <= 'Z'
-                         || text.[!i] = '-') do
-      incr i
-    done;
-    let start = !i in
-    while !i < len && (text.[!i] >= 'a' && text.[!i] <= 'z'
-                     || text.[!i] >= 'A' && text.[!i] <= 'Z'
-                     || text.[!i] >= '0' && text.[!i] <= '9'
-                     || text.[!i] = '-' || text.[!i] = '_') do
-      incr i
-    done;
-    if !i > start then begin
-      let word = String.sub text start (!i - start) in
-      let lower = String.lowercase_ascii word in
-      if List.exists (fun needle ->
-        try ignore (Str.search_forward (Str.regexp_string needle) lower 0); true
-        with Not_found -> false
-      ) needles then
-        words := SSet.add word !words
-    end
-  done;
-  SSet.elements !words
-
-(* try to get native nushell completions from a binary.
- * runs --help, scans the output for words containing completion-related
- * substrings ("complet"), then tries each match as a subcommand or flag
- * with "nushell" as the argument.
- *
- * this catches arbitrary patterns (completions, generate-completions,
- * shell-completion, gen-completions, etc.) without maintaining a hardcoded
- * list. the worst case is a few failed attempts before falling back to
- * manpage/--help parsing. *)
-let try_native_completion bin_path =
-  let help_text = match run_cmd [bin_path; "--help"] 500 with
-    | Some t -> t | None -> "" in
-  if help_text = "" then None
-  else
-    let candidates = extract_matching_words help_text ["complet"] in
-    List.find_map (fun word ->
-      let attempts =
-        if String.starts_with ~prefix:"--" word then
-          [[bin_path; word; "nushell"]]
-        else
-          [[bin_path; word; "nushell"];
-           [bin_path; "--" ^ word; "nushell"]]
-      in
-      List.find_map (fun args ->
-        match run_cmd args 500 with
-        | Some text when is_nushell_source text -> Some text
-        | _ -> None
-      ) attempts
-    ) candidates
-
-(* parse a manpage file, extracting the command name, its flags/subcommands,
- * and any clap-style per-subcommand sections.
- * returns none for nushell builtins or failed parses. *)
-let parse_manpage_for_command file =
-  let contents = read_manpage_file file in
-  let fallback = cmd_name_of_manpage file in
-  (* the filename encodes the command boundary: "git-stash" = 2 words.
-   * use this to clamp the synopsis-extracted name, which can be too greedy
-   * when the synopsis lists subcommand variants. *)
-  let max_words = List.length (String.split_on_char '-' fallback) in
-  let clamp_cmd name =
-    let words = String.split_on_char ' ' name in
-    if List.length words > max_words then
-      String.concat " " (List.filteri (fun i _ -> i < max_words) words)
-    else name in
-  let cmd = match extract_synopsis_command contents with
-    | Some name -> clamp_cmd name | None -> fallback in
-  if is_nushell_builtin cmd then None
-  else
-    let result = parse_manpage_string contents in
-    let sub_sections = extract_subcommand_sections contents in
-    let result = if sub_sections <> [] then
-      { result with subcommands = List.map (fun (name, desc, _) ->
-        { name; desc }) sub_sections }
-    else result in
-    let subs = List.map (fun (name, _desc, r) ->
-      (cmd ^ " " ^ name, r)) sub_sections in
-    Some (cmd, result, subs)
-
-(* "inshellah manpage FILE" — parse one manpage and print the nushell extern *)
-let cmd_manpage file =
-  match parse_manpage_for_command file with
-  | Some (cmd, result, _) when result.entries <> [] ->
-    print_string (generate_extern cmd result)
-  | _ -> ()
-
-(* "inshellah manpage-dir DIR" — batch-process all manpages under a directory *)
-let cmd_manpage_dir dir =
-  List.iter (fun section ->
-    let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in
-    if is_dir subdir then
-      Array.iter (fun file ->
-        (try cmd_manpage (Filename.concat subdir file) with _ -> ())
-      ) (Sys.readdir subdir)
-  ) command_sections
-
-(* detect rendered manpage output — when --help delegates to man(1), the
- * output starts with a header line like "GIT-STASH(1)  ...  GIT-STASH(1)".
- * we check if the first non-blank line matches that pattern. *)
-let is_rendered_manpage text =
-  let lines = String.split_on_char '\n' text in
-  let first_line = List.find_opt (fun l -> String.trim l <> "") lines in
-  match first_line with
-  | None -> false
-  | Some line ->
-    let trimmed = String.trim line in
-    (* look for WORD(DIGIT) at the start of the line *)
-    try
-      let paren = String.index trimmed '(' in
-      paren > 0
-      && paren + 2 < String.length trimmed
-      && trimmed.[paren + 1] >= '0' && trimmed.[paren + 1] <= '9'
-      && trimmed.[paren + 2] = ')'
-    with Not_found -> false
-
-(* find the raw manpage file for a hyphenated command name like "git-stash".
- * first checks the provided man directories directly, then falls back to
- * man -w for on-the-fly resolution when no man dirs are known. *)
-let find_manpage_path mandirs hyphenated_name =
-  let try_dirs () =
-    List.find_map (fun mandir ->
-      List.find_map (fun section ->
-        let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
-        List.find_map (fun ext ->
-          let path = Filename.concat subdir
-            (Printf.sprintf "%s.%d%s" hyphenated_name section ext) in
-          if Sys.file_exists path then Some path else None
-        ) [""; ".gz"]
-      ) command_sections
-    ) mandirs in
-  match try_dirs () with
-  | Some _ as found -> found
-  | None ->
-    (* fallback to man -w when no man dirs provided or file not found *)
-    match run_cmd ["man"; "-w"; hyphenated_name] 200 with
-    | Some raw ->
-      let path = String.trim raw in
-      if Sys.file_exists path then Some path else None
-    | None -> None
-
-(* when --help output is a rendered manpage, find and parse the raw manpage
- * source instead. returns the main result plus any sub-section results
- * (e.g. "git stash push" flags parsed from the git-stash manpage). *)
-let try_manpage_fallback mandirs cmd_name =
-  match find_manpage_path mandirs cmd_name with
-  | None -> None
-  | Some path ->
-    match parse_manpage_for_command path with
-    | None -> None
-    | Some (_, result, subs) when result.entries = [] && subs = [] -> None
-    | Some (_, result, subs) -> Some (result, subs)
-
-(* safety limit: don't accumulate more than 500 subcommand resolution results
- * per binary. prevents runaway recursion on tools with enormous subcommand trees. *)
-let max_resolve_results = 500
-
-(* safe wrapper around parse_manpage_for_command that catches all exceptions *)
-let process_manpage file =
-  try
-    match parse_manpage_for_command file with
-    | Some (cmd, result, subs) when result.entries <> [] || subs <> [] ->
-      Some (cmd, result, subs)
-    | _ -> None
-  with _ -> None
-
-(* collect the set of command names that have manpages in a given man directory.
- * used during indexing to skip --help for commands that will be handled by
- * the manpage parsing phase instead (manpages are more reliable than --help). *)
-let manpaged_commands mandir =
-  List.fold_left (fun acc section ->
-    let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
-    if is_dir subdir then
-      Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc)
-        acc (Sys.readdir subdir)
-    else acc
-  ) SSet.empty command_sections
-
-(* parallel structured help resolver — recursively resolves a command and
- * all its subcommands by running --help on each, forking a child process
- * per subcommand for parallelism.
- *
- * the resolver works as a breadth-first queue:
- *   1. start with the root command in the queue
- *   2. fork a child for each queued item (up to num_cores concurrent)
- *   3. the child runs --help, parses the output, marshals the result via pipe
- *   4. the parent collects results and enqueues discovered subcommands
- *   5. repeat until queue is empty and all children have finished
- *
- * depth is limited to 5 levels and total results to max_resolve_results
- * to prevent runaway recursion on pathological command trees.
- *
- * the child process detects "self-listing" — when a subcommand's --help
- * lists itself as a subcommand (e.g. "git help" listing "help" as a
- * subcommand of itself). this would cause infinite recursion, so such
- * results are discarded.
- *
- * children close all pipe fds from other pending children immediately
- * after fork to prevent fd leaks. the parent drains pipes regularly to
- * prevent children from blocking on full pipe buffers. *)
-let help_resolve_par ?(timeout=200) ?(mandirs=[]) cmd rest name =
-  let max_jobs = num_cores () in
-  let queue = Queue.create () in
-  Queue.push (rest, name, 0) queue;
-  let results = ref [] in
-  (* pending: (pid, rd, buf, cmd_args, cmd_name, depth) *)
-  let pending = ref [] in
-  let collect rd buf cmd_args cmd_name depth =
-    drain_fd rd buf;
-    (try Unix.close rd with _ -> ());
-    let data = Buffer.contents buf in
-    let result : (help_result * subcommand list * (string * help_result) list) option =
-      if String.length data > 0 then
-        try Marshal.from_string data 0 with _ -> None
-      else None in
-    match result with
-    | None -> ()
-    | Some (r, subs, extras) ->
-      let at_limit = depth >= 5 || List.length !results >= max_resolve_results in
-      results := (cmd_name, r) :: !results;
-      (* extras are fully-parsed sub-results from manpage sub-sections —
-       * add them directly without enqueueing for further resolution *)
-      List.iter (fun (sub_name, sub_r) ->
-        if not (List.exists (fun (existing, _) -> existing = sub_name) !results) then
-          results := (sub_name, sub_r) :: !results
-      ) extras;
-      if not at_limit then
-        (* only enqueue subcommands that weren't already covered by extras *)
-        let extra_names = List.map fst extras in
-        List.iter (fun (sc : subcommand) ->
-          let full = cmd_name ^ " " ^ sc.name in
-          if not (List.exists (fun existing -> existing = full) extra_names) then
-            Queue.push (cmd_args @ [sc.name], full, depth + 1) queue
-        ) subs in
-  let reap () =
-    pending := List.filter (fun (pid, rd, buf, cmd_args, cmd_name, depth) ->
-      drain_fd rd buf;
-      match Unix.waitpid [Unix.WNOHANG] pid with
-      | (0, _) -> true
-      | _ -> collect rd buf cmd_args cmd_name depth; false
-      | exception Unix.Unix_error (Unix.ECHILD, _, _) ->
-        (try Unix.close rd with _ -> ()); false
-    ) !pending in
-  let wait_for_slot () =
-    while List.length !pending >= max_jobs do
-      reap ();
-      if List.length !pending >= max_jobs then begin
-        let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
-        ignore (Unix.select fds [] [] 0.05)
-      end
-    done in
-  while not (Queue.is_empty queue) || !pending <> [] do
-    while not (Queue.is_empty queue) do
-      let (cmd_args, cmd_name, depth) = Queue.pop queue in
-      wait_for_slot ();
-      let (rd, wr) = Unix.pipe () in
-      let pid = Unix.fork () in
-      if pid = 0 then begin
-        Unix.close rd;
-        List.iter (fun (_, prd, _, _, _, _) ->
-          try Unix.close prd with _ -> ()) !pending;
-        let result =
-          let text = match run_cmd (cmd :: cmd_args @ ["--help"]) timeout with
-            | Some _ as r -> r
-            | None -> run_cmd (cmd :: cmd_args @ ["-h"]) timeout in
-          match text with
-          | None -> None
-          | Some text ->
-            (* check for rendered manpage first — when --help delegates to
-             * man(1), the raw groff source has richer structure than the
-             * rendered text. parse_help would partially succeed on rendered
-             * manpage output (extracting flags from OPTIONS) but miss
-             * subcommands from the COMMANDS section. *)
-            if is_rendered_manpage text then
-              let base = Filename.basename cmd in
-              let hyphenated = String.concat "-" (base :: cmd_args) in
-              match try_manpage_fallback mandirs hyphenated with
-              | Some (r, subs) ->
-                let at_limit = depth >= 5 in
-                let extra = List.map (fun (sub_name, sub_r) ->
-                  (cmd_name ^ " " ^ sub_name, sub_r)) subs in
-                let enqueue_subs = if at_limit then [] else r.subcommands in
-                Some (r, enqueue_subs, extra)
-              | None ->
-                (* manpage file not found — fall back to parsing rendered text *)
-                (match parse_help text with
-                 | Error _ -> None
-                 | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
-                 | Ok r ->
-                   let self_listed = match cmd_args with
-                     | [] -> false
-                     | _ ->
-                       let leaf = List.nth cmd_args (List.length cmd_args - 1) in
-                       List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in
-                   if self_listed then
-                     Some ({ entries = []; subcommands = []; positionals = [];
-                             description = "" }, [], [])
-                   else
-                     let at_limit = depth >= 5 in
-                     let subs = if at_limit then [] else r.subcommands in
-                     Some (r, subs, []))
-            else
-              match parse_help text with
-              | Error _ -> None
-              | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
-              | Ok r ->
-                let self_listed = match cmd_args with
-                  | [] -> false
-                  | _ ->
-                    let leaf = List.nth cmd_args (List.length cmd_args - 1) in
-                    List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in
-                if self_listed then
-                  (* the subcommand's --help returned the parent's help text
-                   * (it lists itself as a subcommand). cache a leaf stub so the
-                   * completer knows this is a leaf node, not a parent with
-                   * further subcommands. *)
-                  Some ({ entries = []; subcommands = []; positionals = [];
-                          description = "" }, [], [])
-                else
-                  let at_limit = depth >= 5 in
-                  let subs = if at_limit then [] else r.subcommands in
-                  Some (r, subs, []) in
-        let oc = Unix.out_channel_of_descr wr in
-        Marshal.to_channel oc (result : (help_result * subcommand list * (string * help_result) list) option) [];
-        close_out oc;
-        exit 0
-      end else begin
-        Unix.close wr;
-        pending := (pid, rd, Buffer.create 4096, cmd_args, cmd_name, depth) :: !pending
-      end
-    done;
-    if !pending <> [] then begin
-      reap ();
-      if !pending <> [] && Queue.is_empty queue then begin
-        let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
-        ignore (Unix.select fds [] [] 0.05)
-      end
-    end
-  done;
-  List.rev !results
-
-(* "inshellah index" — the main indexing command.
- * processes all binaries and manpages in the given prefix directories,
- * writing completion data to the cache dir.
- *
- * the pipeline has two phases:
- *
- * phase 1 (binaries): fork one child per binary. each child:
- *   - tries native nushell completions (if classified as Try_native_and_help)
- *   - falls back to help_resolve_par (which itself forks per subcommand)
- *   - marshals the result back via pipe as a tagged variant:
- *     `Native of string — raw nushell source
- *     `Parsed of (string * help_result) list — parsed flag data
- *     `None — nothing useful extracted
- *
- * phase 2 (manpages): sequentially parse manpages for commands not yet
- *   covered by phase 1. manpages are more reliable than --help for many
- *   gnu tools, but slower to process.
- *
- * commands on the ignorelist are skipped entirely. commands on the
- * help_only list skip manpage parsing and only use --help. commands
- * with manpages skip --help in phase 1 (they'll be handled in phase 2).
- *
- * the done_cmds set tracks which commands have already been indexed to
- * prevent duplicates across phases and across multiple prefix directories. *)
-
-(* known privilege-escalation wrappers — defined here (before cmd_index and
- * cmd_complete) because both need the list: cmd_index writes @complete
- * external stubs, and cmd_complete strips the wrapper to find the real command. *)
-let elevation_commands =
-  ["sudo"; "run0"; "doas"; "pkexec"; "su"; "calife"; "sux"; "sudoedit";
-   "please"; "super"; "priv"]
-
-let cmd_index bindirs mandirs ignorelist help_only dir =
-  ensure_dir dir;
-  let done_cmds = ref SSet.empty in
-  let result_count = ref 0 in
-  let index_bindir bindir mandir =
-    if not (is_dir bindir) then
-      Printf.eprintf "skipping %s (not found)\n" bindir
-    else begin
-      let bins = Sys.readdir bindir in
-      Array.sort String.compare bins;
-      let manpaged = if is_dir mandir
-        then manpaged_commands mandir else SSet.empty in
-      let max_jobs = num_cores () in
-      let classified = Array.map (fun name ->
-        if SSet.mem name ignorelist then (name, Skip)
-        else if SSet.mem name help_only then (name, classify_binary bindir name)
-        else if SSet.mem name manpaged then (name, Skip)
-        else (name, classify_binary bindir name)
-      ) bins in
-      let pending = ref [] in
-      let process_result name rd buf =
-        drain_fd rd buf;
-        (try Unix.close rd with _ -> ());
-        let data = Buffer.contents buf in
-        if String.length data > 0 then begin
-          let result : [`Native of string | `Parsed of (string * help_result) list | `None] =
-            try Marshal.from_string data 0 with _ -> `None in
-          (match result with
-          | `Native src ->
-            write_native ~dir name src;
-            incr result_count
-          | `Parsed pairs ->
-            List.iter (fun (cmd_name, r) ->
-              if not (SSet.mem cmd_name !done_cmds) then begin
-                write_result ~dir ~source:"help" cmd_name r;
-                done_cmds := SSet.add cmd_name !done_cmds;
-                incr result_count
-              end
-            ) pairs
-          | `None -> ())
-        end;
-        done_cmds := SSet.add name !done_cmds in
-      let reap () =
-        pending := List.filter (fun (pid, rd, buf, name) ->
-          drain_fd rd buf;
-          match Unix.waitpid [Unix.WNOHANG] pid with
-          | (0, _) -> true
-          | _ ->
-            process_result name rd buf;
-            false
-          | exception Unix.Unix_error (Unix.ECHILD, _, _) ->
-            (try Unix.close rd with _ -> ()); false
-        ) !pending in
-      let wait_for_slot () =
-        while List.length !pending >= max_jobs do
-          reap ();
-          if List.length !pending >= max_jobs then begin
-            let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
-            ignore (Unix.select fds [] [] 0.05)
-          end
-        done in
-      Array.iter (fun (name, classification) ->
-        match classification with
-        | Skip -> ()
-        | Try_help | Try_native_and_help ->
-          wait_for_slot ();
-          let (rd, wr) = Unix.pipe () in
-          let pid = Unix.fork () in
-          if pid = 0 then begin
-            Unix.close rd;
-            List.iter (fun (_, prd, _, _) ->
-              try Unix.close prd with _ -> ()) !pending;
-            let result =
-              try
-                let path = Filename.concat bindir name in
-                let native = match classification with
-                  | Try_native_and_help ->
-                    (match try_native_completion path with
-                     | Some src -> Some src | None -> None)
-                  | _ -> None in
-                match native with
-                | Some src -> `Native src
-                | None ->
-                  let pairs = help_resolve_par ~timeout:200 ~mandirs path [] name in
-                  if pairs <> [] then `Parsed pairs else `None
-              with _ -> `None in
-            let oc = Unix.out_channel_of_descr wr in
-            Marshal.to_channel oc
-              (result : [`Native of string | `Parsed of (string * help_result) list | `None]) [];
-            close_out oc;
-            exit 0
-          end else begin
-            Unix.close wr;
-            pending := (pid, rd, Buffer.create 4096, name) :: !pending
-          end
-      ) classified;
-      while !pending <> [] do
-        reap ();
-        if !pending <> [] then begin
-          let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
-          ignore (Unix.select fds [] [] 0.05)
-        end
-      done;
-      (* phase 2: manpages *)
-      if is_dir mandir then
-        List.iter (fun section ->
-          let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
-          if is_dir subdir then begin
-            let files = Sys.readdir subdir in
-            (* sort by filename length first, then alphabetically.
-             * this ensures parent manpages (e.g. nix-env.1.gz) are
-             * processed before subpage manpages (nix-env-install.1.gz)
-             * so the parent's data isn't overwritten by a subpage
-             * whose synopsis also extracts the parent command name. *)
-            Array.sort (fun a b ->
-              let la = String.length a and lb = String.length b in
-              if la <> lb then compare la lb
-              else String.compare a b) files;
-            Array.iter (fun file ->
-              let base_cmd = cmd_name_of_manpage file in
-              if SSet.mem base_cmd help_only then ()
-              else match process_manpage (Filename.concat subdir file) with
-              | None -> ()
-              | Some (cmd, result, subs) ->
-                if not (SSet.mem cmd !done_cmds) then begin
-                  write_result ~dir ~source:"manpage" cmd result;
-                  done_cmds := SSet.add cmd !done_cmds;
-                  incr result_count
-                end else if cmd <> base_cmd then
-                  (* a subpage manpage (e.g. nix-env-install.1) extracted
-                   * a command name that was already indexed (e.g. "nix-env").
-                   * warn so the user can investigate. *)
-                  Printf.eprintf "warning: %s extracted cmd \"%s\" (already indexed), skipping\n"
-                    file cmd;
-                List.iter (fun (sub_cmd, sub_result) ->
-                  if not (SSet.mem sub_cmd !done_cmds) then begin
-                    write_result ~dir ~source:"manpage" sub_cmd sub_result;
-                    done_cmds := SSet.add sub_cmd !done_cmds;
-                    incr result_count
-                  end
-                ) subs;
-                (* for COMMANDS section subcommands (e.g. systemctl start/stop),
-                 * write leaf stubs so the completer treats them as leaf nodes
-                 * rather than falling back to the parent's flags/subcommands.
-                 * only when there are no clap-style sub-sections (subs = []),
-                 * meaning the subcommands came from the COMMANDS section.
-                 * deliberately not added to done_cmds — if a per-subcommand
-                 * manpage exists (e.g. docker-start.1), it will overwrite the stub. *)
-                if subs = [] then
-                  List.iter (fun (sc : subcommand) ->
-                    let sub_cmd = cmd ^ " " ^ sc.name in
-                    if not (SSet.mem sub_cmd !done_cmds) then
-                      write_result ~dir ~source:"manpage" sub_cmd
-                        { entries = []; subcommands = []; positionals = [];
-                          description = sc.desc }
-                  ) result.subcommands
-            ) files
-          end
-        ) command_sections
-    end in
-  List.iter2 index_bindir bindirs mandirs;
-  (* write @complete external stubs for elevation commands (sudo, doas, etc.)
-   * so nushell routes their completions through the external completer.
-   * without this, nushell hardcodes sudo/doas to show command-name completion
-   * and never calls the external completer for their own flags. *)
-  List.iter (fun cmd ->
-    let json_path = Filename.concat dir (filename_of_command cmd ^ ".json") in
-    if Sys.file_exists json_path then
-      write_native ~dir cmd
-        (Printf.sprintf "@complete external\nextern \"%s\" []\n" cmd)
-  ) elevation_commands;
-  Printf.printf "indexed %d commands into %s\n" !result_count dir
-
-(* "inshellah dump" — list all indexed commands with their source type *)
-let cmd_dump dirs =
-  let cmds = all_commands dirs in
-  Printf.printf "%d commands\n" (List.length cmds);
-  List.iter (fun cmd ->
-    let src = match file_type_of dirs cmd with
-      | Some label -> label | None -> "?" in
-    Printf.printf "  %-40s [%s]\n" cmd src
-  ) cmds
-
-(* search $PATH for an executable with the given name.
- * used during completion to find binaries for on-the-fly resolution. *)
-let find_in_path name =
-  try
-    Sys.getenv "PATH"
-    |> String.split_on_char ':'
-    |> List.find_map (fun dir ->
-         let p = Filename.concat dir name in
-         if is_executable p then Some p else None)
-  with Not_found -> None
-
-(* resolve a command's completions on-the-fly and cache the results.
- * called during "complete" when a command isn't in the index.
- * runs help_resolve_par and writes results to the user's cache dir. *)
-let resolve_and_cache ~dir ~mandirs name path =
-  let pairs = help_resolve_par ~timeout:200 ~mandirs path [] name in
-  if pairs <> [] then begin
-    ensure_dir dir;
-    List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs;
-    Some pairs
-  end else None
-
-(* format a single completion candidate as JSON for nushell's completer protocol *)
-let completion_json value desc =
-  Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}"
-    (escape_json value) (escape_json desc)
-
-(* fuzzy matching: returns a score > 0 if needle is a subsequence of haystack.
- * higher scores = better match. scoring tiers:
- *   - exact match: 1000
- *   - prefix match: 900 + length bonus (how much of the haystack is covered)
- *   - subsequence: base 10 per char + bonuses for:
- *     - word boundary alignment (50): matching at '-', '_', or camelCase transitions
- *     - consecutive matches (20): matching adjacent characters
- *
- * this drives the completion candidate ranking. users typing "ser" should see
- * "--server" ranked above "--preserve" even though both contain "ser" as a
- * subsequence. the word-boundary bonus achieves this. *)
-let fuzzy_score needle haystack =
-  let needle_len = String.length needle and haystack_len = String.length haystack in
-  if needle_len = 0 then 1
-  else if needle_len > haystack_len then 0
-  else if needle = haystack then 1000
-  else
-    let needle_lc = String.lowercase_ascii needle
-    and haystack_lc = String.lowercase_ascii haystack in
-    if String.starts_with ~prefix:needle_lc haystack_lc then
-      900 + (needle_len * 100 / haystack_len)
-    else
-      let is_boundary hay_idx =
-        hay_idx = 0 || haystack.[hay_idx - 1] = '-' || haystack.[hay_idx - 1] = '_'
-        || (haystack.[hay_idx - 1] >= 'a' && haystack.[hay_idx - 1] <= 'z'
-            && haystack.[hay_idx] >= 'A' && haystack.[hay_idx] <= 'Z') in
-      (* walk haystack matching needle chars as a subsequence *)
-      let needle_idx, score, _, _ =
-        String.fold_left (fun (needle_idx, score, hay_idx, prev_match) c ->
-          if needle_idx >= needle_len then (needle_idx, score, hay_idx + 1, prev_match)
-          else if c = needle_lc.[needle_idx] then
-            let bonus = (if is_boundary hay_idx then 50 else 10)
-                      + (if prev_match = hay_idx - 1 then 20 else 0) in
-            (needle_idx + 1, score + bonus, hay_idx + 1, hay_idx)
-          else (needle_idx, score, hay_idx + 1, prev_match)
-        ) (0, 0, 0, -1) haystack_lc in
-      if needle_idx = needle_len then score else 0
-
-(* scan past the elevation command's flags and arguments to find the real
- * command. is_command checks whether a token names a known command.
- * returns Some (real_cmd :: args) or None if no command was found. *)
-let find_real_command is_command args =
-  let rec scan = function
-    | [] -> None
-    | "--" :: rest -> Some rest
-    | arg :: rest when String.length arg > 0 && arg.[0] = '-' ->
-      scan rest
-    | arg :: _ as cmd_and_rest when is_command arg ->
-      Some cmd_and_rest
-    | _ :: rest -> scan rest
-  in
-  scan args
-
-(* "inshellah complete CMD [ARGS...]" — the nushell custom completer.
- * this is the hot path — called every time the user presses tab in nushell.
- *
- * the completion logic:
- *   1. try to find the command (or longest subcommand prefix) in the store
- *   2. if not found, try on-the-fly resolution (find in $PATH, run --help, cache)
- *   3. score all candidate completions against the partial input using fuzzy_score
- *   4. output scored candidates as a JSON array
- *
- * subcommand resolution: the lookup tries longest prefix first.
- * for "git add --", it first looks for "git add", then "git".
- * this ensures subcommand-specific flags are shown.
- *
- * nushell sends a trailing empty token when the cursor is after a space
- * ("git add "). in this case all_tokens includes the empty string.
- * when the last token is non-empty, the user is still typing it, so we use
- * it as the fuzzy filter. when empty, we show all candidates.
- *
- * if only a parent command matched (e.g. "git" matched but not "git add"),
- * we suppress subcommand suggestions and only show flags. this prevents
- * showing sibling subcommands when the user has already committed to a
- * specific subcommand path.
- *
- * file completions: nushell's external completer protocol is either/or —
- * you either return custom candidates or fall back to native file completions
- * (via null), but can't mix both. we return null (triggering nushell's native
- * file completer with colors, sorting, quoting) when:
- *   - the user is at a leaf command (no subcommands) and not mid-flag
- *   - or we have no candidates at all
- * this ensures file completions appear with full nushell UX. when the user
- * IS typing a flag (partial starts with "-"), we return our flag candidates. *)
-let cmd_complete spans user_dir system_dirs mandirs =
-  (* system dirs are searched first — they're built at index time from
-   * manpages and are authoritative. user dir is an on-the-fly cache
-   * that should only be used as fallback for commands not in any system dir. *)
-  let dirs = system_dirs @ [user_dir] in
-  (* if the command line starts with a privilege-escalation wrapper, scan past
-   * it to find the real command. we identify the command by checking the store
-   * and $PATH — this avoids needing per-command option tables which are fragile
-   * across different implementations. if no real command is found, fall back to
-   * completing the elevation command itself. *)
-  let spans = match spans with
-    | cmd :: rest when List.mem cmd elevation_commands ->
-      let is_command name =
-        name <> "" && (lookup dirs name <> None || find_in_path name <> None)
-      in
-      (match find_real_command is_command rest with
-       | Some (_ :: _ as real_spans) -> real_spans
-       | _ -> spans)
-    | _ -> spans in
-  match spans with
-  | [] -> print_string "null\n"
-  | cmd_name :: rest ->
-    (* try longest prefix match: "git add" before "git" *)
-    let find_result tokens =
-      let num_tokens = List.length tokens in
-      List.init num_tokens Fun.id |> List.find_map (fun drop ->
-        let prefix = List.filteri (fun i _ -> i < num_tokens - drop) tokens in
-        match prefix with
-        | [] -> None
-        | _ ->
-          let try_name = String.concat " " prefix in
-          match lookup dirs try_name with
-          | Some r -> Some (try_name, r, List.length prefix)
-          | None -> None) in
-    (* strip flag tokens (--user, -a, etc.) from intermediate positions.
-     * flags are not part of the subcommand path and should not affect
-     * lookup. e.g. "systemctl --user start" should look up "systemctl start".
-     * the last token (partial) is NOT stripped — it may be a flag the
-     * user is typing (e.g. "--u") which needs fuzzy matching. *)
-    let strip_intermediate_flags tokens =
-      match List.rev tokens with
-      | last :: rev_rest ->
-        List.filter (fun t ->
-          String.length t = 0 || t.[0] <> '-') (List.rev rev_rest)
-        @ [last]
-      | [] -> [] in
-    let all_tokens = strip_intermediate_flags (cmd_name :: rest) in
-    let last_token = match rest with
-      | [] -> "" | _ -> List.nth rest (List.length rest - 1) in
-    (* only treat the last token as a completed subcommand when nushell
-     * sends a trailing empty token (cursor is after a space).
-     * otherwise the user is still typing and we treat it as partial. *)
-    let lookup_tokens = if last_token = "" then all_tokens
-      else match all_tokens with
-        | _ :: _ -> List.rev (List.tl (List.rev all_tokens))
-        | _ -> [cmd_name] in
-    let resolve tokens partial =
-      match find_result tokens with
-      | Some _ as found -> (found, partial)
-      | None -> (None, partial) in
-    let found, partial = resolve lookup_tokens last_token in
-    (* try on-the-fly resolution when no match or only a parent matched *)
-    let lookup_depth = List.length lookup_tokens in
-    let result, partial = match found with
-      | Some (_, _, depth) when depth >= lookup_depth - 1 ->
-        (* exact or near-exact match — use it *)
-        (found, partial)
-      | _ ->
-        (* no match, or only a parent matched — try on-the-fly resolution *)
-        (match find_in_path cmd_name with
-         | Some path ->
-           (* derive sibling share/man from the binary's location.
-            * e.g. /nix/store/.../bin/foo → /nix/store/.../share/man
-            * this lets on-the-fly resolution find manpages for commands
-            * not in the indexed prefixes. also resolves through nix
-            * wrappers to find the real binary's manpage location. *)
-           let mandir_of_bin p =
-             let bindir = Filename.dirname p in
-             let prefix = Filename.dirname bindir in
-             Filename.concat (Filename.concat prefix "share") "man" in
-           let bin_mandirs =
-             let direct = mandir_of_bin path in
-             (* also check the canonical path after resolving symlinks.
-              * e.g. /run/current-system/sw/bin/foo is a symlink to
-              * /nix/store/xxx/bin/foo — check /nix/store/xxx/share/man *)
-             let via_realpath =
-               try let real = Unix.realpath path in
-                 if real <> path then [mandir_of_bin real] else []
-               with Unix.Unix_error _ -> [] in
-             let via_wrapper =
-               match nix_script_wrapper_target path with
-               | Some target -> [mandir_of_bin target]
-               | None ->
-                 match nix_wrapper_target path with
-                 | Some target -> [mandir_of_bin target]
-                 | None -> [] in
-             List.filter is_dir (direct :: via_realpath @ via_wrapper) in
-           let all_mandirs = bin_mandirs @ mandirs in
-           (match resolve_and_cache ~dir:user_dir ~mandirs:all_mandirs cmd_name path with
-            | Some _pairs -> resolve lookup_tokens last_token
-            | None -> (found, partial))
-         | None -> (found, partial)) in
-    let candidates = match result with
-      | None -> []
-      | Some (_matched_name, r, depth) ->
-        (* when the match is shallower than requested, the user already
-         * typed a subcommand beyond the matched level — don't show
-         * sibling subcommands, only flags *)
-        let sub_candidates = if depth < lookup_depth - 1 then [] else
-        let subs = match r.subcommands with
-          | _ :: _ -> r.subcommands
-          | [] -> subcommands_of dirs _matched_name in
-        List.filter_map (fun (subcommand : subcommand) ->
-          let score = fuzzy_score partial subcommand.name in
-          if score > 0 then Some (score, completion_json subcommand.name subcommand.desc) else None
-        ) subs in
-        (* build flag completion candidates from the entry list.
-         * for flags with both short and long forms (Both), we pick which form
-         * to display based on what the user is currently typing:
-         *   - if the partial input matches the short flag better, show the short
-         *     flag as the value and note the long form in the description
-         *   - otherwise (including empty partial), prefer the long flag and note
-         *     the short form in the description
-         *
-         * parameter names are appended to descriptions in angle brackets for
-         * mandatory params and square brackets for optional ones, matching the
-         * conventions users expect from cli help text. *)
-        let flag_candidates = List.filter_map (fun (entry : entry) ->
-          let base_desc = match entry.param with
-            | Some (Mandatory p) -> if entry.desc <> "" then entry.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">"
-            | Some (Optional p) -> if entry.desc <> "" then entry.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]"
-            | None -> entry.desc in
-          let flag, desc = match entry.switch with
-            | Long l -> ("--" ^ l, base_desc)
-            | Short c -> (Printf.sprintf "-%c" c, base_desc)
-            | Both (c, l) ->
-              (* score the partial against both forms to decide which to present.
-               * e.g. typing "-s" scores higher against "-s" than "--squeeze-blank",
-               * so we show "-s (aka --squeeze-blank)". when the partial is empty or
-               * matches the long form better, we default to the long form. *)
-              let long_flag = "--" ^ l in
-              let short_flag = Printf.sprintf "-%c" c in
-              let long_score = fuzzy_score partial long_flag in
-              let short_score = fuzzy_score partial short_flag in
-              if short_score > long_score then
-                (short_flag, Printf.sprintf "(aka %s) %s" long_flag base_desc)
-              else
-                (long_flag, Printf.sprintf "(aka %s) %s" short_flag base_desc) in
-          let score = fuzzy_score partial flag in
-          if score > 0 then Some (score, completion_json flag desc) else None
-        ) r.entries in
-        let scored = sub_candidates @ flag_candidates in
-        List.sort (fun (a, _) (b, _) -> compare b a) scored
-        |> List.map snd in
-    (* determine whether to return our candidates or fall back to nushell's
-     * native file completer (via null). nushell's protocol is either/or:
-     * returning candidates suppresses file completions, returning null
-     * enables them with full nushell UX (colors, sorting, quoting).
-     *
-     * we return null when:
-     *   - we have no candidates at all (unknown command, no match)
-     *   - the user is at a leaf command and not typing a flag — this is
-     *     the position where file arguments are expected, so hand off to
-     *     nushell's native file completer for the best experience *)
-    let typing_flag = String.length partial > 0 && partial.[0] = '-' in
-    let has_subcommands = match result with
-      | Some (matched_name, r, _) ->
-        r.subcommands <> [] || subcommands_of dirs matched_name <> []
-      | None -> false in
-    let want_files = (not typing_flag) && (not has_subcommands) in
-    if want_files then print_string "null\n"
-    else if candidates = [] then print_string "null\n"
-    else Printf.printf "[%s]\n" (String.concat "," candidates)
-
-(* "inshellah query CMD" — print the raw stored data for a command *)
-let cmd_query cmd dirs =
-  match lookup_raw dirs cmd with
-  | None ->
-    Printf.eprintf "not found: %s\n" cmd; exit 1
-  | Some data ->
-    print_string data; print_newline ()
-
-(* load a newline-separated list of command names to ignore.
- * blank lines and lines starting with '#' are skipped. *)
-let load_ignorelist path =
-  try
-    In_channel.with_open_text path In_channel.input_all
-    |> String.split_on_char '\n'
-    |> List.filter_map (fun line ->
-         let line = String.trim line in
-         if String.length line > 0 && line.[0] <> '#' then Some line else None)
-    |> SSet.of_list
-  with _ -> SSet.empty
-
-(* parse "index" subcommand arguments: prefix dirs + optional --dir, --ignore, --help-only.
- * uses a fold over the argument list, accumulating prefixes and option values. *)
-let parse_index_args args =
-  let (prefixes, dir, ignore, help_only, _) =
-    List.fold_left (fun (prefixes, dir, ignore, help_only, pending) arg ->
-      match pending with
-      | Some "--dir" -> (prefixes, arg, ignore, help_only, None)
-      | Some "--ignore" -> (prefixes, dir, SSet.union ignore (load_ignorelist arg), help_only, None)
-      | Some "--help-only" -> (prefixes, dir, ignore, SSet.union help_only (load_ignorelist arg), None)
-      | Some _ -> (prefixes, dir, ignore, help_only, None)
-      | None ->
-        match arg with
-        | "--dir" | "--ignore" | "--help-only" -> (prefixes, dir, ignore, help_only, Some arg)
-        | _ -> (arg :: prefixes, dir, ignore, help_only, None)
-    ) ([], default_store_path (), SSet.empty, SSet.empty, None) args in
-  (List.rev prefixes, dir, ignore, help_only)
-
-(* derive the sibling man directory from a store directory path.
- * e.g. "/run/current-system/sw/share/inshellah" -> "/run/current-system/sw/share/man" *)
-let man_dir_of_system_dir path =
-  Filename.concat (Filename.dirname path) "man"
-
-(* parse common --dir arguments for complete/query/dump commands.
- * --dir takes a colon-separated list of paths. the first path is the writable
- * user cache dir; additional paths are read-only system directories.
- * man directories are derived from system dir paths as siblings
- * (share/inshellah -> share/man). uses a fold over the argument list. *)
-let parse_dir_args args =
-  let (dir_value, rest_args, _) =
-    List.fold_left (fun (dir_value, rest_args, pending) arg ->
-      match pending with
-      | Some "--dir" -> (Some arg, rest_args, None)
-      | Some _ -> (dir_value, rest_args, None)
-      | None ->
-        match arg with
-        | "--dir" -> (dir_value, rest_args, Some arg)
-        | _ -> (dir_value, arg :: rest_args, None)
-    ) (None, [], None) args in
-  let (user_dir, system_dirs) = match dir_value with
-    | None -> (default_store_path (), [])
-    | Some v ->
-      match String.split_on_char ':' v with
-      | [] -> (default_store_path (), [])
-      | first :: rest -> (first, rest) in
-  (user_dir, system_dirs, List.rev rest_args)
-
-(* "inshellah completions nushell" — emit native nushell extern for inshellah itself *)
-let cmd_completions_nushell () =
-  let result = {
-    entries = [];
-    subcommands = [];
-    positionals = [];
-    description = "nushell completions engine";
-  } in
-  let index_result = {
-    entries = [
-      { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "output directory for cached completions" };
-      { switch = Long "ignore"; param = Some (Mandatory "FILE"); desc = "skip listed commands entirely" };
-      { switch = Long "help-only"; param = Some (Mandatory "FILE"); desc = "skip manpages for listed commands, use --help instead" };
-    ];
-    subcommands = [];
-    positionals = [
-      { pos_name = "prefix"; optional = false; variadic = true };
-    ];
-    description = "index completions from prefix directories";
-  } in
-  let complete_result = {
-    entries = [
-      { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "colon-separated cache paths" };
-    ];
-    subcommands = [];
-    positionals = [
-      { pos_name = "cmd"; optional = false; variadic = false };
-      { pos_name = "args"; optional = true; variadic = true };
-    ];
-    description = "nushell custom completer, outputs JSON candidates";
-  } in
-  let query_result = {
-    entries = [
-      { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "colon-separated cache paths" };
-    ];
-    subcommands = [];
-    positionals = [
-      { pos_name = "cmd"; optional = false; variadic = false };
-    ];
-    description = "print stored completion data for a command";
-  } in
-  let dump_result = {
-    entries = [
-      { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "colon-separated cache paths" };
-    ];
-    subcommands = [];
-    positionals = [];
-    description = "list indexed commands";
-  } in
-  let manpage_result = {
-    entries = [];
-    subcommands = [];
-    positionals = [
-      { pos_name = "file"; optional = false; variadic = false };
-    ];
-    description = "parse a manpage and emit nushell extern";
-  } in
-  let manpage_dir_result = {
-    entries = [];
-    subcommands = [];
-    positionals = [
-      { pos_name = "dir"; optional = false; variadic = false };
-    ];
-    description = "batch-process manpages under a directory";
-  } in
-  let completions_result = {
-    entries = [];
-    subcommands = [];
-    positionals = [];
-    description = "generate nushell completions for inshellah";
-  } in
-  print_string (generate_extern "inshellah" result);
-  print_string (generate_extern "inshellah index" index_result);
-  print_string (generate_extern "inshellah complete" complete_result);
-  print_string (generate_extern "inshellah query" query_result);
-  print_string (generate_extern "inshellah dump" dump_result);
-  print_string (generate_extern "inshellah manpage" manpage_result);
-  print_string (generate_extern "inshellah manpage-dir" manpage_dir_result);
-  print_string (generate_extern "inshellah completions" completions_result)
-
-(* --- entry point ---
- * dispatch on the first argument to the appropriate subcommand handler. *)
-let () =
-  match Array.to_list Sys.argv |> List.tl with
-  | "index" :: rest ->
-    let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in
-    if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1);
-    let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in
-    let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in
-    cmd_index bindirs mandirs ignorelist help_only dir
-  | "complete" :: rest ->
-    let (user_dir, system_dirs, spans) = parse_dir_args rest in
-    let man_dirs = List.filter_map (fun d ->
-      let m = man_dir_of_system_dir d in
-      if is_dir m then Some m else None) system_dirs in
-    cmd_complete spans user_dir system_dirs man_dirs
-  | "query" :: rest ->
-    let (user_dir, system_dirs, args) = parse_dir_args rest in
-    (match args with
-     | [cmd] -> cmd_query cmd (user_dir :: system_dirs)
-     | _ -> Printf.eprintf "error: query CMD [--dir PATH[:PATH...]]\n"; exit 1)
-  | "dump" :: rest ->
-    let (user_dir, system_dirs, _) = parse_dir_args rest in
-    cmd_dump (user_dir :: system_dirs)
-  | ["manpage"; file] -> cmd_manpage file
-  | ["manpage-dir"; dir] -> cmd_manpage_dir dir
-  | ["completions"] -> cmd_completions_nushell ()
-  | _ -> usage ()
diff --git a/doc/building.md b/doc/building.md
index de685d7..0a2598d 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -1,141 +1,77 @@
 # building and installing
 
-## dependencies
+inshellah is a rust crate. it builds with stock cargo on any platform
+rust supports.
 
-inshellah is written in OCaml and uses dune as its build system.
-
-build dependencies:
-- **OCaml** >= 5.0
-- **dune** >= 3.20
-- **angstrom** — parser combinator library
-- **angstrom-unix** — unix extensions for angstrom
-- **camlzip** — gzip decompression for reading compressed manpages
-- **str** — regular expressions (ships with OCaml)
-- **unix** — process/file operations (ships with OCaml)
-
-runtime dependencies:
-- **man** (optional) — used as a fallback to locate manpages during
-  on-the-fly completion resolution. not needed if system directories
-  are provided via `--dir` (manpages are found via sibling `share/man`).
-
-## building with nix (recommended)
-
-if you have nix installed:
+## with nix
 
 ```sh
 nix build
 ```
 
-the binary is at `./result/bin/inshellah`.
+binary is at `./result/bin/inshellah`.
 
-for development with a shell containing all dependencies:
+development shell:
 
 ```sh
 nix develop
-dune build
-dune test
+cargo build --release
+cargo test
 ```
 
-## building from source with opam
+## with cargo
 
-install dependencies via opam:
+requires rust >= 1.85 (edition 2024).
 
 ```sh
-opam install dune angstrom angstrom-unix camlzip
-```
-
-build and test:
-
-```sh
-dune build
-dune test
-```
-
-install into the opam switch:
-
-```sh
-dune install
-```
-
-## building from source without opam
-
-if your distribution packages the OCaml libraries directly, install
-them through your package manager, then build with dune:
-
-```sh
-dune build
-```
-
-the binary is at `_build/default/bin/main.exe`. copy it to your
-`$PATH`:
-
-```sh
-install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
+cargo build --release
+cargo test
+sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
 ```
 
 ## arch linux
 
-install OCaml and dune from the official repos, and the remaining
-libraries from the AUR or via opam:
-
 ```sh
-# system packages
-sudo pacman -S ocaml dune
-
-# ocaml libraries (via opam)
-opam init    # if not already initialized
-eval $(opam env)
-opam install angstrom angstrom-unix camlzip
-
-# build
-dune build
-dune test
-
-# install
-sudo install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
+sudo pacman -S rust
+cargo build --release
+sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
 ```
 
 ## debian / ubuntu
 
 ```sh
-sudo apt install ocaml opam
-opam init
-eval $(opam env)
-opam install dune angstrom angstrom-unix camlzip
-
-dune build
-sudo install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
+sudo apt install cargo rustc
+# or: rustup install stable
+cargo build --release
+sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
 ```
 
 ## fedora
 
 ```sh
-sudo dnf install ocaml opam
-opam init
-eval $(opam env)
-opam install dune angstrom angstrom-unix camlzip
-
-dune build
-sudo install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
+sudo dnf install cargo rust
+cargo build --release
+sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
 ```
 
 ## post-install setup
 
-after installing the binary, index completions from your system
-prefix(es):
+index completions from your system prefix(es):
 
 ```sh
 # typical linux system
 inshellah index /usr /usr/local
 
+# more workers / different timeout
+inshellah index /usr /usr/local --workers 16 --timeout-ms 500
+
 # check what was indexed
 inshellah dump
 ```
 
-then wire up the nushell completer:
+wire up the nushell completer in `~/.config/nushell/config.nu`:
 
 ```nu
-# ~/.config/nushell/config.nu
 $env.config.completions.external = {
     enable: true
     completer: {|spans|
@@ -145,19 +81,28 @@ $env.config.completions.external = {
 }
 ```
 
-see [nushell-integration.md](nushell-integration.md) for full details
-on the completer, and [runtime-completions.md](runtime-completions.md)
-for on-the-fly resolution of commands not covered by the index.
+see [nushell-integration.md](nushell-integration.md) for full
+completer details and [runtime-completions.md](runtime-completions.md)
+for on-the-fly resolution of commands not covered by the upfront
+index.
 
 ## re-indexing after package changes
 
-the index is a static cache — it doesn't update automatically when you
-install or remove packages. re-run `inshellah index` after significant
-package changes:
-
 ```sh
 inshellah index /usr /usr/local
 ```
 
-on nixos, the system index regenerates on every `nixos-rebuild`
-automatically. see [nixos.md](nixos.md) for details.
+on nixos, the system index regenerates on every `nixos-rebuild`. see
+[nixos.md](nixos.md).
+
+## development
+
+```sh
+cargo build           # debug build, faster compile
+cargo test            # full test suite
+cargo clippy --release
+```
+
+a `man` binary is useful at runtime as a fallback for locating
+manpages outside the indexed prefixes — not required for indexing
+itself.
diff --git a/doc/nixos.md b/doc/nixos.md
index 5d74690..d50ada6 100644
--- a/doc/nixos.md
+++ b/doc/nixos.md
@@ -1,105 +1,51 @@
 # nixos integration
 
-inshellah provides a nixos module that automatically indexes nushell
-completions for all installed packages at system build time.
+inshellah provides a nixos module that indexes nushell completions for
+every installed package at system build time, and a wrapped binary
+that knows where to find the result.
 
 ## enabling
 
 ```nix
-# in your flake.nix outputs:
+# flake.nix outputs:
 {
   nixosConfigurations.myhost = nixpkgs.lib.nixosSystem {
     modules = [
       inshellah.nixosModules.default
-      {
-        programs.inshellah.enable = true;
-      }
+      { programs.inshellah.enable = true; }
     ];
   };
 }
 ```
 
-or if importing the module directly:
+or importing directly:
 
 ```nix
 # configuration.nix
 { pkgs, ... }: {
-  imports = [ ./path/to/inshellah/nix/module.nix ];
-  programs.inshellah = {
-    enable = true;
-    package = pkgs.inshellah;  # or your local build
-  };
+  imports = [ ./path/to/inshellah-rs/nix/module.nix ];
+  programs.inshellah.enable = true;
 }
 ```
 
-## what happens at build time
+after rebuilding, completions are immediately available through the
+autoloaded nushell shim.
 
-the module hooks into `environment.extraSetup`, which runs during the
-system profile build (the `buildEnv` that creates `/run/current-system/sw`).
-at that point, all system packages are merged, so `$out/bin` contains every
-executable and `$out/share/man` contains every manpage.
+## what the module does
 
-inshellah runs a single command:
-
-```
-inshellah index "$out" --dir $out/share/inshellah
-```
-
-this executes a three-phase pipeline:
-
-### phase 1: native completion detection (parallel)
-
-for each executable, inshellah scans the elf binary for the string
-`completion`. if found, it probes common patterns like
-`CMD completions nushell` to see if the program can generate its own
-nushell completions. native output is used verbatim — these are always
-higher quality than parsed completions.
-
-programs like `niri`, and any clap/cobra tool with nushell support,
-are handled this way.
-
-### phase 2: manpage parsing (sequential)
-
-for commands not covered by phase 1, inshellah parses manpages from
-man1 (user commands) and man8 (sysadmin commands). it handles:
-
-- gnu `.TP` style (coreutils, help2man)
-- `.IP` style (curl, hand-written)
-- `.PP`+`.RS`/`.RE` style (git, docbook)
-- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
-- mdoc (bsd) format
-- deroff fallback for unusual formats
-
-synopsis sections are parsed to detect subcommands: `git-commit.1`
-generates `export extern "git commit"`, not `export extern "git-commit"`.
-
-### phase 3: --help fallback (parallel)
-
-remaining executables without manpages get `--help` (or `-h`) called
-with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
-to skip those that don't support help flags. shell scripts are run
-directly (they're fast). execution is parallelized to available cores.
-
-when `--help` produces rendered manpage output instead of plain help
-text (e.g. `git stash --help` delegates to `man`), the raw manpage
-source is located and parsed with the groff parser for richer results.
-
-### output
-
-each command gets its own file in `/share/inshellah` under the system
-profile. native generators produce `.nu` files; parsed results produce
-`.json` files. the `complete` command reads both formats.
-
-nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
-nushell provides its own completions.
-
-### performance
-
-on a typical nixos system (~950 executables, ~1600 manpages):
-- total time: ~4-10 seconds
-- native gzip decompression (camlzip, no process spawning)
-- parallel --help with core-scaled forking
-- elf string scanning to skip ~15% of binaries
+- installs the inshellah binary, wrapped so the system completion path
+  is found automatically.
+- runs `inshellah index "$out"` during the system profile build,
+  producing one file per command under `$out/share/inshellah/`.
+- drops the full nushell external-completer shim into
+  `/share/nushell/vendor/autoload/`, including sudo/doas overrides so
+  elevated commands still complete through inshellah.
+- emits lightweight command-name stubs for dynamic-completion backends
+  that are present in the system profile, so tools like `git` and `jj`
+  appear in nushell's command list while inshellah still supplies their
+  argument completions lazily.
+- exposes the same shim as a read-only `snippet` option for users who
+  want to source or inspect it manually.
 
 ## module options
 
@@ -110,12 +56,11 @@ programs.inshellah = {
   # the inshellah package (set automatically by the flake module)
   package = pkgs.inshellah;
 
-  # where to place indexed completion files under the system profile
+  # subdirectory of the system profile holding the index files
   # default: "/share/inshellah"
   completionsPath = "/share/inshellah";
 
   # additional read-only completion directories to search
-  # these are appended to the --dir path alongside the system completions
   extraDirs = [ "/etc/profiles/per-user/alice/share/inshellah" ];
 
   # commands to skip entirely during indexing
@@ -123,41 +68,68 @@ programs.inshellah = {
 
   # commands to skip manpage parsing for (uses --help instead)
   helpOnlyCommands = [ "nix" ];
+
+  # per-subprocess timeout in ms during indexing (null = built-in
+  # default of 200ms)
+  timeoutMs = null;
+
+  # worker-thread count for the parallel scrape
+  workers = null;
 };
 ```
 
 ## using the completer
 
-the flake module sets a read-only `snippet` option containing the nushell
-config needed to wire up the completer. you can access it via
-`config.programs.inshellah.snippet` and paste it into your nushell config,
-or source it from a file generated by your nixos config.
+the module installs the completer under nushell's vendor autoload path,
+so no hand-written nushell config is needed for the normal NixOS case.
 
-the snippet sets up the external completer. the wrapper installed by
-the module has the system completion paths hardcoded, so no flags are
-needed:
+the read-only `snippet` option still holds the complete
+external-completer config. to manage sourcing yourself instead of using
+autoload, write it to a file:
 
-```nu
-let inshellah_complete = {|spans|
-    inshellah complete ...$spans | from json
-}
-$env.config.completions.external = {
-    enable: true
-    max_results: 100
-    completer: $inshellah_complete
-}
+```nix
+# generate a config file from the snippet
+environment.etc."nushell/inshellah.nu".text = config.programs.inshellah.snippet;
 ```
 
-## home manager and other user-level package managers
+then source that file from your nushell config:
 
-the nixos module only indexes packages installed at the system level
-(those that end up in `/run/current-system/sw`). if you use home-manager,
-nix-env, or another user-level package manager, those binaries and
-manpages live elsewhere — typically under `/etc/profiles/per-user/<name>`
-or `~/.nix-profile`.
+```nu
+source /etc/nushell/inshellah.nu
+```
 
-to get completions for user-installed packages, run `inshellah index`
-against those prefixes separately:
+or copy the snippet directly into `~/.config/nushell/config.nu`:
+
+```nu
+# (the snippet is many lines — copy it from `nix eval` of the option,
+# or use the environment.etc approach above)
+$env.config.completions.external = { ... }
+```
+
+the snippet provides both static lookups against the system index and
+runtime fallbacks for cases the static index can't cover:
+
+| command | dynamic source |
+|---|---|
+| `nix` | flake refs via `NIX_GET_COMPLETIONS`, with optional `meta.description` |
+| `systemctl` / `journalctl` | unit names from `list-units` |
+| `coredumpctl` | units + pids |
+| `loginctl` | users / sessions |
+| `machinectl` / `networkctl` | machines / links |
+| `ssh` / `scp` / `sftp` | hostnames from ssh config + known_hosts |
+| `docker` / `podman` | containers + image refs by subcommand |
+| `kubectl` | resource names from the live cluster |
+| `git` | refs + worktree paths |
+| `npm` / `pnpm` / `yarn` | scripts from package.json |
+| `make` / `just` | targets / recipes |
+| `cargo` | workspace targets behind `--bin` / `--example` / etc. |
+| `kill` / `pkill` | pid+comm pairs |
+
+## home manager and user-level package managers
+
+the system module only indexes packages installed system-wide. for
+home-manager or per-user nix profiles, run `inshellah index` against
+those prefixes separately:
 
 ```sh
 # home-manager / per-user profile
@@ -167,35 +139,34 @@ inshellah index /etc/profiles/per-user/$USER
 inshellah index ~/.nix-profile
 ```
 
-this indexes into the default user cache (`$XDG_CACHE_HOME/inshellah`),
-which the completer searches automatically. you can re-run this after
-installing new packages, or add it to a home-manager activation script.
-
-if you want to automate this in home-manager:
+this indexes into `$XDG_CACHE_HOME/inshellah`, which the completer
+searches automatically. to automate via home-manager:
 
 ```nix
-# home.nix
 home.activation.inshellah-index = lib.hm.dag.entryAfter [ "writeBoundary" ] ''
   ${pkgs.inshellah}/bin/inshellah index /etc/profiles/per-user/$USER 2>/dev/null || true
 '';
 ```
 
-the completer will then search both the system index and the user
-cache, so completions from both sources are available.
-
 ## troubleshooting
 
-**completions not appearing**: ensure the completer is configured in
-your nushell config (see above). check that the system index exists:
-`ls /run/current-system/sw/share/inshellah/`.
+**completions not appearing**: check that the system index exists
+(`ls /run/current-system/sw/share/inshellah/`) and that the completer
+is configured.
 
 **missing completions for a specific command**: check if it's a nushell
-built-in (`help commands | where name == "thecommand"`). built-ins are
-excluded because nushell serves its own completions for them.
+built-in (`help commands | where name == "thecommand"`) — built-ins
+are excluded.
 
-**stale completions after update**: completions regenerate on every
-`nixos-rebuild`. if a command changed its flags, rebuild to pick up
-the changes.
+**command name missing but arguments complete after typing it**: the
+command may be installed only in a user profile. the system module can
+only generate command-name stubs for binaries linked into the system
+profile, though the external completer can still complete arguments
+once the command word has been typed.
 
-**build-time errors**: indexing failures are non-fatal (`|| true`).
-check `journalctl` for the build log if completions are missing.
+**stale completions after update**: the index regenerates on every
+`nixos-rebuild`. if a command changed its flags, rebuild.
+
+**build-time errors**: indexing failures are non-fatal. check
+`journalctl` for the build log if completions are missing for a
+specific command.
diff --git a/doc/nushell-integration.md b/doc/nushell-integration.md
index 68ea5f8..773533d 100644
--- a/doc/nushell-integration.md
+++ b/doc/nushell-integration.md
@@ -1,150 +1,28 @@
 # using inshellah completions in nushell
 
-inshellah indexes completions from three sources (in priority order):
-1. **native generators** — programs that can emit nushell completions directly
-2. **manpages** — groff/troff/mdoc manpage parsing
-3. **`--help` output** — parsing help text as a fallback
-
-indexed data is stored as `.json` and `.nu` files in a directory that the
-`complete` command reads from at tab-completion time.
+inshellah indexes completions for the commands in your `$PATH` and
+serves them to nushell's external completer. indexed data is stored as
+`.json` and `.nu` files that the `complete` command reads at
+tab-completion time.
 
 ## quick start
 
 index completions from a system prefix:
 
 ```sh
-# index from a prefix containing bin/ and share/man/
+# from a prefix containing bin/ and share/man/
 inshellah index /usr
 
-# index from multiple prefixes
+# multiple prefixes
 inshellah index /usr /usr/local
 
-# store in a custom directory
+# custom directory
 inshellah index /usr --dir ~/my-completions
 ```
 
-parse a single manpage:
-
-```sh
-inshellah manpage /usr/share/man/man1/git.1.gz
-```
-
-batch-process all manpages under a directory (man1 and man8):
-
-```sh
-inshellah manpage-dir /usr/share/man
-```
-
-## commands
-
-```
-inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
-    index completions into a directory of json/nu files.
-    PREFIX is a directory containing bin/ and share/man/.
-    default dir: $XDG_CACHE_HOME/inshellah
-    --ignore FILE     skip listed commands entirely
-    --help-only FILE  skip manpages for listed commands, use --help instead
-
-inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]]
-    nushell custom completer. outputs json completion candidates.
-    falls back to --help resolution if command is not indexed.
-    --dir takes colon-separated paths. the first path is the writable
-    user cache; additional paths are read-only system directories.
-    manpages are found via sibling share/man of system dir paths.
-
-inshellah query CMD [--dir PATH[:PATH...]]
-    print stored completion data for CMD.
-
-inshellah dump [--dir PATH[:PATH...]]
-    list indexed commands.
-
-inshellah manpage FILE
-    parse a manpage and emit nushell extern block.
-
-inshellah manpage-dir DIR
-    batch-process manpages under DIR (man1 and man8 sections).
-```
-
-## the index pipeline
-
-the `index` command runs a three-phase pipeline over all executables
-in each `PREFIX/bin`:
-
-### phase 1: native completion detection (parallel)
-
-for each executable, inshellah scans the elf binary for the string
-`completion`. if found, it probes common patterns like
-`CMD completions nushell` to see if the program can generate its own
-nushell completions. native output is used verbatim — these are always
-higher quality than parsed completions.
-
-programs like `niri`, and any clap/cobra tool with nushell support,
-are handled this way.
-
-### phase 2: manpage parsing (sequential)
-
-for commands not covered by phase 1, inshellah parses manpages from
-man1 (user commands) and man8 (sysadmin commands). it handles:
-
-- gnu `.TP` style (coreutils, help2man)
-- `.IP` style (curl, hand-written)
-- `.PP`+`.RS`/`.RE` style (git, docbook)
-- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
-- mdoc (bsd) format
-- deroff fallback for unusual formats
-
-synopsis sections are parsed to detect subcommands: `git-commit.1`
-generates `export extern "git commit"`, not `export extern "git-commit"`.
-
-### phase 3: --help fallback (parallel)
-
-remaining executables without manpages get `--help` (or `-h`) called
-with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
-to skip those that don't support help flags. shell scripts are run
-directly (they're fast). execution is parallelized to available cores.
-
-subcommands are recursively resolved — if `--help` output lists
-subcommands, inshellah runs `CMD SUBCMD --help` for each.
-
-when a `--help` invocation produces rendered manpage output (some
-commands like `git stash` delegate `--help` to `man`), inshellah
-detects this and locates the raw manpage source to parse with the
-groff parser instead. this yields richer results (subcommands,
-structured flag sections) than parsing the rendered text.
-
-### output
-
-each command gets its own file in the index directory. native generators
-produce `.nu` files; parsed results produce `.json` files. the `complete`
-command reads both formats.
-
-nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
-nushell provides its own completions.
-
-### performance
-
-on a typical nixos system (~950 executables, ~1600 manpages):
-- total time: ~4-10 seconds
-- native gzip decompression (camlzip, no process spawning)
-- parallel --help with core-scaled forking
-- elf string scanning to skip ~15% of binaries
-
-## the completer
-
-the `complete` command is designed to be wired into nushell as an
-external completer. it reads from the directories specified via `--dir`
-(colon-separated), performs fuzzy matching, and outputs json completion
-candidates. the first path is the writable user cache; additional paths
-are read-only system directories.
-
-if a command is not indexed, `complete` falls back to on-the-fly
-`--help` resolution — it runs the command's help, caches the result
-in the user directory, and returns completions immediately.
-
-### setting up the completer
+then wire up the completer in `~/.config/nushell/config.nu`:
 
 ```nu
-# ~/.config/nushell/config.nu
 $env.config.completions.external = {
     enable: true
     completer: {|spans|
@@ -154,27 +32,62 @@ $env.config.completions.external = {
 }
 ```
 
-with the nixos module, use the provided `snippet` option value (see
-[nixos.md](nixos.md)) which points at the system index automatically.
+that's it. tab-completion now works for every command indexed.
 
-## nixos module
+## commands
 
-enable automatic completion indexing at system build time:
+```
+inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
+                          [--workers N] [--timeout-ms N]
+    index completions into a directory of json/nu files.
+    PREFIX is a directory containing bin/ and share/man/.
+    default dir: $XDG_CACHE_HOME/inshellah
+    --ignore FILE     skip listed commands entirely
+    --help-only FILE  skip manpages for listed commands, use --help instead
+    --workers N       worker-thread count
+    --timeout-ms N    per-subprocess timeout in ms (default: 200)
 
-```nix
-{
-  imports = [ ./path/to/inshellah/nix/module.nix ];
-  programs.inshellah.enable = true;
-}
+inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]] [--timeout-ms N]
+    nushell custom completer. outputs JSON completion candidates.
+    falls back to on-the-fly --help resolution if a command isn't
+    indexed yet — the result is cached and subsequent presses are
+    instant.
+    --dir takes colon-separated paths. the first path is the writable
+    user cache; additional paths are read-only system directories.
+
+inshellah query CMD [--dir PATH[:PATH...]]
+    print stored completion data for CMD.
+
+inshellah dump [--dir PATH[:PATH...]]
+    list indexed commands.
+
+inshellah manpage FILE
+    parse a manpage and emit a nushell extern block.
+
+inshellah manpage-dir DIR
+    batch-process manpages under DIR (man1 and man8 sections).
 ```
 
-this runs `inshellah index` during the system profile build. see
-[nixos.md](nixos.md) for full details.
+## what gets handled
 
-## what gets generated
+- **sources**: native nushell completion generators (clap/cobra tools
+  that can emit completions themselves), manpages in section 1 and 8,
+  `--help` and `-h` output.
+- **groff styles**: gnu `.TP` (coreutils, help2man), `.IP` (curl,
+  hand-written), `.PP`+`.RS`/`.RE` (git, docbook), nix3 bullet
+  (`nix run`, `nix build`), mdoc (BSD), plus a deroff fallback.
+- **subcommand naming**: `git-commit.1` produces `git commit`, not
+  `git-commit`. clap-style per-subcommand manpages get one file each.
+- **synopsis-only flags**: flags declared in a manpage SYNOPSIS but
+  missing from the body (e.g. nix-env's `--profile`, most of sed's
+  interface) are picked up too.
+- **elevation wrappers**: `sudo`, `doas`, `pkexec`, `su`, `run0` are
+  stripped before lookup, including when the real target is given as
+  an absolute path.
+- **exclusions**: nushell built-ins (ls, cd, mv, etc.) are skipped —
+  nushell serves its own completions for those.
 
-the `manpage` and `manpage-dir` commands emit nushell `extern` blocks
-with flags, parameter types, and descriptions:
+## extern blocks (manpage / manpage-dir)
 
 ```nu
 export extern "rg" [
@@ -186,9 +99,52 @@ export extern "rg" [
 ]
 ```
 
-subcommand manpages (e.g. `git-commit.1`) are detected via synopsis
-parsing and generate the correct nushell name (`git commit` not
-`git-commit`).
+these are produced by `inshellah manpage` / `inshellah manpage-dir` and
+can be source'd directly in your nushell config if you prefer that to
+the json completer flow.
 
-nushell built-in commands (ls, cd, mv, etc.) are excluded since nushell
-provides its own completions for these.
+## native completions and file completion
+
+when a tool ships its own nushell completion generator (clap, cobra, etc.),
+inshellah caches its output verbatim as a `.nu` file under the autoload
+dir. nushell loads the `extern` declarations and uses its built-in
+completer for that command — the external completer (inshellah's `complete`
+subcommand) is only consulted as a fallback.
+
+at the `extern` layer, positional/flag types drive what nushell offers:
+
+- `: path` triggers nushell's built-in file/path completion for that slot.
+- `: string@my_completer` runs a user-defined closure.
+- bare `: string` / `: int` provides no candidates of its own.
+
+so when a native `.nu` declares `--file: path`, you'll see file completions
+intermixed with whatever else is in scope. that's intrinsic to the type,
+not something inshellah injects.
+
+a few things worth knowing:
+
+- nushell ≤ 0.69 had a bug
+  ([#6407](https://github.com/nushell/nushell/issues/6407)) where file
+  completion superseded the external completer when the prefix was empty
+  or matched a real path. upgrade if you see this.
+- [PR #14781](https://github.com/nushell/nushell/pull/14781) tightened the
+  contract: an external completer that returns a non-null list now
+  suppresses file fallback; only an explicit `null` opts back in. inshellah
+  already follows this — `null` for "hand off to nu", `[...]` to override.
+- if you want different ranking, the relevant settings are
+  `$env.config.completions.{algorithm, sort, partial, case_sensitive}`.
+  none of them disables file completion for `: path` parameters — that
+  behavior is tied to the type itself.
+
+if a particular native completion bothers you, the workaround is to drop
+that one `.nu` file from the autoload directory. nushell falls back to the
+external completer for unknown commands, and inshellah's `complete`
+subcommand returns candidates directly as JSON — bypassing the `extern`
+type layer entirely, so no `: path` slot triggers nu's built-in file
+completer.
+
+## nixos
+
+`programs.inshellah.enable = true` will index at system build time and
+ship a richer completer with runtime fallbacks (live cluster queries,
+git/ssh/docker/k8s lookups, etc.). see [nixos.md](nixos.md).
diff --git a/doc/runtime-completions.md b/doc/runtime-completions.md
index 7b58e48..3e0ee84 100644
--- a/doc/runtime-completions.md
+++ b/doc/runtime-completions.md
@@ -1,30 +1,31 @@
 # runtime completion resolution
 
-the `complete` command has built-in on-the-fly resolution: when a command
-is not found in the index, it falls back to running `--help`, caches the
-result, and returns completions immediately. this means commands installed
-outside the system profile (via cargo, pip, npm, go, etc.) get completions
-on first tab-press with no manual setup.
+when a command isn't in the static index yet, `inshellah complete`
+runs `--help` (or `-h`) on the binary, caches the result in the user
+directory, and returns completions immediately. tab-completion just
+works for tools installed outside the indexed prefixes — via cargo,
+pip, npm, go, etc.
 
 ## how it works
 
-when you type `docker compose up --<TAB>`:
+typing `docker compose up --<TAB>`:
 
 1. nushell calls `inshellah complete docker compose up --`
-2. inshellah looks up the index for the longest matching prefix
+2. inshellah looks up the longest matching prefix in the index
 3. if found, it fuzzy-matches flags and subcommands against the partial input
 4. if not found, it locates the binary in `$PATH`, runs `--help`,
    recursively resolves subcommands, caches the results in the user
-   directory (`$XDG_CACHE_HOME/inshellah`), and returns completions.
-   if `--help` produces rendered manpage output, the raw manpage source
-   is located and parsed instead for richer results
+   directory (`$XDG_CACHE_HOME/inshellah`), and returns completions
 
-all subsequent completions for that command are instant (served from cache).
+all subsequent completions for that command are served from cache.
+
+elevation wrappers (`sudo`, `doas`, `pkexec`, `su`, `run0`) are
+stripped before lookup: `sudo docker compose up --` resolves against
+`docker`, not `sudo`. absolute paths after the wrapper are recognised
+too.
 
 ## setup
 
-the completer works with no extra configuration beyond the basic setup:
-
 ```nu
 # ~/.config/nushell/config.nu
 $env.config.completions.external = {
@@ -36,18 +37,8 @@ $env.config.completions.external = {
 }
 ```
 
-with the nixos module, the installed wrapper has the system paths
-hardcoded — no extra flags needed. the same snippet works:
-
-```nu
-$env.config.completions.external = {
-    enable: true
-    completer: {|spans|
-        inshellah complete ...$spans
-        | from json
-    }
-}
-```
+with the nixos module, no extra config is needed beyond enabling the
+module — the wrapper has the system paths baked in.
 
 to manually specify system dirs, use colon-separated `--dir`:
 
@@ -61,25 +52,15 @@ $env.config.completions.external = {
 }
 ```
 
-system directories (paths after the first in `--dir`) enable
-manpage-based fallback: when a command's `--help` delegates to `man`,
-the completer looks for the raw manpage in the sibling `share/man`
-directory (e.g. `share/inshellah` → `share/man`). if no system dirs
-are given, it falls back to `man -w` to locate the manpage.
-
-or use the `snippet` option provided by the flake module (see
-[nixos.md](nixos.md)).
+paths after the first in `--dir` are read-only system dirs.
 
 ## cache management
 
-the user cache lives at `$XDG_CACHE_HOME/inshellah` (typically
-`~/.cache/inshellah`).
-
 ```sh
 # list cached commands
 inshellah dump
 
-# view cached data for a command
+# view stored data for a command
 inshellah query docker
 
 # clear cache
diff --git a/dune-project b/dune-project
deleted file mode 100644
index 4d29412..0000000
--- a/dune-project
+++ /dev/null
@@ -1,28 +0,0 @@
-(lang dune 3.20)
-
-(name inshellah)
-
-(generate_opam_files true)
-
-(source
- (github username/reponame))
-
-(authors "atagen <boss@atagen.co>")
-
-(maintainers "atagen <boss@atagen.co>")
-
-(license GPL-3.0-or-later)
-
-(package
- (name inshellah)
- (synopsis "Nushell completions generator")
- (description
-  "Inshellah parses manpages and --help switches to generate completions for nushell.")
- (depends
-  ocaml
-  dune
-  angstrom
-  angstrom-unix
-  camlzip)
- (tags
-  (shell completions nushell parser angstrom)))
diff --git a/flake.lock b/flake.lock
index 3adb309..8c7ac0c 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1773385838,
-        "narHash": "sha256-ylF2AGl08seexxlLvMqj3jd+yZq56W9zicwe51mp0Pw=",
+        "lastModified": 1773821835,
+        "narHash": "sha256-TJ3lSQtW0E2JrznGVm8hOQGVpXjJyXY2guAxku2O9A4=",
         "owner": "nixos",
         "repo": "nixpkgs",
-        "rev": "fef542e7a88eec2b698389e6279464fd479926b6",
+        "rev": "b40629efe5d6ec48dd1efba650c797ddbd39ace0",
         "type": "github"
       },
       "original": {
         "owner": "nixos",
-        "ref": "nixpkgs-unstable",
+        "ref": "nixos-unstable",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index 6b05775..dd20c6b 100644
--- a/flake.nix
+++ b/flake.nix
@@ -1,111 +1,251 @@
 {
-  inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
+
+  inputs.nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
 
   outputs =
     { self, nixpkgs }:
     let
       forAllSystems =
-        f:
-        nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ] (
-          system: f (import nixpkgs { inherit system; })
-        );
+        f: nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed (sys: f nixpkgs.legacyPackages.${sys});
     in
     {
       devShells = forAllSystems (pkgs: {
         default = pkgs.mkShell {
-          packages = with pkgs.ocamlPackages; [
-            dune_3
-            ocaml
-            angstrom
-            angstrom-unix
-            camlzip
-            ppx_inline_test
-            ocaml-lsp
-            ocamlformat
-            ocamlformat-rpc-lib
-            utop
+          packages = with pkgs; [
+            rustc
+            cargo
+            rustfmt
+            rust-analyzer
+            clippy
           ];
         };
       });
 
       packages = forAllSystems (pkgs: {
-        default = pkgs.ocamlPackages.buildDunePackage {
+        default = pkgs.rustPlatform.buildRustPackage {
           pname = "inshellah";
-          version = "0.1";
+          version = "0.1.1";
           src = pkgs.lib.cleanSource ./.;
-          nativeBuildInputs = [ pkgs.git ];
-          buildInputs = with pkgs.ocamlPackages; [
-            dune_3
-            ocaml
-            angstrom
-            angstrom-unix
-            camlzip
-          ];
-
-          meta.mainProgram = "inshellah";
+          cargoLock.lockFile = ./Cargo.lock;
+          meta = {
+            description = "nushell completion indexer";
+            mainProgram = "inshellah";
+          };
         };
       });
 
-      nixosModules.default =
+      checks = forAllSystems (
+        pkgs:
+        let
+          checkSrc = pkgs.lib.cleanSourceWith {
+            src = ./.;
+            filter =
+              path: type:
+              let
+                base = baseNameOf path;
+              in
+              !(type == "directory" && (base == ".git" || base == "target"));
+          };
+          cargoDeps = pkgs.rustPlatform.importCargoLock { lockFile = ./Cargo.lock; };
+          rustInputs = with pkgs; [
+            cargo
+            clippy
+            stdenv.cc
+            rustc
+          ];
+          fakeInshellah = pkgs.writeShellScriptBin "inshellah" ''
+            if [ "''${1:-}" = complete ]; then
+              if [ -n "''${INSHELLAH_STATIC_FILE:-}" ] && [ -s "$INSHELLAH_STATIC_FILE" ]; then
+                cat "$INSHELLAH_STATIC_FILE"
+                printf '\n'
+              else
+                printf 'null\n'
+              fi
+            else
+              printf 'null\n'
+            fi
+          '';
+          fakeNix = pkgs.writeShellScriptBin "nix" ''
+            if [ "''${1:-}" = eval ]; then
+              printf 'raw package description\n'
+            else
+              printf 'header\nbuild\nflake#pkg\n'
+            fi
+          '';
+          fakeSystemctl = pkgs.writeShellScriptBin "systemctl" ''
+            case "$*" in
+              *"g*"*)
+                printf 'greetd.service loaded active running Greeter\n'
+                ;;
+              *)
+                printf 'demo.service loaded active running Demo Unit\n'
+                ;;
+            esac
+          '';
+          fakeKubectl = pkgs.writeShellScriptBin "kubectl" ''
+            printf '%s\n' "$*" > "$KUBECTL_ARGS_FILE"
+            if [ "''${1:-}" = get ] && [ "''${2:-}" = deployment ]; then
+              printf 'deploy-a\n'
+            elif [ "''${1:-}" = get ]; then
+              printf 'pod-a\n'
+            fi
+          '';
+          fakeCargo = pkgs.writeShellScriptBin "cargo" ''
+            cat <<'JSON'
+            {"packages":[{"name":"app-lib","version":"0.1.0","targets":[{"name":"app-lib","kind":["lib"]},{"name":"app-cli","kind":["bin"]},{"name":"app-integration","kind":["test"]}]},{"name":"helper-lib","version":"0.2.0","targets":[{"name":"helper-lib","kind":["lib"]}]}]}
+            JSON
+          '';
+          fakeGit = pkgs.writeShellScriptBin "git" ''
+            case "''${1:-}" in
+              remote)
+                printf 'origin\nupstream\n'
+                ;;
+              for-each-ref)
+                case "$*" in
+                  *"refs/heads refs/remotes refs/tags"*)
+                    printf 'main\tcommit\tMain branch\norigin/main\tcommit\tRemote main\nv1.0\tcommit\tRelease 1\n'
+                    ;;
+                  *"refs/heads"*)
+                    printf 'main\tMain branch\nfeature\tFeature branch\n'
+                    ;;
+                  *"refs/tags"*)
+                    printf 'v1.0\tRelease 1\nv2.0\tRelease 2\n'
+                    ;;
+                esac
+                ;;
+              stash)
+                if [ "''${2:-}" = list ]; then
+                  printf 'stash@{0}: WIP on main: demo stash\n'
+                fi
+                ;;
+              status)
+                printf ' M src/main.rs\n?? new-file.txt\nR  old.txt -> renamed.txt\n'
+                ;;
+              ls-files)
+                printf 'src/main.rs\nREADME.md\n'
+                ;;
+              config)
+                printf 'submodule.demo.path deps/demo\n'
+                ;;
+              worktree)
+                if [ "''${2:-}" = list ]; then
+                  printf 'worktree /repo/linked\n'
+                fi
+                ;;
+            esac
+          '';
+          fakeJj = pkgs.writeShellScriptBin "jj" ''
+            case "''${1:-}" in
+              log)
+                printf 'k\tworking change\nm\tmain change\n'
+                ;;
+              bookmark)
+                if [ "''${2:-}" = list ]; then
+                  printf 'main\nfeature\norigin/main\n'
+                fi
+                ;;
+              tag)
+                if [ "''${2:-}" = list ]; then
+                  printf 'v1.0\nv2.0\n'
+                fi
+                ;;
+              git)
+                if [ "''${2:-}" = remote ] && [ "''${3:-}" = list ]; then
+                  printf 'origin https://example.com/repo.git\nupstream https://example.com/upstream.git\n'
+                fi
+                ;;
+              op|operation)
+                if [ "''${2:-}" = log ]; then
+                  printf 'abc123\tcheckout working copy\n'
+                fi
+                ;;
+              file)
+                if [ "''${2:-}" = list ]; then
+                  printf 'src/main.rs\nREADME.md\n'
+                fi
+                ;;
+              workspace)
+                if [ "''${2:-}" = list ]; then
+                  printf 'default\nlinked\n'
+                fi
+                ;;
+            esac
+          '';
+          fakeCompletionBackends = pkgs.symlinkJoin {
+            name = "inshellah-fake-completion-backends";
+            paths = [
+              fakeInshellah
+              fakeNix
+              fakeSystemctl
+              fakeKubectl
+              fakeCargo
+              fakeGit
+              fakeJj
+            ];
+          };
+          rustCheckPhase = ''
+            echo "running rust checks"
+            rm -rf source-rust
+            cp -R ${checkSrc} source-rust
+            chmod -R u+w source-rust
+            pushd source-rust
+            export CARGO_HOME="$TMPDIR/cargo-home"
+            export CARGO_TARGET_DIR="$TMPDIR/cargo-target"
+            mkdir -p .cargo "$CARGO_HOME"
+            cat > .cargo/config.toml <<EOF
+            [source.crates-io]
+            replace-with = "vendored-sources"
+
+            [source.vendored-sources]
+            directory = "${cargoDeps}"
+
+            [net]
+            offline = true
+            EOF
+            cargo clippy --all-targets
+            cargo test --all-targets
+            popd
+          '';
+          nushellCheckPhase = ''
+            echo "running nushell shim checks"
+            export PATH="${fakeCompletionBackends}/bin:$PATH"
+            export KUBECTL_ARGS_FILE="$TMPDIR/kubectl.args"
+            export INSHELLAH_STATIC_FILE="$TMPDIR/inshellah-static.json"
+            : > "$INSHELLAH_STATIC_FILE"
+            nu --no-config-file -c 'source ${./nix/inshellah-completer.nu}; source ${./tests/nushell-completer.nu}'
+            cat > "$TMPDIR/config-load.nu" <<'EOF'
+            source ${./nix/inshellah-completer.nu}
+
+            def activate [p: path] {
+              sudo nix-env --set -p /nix/var/nix/profiles/system $p
+              sudo $"($p)/bin/switch-to-configuration" switch
+              doas nix-env --set -p /nix/var/nix/profiles/system $p
+            }
+            EOF
+            nu --env-config /dev/null --config "$TMPDIR/config-load.nu" -c 'print ok'
+          '';
+          mkShellCheck =
+            name: inputs: phase:
+            pkgs.runCommand name { nativeBuildInputs = inputs; } ''
+              ${phase}
+              touch $out
+            '';
+        in
         {
-          pkgs,
-          lib,
-          config,
-          ...
-        }:
+          rust = mkShellCheck "inshellah-rust-check" rustInputs rustCheckPhase;
+          nushell = mkShellCheck "inshellah-nushell-check" [ pkgs.nushell ] nushellCheckPhase;
+          default = mkShellCheck "inshellah-check" (rustInputs ++ [ pkgs.nushell ]) ''
+            ${rustCheckPhase}
+            ${nushellCheckPhase}
+          '';
+        }
+      );
+
+      nixosModules.default =
+        { pkgs, ... }:
         {
           imports = [ ./nix/module.nix ];
           programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default;
-          programs.inshellah.snippet = ''
-            let inshellah_complete = { |spans| 
-                let completions = (^inshellah complete ...$spans) | from json
-                # dynamic completions
-                let additional = if ($completions == null and ($spans | length) > 0) {
-                  match $spans.0 {
-                    "nix" => {
-                        $env.NIX_GET_COMPLETIONS = ($spans | length) - 1
-                        let nix_output = $spans | run-external $in | split row -r '\n' | str trim | skip 1
-                        let entries = if (($nix_output | length) < 6 and
-                          ($spans | last) =~ "[a-zA-Z][a-zA-Z0-9_-]*#[a-zA-Z][a-zA-Z0-9_-]*") { 
-                            hide-env NIX_GET_COMPLETIONS
-                            $env.NIX_ALLOW_UNFREE = 1
-                            $env.NIX_ALLOW_BROKEN = 1
-                            $nix_output | par-each { |e|
-                                  try {
-                                    { value: $e, description: (^nix eval --impure $e --apply "f: f.meta.description" err> /dev/null) }
-                                  } catch {
-                                    { value: $e, description: "" }
-                                  }
-                                }
-                         } else {
-                           $nix_output | each { |e|
-                             { value: $e, description: "" }
-                           }
-                         }
-                        $entries
-                    }
-                    "systemctl" => {
-                      if ($spans | length) < 3 { null } else {
-                        let kw = $spans | last
-                        let scope = if ("--user" in $spans) { [--user] } else { [] }
-                        ^systemctl ...$scope list-units --all --no-pager --plain --full --no-legend $"($kw)*"
-                          | lines
-                          | each { |l|
-                            let parsed = $l | parse -r '(?P<unit>\S+)\s+\S+\s+\S+\s+\S+\s+(?P<desc>.*)'
-                            if ($parsed | length) > 0 {
-                              {value: $parsed.0.unit, description: ($parsed.0.desc | str trim)}
-                            }
-                          } | compact
-                      }
-                    }
-                    _ => { null }
-                  }
-                } else { null }
-                let result = ($completions | default []) | append ($additional | default []) | compact
-                if ($result | is-empty) { null } else { $result }
-            }
-            $env.config.completions.external = {enable: true, max_results: 200, completer: $inshellah_complete}
-          '';
         };
     };
 }
diff --git a/inshellah.opam b/inshellah.opam
deleted file mode 100644
index 9888aa7..0000000
--- a/inshellah.opam
+++ /dev/null
@@ -1,35 +0,0 @@
-# This file is generated by dune, edit dune-project instead
-opam-version: "2.0"
-synopsis: "Nushell completions generator"
-description:
-  "Inshellah parses manpages and --help switches to generate completions for nushell."
-maintainer: ["atagen <boss@atagen.co>"]
-authors: ["atagen <boss@atagen.co>"]
-license: "GPL-3.0-or-later"
-tags: ["shell" "completions" "nushell" "parser" "angstrom"]
-homepage: "https://github.com/username/reponame"
-bug-reports: "https://github.com/username/reponame/issues"
-depends: [
-  "ocaml"
-  "dune" {>= "3.20"}
-  "angstrom"
-  "angstrom-unix"
-  "camlzip"
-  "odoc" {with-doc}
-]
-build: [
-  ["dune" "subst"] {dev}
-  [
-    "dune"
-    "build"
-    "-p"
-    name
-    "-j"
-    jobs
-    "@install"
-    "@runtest" {with-test}
-    "@doc" {with-doc}
-  ]
-]
-dev-repo: "git+https://github.com/username/reponame.git"
-x-maintenance-intent: ["(latest)"]
diff --git a/lib/.ocamlformat b/lib/.ocamlformat
deleted file mode 100644
index e69de29..0000000
diff --git a/lib/dune b/lib/dune
deleted file mode 100644
index 38defe1..0000000
--- a/lib/dune
+++ /dev/null
@@ -1,3 +0,0 @@
-(library
- (name inshellah)
- (libraries angstrom angstrom-unix camlzip str unix))
diff --git a/lib/manpage.ml b/lib/manpage.ml
deleted file mode 100644
index 5415fac..0000000
--- a/lib/manpage.ml
+++ /dev/null
@@ -1,1145 +0,0 @@
-(* manpage.ml — parse unix manpages (groff/mdoc format) into help_result.
- *
- * manpages are written in roff/groff markup — a decades-old typesetting language
- * used by man(1). this module strips the formatting and extracts structured data
- * (flags, subcommands, positionals) from the raw groff source.
- *
- * there are two major manpage macro packages:
- *   - man (groff) — used by gnu/linux tools. uses macros like .SH, .TP, .IP, .PP
- *   - mdoc (bsd) — used by bsd tools. uses .Sh, .Fl, .Ar, .Op, .It, .Bl/.El
- *
- * this module handles both, auto-detecting the format by checking for .Sh macros.
- *
- * for groff manpages, flag extraction uses multiple "strategies" that target
- * different common formatting patterns:
- *   - strategy_tp: .TP tagged paragraphs (gnu coreutils, help2man)
- *   - strategy_ip: .IP indented paragraphs (curl, hand-written)
- *   - strategy_pp_rs: .PP + .RS/.RE blocks (git, docbook)
- *   - strategy_nix: nix3-style bullet .IP with .UR/.UE hyperlinks
- *   - strategy_deroff: fallback — strip all groff, feed to help text parser
- *
- * the module tries all applicable strategies and picks the one that extracts
- * the most flag entries, on the theory that more results = better match.
- *
- * key peculiarities:
- *   - groff has an enormous escape syntax (font changes, named characters,
- *     size changes, color, string variables, etc.) — strip_groff_escapes
- *     handles the common cases but is not exhaustive
- *   - font escapes like \fI (italic) need to insert spaces at word boundaries
- *     to prevent flag names from fusing with their parameter names
- *   - the strategies share the angstrom-based switch_parser from parser.ml
- *     for parsing the actual flag syntax out of the stripped text
- *)
-
-open Parser
-
-(* --- shared helpers for imperative string scanning ---
- * many groff parsing routines use an imperative cursor (ref int) walking
- * through a string. these helpers factor out common scanning patterns. *)
-
-(* advance pos past all characters until the delimiter is found.
- * leaves pos pointing at the delimiter character, or at len if not found. *)
-let skip_to_char source len pos delim =
-  while !pos < len && source.[!pos] <> delim do incr pos done
-
-(* translate a groff named character escape to its text equivalent.
- * groff uses two-letter codes like "aq" for apostrophe, "lq"/"rq" for
- * left/right quotes, "em"/"en" for dashes. returns None for unknown names. *)
-let named_char_of = function
-  | "aq"         -> Some '\''
-  | "lq" | "Lq" -> Some '\x22'  (* left double quote *)
-  | "rq" | "Rq" -> Some '\x22'  (* right double quote *)
-  | "em" | "en"  -> Some '-'
-  | _            -> None
-
-(* skip a groff reference that uses one of three sub-forms:
- *   single char  — e.g. \*X or \nX
- *   ( + 2 chars  — e.g. \*(XX or \n(XX
- *   [ to ]       — e.g. \*[name] or \n[name]
- * used for \* (string variable) and \n (number register) escapes.
- * advances pos past the consumed characters. *)
-let skip_groff_reference source len pos =
-  if !pos < len then begin
-    if source.[!pos] = '(' then
-      pos := !pos + 3  (* skip past '(' + two-character name *)
-    else if source.[!pos] = '[' then begin
-      incr pos;
-      skip_to_char source len pos ']';
-      if !pos < len then incr pos
-    end else
-      incr pos
-  end
-
-(* --- groff escape/formatting stripper ---
- * groff escapes start with backslash and use various continuation syntaxes.
- * this function strips them, replacing named characters (like \(aq for
- * apostrophe) with their text equivalents and discarding formatting directives. *)
-
-let strip_groff_escapes source =
-  let buffer = Buffer.create (String.length source) in
-  let len = String.length source in
-  let pos = ref 0 in
-  let prev_char = ref '\000' in
-  (* emit a character into the output buffer and track it as previous *)
-  let put char_val = Buffer.add_char buffer char_val; prev_char := char_val in
-  let is_alnum char_val =
-    (char_val >= 'a' && char_val <= 'z')
-    || (char_val >= 'A' && char_val <= 'Z')
-    || (char_val >= '0' && char_val <= '9')
-  in
-  while !pos < len do
-    if source.[!pos] = '\\' && !pos + 1 < len then begin
-      let next = source.[!pos + 1] in
-      match next with
-      | 'f' ->
-        (* font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *)
-        if !pos + 2 < len then begin
-          let font_char = source.[!pos + 2] in
-          (* insert space before italic font to preserve word boundaries
-             e.g. \fB--max-results\fR\fIcount\fR -> "--max-results count" *)
-          if font_char = 'I' && is_alnum !prev_char then put ' ';
-          if font_char = '(' then
-            pos := !pos + 5  (* \f(XX — two-character font name *)
-          else if font_char = '[' then begin
-            pos := !pos + 3;
-            skip_to_char source len pos ']';
-            if !pos < len then incr pos
-          end else
-            pos := !pos + 3  (* \fX — single-character font selector *)
-        end else
-          pos := !pos + 2
-      | '-' ->
-        (* escaped hyphen-minus — emit a plain hyphen *)
-        put '-';
-        pos := !pos + 2
-      | '&' | '/' | ',' ->
-        (* zero-width characters — discard without output *)
-        pos := !pos + 2
-      | '(' ->
-        (* two-char named character: \(aq, \(lq, \(rq, etc. *)
-        if !pos + 3 < len then begin
-          let name = String.sub source (!pos + 2) 2 in
-          (match named_char_of name with
-           | Some char_val -> put char_val
-           | None -> ());
-          pos := !pos + 4
-        end else
-          pos := !pos + 2
-      | '[' ->
-        (* bracketed named character: \[aq], \[lq], etc. *)
-        pos := !pos + 2;
-        let start = !pos in
-        skip_to_char source len pos ']';
-        if !pos < len then begin
-          let name = String.sub source start (!pos - start) in
-          (match named_char_of name with
-           | Some char_val -> put char_val
-           | None -> ());
-          incr pos
-        end
-      | 's' ->
-        (* size escape: \sN, \s+N, \s-N — skip the numeric argument *)
-        pos := !pos + 2;
-        if !pos < len && (source.[!pos] = '+' || source.[!pos] = '-') then incr pos;
-        if !pos < len && source.[!pos] >= '0' && source.[!pos] <= '9' then incr pos;
-        if !pos < len && source.[!pos] >= '0' && source.[!pos] <= '9' then incr pos
-      | 'm' ->
-        (* color escape: \m[...] — skip the bracketed color name *)
-        pos := !pos + 2;
-        if !pos < len && source.[!pos] = '[' then begin
-          incr pos;
-          skip_to_char source len pos ']';
-          if !pos < len then incr pos
-        end
-      | 'X' ->
-        (* device control: \X'...' — skip the single-quoted payload *)
-        pos := !pos + 2;
-        if !pos < len && source.[!pos] = '\'' then begin
-          incr pos;
-          skip_to_char source len pos '\'';
-          if !pos < len then incr pos
-        end
-      | '*' ->
-        (* string variable: \*X or \*(XX or \*[...] — skip the reference *)
-        pos := !pos + 2;
-        skip_groff_reference source len pos
-      | 'n' ->
-        (* number register: \nX or \n(XX or \n[...] — skip the reference *)
-        pos := !pos + 2;
-        skip_groff_reference source len pos
-      | 'e' ->
-        (* escaped backslash literal *)
-        put '\\';
-        pos := !pos + 2
-      | '\\' ->
-        (* double backslash — emit one *)
-        put '\\';
-        pos := !pos + 2
-      | ' ' ->
-        (* escaped space — emit a regular space *)
-        put ' ';
-        pos := !pos + 2
-      | _ ->
-        (* unknown escape — skip the two-character sequence *)
-        pos := !pos + 2
-    end else begin
-      put source.[!pos];
-      incr pos
-    end
-  done;
-  Buffer.contents buffer
-
-(* strip inline macro formatting: .BI, .BR, .IR, etc.
- * these macros alternate between fonts for their arguments, e.g.:
- *   .BI "--output " "FILE"
- * becomes "--outputFILE" (arguments concatenated without spaces).
- *
- * quoted strings are kept together (quotes stripped), but unquoted spaces
- * are consumed. this matches groff's actual rendering of these macros,
- * where alternating-font arguments are concatenated. *)
-let strip_inline_macro_args text =
-  let buffer = Buffer.create (String.length text) in
-  let len = String.length text in
-  let pos = ref 0 in
-  while !pos < len do
-    if text.[!pos] = '"' then begin
-      (* quoted argument — copy characters up to the closing quote *)
-      incr pos;
-      while !pos < len && text.[!pos] <> '"' do
-        Buffer.add_char buffer text.[!pos];
-        incr pos
-      done;
-      if !pos < len then incr pos
-    end else if text.[!pos] = ' ' || text.[!pos] = '\t' then begin
-      (* unquoted whitespace — skip (arguments are concatenated) *)
-      incr pos
-    end else begin
-      (* regular character — copy to output *)
-      Buffer.add_char buffer text.[!pos];
-      incr pos
-    end
-  done;
-  Buffer.contents buffer
-
-(* convenience: strip escapes and trim whitespace *)
-let strip_groff line =
-  let text = strip_groff_escapes line in
-  String.trim text
-
-(* --- line classification ---
- * every line in a manpage is classified as one of four types.
- * this classification drives all subsequent parsing — strategies
- * pattern-match on sequences of classified lines. *)
-
-type groff_line =
-  | Macro of string * string   (* macro name + args, e.g. ("SH", "OPTIONS") or ("TP", "") *)
-  | Text of string             (* plain text after groff stripping *)
-  | Blank                      (* empty line *)
-  | Comment                    (* groff comment: .backslash-quote or backslash-quote *)
-
-(* classify a single line of manpage source.
- * macro lines start with '.' or '\'' (groff alternate control char).
- * the macro name is split from its arguments at the first space/tab.
- * arguments wrapped in double quotes are unquoted. *)
-let classify_line line =
-  let len = String.length line in
-  if len = 0 then Blank
-  else if len >= 2 && line.[0] = '.' && line.[1] = '\\' && (len < 3 || line.[2] = '"') then
-    Comment
-  else if len >= 3 && line.[0] = '\\' && line.[1] = '"' then
-    Comment
-  else if line.[0] = '.' || line.[0] = '\'' then begin
-    (* macro line — extract macro name and arguments *)
-    let rest = String.sub line 1 (len - 1) in
-    let rest = String.trim rest in
-    (* split into macro name and arguments at the first whitespace *)
-    let space_pos =
-      try Some (String.index rest ' ')
-      with Not_found ->
-        try Some (String.index rest '\t')
-        with Not_found -> None
-    in
-    match space_pos with
-    | Some split_at ->
-      let name = String.sub rest 0 split_at in
-      let args = String.trim (String.sub rest (split_at + 1) (String.length rest - split_at - 1)) in
-      (* strip surrounding quotes from arguments *)
-      let args =
-        let alen = String.length args in
-        if alen >= 2 && args.[0] = '"' && args.[alen - 1] = '"' then
-          String.sub args 1 (alen - 2)
-        else args
-      in
-      Macro (name, args)
-    | None ->
-      Macro (rest, "")
-  end else begin
-    let stripped = strip_groff line in
-    if String.length stripped = 0 then Blank
-    else Text stripped
-  end
-
-(* refined comment detection — the base classify_line may miss some comment
- * forms, so this wrapper checks more carefully before falling through to
- * the general classifier. *)
-let is_comment_line line =
-  let len = String.length line in
-  (len >= 3 && line.[0] = '.' && line.[1] = '\\' && line.[2] = '"')
-  || (len >= 2 && line.[0] = '\\' && line.[1] = '"')
-
-let classify_line line =
-  if is_comment_line line then Comment
-  else classify_line line
-
-(* --- section extraction ---
- * manpages are divided into sections by .SH macros. the OPTIONS section
- * contains the flag definitions we want. if there's no OPTIONS section,
- * we fall back to DESCRIPTION (some simple tools put flags there).
- *
- * old-style nix manpages (nix-build, nix-env-install, etc.) split flags
- * across multiple .SH sections with option-like names: e.g. "Options" for
- * command-specific flags and "Common Options" for flags shared by all nix
- * commands. collecting only the first such section misses the majority of
- * flags, so we collect and concatenate all option-like sections. *)
-
-let extract_options_section lines =
-  let classified = List.map classify_line lines in
-  (* collect lines until the next .SH header, returning (content, rest)
-   * where rest starts at the .SH line (or is empty if at end of file). *)
-  let rec collect_section lines acc =
-    match lines with
-    | [] -> (List.rev acc, [])
-    | Macro ("SH", _) :: _ -> (List.rev acc, lines)
-    | line :: rest -> collect_section rest (line :: acc)
-  in
-  (* test whether a section name looks like an options section.
-   * matches "OPTIONS", "COMMON OPTIONS", "GLOBAL OPTIONS", etc. *)
-  let is_options_section name =
-    let upper = String.uppercase_ascii (String.trim name) in
-    upper = "OPTIONS"
-    || (String.length upper > 0 &&
-        try let _ = Str.search_forward (Str.regexp_string "OPTION") upper 0 in true
-        with Not_found -> false)
-  in
-  (* collect from all option-like .SH sections and concatenate them.
-   * handles the common nix pattern where "Options" and "Common Options"
-   * are separate .SH sections but both contain relevant flags.
-   *
-   * a synthetic Macro("SH","") separator is inserted between sections so
-   * that collect_desc_text (which stops on SH/SS) does not let a description
-   * from the last entry in one section bleed into the intro text of the next. *)
-  let rec find_all_options lines acc =
-    match lines with
-    | [] -> acc
-    | Macro ("SH", args) :: rest when is_options_section args ->
-      let (section, remaining) = collect_section rest [] in
-      let sep = if acc = [] then [] else [Macro ("SH", "")] in
-      find_all_options remaining (acc @ sep @ section)
-    | _ :: rest -> find_all_options rest acc
-  in
-  (* fallback: DESCRIPTION section for simple tools that put flags there *)
-  let rec find_description = function
-    | [] -> []
-    | Macro ("SH", args) :: rest
-      when String.uppercase_ascii (String.trim args) = "DESCRIPTION" ->
-      fst (collect_section rest [])
-    | _ :: rest -> find_description rest
-  in
-  match find_all_options classified [] with
-  | [] -> find_description classified
-  | sections -> sections
-
-(* --- strategy-based entry extraction ---
- * rather than a single monolithic parser, we use multiple "strategies" that
- * each target a specific groff formatting pattern. this is necessary because
- * manpage authors use very different macro combinations for the same purpose.
- *
- * the shared building blocks:
- *   - collect_text_lines: gather consecutive Text lines into one description string
- *   - parse_tag_to_entry: run the angstrom switch parser on a tag string to
- *     extract the flag definition. this reuses the same parser that handles
- *     --help output, giving consistent extraction across both sources.
- *   - tag_of_macro: extract the "tag" text from formatting macros like .B, .BI, etc.
- *)
-
-(* collect consecutive text lines, joining them with spaces *)
-let rec collect_text_lines lines acc =
-  match lines with
-  | Text text :: rest -> collect_text_lines rest (text :: acc)
-  | _ -> (String.concat " " (List.rev acc), lines)
-
-(* attempt to parse a tag string (e.g. "-v, --verbose FILE") into an entry.
- * uses the angstrom switch_parser + param_parser from parser.ml.
- * returns None if the tag doesn't look like a flag definition. *)
-let parse_tag_to_entry tag desc =
-  let tag = strip_groff_escapes tag in
-  let tag = String.trim tag in
-  match Angstrom.parse_string ~consume:Angstrom.Consume.Prefix
-          (Angstrom.lift2 (fun sw p -> (sw, p)) switch_parser param_parser) tag with
-  | Ok (switch, param) -> Some { switch; param; desc }
-  | Error _ -> None
-
-(* extract tag text from a macro line.
- * .B and .I preserve spaces (single argument); .BI, .BR, .IR alternate
- * fonts and concatenate arguments. *)
-let tag_of_macro name args =
-  match name with
-  | "B" | "I" -> strip_groff_escapes args |> String.trim
-  | _ -> strip_inline_macro_args args |> strip_groff_escapes |> String.trim
-
-(* strategy a: .TP style (most common — gnu coreutils, help2man).
- * .TP introduces a tagged paragraph: the next line is the "tag" (flag name)
- * and subsequent text lines are the description. the tag can be plain text
- * or wrapped in a formatting macro (.B, .BI, etc.).
- *
- * example groff:
- *   .TP
- *   \fB\-v\fR, \fB\-\-verbose\fR
- *   increase verbosity *)
-let strategy_tp lines =
-  let rec walk lines acc =
-    match lines with
-    | [] -> List.rev acc
-    | Macro ("TP", _) :: rest ->
-      (* next line is the tag — could be Text or a formatting macro *)
-      begin match rest with
-        | Text tag :: rest2 ->
-          let (desc, rest3) = collect_text_lines rest2 [] in
-          let entry = parse_tag_to_entry tag desc in
-          walk rest3 (match entry with Some e -> e :: acc | None -> acc)
-        | Macro (("B" | "I" | "BI" | "BR" | "IR") as macro_name, args) :: rest2 ->
-          let tag = tag_of_macro macro_name args in
-          let (desc, rest3) = collect_text_lines rest2 [] in
-          let entry = parse_tag_to_entry tag desc in
-          walk rest3 (match entry with Some e -> e :: acc | None -> acc)
-        | _ -> walk rest acc
-      end
-    | _ :: rest -> walk rest acc
-  in
-  walk lines []
-
-(* strategy b: .IP style (curl, hand-written manpages).
- * .IP takes an inline tag argument: .IP "-v, --verbose"
- * the description follows as text lines. simpler than .TP because
- * the tag is on the macro line itself. *)
-let strategy_ip lines =
-  let rec walk lines acc =
-    match lines with
-    | [] -> List.rev acc
-    | Macro ("IP", tag) :: rest ->
-      let tag = strip_groff_escapes tag in
-      let (desc, rest2) = collect_text_lines rest [] in
-      let entry = parse_tag_to_entry tag desc in
-      walk rest2 (match entry with Some e -> e :: acc | None -> acc)
-    | _ :: rest -> walk rest acc
-  in
-  walk lines []
-
-(* strategy c: .PP + .RS/.RE style (git, docbook-generated manpages).
- * flag entries are introduced by .PP (paragraph), with the flag name as
- * plain text, followed by a .RS (indent) block containing the description,
- * closed by .RE (de-indent). this is common in docbook-to-manpage toolchains. *)
-let strategy_pp_rs lines =
-  let rec walk lines acc =
-    match lines with
-    | [] -> List.rev acc
-    | Macro ("PP", _) :: rest ->
-      begin match rest with
-        | Text tag :: rest2 ->
-          (* look for .RS ... text ... .RE *)
-          let rec collect_rs lines desc_acc =
-            match lines with
-            | Macro ("RS", _) :: rest3 ->
-              collect_in_rs rest3 desc_acc
-            | Text text :: rest3 ->
-              (* sometimes description follows directly *)
-              collect_rs rest3 (text :: desc_acc)
-            | _ -> (String.concat " " (List.rev desc_acc), lines)
-          and collect_in_rs lines desc_acc =
-            match lines with
-            | Macro ("RE", _) :: rest3 ->
-              (String.concat " " (List.rev desc_acc), rest3)
-            | Text text :: rest3 ->
-              collect_in_rs rest3 (text :: desc_acc)
-            | Macro ("PP", _) :: _ | Macro ("SH", _) :: _ ->
-              (String.concat " " (List.rev desc_acc), lines)
-            | _ :: rest3 -> collect_in_rs rest3 desc_acc
-            | [] -> (String.concat " " (List.rev desc_acc), [])
-          in
-          let (desc, rest3) = collect_rs rest2 [] in
-          let entry = parse_tag_to_entry tag desc in
-          walk rest3 (match entry with Some e -> e :: acc | None -> acc)
-        | _ -> walk rest acc
-      end
-    | _ :: rest -> walk rest acc
-  in
-  walk lines []
-
-(* strategy d: deroff fallback — strip all groff markup, then feed the
- * resulting plain text through the --help parser from parser.ml.
- * this is the last resort when no structured macro pattern is recognized.
- * it works surprisingly well for simple manpages but may miss entries
- * in heavily formatted ones. *)
-let strategy_deroff_lines lines =
-  let buffer = Buffer.create 256 in
-  List.iter (fun line ->
-    match line with
-    | Text text ->
-      Buffer.add_string buffer text;
-      Buffer.add_char buffer '\n'
-    | Macro (("BI" | "BR" | "IR" | "B" | "I"), args) ->
-      let text = strip_inline_macro_args args in
-      let text = strip_groff_escapes text in
-      Buffer.add_string buffer text;
-      Buffer.add_char buffer '\n'
-    | Blank -> Buffer.add_char buffer '\n'
-    | _ -> ()
-  ) lines;
-  let text = Buffer.contents buffer in
-  match parse_help text with
-  | Ok result -> result.entries
-  | Error _ -> []
-
-(* strategy e: nix3-style bullet .IP with .UR/.UE hyperlinks.
- * nix's manpages use .IP with bullet markers for flag entries, interleaved
- * with .UR/.UE hyperlink macros. the flag tag is in text lines after the
- * bullet .IP, and the description follows a non-bullet .IP marker.
- *
- * nix manpages nest .RS/.RE blocks inside descriptions for sub-examples.
- * the skip_rs helper tracks nesting depth to skip these without losing
- * the rest of the description. *)
-let strategy_nix lines =
-  (* a bullet .IP has non-empty args (the bullet marker) *)
-  let is_bullet_ip args =
-    String.length (String.trim args) > 0
-  in
-  let rec walk lines acc =
-    match lines with
-    | [] -> List.rev acc
-    | Macro ("IP", args) :: rest when is_bullet_ip args ->
-      (* collect tag: skip .UR/.UE macros, collect Text lines *)
-      let rec collect_tag lines parts =
-        match lines with
-        | Macro ("UR", _) :: rest2 -> collect_tag rest2 parts
-        | Macro ("UE", _) :: rest2 -> collect_tag rest2 parts
-        | Text text :: rest2 -> collect_tag rest2 (text :: parts)
-        | _ -> (String.concat " " (List.rev parts), lines)
-      in
-      let (tag, rest2) = collect_tag rest [] in
-      (* collect description after the description .IP marker *)
-      let rec collect_desc lines parts =
-        match lines with
-        | Macro ("IP", dargs) :: rest3 when not (is_bullet_ip dargs) ->
-          collect_desc_text rest3 parts
-        | _ -> (String.concat " " (List.rev parts), lines)
-      and collect_desc_text lines parts =
-        match lines with
-        | Text text :: rest3 -> collect_desc_text rest3 (text :: parts)
-        | Macro ("IP", args2) :: _ when is_bullet_ip args2 ->
-          (* next bullet entry — stop collecting *)
-          (String.concat " " (List.rev parts), lines)
-        | Macro (("SS" | "SH"), _) :: _ ->
-          (* section boundary — stop collecting *)
-          (String.concat " " (List.rev parts), lines)
-        | Macro ("RS", _) :: rest3 ->
-          skip_rs rest3 parts 1
-        | Macro ("IP", _) :: rest3 ->
-          (* non-bullet .IP = continuation paragraph *)
-          collect_desc_text rest3 parts
-        | Macro _ :: rest3 -> collect_desc_text rest3 parts
-        | Blank :: rest3 -> collect_desc_text rest3 parts
-        | Comment :: rest3 -> collect_desc_text rest3 parts
-        | [] -> (String.concat " " (List.rev parts), [])
-      and skip_rs lines parts depth =
-        match lines with
-        | Macro ("RE", _) :: rest3 ->
-          if depth <= 1 then collect_desc_text rest3 parts
-          else skip_rs rest3 parts (depth - 1)
-        | Macro ("RS", _) :: rest3 -> skip_rs rest3 parts (depth + 1)
-        | _ :: rest3 -> skip_rs rest3 parts depth
-        | [] -> (String.concat " " (List.rev parts), [])
-      in
-      let (desc, rest3) = collect_desc rest2 [] in
-      let entry = parse_tag_to_entry tag desc in
-      walk rest3 (match entry with Some e -> e :: acc | None -> acc)
-    | _ :: rest -> walk rest acc
-  in
-  walk lines []
-
-(* count occurrences of a specific macro in the section.
- * used by extract_entries to decide which strategies are worth trying. *)
-let count_macro name lines =
-  List.fold_left (fun count line ->
-    match line with Macro (macro_name, _) when macro_name = name -> count + 1 | _ -> count
-  ) 0 lines
-
-(* auto-detect and try strategies, return the one with most entries.
- * first counts macros to determine which strategies are applicable,
- * then runs all applicable ones and picks the winner by entry count.
- * if no specialized strategy produces results, falls back to deroff.
- *
- * this "try everything, pick the best" approach is intentional.
- * manpage formatting is too varied and inconsistent to reliably detect the
- * format from macro counts alone. running multiple strategies and comparing
- * results is more robust. *)
-let extract_entries lines =
-  let tp = count_macro "TP" lines
-  and ip = count_macro "IP" lines
-  and pp = count_macro "PP" lines
-  and rs = count_macro "RS" lines
-  and ur = count_macro "UR" lines in
-  (* build a list of (label, entries) for each applicable strategy *)
-  let specialized = List.filter_map Fun.id [
-    (if tp > 0 then Some ("TP", strategy_tp lines) else None);
-    (if ip > 0 then Some ("IP", strategy_ip lines) else None);
-    (if pp > 0 && rs > 0 then Some ("PP+RS", strategy_pp_rs lines) else None);
-    (if ur > 0 && ip > 0 then Some ("nix", strategy_nix lines) else None);
-  ] in
-  (* filter to strategies that found at least one entry, fall back to deroff *)
-  let candidates = match List.filter (fun (_, entries) -> entries <> []) specialized with
-    | [] -> [("deroff", strategy_deroff_lines lines)]
-    | filtered -> filtered
-  in
-  (* pick the strategy with the most entries *)
-  List.fold_left (fun (_, best) (name, entries) ->
-    if List.length entries >= List.length best then (name, entries)
-    else (name, best)
-  ) ("none", []) candidates |> snd
-
-(* --- NAME section description extraction ---
- * the NAME section in manpages follows the convention:
- *   "command \- short description"
- * we extract the part after "\-" as the command's description.
- * handles both "\-" (groff) and " - " (plain text) separators. *)
-
-let extract_name_description contents =
-  let lines = String.split_on_char '\n' contents in
-  let classified = List.map classify_line lines in
-  let rec find = function
-    | [] -> None
-    | Macro ("SH", args) :: rest
-      when String.uppercase_ascii (String.trim args) = "NAME" ->
-      collect rest []
-    | _ :: rest -> find rest
-  and collect lines acc =
-    match lines with
-    | Macro ("SH", _) :: _ | [] -> finish acc
-    | Text text :: rest -> collect rest (text :: acc)
-    | Macro (("B" | "BI" | "BR" | "I" | "IR"), args) :: rest ->
-      let text = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
-      collect rest (if String.length text > 0 then text :: acc else acc)
-    | Macro ("Nm", args) :: rest ->
-      let text = strip_groff_escapes args |> String.trim in
-      collect rest (if String.length text > 0 then text :: acc else acc)
-    | Macro ("Nd", args) :: rest ->
-      let text = strip_groff_escapes args |> String.trim in
-      collect rest (if String.length text > 0 then ("\\- " ^ text) :: acc else acc)
-    | _ :: rest -> collect rest acc
-  and finish acc =
-    let full = String.concat " " (List.rev acc) |> String.trim in
-    (* NAME lines look like: "git-add \- Add file contents to the index" *)
-    let sep = Str.regexp {| *\\- *\| +- +|} in
-    match Str.bounded_split sep full 2 with
-    | [_; desc] -> Some (String.trim desc)
-    | _ -> None
-  in
-  find classified
-
-(* --- SYNOPSIS command name extraction ---
- * the SYNOPSIS section shows how to invoke the command:
- *   .SH SYNOPSIS
- *   .B git add
- *   [\fIOPTIONS\fR] [\fB\-\-\fR] [\fI<pathspec>\fR...]
- *
- * we extract the command name by taking consecutive "word" tokens until
- * we hit something that looks like an argument (starts with [, <, -, etc.). *)
-
-let extract_synopsis_command_lines lines =
-  (* replace italic text (\fI...\fR) with angle-bracketed placeholders
-   * before classification strips the font info. italic in groff indicates
-   * a parameter/placeholder (e.g. \fIoperation\fR), not a command word.
-   * the angle brackets cause extract_cmd to stop at these tokens since
-   * '<' is in its stop set. without this, "nix-env \fIoperation\fR"
-   * would be parsed as command "nix-env operation" instead of "nix-env". *)
-  let lines = List.map (fun line ->
-    Str.global_replace (Str.regexp {|\\fI\([^\\]*\)\\f[RP]|}) {|<\1>|} line
-  ) lines in
-  let classified = List.map classify_line lines in
-  let is_synopsis name =
-    String.uppercase_ascii (String.trim name) = "SYNOPSIS"
-  in
-  (* extract the command name from a line by taking leading word tokens *)
-  let extract_cmd line =
-    let words = String.split_on_char ' ' (String.trim line) in
-    let words = List.filter (fun word -> String.length word > 0) words in
-    let is_cmd_char = function
-      | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.' -> true
-      | _ -> false
-    in
-    (* take words that look like command name parts, stop at arguments *)
-    let rec take = function
-      | [] -> []
-      | word :: rest ->
-        if String.length word > 0
-           && (word.[0] = '[' || word.[0] = '-' || word.[0] = '<'
-               || word.[0] = '(' || word.[0] = '{')
-        then []
-        else if String.for_all is_cmd_char word then
-          word :: take rest
-        else []
-    in
-    match take words with
-    | [] -> None
-    | cmd -> Some (String.concat " " cmd)
-  in
-  let rec find = function
-    | [] -> None
-    | Macro ("SH", args) :: rest when is_synopsis args -> collect rest
-    | _ :: rest -> find rest
-  and collect = function
-    | [] -> None
-    | Macro ("SH", _) :: _ -> None
-    | Text text :: _ ->
-      let text = String.trim text in
-      if String.length text > 0 then extract_cmd text else None
-    | Macro (("B" | "BI" | "BR"), args) :: _ ->
-      let text = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
-      if String.length text > 0 then extract_cmd text else None
-    | _ :: rest -> collect rest
-  in
-  find classified
-
-let extract_synopsis_command contents =
-  let lines = String.split_on_char '\n' contents in
-  extract_synopsis_command_lines lines
-
-(* --- SYNOPSIS positional extraction ---
- * extract positional arguments from the SYNOPSIS section by collecting
- * all text/formatting macro lines, joining them, skipping the command
- * name prefix, then running parse_usage_args from parser.ml on the remainder. *)
-
-let extract_synopsis_positionals_lines lines =
-  let classified = List.map classify_line lines in
-  let is_synopsis name =
-    String.uppercase_ascii (String.trim name) = "SYNOPSIS"
-  in
-  let rec find = function
-    | [] -> []
-    | Macro ("SH", args) :: rest when is_synopsis args -> collect rest []
-    | _ :: rest -> find rest
-  and collect lines acc =
-    match lines with
-    | [] -> finish acc
-    | Macro ("SH", _) :: _ -> finish acc
-    | Macro ("SS", _) :: _ -> finish acc
-    | Macro ("br", _) :: _ -> finish acc
-    | Text text :: rest ->
-      let text = strip_groff_escapes text |> String.trim in
-      collect rest (if String.length text > 0 then text :: acc else acc)
-    | Macro (("B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"), args) :: rest ->
-      let text = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
-      collect rest (if String.length text > 0 then text :: acc else acc)
-    | _ :: rest -> collect rest acc
-  and finish acc =
-    let parts = List.rev acc in
-    let full = String.concat " " parts |> String.trim in
-    if String.length full = 0 then []
-    else
-      let cmd_end = skip_command_prefix full in
-      let args = String.sub full cmd_end (String.length full - cmd_end) in
-      parse_usage_args args
-  in
-  find classified
-
-(* --- mdoc (bsd) format support ---
- * mdoc is the bsd manpage macro package. it uses semantic macros rather than
- * presentation macros:
- *   .Fl v    -> flag: -v
- *   .Ar file -> argument: file
- *   .Op ...  -> optional: [...]
- *   .Bl/.It/.El -> list begin/item/end
- *   .Sh      -> section header (note lowercase 'h', vs groff's .SH)
- *
- * the parser walks through classified lines looking for .Bl (list begin)
- * blocks containing .It (items) with .Fl (flag) entries. *)
-
-let is_mdoc lines =
-  List.exists (fun line ->
-    match classify_line line with Macro ("Sh", _) -> true | _ -> false
-  ) lines
-
-(* extract renderable text from an mdoc line, skipping structural macros *)
-let mdoc_text_of line =
-  match line with
-  | Text text -> Some (strip_groff_escapes text)
-  | Macro (macro_name, args) ->
-    (match macro_name with
-     | "Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt"
-     | "Oo" | "Oc" | "Op" -> None
-     | _ ->
-       let text = strip_groff_escapes args |> String.trim in
-       if text = "" then None else Some text)
-  | _ -> None
-
-(* parse an mdoc .It (list item) line that contains flag definitions.
- * mdoc .It lines look like: ".It Fl v Ar file"
- * where Fl = flag, Ar = argument. we extract the flag name and parameter.
- *
- * only handles single-char short flags and long flags starting with '-'.
- * mdoc's .Fl macro automatically prepends '-', so "Fl v" means "-v"
- * and "Fl -verbose" means "--verbose". *)
-let parse_mdoc_it args =
-  let words = String.split_on_char ' ' args
-              |> List.filter (fun word -> word <> "" && word <> "Ns") in
-  let param = match words with
-    | _ :: _ :: "Ar" :: param_name :: _ -> Some (Mandatory param_name)
-    | _ -> None
-  in
-  match words with
-  | "Fl" :: char_str :: _ when String.length char_str = 1 && is_alphanumeric char_str.[0] ->
-    Some { switch = Short char_str.[0]; param; desc = "" }
-  | "Fl" :: name :: _ when String.length name > 1 && name.[0] = '-' ->
-    Some { switch = Long (String.sub name 1 (String.length name - 1)); param; desc = "" }
-  | _ -> None
-
-(* extract a positional argument from an mdoc line (.Ar or .Op Ar) *)
-let positional_of_mdoc_line optional args =
-  let words = String.split_on_char ' ' args
-              |> List.filter (fun word -> word <> "") in
-  match words with
-  | name :: _ when String.length name >= 2 ->
-    Some { pos_name = String.lowercase_ascii name;
-           optional; variadic = List.mem "..." words }
-  | _ -> None
-
-(* parse an entire mdoc-format manpage.
- * walks through all classified lines looking for:
- *   1. .Bl/.It/.El list blocks containing flag definitions
- *   2. .Sh SYNOPSIS sections containing positional arguments (.Ar, .Op Ar)
- *
- * the scan function handles nested .Bl blocks — if the first .It in a .Bl
- * starts with .Fl (a flag), the entire list is parsed as options. otherwise
- * the list is skipped (it might be an example list or a description list). *)
-let parse_mdoc_lines lines =
-  let classified = List.map classify_line lines in
-  (* skip lines until the matching .El closing tag *)
-  let rec skip_to_el = function
-    | [] -> []
-    | Macro ("El", _) :: rest -> rest
-    | _ :: rest -> skip_to_el rest
-  in
-  (* collect description text lines until the next structural macro *)
-  let rec collect_desc acc = function
-    | [] -> (acc, [])
-    | (Macro ("It", _) | Macro ("El", _)
-      | Macro ("Sh", _) | Macro ("Ss", _)) :: _ as rest -> (acc, rest)
-    | line :: rest ->
-      collect_desc (match mdoc_text_of line with Some text -> text :: acc | None -> acc) rest
-  in
-  (* convenience: collect desc and join into a trimmed string *)
-  let desc_of rest =
-    let parts, rest = collect_desc [] rest in
-    (String.concat " " (List.rev parts) |> String.trim, rest)
-  in
-  (* parse a single .It entry: extract flag, collect description *)
-  let parse_it args rest entries =
-    let desc, rest = desc_of rest in
-    let entries = match parse_mdoc_it args with
-      | Some entry -> { entry with desc } :: entries
-      | None -> entries
-    in
-    (entries, rest)
-  in
-  (* parse all .It entries within a .Bl/.El option list *)
-  let rec parse_option_list entries = function
-    | [] -> (entries, [])
-    | Macro ("El", _) :: rest -> (entries, rest)
-    | Macro ("It", args) :: rest ->
-      let entries, rest = parse_it args rest entries in
-      parse_option_list entries rest
-    | _ :: rest -> parse_option_list entries rest
-  in
-  (* main scan: walk through all lines, collecting flags and positionals *)
-  let rec scan entries positionals = function
-    | [] -> (entries, positionals)
-    | Macro ("Bl", _) :: Macro ("It", it_args) :: rest ->
-      (* peek at first .It to decide if this is a flag list *)
-      let words = String.split_on_char ' ' it_args
-                  |> List.filter (fun word -> word <> "") in
-      if (match words with "Fl" :: _ -> true | _ -> false) then
-        let entries, rest = parse_it it_args rest entries in
-        let entries, rest = parse_option_list entries rest in
-        scan entries positionals rest
-      else
-        scan entries positionals (skip_to_el rest)
-    | Macro ("Bl", _) :: rest -> scan entries positionals (skip_to_el rest)
-    | Macro ("Sh", args) :: rest
-      when String.uppercase_ascii (String.trim args) = "SYNOPSIS" ->
-      let positionals, rest = parse_synopsis positionals rest in
-      scan entries positionals rest
-    | _ :: rest -> scan entries positionals rest
-  and parse_synopsis positionals = function
-    | [] -> (positionals, [])
-    | Macro ("Sh", _) :: _ as rest -> (positionals, rest)
-    | Macro ("Ar", args) :: rest ->
-      let positionals = match positional_of_mdoc_line false args with
-        | Some p -> p :: positionals | None -> positionals in
-      parse_synopsis positionals rest
-    | Macro ("Op", args) :: rest ->
-      let words = String.split_on_char ' ' args
-                  |> List.filter (fun word -> word <> "") in
-      let positionals = match words with
-        | "Ar" :: _ ->
-          (match positional_of_mdoc_line true args with
-           | Some p -> p :: positionals | None -> positionals)
-        | _ -> positionals in
-      parse_synopsis positionals rest
-    | _ :: rest -> parse_synopsis positionals rest
-  in
-  let entries, positionals = scan [] [] classified in
-  (* deduplicate positionals by name, preserving order *)
-  let positionals =
-    List.rev positionals
-    |> List.fold_left (fun (seen, acc) p ->
-         if List.mem p.pos_name seen then (seen, acc)
-         else (p.pos_name :: seen, p :: acc)
-       ) ([], [])
-    |> snd |> List.rev
-  in
-  { entries = List.rev entries; subcommands = []; positionals; description = "" }
-
-(* --- COMMANDS section subcommand extraction ---
- * some manpages (notably systemctl) have a dedicated COMMANDS section
- * listing subcommands with descriptions. these use .PP + bold name +
- * .RS/.RE blocks:
- *   .PP
- *   \fBstart\fR \fIUNIT\fR...
- *   .RS 4
- *   Start (activate) one or more units.
- *   .RE
- *
- * we extract the bold command name and first sentence of description. *)
-
-let extract_commands_section lines =
-  let classified = List.map classify_line lines in
-  (* collect all lines from the current position until the next .SH *)
-  let rec collect_until_next_sh lines acc =
-    match lines with
-    | [] -> List.rev acc
-    | Macro ("SH", _) :: _ -> List.rev acc
-    | line :: rest -> collect_until_next_sh rest (line :: acc)
-  in
-  let is_commands_section name =
-    let upper = String.uppercase_ascii (String.trim name) in
-    upper = "COMMANDS" || upper = "COMMAND"
-  in
-  (* find all COMMANDS/.COMMAND sections and collect their lines *)
-  let rec find_commands acc = function
-    | [] -> List.rev acc
-    | Macro ("SH", args) :: rest when is_commands_section args ->
-      find_commands (collect_until_next_sh rest [] :: acc) rest
-    | _ :: rest -> find_commands acc rest
-  in
-  let sections = find_commands [] classified in
-  List.concat sections
-
-(* extract subcommand name from a bold groff text like
- *   "\fBlist\-units\fR [\fIPATTERN\fR...]" -> "list-units"
- *
- * validates that the extracted name looks like a subcommand: lowercase,
- * at least 2 chars, no leading dash. falls back to stripping all groff
- * and taking the first word if no \fB...\fR wrapper is found. *)
-let extract_bold_command_name text =
-  let trimmed = String.trim text in
-  (* check whether a string looks like a valid subcommand name *)
-  let is_valid_subcmd name =
-    String.length name >= 2
-    && name.[0] <> '-'
-    && String.for_all (fun char_val ->
-         (char_val >= 'a' && char_val <= 'z')
-         || (char_val >= '0' && char_val <= '9')
-         || char_val = '-' || char_val = '_'
-       ) name
-  in
-  (* look for \fB...\fR at the start *)
-  if String.length trimmed >= 4
-     && trimmed.[0] = '\\' && trimmed.[1] = 'f' && trimmed.[2] = 'B' then
-    let start = 3 in
-    let end_marker = "\\fR" in
-    match String.split_on_char '\\' (String.sub trimmed start (String.length trimmed - start)) with
-    | name_part :: _ ->
-      let name = strip_groff_escapes ("\\fB" ^ name_part ^ end_marker) |> String.trim in
-      if is_valid_subcmd name then Some name else None
-    | [] -> None
-  else
-    (* try already-stripped text — take the first word *)
-    let stripped = strip_groff_escapes trimmed in
-    let first_word = match String.split_on_char ' ' stripped with
-      | word :: _ -> word | [] -> "" in
-    if is_valid_subcmd first_word then Some first_word else None
-
-(* walk through commands section lines, extracting subcommand name+description
- * pairs from .PP + Text + .RS/.RE blocks *)
-let extract_subcommands_from_commands lines =
-  let rec walk lines acc =
-    match lines with
-    | [] -> List.rev acc
-    | Macro ("PP", _) :: rest ->
-      begin match rest with
-        | Text tag :: rest2 ->
-          (* check if this is a subcommand (bold name, not a flag) *)
-          begin match extract_bold_command_name tag with
-            | Some name ->
-              (* collect description from .RS/.RE block *)
-              let rec collect_desc lines desc_acc =
-                match lines with
-                | Macro ("RS", _) :: rest3 ->
-                  collect_in_rs rest3 desc_acc
-                | Text text :: rest3 ->
-                  collect_desc rest3 (text :: desc_acc)
-                | _ -> (String.concat " " (List.rev desc_acc), lines)
-              and collect_in_rs lines desc_acc =
-                match lines with
-                | Macro ("RE", _) :: rest3 ->
-                  (String.concat " " (List.rev desc_acc), rest3)
-                | Text text :: rest3 ->
-                  collect_in_rs rest3 (text :: desc_acc)
-                | Macro ("PP", _) :: _ | Macro ("SH", _) :: _ | Macro ("SS", _) :: _ ->
-                  (String.concat " " (List.rev desc_acc), lines)
-                | _ :: rest3 -> collect_in_rs rest3 desc_acc
-                | [] -> (String.concat " " (List.rev desc_acc), [])
-              in
-              let (desc, rest3) = collect_desc rest2 [] in
-              let desc = String.trim desc in
-              (* take first sentence as description *)
-              let short_desc = match String.split_on_char '.' desc with
-                | first :: _ when String.length first > 0 -> String.trim first
-                | _ -> desc in
-              let sc : subcommand = { name; desc = short_desc } in
-              walk rest3 (sc :: acc)
-            | None -> walk rest2 acc
-          end
-        | _ -> walk rest acc
-      end
-    | _ :: rest -> walk rest acc
-  in
-  walk lines []
-
-(* --- top-level api --- *)
-
-(* parse a manpage from its classified lines.
- * auto-detects mdoc vs groff format. for groff, runs the multi-strategy
- * extraction pipeline: extract OPTIONS section -> try all strategies ->
- * pick best -> extract SYNOPSIS positionals -> extract COMMANDS subcommands. *)
-let parse_manpage_lines lines =
-  if is_mdoc lines then
-    parse_mdoc_lines lines
-  else begin
-    let options_section = extract_options_section lines in
-    let entries = extract_entries options_section in
-    let positionals = extract_synopsis_positionals_lines lines in
-    let commands_section = extract_commands_section lines in
-    let subcommands = extract_subcommands_from_commands commands_section in
-    { entries; subcommands; positionals; description = "" }
-  end
-
-(* parse a manpage from its raw string contents.
- * splits into lines, parses, then extracts the NAME section description. *)
-let parse_manpage_string contents =
-  let lines = String.split_on_char '\n' contents in
-  let result = parse_manpage_lines lines in
-  let description = match extract_name_description contents with
-    | Some desc -> desc | None -> "" in
-  { result with description }
-
-(* --- clap-style SUBCOMMAND section extraction ---
- * manpages generated by clap (rust's cli arg parser) put each subcommand
- * under its own .SH SUBCOMMAND header with a Usage: line giving the name.
- * this is unusual — most tools list subcommands under a single COMMANDS section.
- *
- * we collect all .SH SUBCOMMAND/SUBCOMMANDS sections, find the Usage: line
- * in each to get the subcommand name, then extract flag entries from the
- * section body. returns triples of (name, description, help_result). *)
-let extract_subcommand_sections contents =
-  let lines = String.split_on_char '\n' contents in
-  let classified = List.map classify_line lines in
-  (* split into sections at .SH boundaries, keeping only SUBCOMMAND(S) sections *)
-  let rec collect_sections acc current_name current_lines = function
-    | [] ->
-      let acc = match current_name with
-        | Some section_name -> (section_name, List.rev current_lines) :: acc
-        | None -> acc in
-      List.rev acc
-    | Macro ("SH", args) :: rest ->
-      let acc = match current_name with
-        | Some section_name -> (section_name, List.rev current_lines) :: acc
-        | None -> acc in
-      let name = String.uppercase_ascii (String.trim args) in
-      if name = "SUBCOMMAND" || name = "SUBCOMMANDS" then
-        collect_sections acc (Some name) [] rest
-      else
-        collect_sections acc None [] rest
-    | line :: rest ->
-      collect_sections acc current_name (line :: current_lines) rest
-  in
-  let sections = collect_sections [] None [] classified in
-  (* for each SUBCOMMAND section, extract name from Usage: line and parse entries *)
-  let usage_re = Str.regexp {|Usage: \([a-zA-Z0-9_-]+\)|} in
-  let matches_usage text =
-    try ignore (Str.search_forward usage_re text 0); Some (Str.matched_group 1 text)
-    with Not_found -> None in
-  List.filter_map (fun (_header, section_lines) ->
-    (* scan section lines for the Usage: line to get the subcommand name *)
-    let name, desc_lines =
-      List.fold_left (fun (name, desc_lines) line ->
-        match name with
-        | Some _ -> (name, desc_lines)
-        | None ->
-          match line with
-          | Text text ->
-            (match matches_usage text with
-             | Some _ as found -> (found, desc_lines)
-             | None -> (None, text :: desc_lines))
-          | Macro (("TP" | "B" | "BI" | "BR"), args) ->
-            let text = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
-            (matches_usage text, desc_lines)
-          | _ -> (None, desc_lines)
-      ) (None, []) section_lines in
-    match name with
-    | None -> None
-    | Some subcmd_name ->
-      let entries = extract_entries section_lines in
-      let desc = String.concat " " (List.rev desc_lines)
-                 |> strip_groff_escapes |> String.trim in
-      (* strip backtick-quoted words *)
-      let desc = Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" desc in
-      Some (subcmd_name, desc, { entries; subcommands = []; positionals = []; description = desc })
-  ) sections
-
-(* read a manpage file from disk. handles .gz compressed files (the common
- * case — most installed manpages are gzipped) using the Gzip library.
- * plain text files are read directly. *)
-let read_manpage_file path =
-  if Filename.check_suffix path ".gz" then begin
-    let ic = Gzip.open_in path in
-    let buffer = Buffer.create 8192 in
-    let chunk = Bytes.create 8192 in
-    (try while true do
-       let bytes_read = Gzip.input ic chunk 0 8192 in
-       if bytes_read = 0 then raise Exit
-       else Buffer.add_subbytes buffer chunk 0 bytes_read
-     done with Exit | End_of_file -> ());
-    Gzip.close_in ic;
-    Buffer.contents buffer
-  end else begin
-    let ic = open_in path in
-    let size = in_channel_length ic in
-    let bytes = Bytes.create size in
-    really_input ic bytes 0 size;
-    close_in ic;
-    Bytes.to_string bytes
-  end
-
-(* convenience: read + parse a manpage file in one step *)
-let parse_manpage_file path =
-  read_manpage_file path |> parse_manpage_string
diff --git a/lib/nushell.ml b/lib/nushell.ml
deleted file mode 100644
index b5e4d4f..0000000
--- a/lib/nushell.ml
+++ /dev/null
@@ -1,253 +0,0 @@
-(* nushell.ml — generate nushell extern definitions from parsed help data.
- *
- * this module is the code generation backend. it takes a help_result (from
- * the parser or manpage modules) and produces nushell source code that
- * defines `extern` declarations — nushell's mechanism for teaching the shell
- * about external commands' flags and subcommands so it can offer completions.
- *
- * it also maintains a list of nushell's built-in commands to avoid generating
- * extern definitions that would shadow them.
- *
- * key responsibilities:
- *   - deduplicating flag entries (same flag from multiple help sources)
- *   - mapping parameter names to nushell types (path, int, string)
- *   - formatting flags in nushell syntax: --flag(-f): type  # description
- *   - handling positional arguments with nushell's ordering constraints
- *   - escaping special characters for nushell string literals
- *)
-
-open Parser
-
-module SSet = Set.Make(String)
-module SMap = Map.Make(String)
-module CSet = Set.Make(Char)
-
-(* nushell built-in commands and keywords — we must never generate `extern`
- * definitions for these because it would shadow nushell's own implementations.
- * this list is maintained manually and should be updated with new nushell releases. *)
-let nushell_builtins = [
-  "alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
-  "bits"; "break"; "bytes";
-  "cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
-  "columns"; "commandline"; "compact"; "complete"; "config"; "const";
-  "continue"; "cp";
-  "date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
-  "do"; "drop"; "du";
-  "each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
-  "exit"; "explain"; "explore"; "export"; "export-env"; "extern";
-  "fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
-  "generate"; "get"; "glob"; "grid"; "group-by";
-  "hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
-  "history"; "http";
-  "if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
-  "is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
-  "job"; "join";
-  "keybindings"; "kill";
-  "last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
-  "match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
-  "move"; "mut"; "mv";
-  "nu-check"; "nu-highlight";
-  "open"; "overlay";
-  "panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
-  "query";
-  "random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
-  "roll"; "rotate"; "run-external";
-  "save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
-  "slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
-  "stor"; "str"; "sys";
-  "table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
-  "try"; "tutor";
-  "ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
-  "upsert"; "url"; "use";
-  "values"; "version"; "view";
-  "watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
-  "zip";
-]
-
-(* lazily constructed set for fast membership checks against builtins *)
-let builtin_set = lazy (SSet.of_list nushell_builtins)
-
-(* returns true if the given command name collides with a nushell built-in *)
-let is_nushell_builtin cmd =
-  SSet.mem cmd (Lazy.force builtin_set)
-
-(* deduplicate flag entries that refer to the same flag.
- * when the same flag appears multiple times (e.g. from overlapping manpage
- * sections or repeated help text), we keep the "best" version using a score:
- *   - both short+long form present: +10 (most informative)
- *   - has a parameter: +5
- *   - description length bonus: up to +5
- *
- * after deduplication by long name, we also remove standalone short flags
- * whose letter is already covered by a Both(short, long) entry. this prevents
- * emitting both "-v" and "--verbose(-v)" which nushell would reject as a
- * duplicate. the filtering preserves original ordering from the help text. *)
-let dedup_entries entries =
-  (* produce a canonical key for each entry based on its switch form *)
-  let key_of entry =
-    match entry.switch with
-    | Short c -> Printf.sprintf "-%c" c
-    | Long l | Both (_, l) -> Printf.sprintf "--%s" l
-  in
-  (* compute a quality score for ranking duplicate entries *)
-  let score entry =
-    let switch_bonus = match entry.switch with Both _ -> 10 | _ -> 0 in
-    let param_bonus = match entry.param with Some _ -> 5 | None -> 0 in
-    let desc_bonus = min 5 (String.length entry.desc / 10) in
-    switch_bonus + param_bonus + desc_bonus
-  in
-  (* fold over entries, keeping only the highest-scored entry per key *)
-  let best = List.fold_left (fun acc entry ->
-    let key = key_of entry in
-    match SMap.find_opt key acc with
-    | Some prev when score prev >= score entry -> acc
-    | _ -> SMap.add key entry acc
-  ) SMap.empty entries in
-  (* collect all short-flag characters that are already part of a Both entry,
-   * so we can suppress standalone Short entries for the same character *)
-  let covered = SMap.fold (fun _ entry acc ->
-    match entry.switch with
-    | Both (c, _) -> CSet.add c acc
-    | _ -> acc
-  ) best CSet.empty in
-  (* emit entries in original order, skipping duplicates and covered shorts *)
-  List.fold_left (fun (seen, acc) entry ->
-    let key = key_of entry in
-    if SSet.mem key seen then (seen, acc)
-    else match entry.switch with
-    | Short c when CSet.mem c covered -> (seen, acc)
-    | _ -> (SSet.add key seen, SMap.find key best :: acc)
-  ) (SSet.empty, []) entries |> snd |> List.rev
-
-(* map parameter names to nushell types.
- * nushell's `extern` declarations use typed parameters, so we infer the type
- * from the parameter name. file/path-related names become "path" (enables
- * path completion), numeric names become "int", everything else is "string". *)
-let nushell_type_of_param = function
-  | "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
-  | "FILENAME" | "PATTERNFILE" -> "path"
-  | "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
-  | "LINES" | "DEPTH" | "depth" -> "int"
-  | _ -> "string"
-
-(* escape a string for use inside nushell double-quoted string literals.
- * only double quotes and backslashes need escaping in nushell's syntax. *)
-let escape_nu s =
-  if not (String.contains s '"') && not (String.contains s '\\') then s
-  else begin
-    let buf = Buffer.create (String.length s + 4) in
-    String.iter (fun c -> match c with
-      | '"' -> Buffer.add_string buf "\\\""
-      | '\\' -> Buffer.add_string buf "\\\\"
-      | _ -> Buffer.add_char buf c
-    ) s;
-    Buffer.contents buf
-  end
-
-(* format a single flag entry as a nushell `extern` parameter line.
- * output examples:
- *   "    --verbose(-v)                       # increase verbosity"
- *   "    --output(-o): path                  # write output to file"
- *   "    -n: int                             # number of results"
- *
- * the description is right-padded to column 40 with a "# " comment prefix.
- * nushell's syntax for combined short+long is "--long(-s)". *)
-let format_flag entry =
-  let name = match entry.switch with
-    | Both (short_char, l) -> Printf.sprintf "--%s(-%c)" l short_char
-    | Long l -> Printf.sprintf "--%s" l
-    | Short short_char -> Printf.sprintf "-%c" short_char
-  in
-  let typed = match entry.param with
-    | Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
-    | None -> ""
-  in
-  let flag = "    " ^ name ^ typed in
-  if String.length entry.desc = 0 then flag
-  else
-    let pad_len = max 1 (40 - String.length flag) in
-    flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
-
-(* format a positional argument as a nushell `extern` parameter line.
- * nushell syntax: "...name: type" for variadic, "name?: type" for optional.
- * hyphens in names are converted to underscores since nushell identifiers
- * cannot contain hyphens. *)
-let format_positional positional =
-  let name = String.map (function '-' -> '_' | c -> c) positional.pos_name in
-  let prefix = if positional.variadic then "..." else "" in
-  let suffix = if positional.optional && not positional.variadic then "?" else "" in
-  let typ = nushell_type_of_param (String.uppercase_ascii positional.pos_name) in
-  Printf.sprintf "    %s%s%s: %s" prefix name suffix typ
-
-(* enforce nushell's positional argument ordering rules:
- *   1. no required positional may follow an optional one
- *   2. at most one variadic ("rest") parameter is allowed
- *
- * if a required positional appears after an optional one, it is silently
- * promoted to optional. duplicate variadic params are dropped.
- * uses a fold to track the state across the list in one pass. *)
-let fixup_positionals positionals =
-  List.fold_left (fun (seen_optional, seen_variadic, acc) positional ->
-    if positional.variadic then
-      (* only allow the first variadic parameter *)
-      if seen_variadic then (seen_optional, seen_variadic, acc)
-      else (true, true, positional :: acc)
-    else if seen_optional then
-      (* once we've seen an optional, all subsequent must be optional too *)
-      (true, seen_variadic, { positional with optional = true } :: acc)
-    else
-      (positional.optional, seen_variadic, positional :: acc)
-  ) (false, false, []) positionals
-  |> fun (_, _, acc) -> List.rev acc
-
-(* generate the full nushell `extern` block for a command.
- * produces output like:
- *   export extern "git add" [
- *     ...pathspec?: path
- *     --verbose(-v)              # be verbose
- *     --dry-run(-n)              # dry run
- *   ]
- *
- * subcommands that weren't resolved into their own full definitions get
- * stub `extern` blocks with just a comment containing their description:
- *   export extern "git stash" [  # stash changes
- *   ]
- *)
-let extern_of cmd_name result =
-  let entries = dedup_entries result.entries in
-  let escaped_name = escape_nu cmd_name in
-  let positionals = fixup_positionals result.positionals in
-  (* format all positional and flag lines, each terminated with a newline *)
-  let pos_lines = List.map (fun positional -> format_positional positional ^ "\n") positionals in
-  let flags = List.map (fun entry -> format_flag entry ^ "\n") entries in
-  let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" escaped_name (String.concat "" pos_lines) (String.concat "" flags) in
-  (* generate stub extern blocks for unresolved subcommands *)
-  let subs = List.map (fun (subcommand : subcommand) ->
-    Printf.sprintf "\nexport extern \"%s %s\" [  # %s\n]\n"
-      escaped_name (escape_nu subcommand.name) (escape_nu subcommand.desc)
-  ) result.subcommands in
-  String.concat "" (main :: subs)
-
-(* public alias for extern_of — this is the main entry point for callers *)
-let generate_extern = extern_of
-
-(* derive a nushell `module` name from a command name.
- * replaces non-alphanumeric characters with hyphens and appends "-completions".
- * e.g. "git" becomes "git-completions", "docker-compose" stays "docker-compose-completions" *)
-let module_name_of cmd_name =
-  let s = String.map (function
-    | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
-  s ^ "-completions"
-
-(* generate a complete nushell `module` wrapping the `extern`.
- * output: "module git-completions { ... }\n\nuse git-completions *\n"
- * the `use` at the end makes the `extern` immediately available in scope. *)
-let generate_module cmd_name result =
-  let mod_name = module_name_of cmd_name in
-  Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" mod_name (extern_of cmd_name result) mod_name
-
-(* convenience wrapper: generate an `extern` from just a list of entries
- * (no subcommands, positionals, or description). used when we only have
- * flag data and nothing else. *)
-let generate_extern_from_entries cmd_name entries =
-  generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }
diff --git a/lib/parser.ml b/lib/parser.ml
deleted file mode 100644
index f20aae5..0000000
--- a/lib/parser.ml
+++ /dev/null
@@ -1,814 +0,0 @@
-(* parser.ml — parse --help output into structured flag/subcommand/positional data.
- *
- * this module is the core of inshellah's help-text understanding. it takes the
- * raw text that a cli tool prints when you run `cmd --help` and extracts:
- *   - flag entries (short/long switches with optional parameters and descriptions)
- *   - subcommand listings (name + description pairs)
- *   - positional arguments (from usage lines)
- *
- * the parser is built on Angstrom (a monadic parser combinator library) for the
- * structured flag/subcommand extraction, with hand-rolled imperative parsers for
- * usage-line positional extraction (where the format is too varied for clean
- * combinator composition).
- *
- * key design decisions:
- *   - the Angstrom parser runs in prefix-consume mode — it doesn't need to parse
- *     the entire input, just extract what it can recognize. unrecognized lines are
- *     skipped via skip_non_option_line.
- *   - multi-line descriptions are handled via indentation-based continuation:
- *     lines indented 8+ spaces that don't start with '-' are folded into the
- *     previous entry's description.
- *   - subcommand detection uses a heuristic: lines with a name followed by 2+
- *     spaces then a description, where the name is at least 2 chars. section
- *     headers (like "arguments:") toggle whether name-description pairs are
- *     treated as subcommands or positionals.
- *   - positional extraction has two paths: usage-line parsing (the common case)
- *     and CLI11's explicit "positionals:" section format.
- *)
-
-open Angstrom
-
-(* strip ansi escape sequences and osc hyperlinks from --help output.
- * many modern cli tools emit colored/styled output even when piped,
- * so we need to clean this before parsing. handles:
- *   - csi sequences (esc [ ... final_byte) — colors, cursor movement, etc.
- *   - osc sequences (esc ] ... bel/st) — hyperlinks, window titles, etc.
- *   - other two-byte esc+char sequences *)
-let strip_ansi s =
-  let buf = Buffer.create (String.length s) in
-  let len = String.length s in
-  let pos = ref 0 in
-  while !pos < len do
-    if !pos + 1 < len && Char.code s.[!pos] = 0x1b then begin
-      let next = s.[!pos + 1] in
-      if next = '[' then begin
-        (* csi sequence: esc [ ... final_byte *)
-        pos := !pos + 2;
-        while !pos < len && not (s.[!pos] >= '@' && s.[!pos] <= '~') do incr pos done;
-        if !pos < len then incr pos
-      end else if next = ']' then begin
-        (* osc sequence: esc ] ... (terminated by bel or esc \) *)
-        pos := !pos + 2;
-        let terminated = ref false in
-        while !pos < len && not !terminated do
-          if s.[!pos] = '\x07' then
-            (incr pos; terminated := true)
-          else if !pos + 1 < len && Char.code s.[!pos] = 0x1b && s.[!pos + 1] = '\\' then
-            (pos := !pos + 2; terminated := true)
-          else
-            incr pos
-        done
-      end else begin
-        (* other esc sequence, skip esc + one char *)
-        pos := !pos + 2
-      end
-    end else begin
-      Buffer.add_char buf s.[!pos];
-      incr pos
-    end
-  done;
-  Buffer.contents buf
-
-(* --- character class predicates ---
- * used throughout the Angstrom parsers to classify characters.
- * separated out for readability and reuse. *)
-
-let is_whitespace = function ' ' | '\t' -> true | _ -> false
-
-let is_alphanumeric = function
-  | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
-  | _ -> false
-
-(* characters allowed inside parameter names like FILE, output-dir, etc. *)
-let is_param_char = function
-  | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
-  | _ -> false
-
-(* used to detect ALL_CAPS parameter names like FILE, TIME_STYLE *)
-let is_upper_or_underscore = function
-  | 'A' .. 'Z' | '_' -> true
-  | _ -> false
-
-(* characters allowed in long flag names (--foo-bar, --enable-feature2) *)
-let is_long_char = function
-  | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
-  | _ -> false
-
-(* --- core types ---
- * these types represent the structured output of parsing a help text.
- * they are shared across the entire codebase (nushell codegen, store, manpage parser).
- *
- * switch: a flag can be short-only (-v), long-only (--verbose), or both (-v, --verbose).
- *   the both variant keeps the pair together so nushell can emit "--verbose(-v)".
- *
- * param: flags can take mandatory (--output FILE) or optional (--color[=WHEN]) values.
- *
- * entry: one complete flag definition — its switch form, optional parameter, and
- *   the description text (potentially multi-line, already joined).
- *
- * help_result: the complete parsed output for a single command. *)
-type switch = Short of char | Long of string | Both of char * string
-type param = Mandatory of string | Optional of string
-type entry = { switch : switch; param : param option; desc : string }
-type subcommand = { name : string; desc : string }
-type positional = { pos_name : string; optional : bool; variadic : bool }
-type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list; description : string }
-
-(* --- low-level Angstrom combinators ---
- * building blocks for all the parsers below. *)
-
-(* consume horizontal whitespace (spaces and tabs) without crossing lines *)
-let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
-(* end of line — matches either a newline or end of input.
- * this is the permissive version used in most places. *)
-let eol = end_of_line <|> end_of_input
-(* strict end of line — must consume an actual newline character.
- * used in skip_non_option_line so we don't accidentally match eof
- * and consume it when we shouldn't. *)
-let eol_strict = end_of_line
-
-(* --- switch and parameter parsers ---
- * parse the flag name portion of an option line, e.g. "-v", "--verbose" *)
-
-let short_switch = char '-' *> satisfy is_alphanumeric
-let long_switch = string "--" *> take_while1 is_long_char
-let comma = char ',' *> inline_ws
-
-(* parameter parsers — handle the various syntaxes tools use to indicate
- * that a flag takes a value. the formats are surprisingly diverse:
- *   --output=FILE        (eq_man_param — mandatory, common in gnu tools)
- *   --color[=WHEN]       (eq_opt_param — optional with = syntax)
- *   --depth DEPTH        (space_upper_param — space-separated ALL_CAPS)
- *   --file <path>        (space_angle_param — angle brackets)
- *   --file [<path>]      (space_opt_angle_param — optional angle brackets)
- *   --format string      (space_type_param — go/cobra lowercase type word)
- *)
-let eq_opt_param =
-  string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
-
-let eq_man_param =
-  char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
-
-(* space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE".
- * peek ahead and check the first char is uppercase, then validate
- * the entire word is ALL_CAPS. prevents false positives where a
- * description word like "Do" or "Set" immediately follows the flag name.
- * digits are allowed (e.g. "SHA256") but lowercase chars disqualify. *)
-let space_upper_param =
-  char ' ' *> peek_char_fail >>= fun c ->
-  if is_upper_or_underscore c then
-    take_while1 is_param_char >>= fun name ->
-    if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
-      return (Mandatory name)
-    else
-      fail "not an all-caps param"
-  else
-    fail "not an uppercase param"
-
-(* angle-bracket param: e.g. "<file>", "<notation>" *)
-let angle_param =
-  char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
-  Mandatory name
-
-(* space + angle bracket param *)
-let space_angle_param =
-  char ' ' *> angle_param
-
-(* optional angle bracket param: [<file>] *)
-let opt_angle_param =
-  char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
-  >>| fun name -> Optional name
-
-let space_opt_angle_param =
-  char ' ' *> opt_angle_param
-
-(* go/cobra style: space + lowercase type word like "string", "list", "int".
- * capped at 10 chars to avoid consuming description words.
- * go's flag libraries commonly emit "--timeout duration" or "--name string"
- * where the type name is a short lowercase word. longer words are almost
- * certainly the start of a description, not a type annotation. *)
-let space_type_param =
-  char ' ' *> peek_char_fail >>= fun c ->
-  if c >= 'a' && c <= 'z' then
-    take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
-    if String.length name <= 10 then
-      return (Mandatory name)
-    else
-      fail "too long for type param"
-  else
-    fail "not a lowercase type param"
-
-(* try each parameter format in order of specificity. the ordering matters:
- * eq_opt_param must come before eq_man_param because "[=WHEN]" would otherwise
- * partially match as "=WHEN" then fail on the trailing "]". similarly,
- * space_opt_angle_param before space_angle_param to catch "[<file>]" before "<file>". *)
-let param_parser =
-  option None
-    (choice
-       [ eq_opt_param; eq_man_param;
-         space_opt_angle_param; space_angle_param;
-         space_upper_param; space_type_param ]
-     >>| fun a -> Some a)
-
-(* switch parser — handles the various ways help text presents flag names.
- * formats handled (in order of attempt):
- *   -a, --all       (short + comma + long — gnu style)
- *   -a --all        (short + space + long — some tools omit the comma)
- *   --all / -a      (long + slash + short — rare but seen in some tools)
- *   -a              (short only)
- *   --all           (long only)
- *
- * the ordering is critical because Angstrom's choice commits to
- * the first parser that makes progress. short_switch consumes "-a", so the
- * combined parsers must be tried before the short-only parser. *)
-let switch_parser =
-  choice
-    [
-      (short_switch >>= fun s ->
-       comma *> long_switch >>| fun l -> Both (s, l));
-      (short_switch >>= fun s ->
-       char ' ' *> long_switch >>| fun l -> Both (s, l));
-      (long_switch >>= fun l ->
-       inline_ws *> char '/' *> inline_ws *>
-       short_switch >>| fun s -> Both (s, l));
-      (short_switch >>| fun s -> Short s);
-      (long_switch >>| fun l -> Long l);
-    ]
-
-(* --- description parsing with multi-line continuation ---
- * descriptions in help text often wrap across multiple lines. the convention
- * is that continuation lines are deeply indented (8+ spaces) and don't start
- * with '-' (which would indicate a new flag entry). we peek ahead to check
- * indentation without consuming, then decide whether to fold the line in. *)
-
-(* take the rest of the line as text (does not consume the newline itself) *)
-let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
-
-(* check if a line is a continuation line: deeply indented, doesn't start with '-'.
- * tabs count as 8 spaces to match typical terminal rendering.
- * the 8-space threshold was chosen empirically — most help formatters indent
- * descriptions at least this much, while flag lines are indented 2-4 spaces. *)
-let continuation_line =
-  peek_string 1 >>= fun _ ->
-  (* must start with significant whitespace (8+ spaces or tab) *)
-  let count_indent s =
-    let indent = ref 0 in
-    let pos = ref 0 in
-    while !pos < String.length s do
-      (match s.[!pos] with
-       | ' ' -> incr indent
-       | '\t' -> indent := !indent + 8
-       | _ -> pos := String.length s);
-      incr pos
-    done;
-    !indent
-  in
-  available >>= fun avail ->
-  if avail = 0 then fail "eof"
-  else
-    (* peek ahead to see indentation level *)
-    peek_string (min avail 80) >>= fun preview ->
-    let indent = count_indent preview in
-    let trimmed = String.trim preview in
-    let starts_with_dash =
-      String.length trimmed > 0 && trimmed.[0] = '-'
-    in
-    if indent >= 8 && not starts_with_dash then
-      (* this is a continuation line — consume whitespace + text *)
-      inline_ws *> rest_of_line <* eol
-    else
-      fail "not a continuation line"
-
-(* parse description text: first line (after switch+param) plus any continuation lines.
- * blank continuation lines are filtered out, and all lines are trimmed and joined
- * with spaces into a single string. *)
-let description =
-  inline_ws *> rest_of_line <* eol >>= fun first_line ->
-  many continuation_line >>| fun cont_lines ->
-  let all = first_line :: cont_lines in
-  let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
-  String.concat " " (List.map String.trim all)
-
-(* description that appears on a separate line below the flag.
- * this handles the clap (rust) "long" help format where flags and descriptions
- * are on separate lines:
- *   --verbose
- *           increase verbosity
- * here there's no inline description — just deeply-indented continuation lines. *)
-let description_below =
-  many1 continuation_line >>| fun lines ->
-  let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
-  String.concat " " (List.map String.trim lines)
-
-(* --- line classification for skipping ---
- * the parser needs to skip lines it doesn't understand (section headers,
- * blank lines, description paragraphs not attached to a flag, etc.)
- * without consuming lines that are flag entries. *)
-
-(* peek ahead to check if the current line looks like a flag entry.
- * an option line starts with whitespace then '-'. *)
-let at_option_line =
-  peek_string 1 >>= fun _ ->
-  available >>= fun avail ->
-  if avail = 0 then fail "eof"
-  else
-    peek_string (min avail 40) >>= fun preview ->
-    let s = String.trim preview in
-    if String.length s > 0 && s.[0] = '-' then return ()
-    else fail "not an option line"
-
-(* skip a non-option line (section header, blank, description-only, etc.).
- * uses eol_strict (not eol) so it won't match at eof — this prevents the
- * parser from infinitely skipping at the end of input. if the line looks
- * like an option line (at_option_line succeeds), we deliberately fail so
- * that the entry parser gets a chance at it instead. *)
-let skip_non_option_line =
-  (at_option_line *> fail "this is an option line")
-  <|> (rest_of_line *> eol_strict *> return ())
-
-(* --- entry parsing --- *)
-
-(* parse a single flag entry: leading whitespace, then switch+param, then description.
- * the description can appear on the same line (inline) or on the next line (below).
- * if there's no description at all, we accept an empty string.
- * the (eol *> description_below) branch handles the clap long-help format. *)
-let entry =
-  inline_ws *>
-  lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
-    (lift2 (fun a b -> (a, b)) switch_parser param_parser)
-    (description <|> (eol *> (description_below <|> return "")))
-
-(* --- subcommand parsing ---
- * subcommand lines in help text follow the pattern:
- *   "  name   description"
- * where the name and description are separated by 2+ spaces.
- * some tools also include argument placeholders between name and description:
- *   "  start UNIT...   start one or more units"
- *   "  list [PATTERN]  list matching units"
- *)
-
-let is_subcommand_char = function
-  | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
-  | _ -> false
-
-(* skip argument placeholders like UNIT..., [PATTERN...|PID...], <file>
- * that appear between the subcommand name and the description.
- * only consumes single-space gaps — the two-space gap before the
- * description is left for the main parser to use as the delimiter.
- *
- * this is a recursive (fix-point) parser that peeks ahead to distinguish
- * single-space argument gaps from the double-space description separator.
- * it accepts tokens that start with [, <, or are ALL_CAPS (with dots/pipes/
- * commas for variadic syntax). *)
-let skip_arg_placeholders =
-  fix (fun self ->
-    (* peek ahead: single space followed by arg-like token *)
-    available >>= fun avail ->
-    if avail < 2 then return ()
-    else
-    peek_string (min avail 2) >>= fun peek_two ->
-    if String.length peek_two >= 2 && peek_two.[0] = ' ' && peek_two.[1] <> ' ' then
-      (* single space — could be an arg placeholder *)
-      let next = peek_two.[1] in
-      if next = '[' || next = '<'
-         || (next >= 'A' && next <= 'Z') then
-        (* peek the full token to check if it's ALL_CAPS/brackets *)
-        peek_string (min avail 80) >>= fun preview ->
-        (* extract the token after the single space *)
-        let tok_start = 1 in
-        let token_end = ref tok_start in
-        while !token_end < String.length preview
-              && preview.[!token_end] <> ' '
-              && preview.[!token_end] <> '\n'
-              && preview.[!token_end] <> '\r' do
-          incr token_end
-        done;
-        let tok = String.sub preview tok_start (!token_end - tok_start) in
-        (* accept as placeholder if it starts with [ or < or is ALL_CAPS
-           (possibly with dots, pipes, dashes) *)
-        let is_placeholder =
-          tok.[0] = '[' || tok.[0] = '<'
-          || String.for_all (fun c ->
-               (c >= 'A' && c <= 'Z') || c = '_' || c = '-'
-               || c = '.' || c = '|' || c = ',' || (c >= '0' && c <= '9')
-             ) tok
-        in
-        if is_placeholder then
-          advance (1 + String.length tok) *> self
-        else return ()
-      else return ()
-    else return ())
-
-(* parse a subcommand entry line.
- * requires: name >= 2 chars, followed by 2+ spaces, then description.
- * the name is lowercased for consistent lookup.
- *
- * if the description starts with "- " (a dash-space prefix), it's stripped.
- * some tools format their subcommand lists as:
- *   "  add   - add a new item"
- * where the "- " is decorative, not part of the description. *)
-let subcommand_entry =
-  inline_ws *>
-  take_while1 is_subcommand_char >>= fun name ->
-  if String.length name < 2 then fail "subcommand name too short"
-  else
-  skip_arg_placeholders *>
-  char ' ' *> char ' ' *> inline_ws *>
-  rest_of_line <* eol >>| fun desc ->
-  { name = String.lowercase_ascii name;
-    desc = let trimmed = String.trim desc in
-      if String.length trimmed >= 2 && trimmed.[0] = '-' && trimmed.[1] = ' ' then
-        String.trim (String.sub trimmed 2 (String.length trimmed - 2))
-      else trimmed }
-
-(* --- section header detection ---
- * section headers are critical for disambiguating subcommands from positional
- * arguments. lines like "commands:" introduce subcommand sections, while
- * "arguments:" or "positionals:" introduce argument sections where the same
- * name+description format should not be treated as subcommands. *)
-
-(* detect section names that introduce positional argument listings.
- * the check is case-insensitive and strips trailing colons. *)
-let is_arg_section s =
-  let lc = String.lowercase_ascii (String.trim s) in
-  let base = if String.ends_with ~suffix:":" lc
-    then String.sub lc 0 (String.length lc - 1) |> String.trim
-    else lc in
-  base = "arguments" || base = "args" || base = "positionals"
-  || base = "positional arguments"
-
-(* a section header: left-aligned (or lightly indented, <= 4 spaces) text
- * ending with ':', not starting with '-'. must be consumed before
- * subcommand_entry in the choice combinator, otherwise "commands:" would
- * be parsed as a subcommand named "commands" with description ":".
- *
- * returns a bool indicating whether this is an argument section (true)
- * or some other section (false). this drives the subcommand filtering logic
- * in help_parser — entries under argument sections are excluded from the
- * subcommand list. *)
-let section_header =
-  available >>= fun avail ->
-  if avail = 0 then fail "eof"
-  else
-    peek_string (min avail 80) >>= fun preview ->
-    (* extract just the first line from the preview *)
-    let first_line = match String.index_opt preview '\n' with
-      | Some pos -> String.sub preview 0 pos
-      | None -> preview in
-    let trimmed = String.trim first_line in
-    let len = String.length trimmed in
-    let indent = let pos = ref 0 in
-      while !pos < String.length first_line && (first_line.[!pos] = ' ' || first_line.[!pos] = '\t') do incr pos done;
-      !pos in
-    if len >= 2 && trimmed.[len - 1] = ':' && trimmed.[0] <> '-' && indent <= 4 then
-      rest_of_line <* eol_strict >>| fun line -> is_arg_section line
-    else fail "not a section header"
-
-(* --- top-level parser ---
- * the main help parser: walks through all lines, trying each line as one of:
- *   1. a flag entry (starts with whitespace + '-')
- *   2. a section header (left-aligned text ending with ':')
- *   3. a subcommand line (name + 2+ spaces + description)
- *   4. anything else — skip
- *
- * the choice ordering matters: entries are tried first (highest priority),
- * then section headers (must beat subcommand_entry to avoid misparse),
- * then subcommands, then skip as fallback.
- *
- * after collecting all items, two post-processing steps happen:
- *   - subcommands under argument sections are excluded (tracked via
- *     a running in_arg_sec boolean toggled by section headers)
- *   - duplicate subcommand names are deduplicated, keeping the entry
- *     with the longer description (heuristic: more info = better)
- *
- * positionals are not extracted here — they come from the usage line
- * parser (extract_usage_positionals) or CLI11's explicit section parser
- * (extract_cli11_positionals), applied later in parse_help. *)
-let help_parser =
-  let open Angstrom in
-  fix (fun _self ->
-    let try_entry =
-      entry >>| fun e -> `Entry e
-    in
-    let try_section =
-      section_header >>| fun is_arg -> `Section is_arg
-    in
-    let try_subcommand =
-      subcommand_entry >>| fun sc -> `Subcommand sc
-    in
-    let try_skip =
-      skip_non_option_line >>| fun () -> `Skip
-    in
-    many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items ->
-    let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
-    let subcommands =
-      List.fold_left (fun (in_arg_sec, acc) item ->
-        match item with
-        | `Section is_arg -> (is_arg, acc)
-        | `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc)
-        | _ -> (in_arg_sec, acc)
-      ) (false, []) items
-      |> snd |> List.rev
-      |> List.fold_left (fun acc sc ->
-           match List.assoc_opt sc.name acc with
-           | Some prev when String.length prev.desc >= String.length sc.desc -> acc
-           | _ -> (sc.name, sc) :: List.remove_assoc sc.name acc
-         ) []
-      |> List.rev_map snd
-    in
-    { entries; subcommands; positionals = []; description = "" })
-
-(* --- usage line parsing ---
- * usage lines look like: "usage: git add [OPTIONS] [--] [<pathspec>...]"
- * to extract positional arguments, we first need to skip past the command
- * name prefix ("git add") to reach the argument portion.
- *
- * skip_command_prefix walks word-by-word, treating each space-separated
- * token as part of the command name as long as it:
- *   - is made of "word chars" (alphanumeric, hyphen, underscore, slash, dot)
- *   - contains at least one lowercase letter (to distinguish from ALL_CAPS
- *     positional names like FILE)
- *   - doesn't start with [, <, (, {, or - (which indicate arguments, not
- *     command name components)
- *
- * this is an imperative index-walking parser rather than using Angstrom,
- * because usage lines are a single string (not line-oriented) and the format
- * is too varied for clean combinator composition. *)
-let skip_command_prefix s =
-  let len = String.length s in
-  let pos = ref 0 in
-  let skip_ws () = while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done in
-  let is_word_char = function
-    | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
-    | _ -> false
-  in
-  let rec loop () =
-    skip_ws ();
-    if !pos >= len then ()
-    else if s.[!pos] = '[' || s.[!pos] = '<' || s.[!pos] = '(' || s.[!pos] = '{' || s.[!pos] = '-' then ()
-    else if is_word_char s.[!pos] then begin
-      let start = !pos in
-      while !pos < len && is_word_char s.[!pos] do incr pos done;
-      let word = String.sub s start (!pos - start) in
-      let has_lower = ref false in
-      String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
-      if not !has_lower then
-        pos := start
-      else
-        loop ()
-    end
-  in
-  loop ();
-  !pos
-
-(* parse the argument portion of a usage line into positional definitions.
- * handles these syntactic forms:
- *   <file>          - mandatory positional
- *   [file]          - optional positional
- *   FILE            - mandatory positional (ALL_CAPS convention)
- *   <file>...       - variadic (also handles utf-8 ellipsis)
- *   [file...]       - optional variadic
- *   curly-brace alternatives - skipped, not a positional
- *   -flag           - flags (skipped)
- *
- * certain ALL_CAPS names are skipped because they're not real positionals —
- * "OPTIONS", "FLAGS", etc. are section labels that sometimes appear in usage
- * lines for readability.
- *
- * deduplication at the end ensures we don't emit the same positional twice
- * (can happen when usage lines are reformatted or repeated). *)
-let parse_usage_args s =
-  let len = String.length s in
-  let pos = ref 0 in
-  let positionals = ref [] in
-  let skip_ws () =
-    while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done in
-  let is_pos_char c =
-    (c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
-  (* detect trailing dots or utf-8 ellipsis indicating variadic args *)
-  let read_dots () =
-    skip_ws ();
-    if !pos + 2 < len && s.[!pos] = '.' && s.[!pos+1] = '.' && s.[!pos+2] = '.' then
-      (pos := !pos + 3; true)
-    else if !pos + 2 < len && s.[!pos] = '\xe2' && s.[!pos+1] = '\x80' && s.[!pos+2] = '\xa6' then
-      (pos := !pos + 3; true)  (* utf-8 ellipsis *)
-    else false
-  in
-  (* names that are section labels, not actual positional arguments *)
-  let is_skip name =
-    let u = String.uppercase_ascii name in
-    u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
-  in
-  (* validate that a name contains only alphanumeric, underscore, hyphen chars *)
-  let is_clean_name name =
-    String.length name >= 2
-    && String.for_all (fun c ->
-         (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
-         || (c >= '0' && c <= '9') || c = '_' || c = '-') name
-  in
-  let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
-  (* skip {A|c|d|...} alternative blocks — not positional arguments *)
-  let skip_braces () =
-    if !pos < len && s.[!pos] = '{' then begin
-      let depth = ref 1 in
-      incr pos;
-      while !pos < len && !depth > 0 do
-        if s.[!pos] = '{' then incr depth
-        else if s.[!pos] = '}' then decr depth;
-        incr pos
-      done;
-      ignore (read_dots ());
-      true
-    end else false
-  in
-  while !pos < len do
-    skip_ws ();
-    if !pos >= len then ()
-    else if skip_braces () then ()
-    else match s.[!pos] with
-    | '[' ->
-      (* optional positional: [name] or [<name>] or [name...] *)
-      incr pos;
-      let start = !pos in
-      let depth = ref 1 in
-      while !pos < len && !depth > 0 do
-        if s.[!pos] = '[' then incr depth
-        else if s.[!pos] = ']' then decr depth;
-        incr pos
-      done;
-      let bracket_end = !pos - 1 in
-      let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
-      let inner, has_inner_dots =
-        if String.ends_with ~suffix:"..." inner then
-          (String.sub inner 0 (String.length inner - 3) |> String.trim, true)
-        else (inner, false)
-      in
-      let variadic = has_inner_dots || read_dots () in
-      if String.length inner > 0
-         && inner.[0] <> '-'
-         && (is_letter inner.[0] || inner.[0] = '<') then begin
-        let name =
-          if inner.[0] = '<' then
-            let e = try String.index inner '>' with Not_found -> String.length inner in
-            String.sub inner 1 (e - 1)
-          else inner
-        in
-        if is_clean_name name && not (is_skip name) then
-          positionals := { pos_name = String.lowercase_ascii name;
-                       optional = true; variadic } :: !positionals
-      end
-    | '<' ->
-      (* mandatory positional in angle brackets: <name> *)
-      incr pos;
-      let start = !pos in
-      while !pos < len && s.[!pos] <> '>' do incr pos done;
-      let name = String.sub s start (!pos - start) in
-      if !pos < len then incr pos;
-      let variadic = read_dots () in
-      if is_clean_name name && not (is_skip name) then
-        positionals := { pos_name = String.lowercase_ascii name;
-                     optional = false; variadic } :: !positionals
-    | '-' ->
-      (* flag — skip entirely, not a positional *)
-      while !pos < len && s.[!pos] <> ' ' && s.[!pos] <> '\t' && s.[!pos] <> ']' do incr pos done
-    | c when c >= 'A' && c <= 'Z' ->
-      (* ALL_CAPS positional name *)
-      let start = !pos in
-      while !pos < len && is_pos_char s.[!pos] do incr pos done;
-      let name = String.sub s start (!pos - start) in
-      let variadic = read_dots () in
-      if String.length name >= 2
-         && String.for_all (fun c ->
-              (c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
-            ) name
-         && not (is_skip name) then
-        positionals := { pos_name = String.lowercase_ascii name;
-                     optional = false; variadic } :: !positionals
-    | _ ->
-      incr pos
-  done;
-  (* deduplicate positionals by name, keeping the first occurrence *)
-  List.rev !positionals
-  |> List.fold_left (fun (seen, acc) p ->
-       if List.mem p.pos_name seen then (seen, acc)
-       else (p.pos_name :: seen, p :: acc)
-     ) ([], [])
-  |> snd |> List.rev
-
-(* find the "usage:" line in the help text and extract positionals from it.
- * searches line-by-line for a line starting with "usage:" (case-insensitive).
- * handles both inline usage ("usage: cmd [OPTIONS] FILE") and the clap style
- * where the actual usage is on the next line:
- *   USAGE:
- *     cmd [OPTIONS] FILE
- *
- * also handles the bare "usage" header (no colon) followed by a next line. *)
-let extract_usage_positionals text =
-  let lines = String.split_on_char '\n' text in
-  let lines_arr = Array.of_list lines in
-  let len = Array.length lines_arr in
-  (* search through lines for the first usage header and return the usage content *)
-  let find_usage_line () =
-    let check_line idx =
-      let trimmed = String.trim lines_arr.(idx) in
-      let trimmed_len = String.length trimmed in
-      let lc = String.lowercase_ascii trimmed in
-      if trimmed_len >= 6 && String.sub lc 0 6 = "usage:" then begin
-        let after = String.sub trimmed 6 (trimmed_len - 6) |> String.trim in
-        if String.length after > 0 then Some after
-        else if idx + 1 < len then
-          (* clap style: USAGE:\n  cmd [OPTIONS] PATTERN *)
-          let next = String.trim lines_arr.(idx + 1) in
-          if String.length next > 0 then Some next else None
-        else None
-      end else if lc = "usage" then begin
-        if idx + 1 < len then
-          let next = String.trim lines_arr.(idx + 1) in
-          if String.length next > 0 then Some next else None
-        else None
-      end else None
-    in
-    (* use List.find_map over the index range to find the first matching line *)
-    List.find_map check_line (List.init len Fun.id)
-  in
-  match find_usage_line () with
-  | None -> []
-  | Some usage ->
-    let cmd_end = skip_command_prefix usage in
-    let args = String.sub usage cmd_end (String.length usage - cmd_end) in
-    parse_usage_args args
-
-(* extract positionals from CLI11's explicit "POSITIONALS:" section.
- * CLI11 (a c++ arg parsing library) emits a dedicated section:
- *   Positionals:
- *     name TEXT           description here
- *     count INT           another description
- *
- * this is preferred over usage-line extraction when present because it
- * provides more accurate type information. the parser looks for the
- * section header, then reads indented lines until a blank or unindented
- * line signals the end. type words (TEXT, INT, FLOAT, etc.) between the
- * name and description are skipped. *)
-let extract_cli11_positionals text =
-  let lines = String.split_on_char '\n' text in
-  (* parse a single indented positional line into a positional record *)
-  let parse_one s =
-    let len = String.length s in
-    let pos = ref 0 in
-    let is_name_char c =
-      (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-      || (c >= '0' && c <= '9') || c = '_' || c = '-' in
-    while !pos < len && is_name_char s.[!pos] do incr pos done;
-    if !pos < 2 then None
-    else
-      let name = String.sub s 0 !pos in
-      while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done;
-      (* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
-      while !pos < len && s.[!pos] >= 'A' && s.[!pos] <= 'Z' do incr pos done;
-      while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done;
-      let variadic = !pos + 2 < len && s.[!pos] = '.' && s.[!pos+1] = '.' && s.[!pos+2] = '.' in
-      Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
-  in
-  (* parse consecutive indented lines under the section header *)
-  let rec parse_lines lines acc =
-    match lines with
-    | [] -> List.rev acc
-    | line :: rest ->
-      let len = String.length line in
-      if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
-        List.rev acc
-      else
-        let trimmed = String.trim line in
-        if String.length trimmed = 0 then List.rev acc
-        else match parse_one trimmed with
-          | Some p -> parse_lines rest (p :: acc)
-          | None -> parse_lines rest acc
-  in
-  (* scan lines for the positionals section header, then parse the body *)
-  let rec find_section = function
-    | [] -> []
-    | line :: rest ->
-      let trimmed = String.trim line in
-      if trimmed = "POSITIONALS:" || trimmed = "Positionals:" then
-        parse_lines rest []
-      else
-        find_section rest
-  in
-  find_section lines
-
-(* top-level entry point: parse a --help text string into a help_result.
- * steps:
- *   1. strip ansi escapes (colors, hyperlinks, etc.)
- *   2. run the Angstrom help_parser for flags and subcommands
- *   3. extract positionals via CLI11 format (preferred) or usage line (fallback)
- *   4. merge positionals into the result
- * uses Angstrom's prefix-consume mode — we don't need to parse every byte. *)
-let parse_help txt =
-  let clean = strip_ansi txt in
-  match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
-  | Ok result ->
-    let cli11 = extract_cli11_positionals clean in
-    let usage = extract_usage_positionals clean in
-    let positionals = if cli11 <> [] then cli11 else usage in
-    Ok { result with positionals }
-  | Error msg -> Error msg
diff --git a/lib/store.ml b/lib/store.ml
deleted file mode 100644
index 2466c81..0000000
--- a/lib/store.ml
+++ /dev/null
@@ -1,670 +0,0 @@
-(* store.ml — filesystem-backed cache of parsed completion data.
- *
- * this module handles persistence of completion data to disk. each command's
- * help_result is serialized to JSON and stored as a file in a cache directory
- * (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions
- * are stored as .nu files instead.
- *
- * the store also provides lookup, listing, and subcommand discovery by
- * scanning filenames in the cache directory.
- *
- * file naming convention:
- *   - spaces in command names become underscores (e.g. "git add" -> "git_add.json")
- *   - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json")
- *   - .json files contain serialized help_result
- *   - .nu files contain native nushell extern source code
- *
- * the module includes a minimal hand-rolled JSON parser/serializer because
- * we only need to handle our own output format (no need for a full JSON library).
- *)
-
-open Parser
-
-(* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to
- * ~/.cache/inshellah if XDG_CACHE_HOME is not set. *)
-let default_store_path () =
-  let cache = try Sys.getenv "XDG_CACHE_HOME"
-    with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
-  Filename.concat cache "inshellah"
-
-(* recursively create directories along a path (equivalent to mkdir -p).
- * splits the path into components and folds over them, accumulating
- * the current directory prefix and creating each level if missing. *)
-let ensure_dir dir =
-  let sep = Filename.dir_sep in
-  let parts = String.split_on_char sep.[0] dir in
-  (* determine the starting prefix: absolute paths begin with "/" *)
-  let start = if String.length dir > 0 && dir.[0] = sep.[0] then sep else "" in
-  let _final =
-    List.fold_left (fun current part ->
-      if part = "" then current
-      else begin
-        let next = if current = sep then sep ^ part
-                   else if current = "" then part
-                   else current ^ sep ^ part in
-        (if not (Sys.file_exists next) then Unix.mkdir next 0o755);
-        next
-      end
-    ) start parts
-  in
-  ()
-
-(* convert command name to safe filename: spaces become underscores,
- * non-alphanumeric chars become hyphens.
- * e.g. "git add" -> "git_add", "docker-compose" -> "docker-compose" *)
-let filename_of_command cmd =
-  String.map (function
-    | ' ' -> '_'
-    | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as char_val -> char_val
-    | _ -> '-') cmd
-
-(* inverse of filename_of_command: underscores back to spaces.
- * note: this is lossy — original underscores in command names
- * (e.g. "my_tool") would be converted to spaces. in practice this
- * doesn't matter because tools with underscores in names are rare,
- * and subcommands use space-separated naming. *)
-let command_of_filename base_name =
-  String.map (function '_' -> ' ' | char_val -> char_val) base_name
-
-(* --- JSON serialization of help_result ---
- * hand-rolled JSON emitters. we don't use a JSON library because:
- *   1. the schema is fixed and simple — we only serialize our own types
- *   2. avoiding dependencies keeps the binary small
- *   3. printf-style emission is fast and straightforward for our types *)
-
-(* escape a string for JSON: quotes, backslashes, and control characters.
- * control chars below 0x20 are emitted as \uXXXX unicode escapes. *)
-let escape_json contents =
-  let buf = Buffer.create (String.length contents + 4) in
-  String.iter (fun char_val -> match char_val with
-    | '"' -> Buffer.add_string buf "\\\""
-    | '\\' -> Buffer.add_string buf "\\\\"
-    | '\n' -> Buffer.add_string buf "\\n"
-    | '\t' -> Buffer.add_string buf "\\t"
-    | '\r' -> Buffer.add_string buf "\\r"
-    | c when Char.code c < 0x20 ->
-      Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
-    | c -> Buffer.add_char buf c
-  ) contents;
-  Buffer.contents buf
-
-(* wrap a string in quotes after escaping for JSON *)
-let json_string text = Printf.sprintf "\"%s\"" (escape_json text)
-
-(* the literal null value for JSON output *)
-let json_null = "null"
-
-(* serialize a switch (short flag, long flag, or both) to JSON *)
-let json_switch_of = function
-  | Short char_val ->
-    Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 char_val))
-  | Long name ->
-    Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string name)
-  | Both (char_val, name) ->
-    Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
-      (json_string (String.make 1 char_val)) (json_string name)
-
-(* serialize a parameter spec (mandatory, optional, or absent) to JSON *)
-let json_param_of = function
-  | None -> json_null
-  | Some (Mandatory name) ->
-    Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string name)
-  | Some (Optional name) ->
-    Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string name)
-
-(* serialize a single flag entry (switch + param + description) to JSON *)
-let json_entry_of entry =
-  Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
-    (json_switch_of entry.switch) (json_param_of entry.param) (json_string entry.desc)
-
-(* serialize a subcommand (name + description) to JSON *)
-let json_subcommand_of sc =
-  Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
-
-(* serialize a positional argument to JSON *)
-let json_positional_of p =
-  Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
-    (json_string p.pos_name) p.optional p.variadic
-
-(* serialize a list of items to a JSON array using the given formatter *)
-let json_list formatter items =
-  "[" ^ String.concat "," (List.map formatter items) ^ "]"
-
-(* serialize an entire help_result to a JSON object string *)
-let json_of_help_result ?(source="help") result =
-  Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
-    (json_string source)
-    (json_string result.description)
-    (json_list json_entry_of result.entries)
-    (json_list json_subcommand_of result.subcommands)
-    (json_list json_positional_of result.positionals)
-
-(* --- JSON deserialization ---
- * minimal hand-rolled recursive-descent JSON parser. only handles the subset
- * we emit: strings, booleans, nulls, arrays, and objects. no number parsing
- * (we don't emit numbers). this is intentionally minimal — we only read back
- * our own serialized format, so robustness against arbitrary JSON is not needed.
- *
- * note: the \u escape handler does basic UTF-8 encoding for code points
- * up to 0xFFFF but doesn't handle surrogate pairs. this is fine for our use
- * case since we only escape control characters below 0x20. *)
-
-type json =
-  | Jnull
-  | Jbool of bool
-  | Jstring of string
-  | Jarray of json list
-  | Jobject of (string * json) list
-
-(* JSON accessor helpers — return sensible defaults for missing/wrong types *)
-let json_get key = function
-  | Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
-  | _ -> Jnull
-
-(* extract a string from a JSON value, defaulting to empty string *)
-let json_to_string = function Jstring text -> text | _ -> ""
-
-(* extract a boolean from a JSON value, defaulting to false *)
-let json_to_bool = function Jbool value -> value | _ -> false
-
-(* extract a list from a JSON array value, defaulting to empty list *)
-let json_to_list = function Jarray items -> items | _ -> []
-
-exception Json_error of string
-
-(* imperative recursive-descent JSON parser.
- * uses a mutable position ref to walk through the string.
- * note: boolean/null parsing just advances a fixed number of chars
- * without validating the actual characters — safe because we only read
- * our own output, but would be incorrect for arbitrary JSON. *)
-let parse_json contents =
-  let len = String.length contents in
-  let pos = ref 0 in
-  (* peek at the current character without consuming it *)
-  let peek () = if !pos < len then contents.[!pos] else '\x00' in
-  (* advance the position by one character *)
-  let advance () = incr pos in
-  (* skip over any whitespace characters at current position *)
-  let skip_ws () =
-    while !pos < len && (contents.[!pos] = ' ' || contents.[!pos] = '\t'
-                         || contents.[!pos] = '\n' || contents.[!pos] = '\r') do
-      advance ()
-    done in
-  (* skip whitespace then consume the expected character, or raise *)
-  let expect char_val =
-    skip_ws ();
-    if peek () <> char_val then
-      raise (Json_error (Printf.sprintf "expected '%c' at %d" char_val !pos));
-    advance () in
-  (* mutually recursive parsers for each JSON value type *)
-  let rec parse_value () =
-    skip_ws ();
-    match peek () with
-    | '"' -> Jstring (parse_string ())
-    | '{' -> parse_object ()
-    | '[' -> parse_array ()
-    | 'n' -> advance (); advance (); advance (); advance (); Jnull
-    | 't' -> advance (); advance (); advance (); advance (); Jbool true
-    | 'f' ->
-      advance (); advance (); advance (); advance (); advance (); Jbool false
-    | char_val ->
-      raise (Json_error (Printf.sprintf "unexpected '%c' at %d" char_val !pos))
-  (* parse a quoted string value, handling escape sequences *)
-  and parse_string () =
-    expect '"';
-    let buf = Buffer.create 32 in
-    while peek () <> '"' do
-      if peek () = '\\' then begin
-        advance ();
-        (match peek () with
-         | '"' -> Buffer.add_char buf '"'
-         | '\\' -> Buffer.add_char buf '\\'
-         | 'n' -> Buffer.add_char buf '\n'
-         | 't' -> Buffer.add_char buf '\t'
-         | 'r' -> Buffer.add_char buf '\r'
-         | 'u' ->
-           (* handle \uXXXX unicode escapes with basic UTF-8 encoding *)
-           advance ();
-           let hex = String.sub contents !pos 4 in
-           pos := !pos + 3;
-           let code = int_of_string ("0x" ^ hex) in
-           if code < 128 then Buffer.add_char buf (Char.chr code)
-           else begin
-             if code < 0x800 then begin
-               Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
-               Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
-             end else begin
-               Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
-               Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
-               Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
-             end
-           end
-         | char_val -> Buffer.add_char buf char_val);
-        advance ()
-      end else begin
-        Buffer.add_char buf (peek ());
-        advance ()
-      end
-    done;
-    advance (); (* consume closing quote *)
-    Buffer.contents buf
-  (* parse a JSON object: { "key": value, ... } *)
-  and parse_object () =
-    expect '{';
-    skip_ws ();
-    if peek () = '}' then (advance (); Jobject [])
-    else begin
-      let pairs = ref [] in
-      let more = ref true in
-      while !more do
-        skip_ws ();
-        let key = parse_string () in
-        expect ':';
-        let value = parse_value () in
-        pairs := (key, value) :: !pairs;
-        skip_ws ();
-        if peek () = ',' then advance ()
-        else more := false
-      done;
-      expect '}';
-      Jobject (List.rev !pairs)
-    end
-  (* parse a JSON array: [ value, value, ... ] *)
-  and parse_array () =
-    expect '[';
-    skip_ws ();
-    if peek () = ']' then (advance (); Jarray [])
-    else begin
-      let items = ref [] in
-      let more = ref true in
-      while !more do
-        let value = parse_value () in
-        items := value :: !items;
-        skip_ws ();
-        if peek () = ',' then advance ()
-        else more := false
-      done;
-      expect ']';
-      Jarray (List.rev !items)
-    end
-  in
-  parse_value ()
-
-(* --- JSON to OCaml type converters ---
- * these reconstruct our parser types from their JSON representations.
- * they mirror the json_*_of serializers above. *)
-
-(* reconstruct a switch value from its JSON representation *)
-let switch_of_json json_node =
-  match json_to_string (json_get "type" json_node) with
-  | "short" ->
-    let char_str = json_to_string (json_get "char" json_node) in
-    Short (if String.length char_str > 0 then char_str.[0] else '?')
-  | "long" -> Long (json_to_string (json_get "name" json_node))
-  | "both" ->
-    let char_str = json_to_string (json_get "char" json_node) in
-    Both ((if String.length char_str > 0 then char_str.[0] else '?'),
-          json_to_string (json_get "name" json_node))
-  | _ -> Long "?"
-
-(* reconstruct a parameter spec from its JSON representation *)
-let param_of_json = function
-  | Jnull -> None
-  | json_node ->
-    let name = json_to_string (json_get "name" json_node) in
-    (match json_to_string (json_get "kind" json_node) with
-     | "mandatory" -> Some (Mandatory name)
-     | "optional" -> Some (Optional name)
-     | _ -> None)
-
-(* reconstruct a flag entry from its JSON representation *)
-let entry_of_json json_node =
-  { switch = switch_of_json (json_get "switch" json_node);
-    param = param_of_json (json_get "param" json_node);
-    desc = json_to_string (json_get "desc" json_node) }
-
-(* reconstruct a subcommand from its JSON representation *)
-let subcommand_of_json json_node =
-  { name = json_to_string (json_get "name" json_node);
-    desc = json_to_string (json_get "desc" json_node) }
-
-(* reconstruct a positional argument from its JSON representation *)
-let positional_of_json json_node =
-  { pos_name = json_to_string (json_get "name" json_node);
-    optional = json_to_bool (json_get "optional" json_node);
-    variadic = json_to_bool (json_get "variadic" json_node) }
-
-(* reconstruct a full help_result from its JSON representation *)
-let help_result_of_json json_node =
-  { entries = List.map entry_of_json (json_to_list (json_get "entries" json_node));
-    subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" json_node));
-    positionals = List.map positional_of_json (json_to_list (json_get "positionals" json_node));
-    description = json_to_string (json_get "description" json_node) }
-
-(* --- filesystem operations --- *)
-
-(* write a string to a file, overwriting any existing content *)
-let write_file path contents =
-  let oc = open_out path in
-  output_string oc contents;
-  close_out oc
-
-(* read an entire file into a string, returning None on any error *)
-let read_file path =
-  try
-    let ic = open_in path in
-    let size = in_channel_length ic in
-    let contents = Bytes.create size in
-    really_input ic contents 0 size;
-    close_in ic;
-    Some (Bytes.to_string contents)
-  with _ -> None
-
-(* write a parsed help_result to the store as JSON *)
-let write_result ~dir ?(source="help") command result =
-  let path = Filename.concat dir (filename_of_command command ^ ".json") in
-  write_file path (json_of_help_result ~source result)
-
-(* write native nushell completion source to the store as a .nu file *)
-let write_native ~dir command data =
-  let path = Filename.concat dir (filename_of_command command ^ ".nu") in
-  write_file path data
-
-(* check whether a path exists and is a directory *)
-let is_dir path = Sys.file_exists path && Sys.is_directory path
-
-(* look for a command's data file across multiple store directories.
- * checks JSON first, then .nu. returns the first match found.
- * directories are searched in order (user dir before system dirs). *)
-let find_file dirs command =
-  let base_name = filename_of_command command in
-  List.find_map (fun directory ->
-    let json_path = Filename.concat directory (base_name ^ ".json") in
-    if Sys.file_exists json_path then Some json_path
-    else
-      let nu_path = Filename.concat directory (base_name ^ ".nu") in
-      if Sys.file_exists nu_path then Some nu_path
-      else None
-  ) dirs
-
-(* parse a nushell .nu file to extract a help_result for a specific command.
- * .nu files contain `export extern "cmd" [ ... ]` blocks with flag definitions.
- * this parser extracts flags, positionals, subcommands, and descriptions
- * from the nushell extern syntax so the completer can use native completions.
- *
- * nushell extern parameter syntax:
- *   --flag(-s): type  # description     → Both(s, "flag") with param
- *   --flag: type  # description         → Long "flag" with param
- *   --flag  # description               → Long "flag" no param
- *   -s  # description                   → Short 's'
- *   name: type  # description           → positional
- *   name?: type                         → optional positional
- *   ...name: type                       → variadic positional
- *)
-let parse_nu_completions target_cmd contents =
-  let lines = String.split_on_char '\n' contents in
-  (* extract the description comment preceding an export extern block *)
-  let current_desc = ref "" in
-  (* collect all extern blocks: (cmd_name, entries, positionals, description) *)
-  let blocks = ref [] in
-  let in_block = ref false in
-  let block_cmd = ref "" in
-  let block_entries = ref [] in
-  let block_positionals = ref [] in
-  let block_desc = ref "" in
-  let finish_block () =
-    if !in_block then begin
-      blocks := (!block_cmd, List.rev !block_entries,
-                 List.rev !block_positionals, !block_desc) :: !blocks;
-      in_block := false
-    end in
-  List.iter (fun line ->
-    let trimmed = String.trim line in
-    if not !in_block then begin
-      (* look for description comments and export extern lines *)
-      if String.length trimmed > 2 && trimmed.[0] = '#' && trimmed.[1] = ' ' then
-        current_desc := String.trim (String.sub trimmed 2 (String.length trimmed - 2))
-      else if String.length trimmed > 15
-              && (try ignore (Str.search_forward
-                    (Str.regexp_string "export extern") trimmed 0); true
-                  with Not_found -> false) then begin
-        (* extract command name from: export extern "cmd name" [ or export extern cmd [ *)
-        let re_quoted = Str.regexp {|export extern "\([^"]*\)"|} in
-        let re_bare = Str.regexp {|export extern \([a-zA-Z0-9_-]+\)|} in
-        let cmd_opt =
-          if try ignore (Str.search_forward re_quoted trimmed 0); true
-             with Not_found -> false
-          then Some (Str.matched_group 1 trimmed)
-          else if try ignore (Str.search_forward re_bare trimmed 0); true
-             with Not_found -> false
-          then Some (Str.matched_group 1 trimmed)
-          else None in
-        if cmd_opt <> None then begin
-          let cmd = match cmd_opt with Some c -> c | None -> "" in
-          in_block := true;
-          block_cmd := cmd;
-          block_entries := [];
-          block_positionals := [];
-          block_desc := !current_desc;
-          current_desc := ""
-        end
-      end else
-        current_desc := ""
-    end else begin
-      (* inside an extern block — parse flag/positional lines *)
-      if String.length trimmed > 0 && trimmed.[0] = ']' then
-        finish_block ()
-      else begin
-        (* extract description from # comment *)
-        let param_part, desc =
-          match String.split_on_char '#' trimmed with
-          | before :: rest ->
-            (String.trim before,
-             String.trim (String.concat "#" rest))
-          | _ -> (trimmed, "")
-        in
-        if String.length param_part > 1 then begin
-          if param_part.[0] = '-' && param_part.[1] = '-' then begin
-            (* long flag: --flag(-s): type or --flag: type or --flag *)
-            let re_both = Str.regexp {|--\([a-zA-Z0-9-]+\)(-\([a-zA-Z0-9]\))\(: *\([a-zA-Z]+\)\)?|} in
-            let re_long = Str.regexp {|--\([a-zA-Z0-9-]+\)\(: *\([a-zA-Z]+\)\)?|} in
-            if try ignore (Str.search_forward re_both param_part 0); true
-               with Not_found -> false then begin
-              let long = Str.matched_group 1 param_part in
-              let short = (Str.matched_group 2 param_part).[0] in
-              let param = try Some (Mandatory (Str.matched_group 4 param_part))
-                          with Not_found | Invalid_argument _ -> None in
-              block_entries := { switch = Both (short, long); param; desc } :: !block_entries
-            end else if try ignore (Str.search_forward re_long param_part 0); true
-               with Not_found -> false then begin
-              let long = Str.matched_group 1 param_part in
-              let param = try Some (Mandatory (Str.matched_group 3 param_part))
-                          with Not_found | Invalid_argument _ -> None in
-              block_entries := { switch = Long long; param; desc } :: !block_entries
-            end
-          end else if param_part.[0] = '-' then begin
-            (* short flag: -s *)
-            if String.length param_part >= 2 then
-              let c = param_part.[1] in
-              if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') then
-                block_entries := { switch = Short c; param = None; desc } :: !block_entries
-          end else begin
-            (* positional: name: type or name?: type or ...name: type *)
-            let variadic = String.starts_with ~prefix:"..." param_part in
-            let part = if variadic then String.sub param_part 3 (String.length param_part - 3)
-                       else param_part in
-            let optional = try let q = String.index part '?' in q > 0
-                           with Not_found -> false in
-            let name = match String.index_opt part ':' with
-              | Some i -> String.trim (String.sub part 0 i)
-              | None -> match String.index_opt part '?' with
-                | Some i -> String.trim (String.sub part 0 i)
-                | None -> String.trim part in
-            let name = String.map (function '-' -> '_' | c -> c) name in
-            if String.length name > 0 && name.[0] <> '-' then
-              block_positionals := { pos_name = name; optional = optional || variadic;
-                                     variadic } :: !block_positionals
-          end
-        end
-      end
-    end
-  ) lines;
-  finish_block ();
-  let blocks = List.rev !blocks in
-  (* find the block matching the target command *)
-  let target = target_cmd in
-  match List.find_opt (fun (cmd, _, _, _) -> cmd = target) blocks with
-  | Some (_, entries, positionals, description) ->
-    (* collect subcommands from other blocks that are children of this command *)
-    let prefix = target ^ " " in
-    let subcommands = List.filter_map (fun (cmd, _, _, desc) ->
-      if String.starts_with ~prefix cmd then
-        let sub_name = String.sub cmd (String.length prefix)
-                         (String.length cmd - String.length prefix) in
-        (* only immediate subcommands (no further spaces) *)
-        if not (String.contains sub_name ' ') && String.length sub_name > 0
-        then Some { name = sub_name; desc }
-        else None
-      else None
-    ) blocks in
-    { entries; subcommands; positionals; description }
-  | None ->
-    (* target not found — return empty result *)
-    { entries = []; subcommands = []; positionals = []; description = "" }
-
-(* look up a command and deserialize its help_result.
- * searches for .json files first, then falls back to .nu files
- * (parsing the nushell extern syntax to extract completion data).
- * for subcommands like "rbw get", also checks the parent's .nu file
- * (e.g. rbw.nu) since clap-generated .nu files contain all extern
- * blocks in a single file. *)
-let lookup dirs command =
-  let base_name = filename_of_command command in
-  (* also try the root command's .nu file for subcommand lookups.
-   * "rbw get" -> try rbw.nu and look for the "rbw get" extern block. *)
-  let parent_base = match String.index_opt command ' ' with
-    | Some i -> Some (filename_of_command (String.sub command 0 i))
-    | None -> None in
-  List.find_map (fun directory ->
-    let json_path = Filename.concat directory (base_name ^ ".json") in
-    match read_file json_path with
-    | Some data ->
-      (try Some (help_result_of_json (parse_json data))
-       with _ -> None)
-    | None ->
-      let nu_path = Filename.concat directory (base_name ^ ".nu") in
-      (match read_file nu_path with
-      | Some data ->
-        (try Some (parse_nu_completions command data)
-         with _ -> None)
-      | None ->
-        (* try parent's .nu file for subcommand blocks *)
-        match parent_base with
-        | Some pb ->
-          let parent_nu = Filename.concat directory (pb ^ ".nu") in
-          (match read_file parent_nu with
-           | Some data ->
-             (try
-                let r = parse_nu_completions command data in
-                if r.entries <> [] || r.subcommands <> [] || r.positionals <> []
-                then Some r else None
-              with _ -> None)
-           | None -> None)
-        | None -> None)
-  ) dirs
-
-(* look up a command's raw data (JSON or .nu source) without parsing.
- * used by the "query" command to dump stored data as-is. *)
-let lookup_raw dirs command =
-  let base_name = filename_of_command command in
-  List.find_map (fun directory ->
-    let json_path = Filename.concat directory (base_name ^ ".json") in
-    match read_file json_path with
-    | Some _ as result -> result
-    | None ->
-      let nu_path = Filename.concat directory (base_name ^ ".nu") in
-      read_file nu_path
-  ) dirs
-
-(* strip known extensions (.json or .nu) from a filename, returning None
- * if the filename has neither extension *)
-let chop_extension filename =
-  if Filename.check_suffix filename ".json" then Some (Filename.chop_suffix filename ".json")
-  else if Filename.check_suffix filename ".nu" then Some (Filename.chop_suffix filename ".nu")
-  else None
-
-(* discover subcommands of a command by scanning filenames in the store.
- * looks for files whose names start with the command's filename + "_"
- * (e.g. for "git", finds "git_add.json", "git_commit.json", etc.)
- *
- * only returns immediate subcommands (no nested underscores beyond the prefix).
- * tries to extract description from the JSON "description" field if available.
- *
- * note: this filesystem-based discovery is used as a fallback when the
- * command's own help_result doesn't list subcommands. it enables completion
- * for subcommands that were indexed from separate manpages or help runs. *)
-let subcommands_of dirs command =
-  let prefix = filename_of_command command ^ "_" in
-  let prefix_len = String.length prefix in
-  let module SMap = Map.Make(String) in
-  let subs = List.fold_left (fun subs directory ->
-    if is_dir directory then
-      Array.fold_left (fun subs filename ->
-        if not (String.starts_with ~prefix filename) then subs
-        else
-          let is_json = Filename.check_suffix filename ".json" in
-          match chop_extension filename with
-          | None -> subs
-          | Some base_name ->
-            let rest = String.sub base_name prefix_len (String.length base_name - prefix_len) in
-            (* skip nested subcommands and empty names *)
-            if String.contains rest '_' || String.length rest = 0 then subs
-            else if SMap.mem rest subs then subs
-            else
-              (* try to read the description from the JSON file *)
-              let desc = if is_json then
-                match read_file (Filename.concat directory filename) with
-                | Some data ->
-                  (try json_to_string (json_get "description" (parse_json data))
-                   with _ -> "")
-                | None -> ""
-              else "" in
-              SMap.add rest { name = rest; desc } subs
-      ) subs (Sys.readdir directory)
-    else subs
-  ) SMap.empty dirs in
-  SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
-
-(* list all indexed commands across all store directories.
- * returns a sorted, deduplicated list of command names. *)
-let all_commands dirs =
-  let module SSet = Set.Make(String) in
-  List.fold_left (fun cmds directory ->
-    if is_dir directory then
-      Array.fold_left (fun cmds filename ->
-        match chop_extension filename with
-        | Some base_name -> SSet.add (command_of_filename base_name) cmds
-        | None -> cmds
-      ) cmds (Sys.readdir directory)
-    else cmds
-  ) SSet.empty dirs
-  |> SSet.elements
-
-(* determine how a command was indexed: "help", "manpage", "native", etc.
- * for JSON files, reads the "source" field. for .nu files, returns "native".
- * used by the "dump" command to show provenance. *)
-let file_type_of dirs command =
-  let base_name = filename_of_command command in
-  List.find_map (fun directory ->
-    let json_path = Filename.concat directory (base_name ^ ".json") in
-    if Sys.file_exists json_path then
-      (match read_file json_path with
-       | Some data ->
-         (try Some (json_to_string (json_get "source" (parse_json data)))
-          with _ -> Some "json")
-       | None -> Some "json")
-    else
-      let nu_path = Filename.concat directory (base_name ^ ".nu") in
-      if Sys.file_exists nu_path then Some "native"
-      else None
-  ) dirs
diff --git a/nix/inshellah-completer.nu b/nix/inshellah-completer.nu
new file mode 100644
index 0000000..bee5363
--- /dev/null
+++ b/nix/inshellah-completer.nu
@@ -0,0 +1,813 @@
+@complete external
+def --wrapped sudo [...args] {
+    ^sudo ...$args
+}
+
+@complete external
+def --wrapped doas [...args] {
+    ^doas ...$args
+}
+
+let inshellah_nonempty = { |items|
+    let result = ($items | default [] | compact)
+    if ($result | is-empty) { null } else { $result }
+}
+
+let inshellah_fuzzy_score = { |needle, haystack|
+    let needle = $needle | default "" | into string
+    let haystack = $haystack | default "" | into string
+    let needle_len = ($needle | str length)
+    let haystack_len = ($haystack | str length)
+
+    if $needle_len == 0 {
+        1
+    } else if $needle_len > $haystack_len {
+        0
+    } else if $needle == $haystack {
+        1000
+    } else {
+        let needle_lc = $needle | str downcase
+        let haystack_lc = $haystack | str downcase
+        if ($haystack_lc | str starts-with $needle_lc) {
+            900 + (($needle_len * 100) // $haystack_len)
+        } else {
+            let needle_chars = $needle_lc | split chars
+            let haystack_chars = $haystack | split chars
+            let haystack_lc_chars = $haystack_lc | split chars
+            let scored = (
+                $haystack_lc_chars
+                | enumerate
+                | reduce --fold {needle_idx: 0, score: 0, prev_match: -2} { |it, acc|
+                    if $acc.needle_idx >= $needle_len {
+                        $acc
+                    } else if $it.item == ($needle_chars | get $acc.needle_idx) {
+                        let idx = $it.index
+                        let prev = if $idx == 0 { "" } else { $haystack_chars | get ($idx - 1) }
+                        let current = $haystack_chars | get $idx
+                        let boundary = (
+                            ($idx == 0)
+                            or ($prev == "-")
+                            or ($prev == "_")
+                            or (($prev =~ '^[a-z]$') and ($current =~ '^[A-Z]$'))
+                        )
+                        let base = if $boundary { 50 } else { 10 }
+                        let consecutive = if $acc.prev_match == ($idx - 1) { 20 } else { 0 }
+                        {
+                            needle_idx: ($acc.needle_idx + 1)
+                            score: ($acc.score + $base + $consecutive)
+                            prev_match: $idx
+                        }
+                    } else {
+                        $acc
+                    }
+                }
+            )
+            if $scored.needle_idx == $needle_len { $scored.score } else { 0 }
+        }
+    }
+}
+
+let inshellah_filter_candidates = { |items, prefix|
+    let result = do $inshellah_nonempty $items
+    if $result == null {
+        null
+    } else if ($prefix | is-empty) {
+        $result
+    } else {
+        let needle = $prefix | into string
+        let filtered = (
+            $result
+            | enumerate
+            | each { |row| $row.item | insert __idx $row.index }
+            | insert __score { |item| do $inshellah_fuzzy_score $needle $item.value }
+            | where { |item|
+                let value = ($item.value | into string)
+                let desc = ($item.description? | default "" | into string | str downcase)
+                let exact_command = ($value == $needle) and (($desc | str contains "subcommand") or $desc == "external command")
+                ($item.__score > 0) and not $exact_command
+            }
+            | insert __rank { |item| 0 - $item.__score }
+            | sort-by __rank __idx
+            | reject __idx __score __rank
+        )
+        do $inshellah_nonempty $filtered
+    }
+}
+
+let inshellah_static_complete = { |spans|
+    try {
+        let completed = (^inshellah complete ...$spans | complete)
+        if $completed.exit_code != 0 {
+            null
+        } else {
+            let parsed = (try { $completed.stdout | from json } catch { null })
+            let parsed_type = ($parsed | describe)
+            if $parsed == null {
+                null
+            } else if (($parsed_type | str starts-with "list") or ($parsed_type | str starts-with "table")) {
+                do $inshellah_nonempty $parsed
+            } else {
+                null
+            }
+        }
+    } catch {
+        null
+    }
+}
+
+let inshellah_unit_candidates = { |scope, prefix|
+    try {
+        ^systemctl ...$scope list-units --all --no-pager --plain --full --no-legend $"($prefix)*"
+            | lines
+            | each { |l|
+                let parsed = $l | parse -r '(?P<unit>\S+)\s+\S+\s+\S+\s+\S+\s+(?P<desc>.*)'
+                if ($parsed | length) > 0 {
+                    {value: $parsed.0.unit, description: ($parsed.0.desc | str trim)}
+                }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_kubectl_scope = { |spans|
+    let all_namespaces = ("-A" in $spans) or ("--all-namespaces" in $spans)
+    let namespace_eq = ($spans | where { |s| $s =~ '^--namespace=' } | get 0? | default "")
+    let namespace_arg = (
+        $spans
+        | enumerate
+        | where { |it| $it.item == "-n" or $it.item == "--namespace" }
+        | reverse
+        | get 0?
+        | default null
+    )
+    let namespace = if not ($namespace_eq | is-empty) {
+        $namespace_eq | str replace --regex '^--namespace=' ''
+    } else if $namespace_arg != null and (($namespace_arg.index + 1) < ($spans | length)) {
+        $spans | get ($namespace_arg.index + 1)
+    } else {
+        ""
+    }
+
+    if $all_namespaces {
+        {args: [--all-namespaces], all: true}
+    } else if not ($namespace | is-empty) {
+        {args: [-n $namespace], all: false}
+    } else {
+        {args: [], all: false}
+    }
+}
+
+let inshellah_kubectl_names = { |kind, spans|
+    if ($kind | is-empty) or ($kind | str starts-with "-") {
+        null
+    } else {
+        let scope = do $inshellah_kubectl_scope $spans
+        let columns = if $scope.all {
+            "custom-columns=NAMESPACE:.metadata.namespace,NAME:.metadata.name"
+        } else {
+            "custom-columns=NAME:.metadata.name"
+        }
+        try {
+            let rows = (
+                ^kubectl get $kind ...$scope.args --no-headers -o $columns
+                    | lines
+                    | str trim
+                    | where { |n| not ($n | is-empty) }
+            )
+            if $scope.all {
+                $rows | each { |row|
+                    let parts = $row | split row -r '\s+'
+                    if ($parts | length) >= 2 {
+                        {value: ($parts | get 1), description: $"($kind) in ($parts | get 0)"}
+                    }
+                } | compact
+            } else {
+                $rows | each { |n| {value: $n, description: $kind} }
+            }
+        } catch { null }
+    }
+}
+
+let inshellah_git_refs = { ||
+    try {
+        ^git for-each-ref --format='%(refname:short)%09%(objecttype)%09%(contents:subject)' refs/heads refs/remotes refs/tags
+            | lines
+            | each { |l|
+                let p = $l | split row "\t"
+                if ($p | length) >= 3 { {value: $p.0, description: $p.2} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_git_branches = { ||
+    try {
+        ^git for-each-ref --format='%(refname:short)%09%(contents:subject)' refs/heads
+            | lines
+            | each { |l|
+                let p = $l | split row "\t"
+                if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_git_tags = { ||
+    try {
+        ^git for-each-ref --format='%(refname:short)%09%(contents:subject)' refs/tags
+            | lines
+            | each { |l|
+                let p = $l | split row "\t"
+                if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_git_remotes = { ||
+    try {
+        ^git remote
+            | lines
+            | str trim
+            | where { |r| not ($r | is-empty) }
+            | each { |r| {value: $r, description: "remote"} }
+    } catch { null }
+}
+
+let inshellah_git_stashes = { ||
+    try {
+        ^git stash list
+            | lines
+            | each { |l|
+                let m = $l | parse -r '^(?P<stash>stash@\{[0-9]+\}):\s*(?P<desc>.*)$'
+                if ($m | length) > 0 { {value: $m.0.stash, description: $m.0.desc} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_git_status_paths = { ||
+    try {
+        ^git status --porcelain -uall
+            | lines
+            | each { |l|
+                let m = $l | parse -r '^.. (?P<path>.+)$'
+                if ($m | length) > 0 {
+                    let raw = $m.0.path
+                    let path = if ($raw | str contains " -> ") { $raw | split row " -> " | last } else { $raw }
+                    {value: $path, description: "changed path"}
+                }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_git_tracked_paths = { ||
+    try {
+        ^git ls-files
+            | lines
+            | where { |p| not ($p | is-empty) }
+            | each { |p| {value: $p, description: "tracked file"} }
+    } catch { null }
+}
+
+let inshellah_git_submodules = { ||
+    try {
+        ^git config --file .gitmodules --get-regexp '^submodule\..*\.path$'
+            | lines
+            | each { |l|
+                let p = $l | split row -r '\s+'
+                if ($p | length) >= 2 { {value: $p.1, description: "submodule"} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_git_worktrees = { ||
+    try {
+        ^git worktree list --porcelain
+            | lines
+            | each { |l|
+                let m = $l | parse -r '^worktree\s+(?P<p>.+)$'
+                if ($m | length) > 0 { {value: $m.0.p, description: ""} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_jj_revs = { ||
+    try {
+        ^jj log --ignore-working-copy --no-graph -r 'all()' -T 'change_id.shortest() ++ "\t" ++ description.first_line() ++ "\n"' err> /dev/null
+            | lines
+            | each { |l|
+                let p = $l | split row "\t"
+                if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_jj_bookmarks = { ||
+    try {
+        ^jj bookmark list --all-remotes -T 'name ++ "\n"' err> /dev/null
+            | lines
+            | str trim
+            | where { |b| not ($b | is-empty) }
+            | each { |b| {value: $b, description: "bookmark"} }
+    } catch { null }
+}
+
+let inshellah_jj_tags = { ||
+    try {
+        ^jj tag list --all-remotes -T 'name ++ "\n"' err> /dev/null
+            | lines
+            | str trim
+            | where { |t| not ($t | is-empty) }
+            | each { |t| {value: $t, description: "tag"} }
+    } catch { null }
+}
+
+let inshellah_jj_remotes = { ||
+    try {
+        ^jj git remote list err> /dev/null
+            | lines
+            | each { |l|
+                let p = $l | str trim | split row -r '\s+'
+                if ($p | length) >= 1 { {value: $p.0, description: ($p | get 1? | default "remote")} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_jj_ops = { ||
+    try {
+        ^jj op log --ignore-working-copy --no-graph -T 'id.short() ++ "\t" ++ description.first_line() ++ "\n"' err> /dev/null
+            | lines
+            | each { |l|
+                let p = $l | split row "\t"
+                if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
+            } | compact
+    } catch { null }
+}
+
+let inshellah_jj_files = { ||
+    try {
+        ^jj file list --ignore-working-copy err> /dev/null
+            | lines
+            | str trim
+            | where { |p| not ($p | is-empty) }
+            | each { |p| {value: $p, description: "repo file"} }
+    } catch { null }
+}
+
+let inshellah_jj_workspaces = { ||
+    try {
+        ^jj workspace list -T 'name ++ "\n"' err> /dev/null
+            | lines
+            | str trim
+            | where { |w| not ($w | is-empty) }
+            | each { |w| {value: $w, description: "workspace"} }
+    } catch { null }
+}
+
+let inshellah_complete = { |spans|
+    let completions = do $inshellah_static_complete $spans
+    let span_len = ($spans | length)
+    let last_span = if $span_len > 0 { $spans | last } else { "" }
+    let prev_span = if $span_len >= 2 { $spans | get ($span_len - 2) } else { "" }
+    let sub = if $span_len >= 2 { $spans | get 1 } else { "" }
+
+    let additional = if ($completions == null and $span_len > 0) {
+        match $spans.0 {
+            "nix" => {
+                if $span_len < 2 {
+                    null
+                } else {
+                    try {
+                        let nix_output = (
+                            with-env { NIX_GET_COMPLETIONS: ($span_len - 1) } {
+                                $spans | run-external $in
+                            }
+                            | split row -r '\n'
+                            | str trim
+                            | skip 1
+                            | where { |e| not ($e | is-empty) }
+                        )
+                        if (($nix_output | length) < 6 and
+                            $last_span =~ "[a-zA-Z][a-zA-Z0-9_-]*#[a-zA-Z][a-zA-Z0-9_-]*") {
+                            with-env { NIX_ALLOW_UNFREE: "1" NIX_ALLOW_BROKEN: "1" } {
+                                $nix_output | par-each { |e|
+                                    try {
+                                        {value: $e, description: (^nix eval --raw --impure $e --apply "f: f.meta.description" err> /dev/null)}
+                                    } catch {
+                                        {value: $e, description: ""}
+                                    }
+                                }
+                            }
+                        } else {
+                            $nix_output | each { |e| {value: $e, description: ""} }
+                        }
+                    } catch { null }
+                }
+            }
+            "systemctl" => {
+                let unit_verbs = [
+                    "status" "show" "cat" "help" "start" "stop" "restart" "reload" "try-restart"
+                    "reload-or-restart" "reload-or-try-restart" "isolate" "kill" "reset-failed"
+                    "enable" "disable" "reenable" "preset" "mask" "unmask" "is-active" "is-failed"
+                    "is-enabled" "edit"
+                ]
+                let args = $spans | skip 1 | where { |s| not ($s | str starts-with "-") }
+                let verb = $args | get 0? | default ""
+                if (($verb in $unit_verbs) and $span_len >= 3) {
+                    let scope = if ("--user" in $spans) { [--user] } else { [] }
+                    do $inshellah_unit_candidates $scope $last_span
+                } else { null }
+            }
+            "journalctl" => {
+                if ($prev_span == "--unit" or $prev_span == "-u") {
+                    let scope = if ("--user-unit" in $spans or "--user" in $spans) { [--user] } else { [] }
+                    do $inshellah_unit_candidates $scope $last_span
+                } else { null }
+            }
+            "coredumpctl" => {
+                let unit_verbs = ["dump" "info" "debug" "list"]
+                if (($sub in $unit_verbs) and $span_len >= 3) {
+                    let units = (do $inshellah_unit_candidates [] $last_span | default [])
+                    let pids = (try {
+                        ^coredumpctl list --no-pager --no-legend
+                            | lines
+                            | each { |l|
+                                let p = $l | split row -r '\s+'
+                                if ($p | length) >= 5 { {value: $p.4, description: $"PID ($p.4) ($p | get 9? | default "")"} }
+                            } | compact
+                    } catch { [] })
+                    $units | append $pids
+                } else { null }
+            }
+            "loginctl" => {
+                let user_verbs = ["user-status" "show-user" "enable-linger" "disable-linger" "kill-user" "terminate-user"]
+                let session_verbs = ["session-status" "show-session" "activate" "lock-session" "unlock-session" "terminate-session" "kill-session"]
+                if (($sub in $user_verbs) and $span_len >= 3) {
+                    try {
+                        ^loginctl list-users --no-pager --no-legend
+                            | lines | each { |l|
+                                let p = $l | str trim | split row -r '\s+'
+                                if ($p | length) >= 2 { {value: $p.1, description: $"UID ($p.0)"} }
+                            } | compact
+                    } catch { null }
+                } else if (($sub in $session_verbs) and $span_len >= 3) {
+                    try {
+                        ^loginctl list-sessions --no-pager --no-legend
+                            | lines | each { |l|
+                                let p = $l | str trim | split row -r '\s+'
+                                if ($p | length) >= 3 { {value: $p.0, description: $"user ($p.2)"} }
+                            } | compact
+                    } catch { null }
+                } else { null }
+            }
+            "machinectl" => {
+                let machine_verbs = ["status" "show" "start" "login" "shell" "enable" "disable" "poweroff" "reboot" "terminate" "kill" "bind" "copy-to" "copy-from"]
+                if (($sub in $machine_verbs) and $span_len >= 3) {
+                    try {
+                        ^machinectl list --no-pager --no-legend
+                            | lines | each { |l|
+                                let p = $l | str trim | split row -r '\s+'
+                                if ($p | length) >= 1 { {value: $p.0, description: ($p | get 1? | default "")} }
+                            } | compact
+                    } catch { null }
+                } else { null }
+            }
+            "networkctl" => {
+                let link_verbs = ["status" "show" "up" "down" "renew" "forcerenew" "reconfigure" "delete"]
+                if (($sub in $link_verbs) and $span_len >= 3) {
+                    try {
+                        ^networkctl list --no-pager --no-legend
+                            | lines | each { |l|
+                                let p = $l | str trim | split row -r '\s+'
+                                if ($p | length) >= 4 { {value: $p.1, description: $"($p.2) ($p.3)"} }
+                            } | compact
+                    } catch { null }
+                } else { null }
+            }
+            "hostnamectl" | "timedatectl" | "localectl" => {
+                null
+            }
+            "ssh" | "scp" | "sftp" => {
+                let cfg_hosts = (try {
+                    open ~/.ssh/config | lines | each { |l|
+                        let m = $l | parse -r '(?i)^\s*Host\s+(?P<h>.+)$'
+                        if ($m | length) > 0 { $m.0.h | split row -r '\s+' } else { [] }
+                    } | flatten | where { |h| not ($h | str contains '*') and not ($h | is-empty) }
+                } catch { [] })
+                let known = (try {
+                    open ~/.ssh/known_hosts | lines | each { |l|
+                        ($l | split row -r '\s+' | get 0? | default "") | split row ','
+                    } | flatten | where { |h| (not ($h | is-empty)) and (not ($h | str starts-with '|')) and (not ($h | str starts-with '[')) }
+                } catch { [] })
+                $cfg_hosts | append $known | uniq | each { |h| {value: $h, description: ""} }
+            }
+            "docker" | "podman" => {
+                let need_container = ["exec" "logs" "inspect" "start" "stop" "restart" "rm" "kill" "attach" "cp" "top" "wait" "pause" "unpause" "port" "commit" "diff" "export"]
+                let need_image = ["run" "rmi" "tag" "push" "pull" "history" "save" "create"]
+                if ($sub in $need_container) {
+                    try {
+                        ^($spans.0) ps -a --format '{{.Names}}\t{{.Image}}'
+                            | lines | each { |l|
+                                let p = $l | split row "\t"
+                                if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
+                            } | compact
+                    } catch { null }
+                } else if ($sub in $need_image) {
+                    try {
+                        ^($spans.0) images --format '{{.Repository}}:{{.Tag}}\t{{.Size}}'
+                            | lines | each { |l|
+                                let p = $l | split row "\t"
+                                if (($p | length) >= 2) and (not ($p.0 | str ends-with ':<none>')) {
+                                    {value: $p.0, description: $p.1}
+                                }
+                            } | compact
+                    } catch { null }
+                } else { null }
+            }
+            "kubectl" => {
+                let resource_verbs = ["get" "describe" "delete" "edit" "scale" "annotate" "label"]
+                if (($sub in $resource_verbs) and $span_len >= 4) {
+                    let kind = $spans | get 2? | default ""
+                    do $inshellah_kubectl_names $kind $spans
+                } else if (($sub == "logs" or $sub == "exec" or $sub == "port-forward") and $span_len >= 3) {
+                    do $inshellah_kubectl_names "pods" $spans
+                } else if ($sub == "rollout" and $span_len >= 5) {
+                    let action = $spans | get 2? | default ""
+                    let kind = $spans | get 3? | default ""
+                    if ($action in ["history" "pause" "restart" "resume" "status" "undo"]) {
+                        do $inshellah_kubectl_names $kind $spans
+                    } else { null }
+                } else { null }
+            }
+            "git" => {
+                let git_verbs = [
+                    "add" "bisect" "branch" "checkout" "cherry-pick" "clone" "commit" "diff"
+                    "fetch" "grep" "init" "log" "merge" "mv" "pull" "push" "rebase" "reflog"
+                    "remote" "reset" "restore" "revert" "rm" "show" "stash" "status"
+                    "submodule" "switch" "tag" "worktree"
+                ]
+                let ref_verbs = ["checkout" "merge" "rebase" "log" "diff" "show" "reset" "cherry-pick" "revert" "tag" "blame" "bisect"]
+                let branch_verbs = ["switch" "branch"]
+                let remote_verbs = ["add" "rename" "remove" "rm" "set-head" "set-branches" "get-url" "set-url" "show" "prune" "update"]
+                let stash_verbs = ["push" "save" "list" "show" "drop" "pop" "apply" "branch" "clear" "create" "store"]
+                let submodule_verbs = ["add" "status" "init" "deinit" "update" "set-branch" "set-url" "summary" "foreach" "sync" "absorbgitdirs"]
+                let bisect_verbs = ["start" "bad" "good" "new" "old" "terms" "skip" "next" "reset" "visualize" "view" "replay" "log" "run"]
+                let git_args = $spans | skip 2 | where { |s| not ($s | is-empty) and not ($s | str starts-with "-") }
+                if $span_len <= 2 {
+                    $git_verbs | each { |v| {value: $v, description: "git subcommand"} }
+                } else if ($sub == "worktree") {
+                    let worktree_verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        ["add" "list" "lock" "move" "prune" "remove" "repair" "unlock"] | each { |v| {value: $v, description: "worktree subcommand"} }
+                    } else if ($worktree_verb in ["remove" "move" "lock" "unlock" "repair"]) {
+                        do $inshellah_git_worktrees
+                    } else if ($worktree_verb == "add" and $span_len >= 5) {
+                        do $inshellah_git_refs
+                    } else { null }
+                } else if ($sub == "remote" and $span_len >= 3) {
+                    let remote_verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $remote_verbs | each { |v| {value: $v, description: "remote subcommand"} }
+                    } else if ($remote_verb in ["rename" "remove" "rm" "set-head" "set-branches" "get-url" "set-url" "show" "prune" "update"]) {
+                        do $inshellah_git_remotes
+                    } else { null }
+                } else if (($sub in ["fetch" "push" "pull"]) and $span_len >= 3) {
+                    if ($git_args | is-empty) {
+                        do $inshellah_git_remotes
+                    } else {
+                        do $inshellah_git_refs
+                    }
+                } else if ($sub == "stash" and $span_len >= 3) {
+                    let stash_verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $stash_verbs | each { |v| {value: $v, description: "stash subcommand"} }
+                    } else if ($stash_verb in ["show" "drop" "pop" "apply" "store"]) {
+                        do $inshellah_git_stashes
+                    } else if ($stash_verb == "branch" and ($git_args | length) >= 2) {
+                        do $inshellah_git_stashes
+                    } else { null }
+                } else if ($sub == "submodule" and $span_len >= 3) {
+                    let submodule_verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $submodule_verbs | each { |v| {value: $v, description: "submodule subcommand"} }
+                    } else if ($submodule_verb in ["status" "init" "deinit" "update" "set-branch" "set-url" "summary" "foreach" "sync"]) {
+                        do $inshellah_git_submodules
+                    } else { null }
+                } else if ($sub == "bisect" and $span_len >= 3) {
+                    let bisect_verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $bisect_verbs | each { |v| {value: $v, description: "bisect subcommand"} }
+                    } else if ($bisect_verb in ["bad" "good" "new" "old" "skip" "reset" "start"]) {
+                        do $inshellah_git_refs
+                    } else { null }
+                } else if ($sub == "tag" and $span_len >= 3) {
+                    if (["-d" "--delete" "-v" "--verify"] | any { |f| $f in $spans }) {
+                        do $inshellah_git_tags
+                    } else if ($span_len >= 4) {
+                        do $inshellah_git_refs
+                    } else {
+                        do $inshellah_git_tags
+                    }
+                } else if ($sub == "add" and $span_len >= 3) {
+                    do $inshellah_git_status_paths
+                } else if ($sub == "restore" and $span_len >= 3) {
+                    if ($prev_span == "--source" or $prev_span == "-s") {
+                        do $inshellah_git_refs
+                    } else {
+                        do $inshellah_git_status_paths
+                    }
+                } else if ($sub == "rm" and $span_len >= 3) {
+                    do $inshellah_git_tracked_paths
+                } else if ($sub == "mv" and $span_len >= 3) {
+                    if ($git_args | is-empty) { do $inshellah_git_tracked_paths } else { null }
+                } else if ($sub == "checkout" and $span_len >= 3) {
+                    if ($prev_span in ["-b" "-B" "--orphan"]) { null } else { do $inshellah_git_refs }
+                } else if ($sub == "switch" and $span_len >= 3) {
+                    if ($prev_span in ["-c" "-C" "--create" "--force-create" "--orphan"]) { null } else { do $inshellah_git_branches }
+                } else if (($sub in $branch_verbs) and $span_len >= 3) {
+                    do $inshellah_git_branches
+                } else if (($sub in $ref_verbs) and $span_len >= 3) {
+                    do $inshellah_git_refs
+                } else { null }
+            }
+            "jj" => {
+                let jj_verbs = [
+                    "abandon" "absorb" "bookmark" "commit" "describe" "diff" "diffedit"
+                    "duplicate" "edit" "evolog" "file" "git" "interdiff" "log" "new"
+                    "operation" "op" "rebase" "resolve" "restore" "revert" "show" "sparse"
+                    "split" "squash" "status" "tag" "undo" "workspace" "b" "ci" "desc" "st"
+                ]
+                let rev_flags = [
+                    "-r" "--revision" "--revisions" "--from" "--to" "-s" "--source"
+                    "-d" "--destination" "--insert-after" "--insert-before" "--before"
+                    "--after" "--onto" "--change"
+                ]
+                let rev_verbs = [
+                    "abandon" "absorb" "describe" "diff" "diffedit" "duplicate" "edit"
+                    "evolog" "interdiff" "log" "metaedit" "new" "parallelize" "rebase"
+                    "restore" "revert" "show" "sign" "simplify-parents" "split" "squash"
+                    "unsign"
+                ]
+                let bookmark_verbs = ["advance" "create" "delete" "forget" "list" "move" "rename" "set" "track" "untrack"]
+                let jj_git_verbs = ["clone" "colocation" "export" "fetch" "import" "init" "push" "remote" "root"]
+                let jj_remote_verbs = ["add" "list" "remove" "rename" "set-url"]
+                let op_verbs = ["abandon" "diff" "integrate" "log" "restore" "revert" "show"]
+                let file_verbs = ["annotate" "chmod" "list" "search" "show" "track" "untrack"]
+                let workspace_verbs = ["add" "forget" "list" "rename" "root" "update-stale"]
+                let sparse_verbs = ["edit" "list" "reset" "set"]
+                let jj_args = $spans | skip 2 | where { |s| not ($s | is-empty) and not ($s | str starts-with "-") }
+                if ($prev_span in $rev_flags) {
+                    do $inshellah_jj_revs
+                } else if ($prev_span == "--remote") {
+                    do $inshellah_jj_remotes
+                } else if ($prev_span == "--at-operation" or $prev_span == "--at-op") {
+                    do $inshellah_jj_ops
+                } else if $span_len <= 2 {
+                    $jj_verbs | each { |v| {value: $v, description: "jj subcommand"} }
+                } else if ($sub == "bookmark" or $sub == "b") {
+                    let verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $bookmark_verbs | each { |v| {value: $v, description: "bookmark subcommand"} }
+                    } else if ($verb in ["delete" "forget" "move" "rename" "set" "track" "untrack" "advance"]) {
+                        do $inshellah_jj_bookmarks
+                    } else { null }
+                } else if ($sub == "tag") {
+                    let verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        ["delete" "list" "set"] | each { |v| {value: $v, description: "tag subcommand"} }
+                    } else if ($verb in ["delete" "set"]) {
+                        do $inshellah_jj_tags
+                    } else { null }
+                } else if ($sub == "git") {
+                    let git_verb = $spans | get 2? | default ""
+                    let remote_verb = $spans | get 3? | default ""
+                    if $span_len <= 3 {
+                        $jj_git_verbs | each { |v| {value: $v, description: "jj git subcommand"} }
+                    } else if ($git_verb == "remote") {
+                        if $span_len <= 4 {
+                            $jj_remote_verbs | each { |v| {value: $v, description: "remote subcommand"} }
+                        } else if ($remote_verb in ["remove" "rename" "set-url"]) {
+                            do $inshellah_jj_remotes
+                        } else { null }
+                    } else if ($git_verb in ["fetch" "push"]) {
+                        do $inshellah_jj_remotes
+                    } else { null }
+                } else if ($sub == "operation" or $sub == "op") {
+                    let verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $op_verbs | each { |v| {value: $v, description: "operation subcommand"} }
+                    } else if ($verb in ["abandon" "diff" "integrate" "restore" "revert" "show"]) {
+                        do $inshellah_jj_ops
+                    } else { null }
+                } else if ($sub == "file") {
+                    let verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $file_verbs | each { |v| {value: $v, description: "file subcommand"} }
+                    } else if ($verb in ["annotate" "chmod" "list" "search" "show" "untrack"]) {
+                        do $inshellah_jj_files
+                    } else { null }
+                } else if ($sub == "workspace") {
+                    let verb = $spans | get 2? | default ""
+                    if $span_len <= 3 {
+                        $workspace_verbs | each { |v| {value: $v, description: "workspace subcommand"} }
+                    } else if ($verb in ["forget" "update-stale"]) {
+                        do $inshellah_jj_workspaces
+                    } else { null }
+                } else if ($sub == "sparse") {
+                    if $span_len <= 3 {
+                        $sparse_verbs | each { |v| {value: $v, description: "sparse subcommand"} }
+                    } else { null }
+                } else if ($sub in ["diff" "log"] and ($jj_args | is-empty)) {
+                    do $inshellah_jj_files
+                } else if ($sub in $rev_verbs and $span_len >= 3) {
+                    do $inshellah_jj_revs
+                } else { null }
+            }
+            "npm" | "pnpm" | "yarn" => {
+                let wants = (
+                    (($spans.0 == "yarn") and $span_len == 2)
+                    or (($sub == "run" or $sub == "run-script") and $span_len == 3)
+                )
+                if $wants {
+                    try {
+                        open package.json | get scripts? | default {} | transpose name cmd
+                            | each { |row| {value: $row.name, description: $row.cmd} }
+                    } catch { null }
+                } else { null }
+            }
+            "make" => {
+                if $span_len <= 2 {
+                    try {
+                        open Makefile | lines
+                            | each { |l|
+                                let m = $l | parse -r '^(?P<t>[A-Za-z0-9_./-]+)\s*:'
+                                if (($m | length) > 0) and (not ($m.0.t | str starts-with '.')) {
+                                    {value: $m.0.t, description: ""}
+                                }
+                            } | compact | uniq-by value
+                    } catch { null }
+                } else { null }
+            }
+            "just" => {
+                if $span_len <= 2 {
+                    try {
+                        ^just --list --unsorted
+                            | lines | skip 1
+                            | each { |l|
+                                let m = $l | parse -r '^\s+(?P<t>[A-Za-z0-9_-]+)(?:\s+\S.*)?(?:\s*#\s*(?P<d>.*))?$'
+                                if ($m | length) > 0 {
+                                    {value: $m.0.t, description: ($m.0.d? | default "")}
+                                }
+                            } | compact
+                    } catch { null }
+                } else { null }
+            }
+            "cargo" => {
+                let target_flags = ["--bin" "--example" "--test" "--bench"]
+                if ($prev_span == "-p" or $prev_span == "--package") {
+                    try {
+                        ^cargo metadata --no-deps --format-version 1
+                            | from json
+                            | get packages
+                            | each { |pkg| {value: $pkg.name, description: ($pkg.version? | default "")} }
+                            | uniq-by value
+                    } catch { null }
+                } else if ($prev_span in $target_flags) {
+                    let kind = $prev_span | str replace "--" ""
+                    try {
+                        ^cargo metadata --no-deps --format-version 1
+                            | from json
+                            | get packages
+                            | each { |pkg|
+                                $pkg.targets
+                                    | where { |t| $kind in $t.kind }
+                                    | each { |t| {value: $t.name, description: ($t.kind | str join ",")} }
+                            }
+                            | flatten
+                            | uniq-by value
+                    } catch { null }
+                } else { null }
+            }
+            "kill" | "pkill" => {
+                try {
+                    ^ps -eo pid,comm --no-headers
+                        | lines
+                        | each { |l|
+                            let parts = $l | str trim | split row -r '\s+'
+                            if ($parts | length) >= 2 {
+                                let pid = $parts | get 0
+                                let comm = $parts | skip 1 | str join " "
+                                if ($spans.0 == "kill") { {value: $pid, description: $comm} }
+                                else { {value: $comm, description: $pid} }
+                            }
+                        } | compact
+                } catch { null }
+            }
+            _ => { null }
+        }
+    } else { null }
+
+    if $completions == null {
+        do $inshellah_filter_candidates $additional $last_span
+    } else {
+        $completions
+    }
+}
+
+$env.config.completions.external = {enable: true, max_results: 200, completer: $inshellah_complete}
diff --git a/nix/module.nix b/nix/module.nix
index 04ae7f2..95289f8 100644
--- a/nix/module.nix
+++ b/nix/module.nix
@@ -10,7 +10,7 @@
 #
 # Usage:
 #   { pkgs, ... }: {
-#     imports = [ ./path/to/inshellah/nix/module.nix ];
+#     imports = [ ./path/to/inshellah-rs/nix/module.nix ];
 #     programs.inshellah.enable = true;
 #   }
 
@@ -23,6 +23,34 @@
 
 let
   cfg = config.programs.inshellah;
+  completerSnippet = ./inshellah-completer.nu;
+  dynamicStubCommands = [
+    "systemctl"
+    "journalctl"
+    "coredumpctl"
+    "loginctl"
+    "machinectl"
+    "networkctl"
+    "hostnamectl"
+    "timedatectl"
+    "localectl"
+    "ssh"
+    "scp"
+    "sftp"
+    "docker"
+    "podman"
+    "kubectl"
+    "git"
+    "jj"
+    "npm"
+    "pnpm"
+    "yarn"
+    "make"
+    "just"
+    "cargo"
+    "pkill"
+  ];
+  dynamicStubCommandArgs = lib.escapeShellArgs dynamicStubCommands;
 in
 {
   options.programs.inshellah = {
@@ -72,9 +100,33 @@ in
       '';
     };
 
+    timeoutMs = lib.mkOption {
+      type = lib.types.nullOr lib.types.int;
+      default = null;
+      example = 200;
+      description = ''
+        per-subprocess timeout in milliseconds. when null the binary's
+        compiled-in default is used (currently 200ms).
+      '';
+    };
+
+    workers = lib.mkOption {
+      type = lib.types.nullOr lib.types.int;
+      default = null;
+      example = 8;
+      description = ''
+        worker thread count for the parallel scrape pool. when null,
+        `std::thread::available_parallelism` is used.
+      '';
+    };
+
     snippet = lib.mkOption {
       type = lib.types.str;
       readOnly = true;
+      default = builtins.readFile completerSnippet;
+      description = ''
+        nushell external completer snippet installed by the module.
+      '';
     };
   };
 
@@ -98,7 +150,10 @@ in
         (lib.hiPrio wrapped)
         cfg.package
       ];
-    environment.pathsToLink = [ "/share/nushell/autoload" ];
+    environment.pathsToLink = [
+      "/share/nushell/autoload"
+      "/share/nushell/vendor/autoload"
+    ];
     environment.extraSetup =
       let
         inshellah = "${cfg.package}/bin/inshellah";
@@ -109,30 +164,38 @@ in
           lib.concatStringsSep "\n" cfg.helpOnlyCommands
         );
         helpOnlyFlag = lib.optionalString (cfg.helpOnlyCommands != [ ]) " --help-only ${helpOnlyFile}";
+        timeoutFlag = lib.optionalString (cfg.timeoutMs != null) " --timeout-ms ${toString cfg.timeoutMs}";
+        workersFlag = lib.optionalString (cfg.workers != null) " --workers ${toString cfg.workers}";
+        snippetFile = pkgs.writeText "inshellah-completer.nu" cfg.snippet;
       in
       ''
         mkdir -p ${destDir}
 
         if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then
-          ${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag} \
+          ${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag}${timeoutFlag}${workersFlag} \
             2>/dev/null || true
         fi
 
         find ${destDir} -maxdepth 1 -empty -delete
 
-        # nushell hardcodes sudo and doas to bypass the external completer,
-        # returning command-name completion instead of calling inshellah.
-        # these @complete external stubs override that so inshellah handles
-        # their flags and elevation stripping. placed in the nushell autoload
-        # dir so they are sourced automatically at shell startup.
+        # Install the full nushell completer plus sudo/doas wrapped commands.
+        # Nushell otherwise hardcodes sudo/doas to bypass external completers.
         mkdir -p $out/share/nushell/vendor/autoload
-        cat > $out/share/nushell/vendor/autoload/inshellah-elevation.nu << 'NUSHELL'
-        @complete external
-        extern "sudo" []
+        cp ${snippetFile} $out/share/nushell/vendor/autoload/inshellah.nu
 
-        @complete external
-        extern "doas" []
-        NUSHELL
+        # Register command names for dynamic backends that are actually present
+        # in the linked profile. The externs keep Nu's command list aware of
+        # these commands while the external completer still supplies arguments.
+        stubFile=$out/share/nushell/vendor/autoload/inshellah-command-stubs.nu
+        : > "$stubFile"
+        for cmd in ${dynamicStubCommandArgs}; do
+          if [ -x "$out/bin/$cmd" ]; then
+            printf '@complete external\nextern "%s" [...args]\n\n' "$cmd" >> "$stubFile"
+          fi
+        done
+        if [ ! -s "$stubFile" ]; then
+          rm -f "$stubFile"
+        fi
       '';
   };
 }
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..2256bee
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,4 @@
+pub mod parsers;
+pub mod pool;
+pub mod store;
+pub mod types;
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..df1f49b
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,2241 @@
+//! inshellah CLI.
+//!
+//! subcommands:
+//!   index PREFIX...     scan PREFIX/bin and PREFIX/share/man, write JSON cache
+//!   manpage FILE        parse a single manpage, emit nushell extern
+//!   manpage-dir DIR     batch-process manpages under DIR
+//!   complete CMD ARG... nushell external completer; reads the cache,
+//!                       falls back to on-the-fly --help if uncached
+//!   query CMD           print stored data for CMD
+//!   dump                list indexed commands
+//!   completions         emit nushell completion definitions for inshellah itself
+
+use std::collections::HashSet;
+use std::fs;
+use std::io::Read;
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use parking_lot::Mutex;
+
+use inshellah::parsers::help::help_parser;
+use inshellah::parsers::manpage::{
+    ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
+    extract_synopsis_command, parse_manpage_string, parse_manpage_with_subs, read_manpage_file,
+};
+use inshellah::parsers::nushell::{generate_extern, generate_module, is_nushell_builtin};
+use inshellah::pool::{ScrapePool, Submitter};
+use inshellah::store::{
+    all_commands, default_store_path, ensure_dir, file_type_of, filename_of_command, lookup,
+    lookup_raw, parse_nu_completions, subcommands_of, write_native, write_result,
+};
+
+const COMMAND_SECTIONS: &[u8] = &[1, 8];
+
+/// per-subprocess timeout default when --timeout-ms isn't passed.
+/// empirically tuned so that a slow-to-print binary doesn't block the
+/// pool, while fast-responding ones (the vast majority) print their
+/// --help well inside the window. with `n` parallel workers a 200ms
+/// ceiling means the worst-case waste from an unresponsive binary is
+/// `200ms / n_workers` of wall time.
+const DEFAULT_TIMEOUT_MS: u64 = 200;
+
+fn usage() {
+    eprintln!(
+        "inshellah - nushell completions engine
+
+Usage:
+  inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
+                            [--timeout-ms N] [--workers N]
+      Index completions into a directory of JSON/nu files.
+      PREFIX is a directory containing bin/ and share/man/.
+      Default dir: $XDG_CACHE_HOME/inshellah
+      --ignore FILE     skip listed commands entirely
+      --help-only FILE  skip manpages for listed commands, use --help instead
+      --timeout-ms N    per-subprocess timeout in milliseconds (default 200)
+      --workers N       parallel scrape workers (default: cpu count)
+  inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]] [--timeout-ms N]
+      Nushell custom completer. Outputs JSON completion candidates.
+      Falls back to --help resolution if command is not indexed.
+      --dir takes colon-separated paths. The first path is the writable
+      user cache; additional paths are read-only system directories.
+  inshellah query CMD [--dir PATH[:PATH...]]
+      Print stored completion data for CMD.
+  inshellah dump [--dir PATH[:PATH...]]
+      List indexed commands.
+  inshellah manpage FILE            Parse a manpage and emit nushell extern
+  inshellah manpage-dir DIR         Batch-process manpages under DIR
+  inshellah completions             Generate nushell completions for inshellah
+"
+    );
+}
+
+// --- subprocess management ---
+
+/// sanitized env: strip display-related variables to prevent gui tools from
+/// popping up windows when run with --help. cached once per process —
+/// `vars_os` walks the whole env every call, which adds up across thousands
+/// of spawns.
+fn safe_env_vars() -> &'static [(std::ffi::OsString, std::ffi::OsString)] {
+    static CACHE: std::sync::OnceLock<Vec<(std::ffi::OsString, std::ffi::OsString)>> =
+        std::sync::OnceLock::new();
+    CACHE.get_or_init(|| {
+        std::env::vars_os()
+            .filter(|(k, _)| {
+                let s = k.to_string_lossy();
+                !(s == "DISPLAY"
+                    || s == "WAYLAND_DISPLAY"
+                    || s == "DBUS_SESSION_BUS_ADDRESS"
+                    || s == "XAUTHORITY")
+            })
+            .collect()
+    })
+}
+
+/// run a command with a timeout, capturing stdout+stderr merged.
+/// returns None if the process couldn't be started, produced no output,
+/// or was killed due to timeout.
+///
+/// uses `poll(2)` on the pipe fds directly from the calling thread — no
+/// reader threads, no try_wait polling loop. we block in the kernel for
+/// either data (POLLIN), peer-close (POLLHUP), or the timeout deadline,
+/// so the cost per subprocess is roughly one syscall per data chunk
+/// plus the spawn itself.
+///
+/// unix process groups still apply: the child is its own pgid leader, so
+/// on timeout we killpg(pgid, SIGKILL) and the whole tree (wrapper
+/// scripts, forked grandchildren) dies, closing the pipe writers and
+/// letting our reads finish cleanly.
+fn run_cmd(args: &[String], timeout_ms: u64) -> Option<String> {
+    use std::io::Read;
+    use std::os::fd::AsRawFd;
+    use std::os::unix::process::CommandExt;
+
+    if args.is_empty() {
+        return None;
+    }
+    let mut cmd = Command::new(&args[0]);
+    cmd.args(&args[1..]);
+    cmd.stdin(Stdio::null());
+    cmd.stdout(Stdio::piped());
+    cmd.stderr(Stdio::piped());
+    cmd.env_clear();
+    for (k, v) in safe_env_vars() {
+        cmd.env(k, v);
+    }
+    cmd.current_dir("/tmp");
+    cmd.process_group(0);
+
+    let mut child = cmd.spawn().ok()?;
+    let pgid = child.id() as i32;
+    let mut stdout = child.stdout.take()?;
+    let mut stderr = child.stderr.take()?;
+    let stdout_fd = stdout.as_raw_fd();
+    let stderr_fd = stderr.as_raw_fd();
+
+    // both pipe fds must be non-blocking so poll-then-read can drain
+    // everything available without blocking on the next chunk.
+    unsafe {
+        for fd in [stdout_fd, stderr_fd] {
+            let flags = libc::fcntl(fd, libc::F_GETFL);
+            libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
+        }
+    }
+
+    let deadline = Instant::now() + Duration::from_millis(timeout_ms);
+    let mut buf: Vec<u8> = Vec::with_capacity(4096);
+    let mut chunk = [0u8; 4096];
+    let mut stdout_open = true;
+    let mut stderr_open = true;
+    let mut timed_out = false;
+
+    while stdout_open || stderr_open {
+        let now = Instant::now();
+        if now >= deadline {
+            timed_out = true;
+            break;
+        }
+        let remaining_ms = (deadline - now).as_millis().min(i32::MAX as u128) as i32;
+
+        let mut fds = [
+            libc::pollfd {
+                fd: if stdout_open { stdout_fd } else { -1 },
+                events: libc::POLLIN,
+                revents: 0,
+            },
+            libc::pollfd {
+                fd: if stderr_open { stderr_fd } else { -1 },
+                events: libc::POLLIN,
+                revents: 0,
+            },
+        ];
+        let n = unsafe { libc::poll(fds.as_mut_ptr(), fds.len() as libc::nfds_t, remaining_ms) };
+        if n < 0 {
+            // EINTR — retry. anything else: bail and let the child reap below.
+            if std::io::Error::last_os_error().kind() == std::io::ErrorKind::Interrupted {
+                continue;
+            }
+            break;
+        }
+        if n == 0 {
+            // poll itself returned without events — deadline check at top
+            // of next iter will catch it.
+            continue;
+        }
+
+        // drain whichever fds are ready until EAGAIN or EOF.
+        for (i, pfd) in fds.iter().enumerate() {
+            if pfd.revents == 0 {
+                continue;
+            }
+            let (reader, open): (&mut dyn Read, &mut bool) = if i == 0 {
+                (&mut stdout as &mut dyn Read, &mut stdout_open)
+            } else {
+                (&mut stderr as &mut dyn Read, &mut stderr_open)
+            };
+            loop {
+                match reader.read(&mut chunk) {
+                    Ok(0) => {
+                        *open = false;
+                        break;
+                    }
+                    Ok(read) => buf.extend_from_slice(&chunk[..read]),
+                    Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => break,
+                    Err(_) => {
+                        *open = false;
+                        break;
+                    }
+                }
+            }
+            if pfd.revents & (libc::POLLHUP | libc::POLLERR) != 0 {
+                *open = false;
+            }
+        }
+    }
+
+    if timed_out {
+        unsafe {
+            libc::killpg(pgid, libc::SIGKILL);
+        }
+    }
+    let _ = child.wait();
+
+    if buf.is_empty() {
+        None
+    } else {
+        Some(String::from_utf8_lossy(&buf).into_owned())
+    }
+}
+
+// --- file classification ---
+
+fn is_executable(path: &Path) -> bool {
+    use std::os::unix::fs::PermissionsExt;
+    fs::metadata(path)
+        .map(|m| m.is_file() && (m.permissions().mode() & 0o111) != 0)
+        .unwrap_or(false)
+}
+
+fn is_script(path: &Path) -> bool {
+    let real = match fs::canonicalize(path) {
+        Ok(p) => p,
+        Err(_) => return false,
+    };
+    let Ok(mut f) = fs::File::open(&real) else {
+        return false;
+    };
+    let mut buf = [0u8; 2];
+    f.read_exact(&mut buf)
+        .map(|_| &buf == b"#!")
+        .unwrap_or(false)
+}
+
+/// skip filenames that aren't real commands (e.g. doc/locale paths).
+fn skip_name(name: &str) -> bool {
+    name.starts_with('.')
+        || name.ends_with(".so")
+        || name.ends_with(".a")
+        || name.ends_with(".la")
+        || name.contains('/')
+}
+
+// --- ELF scanning ---
+
+/// scan an ELF binary (or any file) for string needles. returns the set of
+/// needles that appeared. on read failure all needles are reported found
+/// (conservative — we'd rather try --help than skip).
+fn elf_scan(path: &Path, needles: &[&str]) -> HashSet<String> {
+    let mut found: HashSet<String> = HashSet::new();
+    let real = match fs::canonicalize(path) {
+        Ok(p) => p,
+        Err(_) => {
+            for n in needles {
+                found.insert((*n).to_string());
+            }
+            return found;
+        }
+    };
+    let Ok(mut f) = fs::File::open(&real) else {
+        for n in needles {
+            found.insert((*n).to_string());
+        }
+        return found;
+    };
+    let mut magic = [0u8; 4];
+    if f.read_exact(&mut magic).is_err() {
+        return found;
+    }
+    if magic != [0x7f, b'E', b'L', b'F'] {
+        // not ELF — return empty so caller decides
+        return found;
+    }
+    let max_needle = needles.iter().map(|s| s.len()).max().unwrap_or(0);
+    let chunk_size = 65536usize;
+    let mut buf = vec![0u8; chunk_size + max_needle];
+    let mut carry = 0usize;
+    let needles_b: Vec<&[u8]> = needles.iter().map(|s| s.as_bytes()).collect();
+    loop {
+        let n: usize = f
+            .read(&mut buf[carry..carry + chunk_size])
+            .unwrap_or_default();
+        if n == 0 {
+            break;
+        }
+        let total = carry + n;
+        for (i, needle) in needles_b.iter().enumerate() {
+            let key = needles[i];
+            if found.contains(key) {
+                continue;
+            }
+            if needle.len() > total {
+                continue;
+            }
+            let win = &buf[..total];
+            if win.windows(needle.len()).any(|w| w == *needle) {
+                found.insert(key.to_string());
+            }
+        }
+        if found.len() == needles.len() {
+            break;
+        }
+        let new_carry = max_needle.min(total);
+        buf.copy_within(total - new_carry..total, 0);
+        carry = new_carry;
+    }
+    found
+}
+
+// --- nix wrapper detection ---
+
+fn read_to_string_capped(path: &Path, cap: usize) -> Option<String> {
+    let real = fs::canonicalize(path).ok()?;
+    let md = fs::metadata(&real).ok()?;
+    if md.len() as usize > cap {
+        return None;
+    }
+    fs::read_to_string(&real).ok()
+}
+
+/// detect nix-generated c wrappers; return the real binary path.
+fn nix_wrapper_target(path: &Path) -> Option<PathBuf> {
+    let contents = read_to_string_capped(path, 65536)?;
+    if !contents.contains("makeCWrapper") {
+        return None;
+    }
+    // pattern: /nix/store/<hash>-<name>/bin/<exe>
+    extract_nix_bin_path(&contents)
+}
+
+/// detect nix-generated bash/sh wrappers.
+fn nix_script_wrapper_target(path: &Path) -> Option<PathBuf> {
+    let contents = read_to_string_capped(path, 4096)?;
+    if !contents.starts_with("#!") {
+        return None;
+    }
+    if !contents.contains("/nix/store/") {
+        return None;
+    }
+    if !(contents.contains("exec ") || contents.contains("exec\t")) {
+        return None;
+    }
+    extract_nix_bin_path(&contents)
+}
+
+fn extract_nix_bin_path(contents: &str) -> Option<PathBuf> {
+    let needle = "/nix/store/";
+    let bytes = contents.as_bytes();
+    let mut idx = 0;
+    while let Some(rel) = contents[idx..].find(needle) {
+        let start = idx + rel;
+        // find end of the path (whitespace, quote, or null)
+        let mut end = start + needle.len();
+        while end < bytes.len() {
+            let b = bytes[end];
+            if b == b' '
+                || b == b'\t'
+                || b == b'\n'
+                || b == b'\r'
+                || b == b'"'
+                || b == b'\''
+                || b == 0
+            {
+                break;
+            }
+            end += 1;
+        }
+        let candidate = &contents[start..end];
+        if candidate.contains("/bin/") {
+            let path = PathBuf::from(candidate);
+            if path.exists() {
+                return Some(path);
+            }
+        }
+        idx = end;
+    }
+    None
+}
+
+// --- binary classification ---
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum Classify {
+    /// can try --help
+    TryHelp,
+    /// the tool likely speaks the "nushell" completion subcommand
+    HasNativeCompletions,
+    /// skip — doesn't look like a CLI we can extract from
+    Skip,
+}
+
+/// classify an ELF binary by scanning for help/completion needles.
+fn classify_elf(path: &Path) -> Classify {
+    let found = elf_scan(path, &["-h", "--help", "complet"]);
+    if found.contains("complet") {
+        Classify::HasNativeCompletions
+    } else if found.contains("-h") || found.contains("--help") {
+        Classify::TryHelp
+    } else {
+        Classify::Skip
+    }
+}
+
+/// classify a binary by its actual nature: script, ELF, or nix wrapper.
+fn classify_binary(_bindir: &Path, full: &Path) -> Classify {
+    if is_script(full) {
+        return Classify::TryHelp;
+    }
+    if let Some(target) = nix_wrapper_target(full) {
+        return classify_elf(&target);
+    }
+    if let Some(target) = nix_script_wrapper_target(full) {
+        return classify_elf(&target);
+    }
+    classify_elf(full)
+}
+
+// --- help text extraction ---
+
+/// try `--help`, then `-h`, returning the first non-empty output (with
+/// ANSI escapes stripped). each attempt gets the same per-call timeout.
+/// we deliberately skip the third historical `help`-subcommand variant:
+/// if neither flag yielded usable text, a positional `help` is unlikely
+/// to do anything different and the extra spawn dominates indexing cost.
+fn try_help(bin: &Path, timeout_ms: u64) -> Option<String> {
+    let bin_s = bin.to_string_lossy().to_string();
+    for variant in [&["--help"][..], &["-h"][..]] {
+        let mut args = vec![bin_s.clone()];
+        args.extend(variant.iter().map(|s| s.to_string()));
+        if let Some(out) = run_cmd(&args, timeout_ms) {
+            let cleaned = fast_strip_ansi::strip_ansi_string(&out);
+            if !cleaned.trim().is_empty() {
+                return Some(cleaned.to_string());
+            }
+        }
+    }
+    None
+}
+
+fn is_nushell_source(text: &str) -> bool {
+    text.len() > 20
+        && (text.contains("export extern")
+            || text.contains("export def")
+            || (text.contains("module ") && text.contains("export")))
+}
+
+/// look for words that contain a known needle within the text (used to
+/// find subcommand names that might be a native-completion command).
+fn extract_matching_words(text: &str, needles: &[&str]) -> Vec<String> {
+    let mut out: Vec<String> = Vec::new();
+    let mut seen: HashSet<String> = HashSet::new();
+    for token in text.split(|c: char| c.is_whitespace() || c == ',' || c == '|') {
+        let word = token.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_');
+        if word.len() < 2 || word.starts_with('-') {
+            continue;
+        }
+        for needle in needles {
+            if word.contains(needle) && !seen.contains(word) {
+                seen.insert(word.to_string());
+                out.push(word.to_string());
+                break;
+            }
+        }
+    }
+    out
+}
+
+/// try to get native nushell completions from a binary that supports them.
+fn try_native_completion(bin: &Path, timeout_ms: u64) -> Option<String> {
+    let help_text = try_help(bin, timeout_ms)?;
+    // look for words like "completion", "completions" — typical subcommand
+    let candidates = extract_matching_words(&help_text, &["complet"]);
+    let bin_s = bin.to_string_lossy().to_string();
+    for sub in &candidates {
+        for args_form in [
+            vec![bin_s.clone(), sub.clone(), "nushell".to_string()],
+            vec![
+                bin_s.clone(),
+                sub.clone(),
+                "--shell".to_string(),
+                "nushell".to_string(),
+            ],
+            vec![bin_s.clone(), sub.clone(), "--shell=nushell".to_string()],
+        ] {
+            if let Some(out) = run_cmd(&args_form, timeout_ms) {
+                let cleaned = fast_strip_ansi::strip_ansi_string(&out);
+                if is_nushell_source(&cleaned) {
+                    return Some(cleaned.to_string());
+                }
+            }
+        }
+    }
+    None
+}
+
+// --- subcommand recursion ---
+
+const MAX_RESOLVE_RESULTS: usize = 500;
+const MAX_RECURSE_DEPTH: u32 = 5;
+
+fn parse_help_text(text: &str) -> ManpageResult {
+    let cleaned: String = fast_strip_ansi::strip_ansi_string(text).into_owned();
+    match help_parser(&cleaned) {
+        Ok((_, r)) => (&r).into(),
+        Err(_) => ManpageResult::default(),
+    }
+}
+
+/// recursively resolve subcommands, returning a vec of (cmd_path, result)
+/// where cmd_path is the full "git stash apply" form. used by the
+/// dynamic-resolve path in `cmd_complete`; the batch indexer uses the
+/// pool instead, which expresses this same BFS shape with workers.
+fn help_resolve(
+    bin: &Path,
+    cmd: &str,
+    depth: u32,
+    timeout_ms: u64,
+    acc: &mut Vec<(String, ManpageResult)>,
+) {
+    if acc.len() >= MAX_RESOLVE_RESULTS {
+        return;
+    }
+    let Some(help_text) = try_help(bin, timeout_ms) else {
+        return;
+    };
+    let result = parse_help_text(&help_text);
+    acc.push((cmd.to_string(), result));
+    let initial_subs: Vec<String> = acc
+        .last()
+        .map(|(_, r)| {
+            r.subcommands
+                .iter()
+                .map(|sc| sc.name.clone())
+                .filter(|n| n.len() >= 2 && !n.starts_with('-'))
+                .collect()
+        })
+        .unwrap_or_default();
+    let bin_s = bin.to_string_lossy().to_string();
+    for sub in initial_subs {
+        recurse_subcommand(
+            &bin_s,
+            cmd,
+            std::slice::from_ref(&sub),
+            depth + 1,
+            timeout_ms,
+            acc,
+        );
+    }
+}
+
+fn recurse_subcommand(
+    bin_s: &str,
+    base_cmd: &str,
+    sub_args: &[String],
+    depth: u32,
+    timeout_ms: u64,
+    acc: &mut Vec<(String, ManpageResult)>,
+) {
+    if acc.len() >= MAX_RESOLVE_RESULTS || depth > MAX_RECURSE_DEPTH {
+        return;
+    }
+    let full_cmd = format!("{base_cmd} {}", sub_args.join(" "));
+    let Some(text) = try_help_args(bin_s, sub_args, timeout_ms) else {
+        return;
+    };
+    let result = parse_help_text(&text);
+    if result.entries.is_empty() && result.subcommands.is_empty() && result.positionals.is_empty() {
+        return;
+    }
+    if let Some(leaf) = sub_args.last() {
+        let self_listed = result
+            .subcommands
+            .iter()
+            .any(|sc| sc.name.eq_ignore_ascii_case(leaf));
+        if self_listed {
+            return;
+        }
+    }
+    let inner_subs: Vec<String> = result
+        .subcommands
+        .iter()
+        .map(|sc| sc.name.clone())
+        .filter(|n| n.len() >= 2 && !n.starts_with('-') && n != "help")
+        .collect();
+    acc.push((full_cmd, result));
+    for sub in inner_subs {
+        if acc.len() >= MAX_RESOLVE_RESULTS {
+            break;
+        }
+        let mut next = sub_args.to_vec();
+        next.push(sub);
+        recurse_subcommand(bin_s, base_cmd, &next, depth + 1, timeout_ms, acc);
+    }
+}
+
+/// try `bin sub_path... --help` first, then `... -h` if --help came back
+/// empty or "No manual entry…". used by deep subcommand recursion.
+fn try_help_args(bin_s: &str, sub_args: &[String], timeout_ms: u64) -> Option<String> {
+    let mut primary_args: Vec<String> = vec![bin_s.to_string()];
+    primary_args.extend(sub_args.iter().cloned());
+    primary_args.push("--help".to_string());
+    let primary = run_cmd(&primary_args, timeout_ms);
+    let primary_text = primary
+        .as_deref()
+        .map(|s| fast_strip_ansi::strip_ansi_string(s).into_owned());
+    let primary_useful = primary_text
+        .as_ref()
+        .map(|t| {
+            let trimmed = t.trim();
+            !trimmed.is_empty()
+                && !trimmed.starts_with("No manual entry")
+                && !trimmed.starts_with("man:")
+        })
+        .unwrap_or(false);
+    if primary_useful {
+        return primary_text;
+    }
+    let mut fallback_args: Vec<String> = vec![bin_s.to_string()];
+    fallback_args.extend(sub_args.iter().cloned());
+    fallback_args.push("-h".to_string());
+    if let Some(out) = run_cmd(&fallback_args, timeout_ms) {
+        let cleaned = fast_strip_ansi::strip_ansi_string(&out).into_owned();
+        if !cleaned.trim().is_empty() {
+            return Some(cleaned);
+        }
+    }
+    primary_text
+}
+
+// --- manpage handling ---
+
+fn cmd_name_of_manpage(path: &Path) -> String {
+    let mut base = path
+        .file_name()
+        .and_then(|s| s.to_str())
+        .unwrap_or("")
+        .to_string();
+    if base.ends_with(".gz") {
+        base.truncate(base.len() - 3);
+    }
+    // strip section suffix: "ls.1" -> "ls"
+    if let Some(dot) = base.rfind('.') {
+        base.truncate(dot);
+    }
+    base
+}
+
+fn find_manpage_path(mandirs: &[PathBuf], hyphenated: &str) -> Option<PathBuf> {
+    for mandir in mandirs {
+        for section in COMMAND_SECTIONS {
+            let secdir = mandir.join(format!("man{section}"));
+            for ext in ["", ".gz"] {
+                let path = secdir.join(format!("{hyphenated}.{section}{ext}"));
+                if path.is_file() {
+                    return Some(path);
+                }
+            }
+        }
+    }
+    None
+}
+
+/// derive the command name a manpage documents. the SYNOPSIS section
+/// is authoritative because manpage filenames are ambiguous —
+/// "btrfs-check.8" could mean either a standalone binary `btrfs-check`
+/// or the subcommand `btrfs check`. we clamp to the number of
+/// hyphen-separated parts in the filename to prevent synopsis lines
+/// like "btrfs check [options] <device>" from absorbing the device
+/// placeholder into the command name.
+fn resolve_manpage_cmd_name(file: &Path, contents: &str) -> String {
+    let fallback = cmd_name_of_manpage(file);
+    let max_words = fallback.matches('-').count() + 1;
+    match extract_synopsis_command(contents) {
+        Some(name) => {
+            let words: Vec<&str> = name.split(' ').filter(|w| !w.is_empty()).collect();
+            if words.len() > max_words {
+                words[..max_words].join(" ")
+            } else {
+                name
+            }
+        }
+        None => fallback,
+    }
+}
+
+type NamedManpageResult = (String, ManpageResult);
+type ProcessedManpage = (String, ManpageResult, Vec<NamedManpageResult>);
+
+/// process a manpage and return (cmd_name, main_result, per-subcommand results).
+/// the sub_results come from clap-style `.SH SUBCOMMAND` sections — each is
+/// a self-contained command with its own flags.
+fn process_manpage(file: &Path) -> Option<ProcessedManpage> {
+    let contents = read_manpage_file(file).ok()?;
+    let (mut result, sub_sections) = parse_manpage_with_subs(&contents);
+    if result.entries.is_empty() && result.subcommands.is_empty() && sub_sections.is_empty() {
+        return None;
+    }
+    let name = resolve_manpage_cmd_name(file, &contents);
+    if name.is_empty() {
+        return None;
+    }
+    strip_manpage_subcmd_prefixes(&mut result, file, &name);
+    // namespace the sub-section names under the resolved cmd name:
+    // e.g. nh's SUBCOMMAND "os" becomes the stored command "nh os".
+    let subs: Vec<(String, ManpageResult)> = sub_sections
+        .into_iter()
+        .map(|(sub_name, sub_result)| (format!("{name} {sub_name}"), sub_result))
+        .collect();
+    Some((name, result, subs))
+}
+
+fn list_manpages(mandirs: &[PathBuf]) -> Vec<PathBuf> {
+    let mut out = Vec::new();
+    for mandir in mandirs {
+        for section in COMMAND_SECTIONS {
+            let secdir = mandir.join(format!("man{section}"));
+            if let Ok(entries) = fs::read_dir(&secdir) {
+                for entry in entries.flatten() {
+                    out.push(entry.path());
+                }
+            }
+        }
+    }
+    out
+}
+
+// --- index command ---
+
+fn load_ignorelist(path: &Path) -> HashSet<String> {
+    let mut out = HashSet::new();
+    if let Ok(contents) = fs::read_to_string(path) {
+        for line in contents.lines() {
+            let line = line.trim();
+            if !line.is_empty() && !line.starts_with('#') {
+                out.insert(line.to_string());
+            }
+        }
+    }
+    out
+}
+
+fn list_binaries(bindirs: &[PathBuf]) -> Vec<(String, PathBuf)> {
+    let mut all: Vec<(String, PathBuf)> = Vec::new();
+    let mut seen: HashSet<String> = HashSet::new();
+    for bd in bindirs {
+        let Ok(entries) = fs::read_dir(bd) else {
+            continue;
+        };
+        for entry in entries.flatten() {
+            let path = entry.path();
+            let Some(name) = path.file_name().and_then(|s| s.to_str()) else {
+                continue;
+            };
+            if skip_name(name) || is_nushell_builtin(name) {
+                continue;
+            }
+            if !is_executable(&path) {
+                continue;
+            }
+            if seen.insert(name.to_string()) {
+                all.push((name.to_string(), path));
+            }
+        }
+    }
+    all.sort_by(|a, b| a.0.cmp(&b.0));
+    all
+}
+
+fn manpage_name_has_installed_command(name: &str, binary_names: &HashSet<String>) -> bool {
+    if binary_names.contains(name) {
+        return true;
+    }
+    name.split_once(' ')
+        .map(|(parent, _)| binary_names.contains(parent))
+        .unwrap_or(false)
+}
+
+#[cfg(test)]
+mod main_tests {
+    use super::*;
+
+    #[test]
+    fn manpage_names_must_match_installed_binary_or_subcommand_parent() {
+        let binary_names = HashSet::from(["git".to_string(), "getent".to_string()]);
+
+        assert!(manpage_name_has_installed_command("git", &binary_names));
+        assert!(manpage_name_has_installed_command("git add", &binary_names));
+        assert!(manpage_name_has_installed_command(
+            "getent passwd",
+            &binary_names
+        ));
+        assert!(!manpage_name_has_installed_command("ld.so", &binary_names));
+        assert!(!manpage_name_has_installed_command(
+            "git-add",
+            &binary_names
+        ));
+    }
+}
+
+/// shared state passed to every pool worker. nothing inside mutates
+/// except `indexed`, which is wrapped in a parking_lot::Mutex.
+struct ScrapeCtx {
+    cache_dir: PathBuf,
+    mandirs: Vec<PathBuf>,
+    help_only: HashSet<String>,
+    indexed: Mutex<HashSet<String>>,
+    timeout_ms: u64,
+}
+
+#[derive(Debug)]
+struct PoolJob {
+    bin_path: PathBuf,
+    /// the binary's basename — e.g. "git". stays constant across the
+    /// whole recursion tree for this binary.
+    base_cmd: String,
+    /// chain of subcommand tokens past the base. empty for the
+    /// top-level scrape, ["clone"] for `git clone`, ["stash","apply"]
+    /// for `git stash apply`.
+    sub_args: Vec<String>,
+    depth: u32,
+}
+
+impl PoolJob {
+    fn full_cmd(&self) -> String {
+        if self.sub_args.is_empty() {
+            self.base_cmd.clone()
+        } else {
+            format!("{} {}", self.base_cmd, self.sub_args.join(" "))
+        }
+    }
+}
+
+/// hyphenated form used to look up a manpage for a (possibly nested)
+/// command — "git" for top-level, "git-remote" for `git remote`,
+/// "git-stash-apply" for `git stash apply`.
+fn hyphenated_cmd(job: &PoolJob) -> String {
+    if job.sub_args.is_empty() {
+        job.base_cmd.clone()
+    } else {
+        format!("{}-{}", job.base_cmd, job.sub_args.join("-"))
+    }
+}
+
+/// some manpages list subcommands with the parent's name as a prefix —
+/// git.1 has \fBgit-add\fR(1), \fBgit-remote-ext\fR(1), etc. downstream
+/// expects bare subcommand names ("add", "remote-ext") so they dispatch
+/// as `git add` / `git remote-ext`. strips a leading "{base}-" wherever
+/// present; a no-op when the manpage already uses bare names.
+fn strip_subcmd_prefix(result: &mut ManpageResult, base: &str) {
+    let prefix = format!("{base}-");
+    for sc in &mut result.subcommands {
+        if let Some(rest) = sc.name.strip_prefix(&prefix) {
+            sc.name = rest.to_string();
+        }
+    }
+}
+
+fn strip_manpage_subcmd_prefixes(result: &mut ManpageResult, file: &Path, cmd_name: &str) {
+    let filename_base = cmd_name_of_manpage(file);
+    if !filename_base.is_empty() {
+        strip_subcmd_prefix(result, &filename_base);
+    }
+    let hyphenated_cmd = cmd_name.replace(' ', "-");
+    if !hyphenated_cmd.is_empty() && hyphenated_cmd != filename_base {
+        strip_subcmd_prefix(result, &hyphenated_cmd);
+    }
+}
+
+/// enqueue child jobs for each discovered subcommand. shared between the
+/// manpage and help branches of process_pool_job.
+fn enqueue_subcommands(
+    job: &PoolJob,
+    subcommands: &[ManpageSubcommand],
+    submit: &Submitter<PoolJob>,
+) {
+    // matches the sequential recurse_subcommand depth check (`depth > MAX`),
+    // not `>=`, so we get 6 levels (0..=5) of recursion. without this we
+    // were cutting off the last layer of deep clap trees like jay.
+    if job.depth > MAX_RECURSE_DEPTH {
+        return;
+    }
+    for sc in subcommands {
+        if sc.name.len() < 2 || sc.name.starts_with('-') || sc.name == "help" {
+            continue;
+        }
+        let mut next = job.sub_args.clone();
+        next.push(sc.name.clone());
+        submit.submit(PoolJob {
+            bin_path: job.bin_path.clone(),
+            base_cmd: job.base_cmd.clone(),
+            sub_args: next,
+            depth: job.depth + 1,
+        });
+    }
+}
+
+/// per-job handler called by every worker. populates the cache + enqueues
+/// child jobs (one per discovered subcommand) onto the same pool.
+///
+/// source priority is: (1) native completions, (2) manpage, (3) --help.
+/// --help text is fetched at step 1 only as a probe for the completions
+/// subcommand; it is not mined for content unless steps 1 and 2 both miss.
+fn process_pool_job(ctx: &ScrapeCtx, job: PoolJob, submit: &Submitter<PoolJob>) {
+    let full_cmd = job.full_cmd();
+    if ctx.indexed.lock().contains(&full_cmd) {
+        return;
+    }
+    let bin_s = job.bin_path.to_string_lossy().to_string();
+
+    // 1. native completions (top-level only — sub-commands don't ship
+    //    their own completion payloads). classify_binary scans the ELF for
+    //    "complet" needles, and try_native_completion confirms by invoking
+    //    the completions subcommand.
+    if job.sub_args.is_empty() {
+        let class = classify_binary(&job.bin_path, &job.bin_path);
+        if matches!(class, Classify::Skip) {
+            return;
+        }
+        if matches!(class, Classify::HasNativeCompletions)
+            && let Some(nu) = try_native_completion(&job.bin_path, ctx.timeout_ms)
+        {
+            let _ = write_native(&ctx.cache_dir, &full_cmd, &nu);
+            ctx.indexed.lock().insert(full_cmd);
+            return;
+        }
+    }
+
+    // 2. manpage as primary content source — structured documentation
+    //    over the curated --help summary.
+    if !ctx.help_only.contains(&job.base_cmd) && !ctx.help_only.contains(&full_cmd) {
+        let hyphenated = hyphenated_cmd(&job);
+        if let Some(mp_path) = find_manpage_path(&ctx.mandirs, &hyphenated)
+            && let Ok(contents) = read_manpage_file(&mp_path)
+        {
+            let mut mp_result = parse_manpage_string(&contents);
+            if !mp_result.entries.is_empty() || !mp_result.subcommands.is_empty() {
+                strip_subcmd_prefix(&mut mp_result, &hyphenated);
+                let _ = write_result(&ctx.cache_dir, &full_cmd, "manpage", &mp_result);
+                ctx.indexed.lock().insert(full_cmd);
+                enqueue_subcommands(&job, &mp_result.subcommands, submit);
+                return;
+            }
+        }
+    }
+
+    // 3. fallback: scrape --help text for content.
+    let text = if job.sub_args.is_empty() {
+        try_help(&job.bin_path, ctx.timeout_ms)
+    } else {
+        try_help_args(&bin_s, &job.sub_args, ctx.timeout_ms)
+    };
+    let Some(text) = text else { return };
+
+    let result = parse_help_text(&text);
+    if result.entries.is_empty() && result.subcommands.is_empty() && result.positionals.is_empty() {
+        return;
+    }
+
+    // self-listing detection for sub-probes: if the leaf token shows up in
+    // the result's subcommand list, the binary probably echoed the parent
+    // help (didn't recognize the token). discard.
+    if let Some(leaf) = job.sub_args.last()
+        && result
+            .subcommands
+            .iter()
+            .any(|sc| sc.name.eq_ignore_ascii_case(leaf))
+    {
+        return;
+    }
+
+    let _ = write_result(&ctx.cache_dir, &full_cmd, "help", &result);
+    ctx.indexed.lock().insert(full_cmd);
+    enqueue_subcommands(&job, &result.subcommands, submit);
+}
+
+fn cmd_index(
+    bindirs: &[PathBuf],
+    mandirs: &[PathBuf],
+    ignorelist: &HashSet<String>,
+    help_only: &HashSet<String>,
+    dir: &Path,
+    timeout_ms: u64,
+    num_workers: usize,
+) -> std::io::Result<()> {
+    ensure_dir(dir)?;
+    let binaries = list_binaries(bindirs);
+    let binary_names: HashSet<String> = binaries
+        .iter()
+        .filter(|(name, _)| !ignorelist.contains(name))
+        .map(|(name, _)| name.clone())
+        .collect();
+
+    // phase 1: parallel scrape of every eligible binary via the BFS pool.
+    // shared state lives in an Arc<ScrapeCtx>; the `indexed` set is the
+    // one mutable bit and uses parking_lot::Mutex.
+    let ctx = Arc::new(ScrapeCtx {
+        cache_dir: dir.to_path_buf(),
+        mandirs: mandirs.to_vec(),
+        help_only: help_only.clone(),
+        indexed: Mutex::new(HashSet::new()),
+        timeout_ms,
+    });
+    let pool = ScrapePool::new(num_workers, {
+        let ctx = ctx.clone();
+        move |job: PoolJob, submit: &Submitter<PoolJob>| {
+            process_pool_job(&ctx, job, submit);
+        }
+    });
+    for (name, path) in &binaries {
+        if ignorelist.contains(name) {
+            continue;
+        }
+        pool.submit(PoolJob {
+            bin_path: path.clone(),
+            base_cmd: name.clone(),
+            sub_args: Vec::new(),
+            depth: 0,
+        });
+    }
+    pool.wait();
+    // unwrap the indexed set back out for phase 2 — by this point no
+    // workers are alive so the Arc has only one strong reference.
+    let mut indexed: HashSet<String> = Arc::try_unwrap(ctx)
+        .ok()
+        .map(|c| c.indexed.into_inner())
+        .unwrap_or_default();
+
+    // process manpages for commands not yet indexed (unless they're in help-only).
+    // shorter filenames sort first so parent manpages (e.g. nix-env.1) are
+    // processed before subpage manpages (nix-env-install.1).
+    let mut manpages = list_manpages(mandirs);
+    manpages.sort_by(|a, b| {
+        let alen = a.file_name().map(|s| s.len()).unwrap_or(0);
+        let blen = b.file_name().map(|s| s.len()).unwrap_or(0);
+        alen.cmp(&blen).then_with(|| a.cmp(b))
+    });
+    for manpage_path in manpages {
+        let Some((name, result, sub_sections)) = process_manpage(&manpage_path) else {
+            continue;
+        };
+        if !manpage_name_has_installed_command(&name, &binary_names) {
+            continue;
+        }
+        let base_cmd = cmd_name_of_manpage(&manpage_path);
+        if indexed.contains(&name) {
+            if name != base_cmd {
+                eprintln!(
+                    "warning: {} extracted cmd \"{}\" (already indexed), skipping",
+                    manpage_path
+                        .file_name()
+                        .and_then(|s| s.to_str())
+                        .unwrap_or(""),
+                    name
+                );
+            }
+            continue;
+        }
+        if help_only.contains(&name) {
+            continue;
+        }
+        if is_nushell_builtin(&name) {
+            continue;
+        }
+        // clap-style SUBCOMMAND sections produce real, fully-populated
+        // sub-files (each with its own flags + positionals); they take
+        // priority over COMMANDS-section leaf stubs.
+        write_result(dir, &name, "manpage", &result)?;
+        indexed.insert(name.clone());
+        for (sub_cmd, sub_result) in &sub_sections {
+            if indexed.contains(sub_cmd) {
+                continue;
+            }
+            write_result(dir, sub_cmd, "manpage", sub_result)?;
+            indexed.insert(sub_cmd.clone());
+        }
+        // for COMMANDS-section subcommands that aren't already covered by
+        // a SUBCOMMAND section (or a per-subcommand manpage), write a
+        // description-only stub so the completer treats them as leaves.
+        // a real per-subcommand manpage processed later will overwrite the
+        // stub since we deliberately don't add it to `indexed`.
+        if sub_sections.is_empty() {
+            for sc in &result.subcommands {
+                let sub_cmd = format!("{name} {}", sc.name);
+                if indexed.contains(&sub_cmd) {
+                    continue;
+                }
+                let stub = ManpageResult {
+                    entries: Vec::new(),
+                    subcommands: Vec::new(),
+                    positionals: Default::default(),
+                    description: sc.desc.clone(),
+                };
+                write_result(dir, &sub_cmd, "manpage", &stub)?;
+            }
+        }
+    }
+
+    println!("indexed {} commands into {}", indexed.len(), dir.display());
+    Ok(())
+}
+
+// --- manpage subcommand ---
+
+fn cmd_manpage(file: &Path) -> std::io::Result<()> {
+    if let Some((name, result, sub_sections)) = process_manpage(file) {
+        print!("{}", generate_extern(&name, &result));
+        for (sub_cmd, sub_result) in sub_sections {
+            print!("{}", generate_extern(&sub_cmd, &sub_result));
+        }
+    }
+    Ok(())
+}
+
+fn cmd_manpage_dir(dir: &Path) -> std::io::Result<()> {
+    for section in COMMAND_SECTIONS {
+        let secdir = dir.join(format!("man{section}"));
+        let Ok(entries) = fs::read_dir(&secdir) else {
+            continue;
+        };
+        for entry in entries.flatten() {
+            let path = entry.path();
+            if let Some((name, result, sub_sections)) = process_manpage(&path) {
+                print!("{}", generate_extern(&name, &result));
+                for (sub_cmd, sub_result) in sub_sections {
+                    print!("{}", generate_extern(&sub_cmd, &sub_result));
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+// --- query / dump / complete ---
+
+fn cmd_query(cmd: &str, dirs: &[PathBuf]) -> std::io::Result<()> {
+    match lookup_raw(dirs, cmd) {
+        Some(data) => {
+            print!("{data}");
+            Ok(())
+        }
+        None => {
+            eprintln!("not found: {cmd}");
+            std::process::exit(1);
+        }
+    }
+}
+
+fn cmd_dump(dirs: &[PathBuf]) {
+    let cmds = all_commands(dirs);
+    println!("{} commands", cmds.len());
+    for cmd in &cmds {
+        let src = file_type_of(dirs, cmd).unwrap_or_else(|| "?".to_string());
+        println!("{src:>8}  {cmd}");
+    }
+}
+
+/// look up a command's path in $PATH.
+fn find_in_path(name: &str) -> Option<PathBuf> {
+    let path_var = std::env::var("PATH").ok()?;
+    for dir in path_var.split(':') {
+        let candidate = Path::new(dir).join(name);
+        if is_executable(&candidate) {
+            return Some(candidate);
+        }
+    }
+    None
+}
+
+fn executable_span_path(span: &str) -> Option<PathBuf> {
+    if !span.contains('/') {
+        return None;
+    }
+    let path = PathBuf::from(span);
+    is_executable(&path).then_some(path)
+}
+
+fn command_name_for_path(path: &Path) -> Option<String> {
+    path.file_name()
+        .and_then(|name| name.to_str())
+        .filter(|name| !name.is_empty())
+        .map(ToOwned::to_owned)
+}
+
+/// compute completion match quality. zero means no match.
+///
+/// scoring tiers:
+/// - exact match: 1000
+/// - prefix match: 900 + length bonus
+/// - subsequence match: per-character score with bonuses for word boundaries
+///   and consecutive matches
+fn fuzzy_score(needle: &str, haystack: &str) -> i32 {
+    let needle_len = needle.chars().count();
+    let haystack_len = haystack.chars().count();
+    if needle_len == 0 {
+        return 1;
+    }
+    if needle_len > haystack_len {
+        return 0;
+    }
+    if needle == haystack {
+        return 1000;
+    }
+
+    let needle_lc = needle.to_ascii_lowercase();
+    let haystack_lc = haystack.to_ascii_lowercase();
+    if haystack_lc.starts_with(&needle_lc) {
+        return 900 + (needle_len as i32 * 100 / haystack_len as i32);
+    }
+
+    let needle_chars: Vec<char> = needle_lc.chars().collect();
+    let haystack_chars: Vec<char> = haystack.chars().collect();
+    let haystack_lc_chars: Vec<char> = haystack_lc.chars().collect();
+
+    let mut needle_idx = 0usize;
+    let mut score = 0i32;
+    let mut prev_match: Option<usize> = None;
+
+    for (hay_idx, c) in haystack_lc_chars.iter().enumerate() {
+        if needle_idx >= needle_len {
+            break;
+        }
+        if *c != needle_chars[needle_idx] {
+            continue;
+        }
+
+        let boundary = hay_idx == 0
+            || haystack_chars[hay_idx - 1] == '-'
+            || haystack_chars[hay_idx - 1] == '_'
+            || (haystack_chars[hay_idx - 1].is_ascii_lowercase()
+                && haystack_chars[hay_idx].is_ascii_uppercase());
+        let consecutive = prev_match == Some(hay_idx.saturating_sub(1));
+        score += if boundary { 50 } else { 10 };
+        if consecutive {
+            score += 20;
+        }
+        needle_idx += 1;
+        prev_match = Some(hay_idx);
+    }
+
+    if needle_idx == needle_len { score } else { 0 }
+}
+
+fn json_escape(s: &str) -> String {
+    let mut out = String::with_capacity(s.len() + 2);
+    for c in s.chars() {
+        match c {
+            '"' => out.push_str("\\\""),
+            '\\' => out.push_str("\\\\"),
+            '\n' => out.push_str("\\n"),
+            '\r' => out.push_str("\\r"),
+            '\t' => out.push_str("\\t"),
+            c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
+            c => out.push(c),
+        }
+    }
+    out
+}
+
+fn completion_json(value: &str, desc: &str) -> String {
+    format!(
+        r#"{{"value":"{}","description":"{}"}}"#,
+        json_escape(value),
+        json_escape(desc)
+    )
+}
+
+fn print_completion_candidates(candidates: &[String]) {
+    if candidates.is_empty() {
+        println!("null");
+    } else {
+        println!("[{}]", candidates.join(","));
+    }
+}
+
+#[derive(Clone, Debug)]
+struct AdbDevice {
+    serial: String,
+    desc: String,
+    transport_id: Option<String>,
+}
+
+enum AdbDeviceCompletion {
+    Serial {
+        prefix: String,
+        replacement_prefix: String,
+    },
+    TransportId {
+        prefix: String,
+        replacement_prefix: String,
+    },
+}
+
+fn adb_device_completion(rest: &[String]) -> Option<AdbDeviceCompletion> {
+    if !adb_command_tokens(rest).is_empty() {
+        return None;
+    }
+    let current = rest.last().map(String::as_str).unwrap_or("");
+    if let Some(prefix) = current.strip_prefix("--serial=") {
+        return Some(AdbDeviceCompletion::Serial {
+            prefix: prefix.to_string(),
+            replacement_prefix: "--serial=".to_string(),
+        });
+    }
+    if let Some(prefix) = current.strip_prefix("--one-device=") {
+        return Some(AdbDeviceCompletion::Serial {
+            prefix: prefix.to_string(),
+            replacement_prefix: "--one-device=".to_string(),
+        });
+    }
+    if let Some(prefix) = current.strip_prefix("--transport-id=") {
+        return Some(AdbDeviceCompletion::TransportId {
+            prefix: prefix.to_string(),
+            replacement_prefix: "--transport-id=".to_string(),
+        });
+    }
+    if rest.len() >= 2 {
+        let prev = rest[rest.len() - 2].as_str();
+        if prev == "-s" || prev == "--serial" || prev == "--one-device" {
+            return Some(AdbDeviceCompletion::Serial {
+                prefix: current.to_string(),
+                replacement_prefix: String::new(),
+            });
+        }
+        if prev == "-t" || prev == "--transport-id" {
+            return Some(AdbDeviceCompletion::TransportId {
+                prefix: current.to_string(),
+                replacement_prefix: String::new(),
+            });
+        }
+    }
+    None
+}
+
+fn parse_adb_devices(output: &str) -> Vec<AdbDevice> {
+    let mut out = Vec::new();
+    for line in output.lines() {
+        let trimmed = line.trim();
+        if trimmed.is_empty()
+            || trimmed.starts_with('*')
+            || trimmed.eq_ignore_ascii_case("List of devices attached")
+        {
+            continue;
+        }
+
+        let parts: Vec<&str> = trimmed.split_whitespace().collect();
+        if parts.len() < 2 {
+            continue;
+        }
+        let serial = parts[0];
+        let state = if parts.get(1) == Some(&"no") && parts.get(2) == Some(&"permissions") {
+            "no permissions"
+        } else {
+            parts[1]
+        };
+        if serial.eq_ignore_ascii_case("list") {
+            continue;
+        }
+        if !is_adb_device_state(state) {
+            continue;
+        }
+
+        let mut details = Vec::new();
+        let mut transport_id = None;
+        let detail_start = if state == "no permissions" { 3 } else { 2 };
+        for part in parts.iter().skip(detail_start) {
+            if let Some(model) = part.strip_prefix("model:") {
+                details.push(model.replace('_', " "));
+            } else if let Some(product) = part.strip_prefix("product:") {
+                details.push(product.replace('_', " "));
+            } else if let Some(id) = part.strip_prefix("transport_id:") {
+                transport_id = Some(id.to_string());
+            }
+        }
+        let desc = if details.is_empty() {
+            state.to_string()
+        } else {
+            format!("{state} {}", details.join(" "))
+        };
+        out.push(AdbDevice {
+            serial: serial.to_string(),
+            desc,
+            transport_id,
+        });
+    }
+    out
+}
+
+fn is_adb_device_state(state: &str) -> bool {
+    matches!(
+        state,
+        "device"
+            | "offline"
+            | "unauthorized"
+            | "recovery"
+            | "sideload"
+            | "rescue"
+            | "no permissions"
+    )
+}
+
+fn adb_device_candidates(
+    path: &Path,
+    completion: AdbDeviceCompletion,
+    timeout_ms: u64,
+) -> Vec<String> {
+    let args = vec![
+        path.to_string_lossy().to_string(),
+        "devices".to_string(),
+        "-l".to_string(),
+    ];
+    let Some(output) = run_cmd(&args, timeout_ms) else {
+        return Vec::new();
+    };
+    let mut scored = Vec::new();
+    for device in parse_adb_devices(&output) {
+        match &completion {
+            AdbDeviceCompletion::Serial {
+                prefix,
+                replacement_prefix,
+            } => {
+                let score = prefix_score(prefix, &device.serial);
+                if score > 0 {
+                    scored.push((
+                        score,
+                        completion_json(
+                            &format!("{replacement_prefix}{}", &device.serial),
+                            &device.desc,
+                        ),
+                    ));
+                }
+            }
+            AdbDeviceCompletion::TransportId {
+                prefix,
+                replacement_prefix,
+            } => {
+                if let Some(id) = &device.transport_id {
+                    let score = prefix_score(prefix, id);
+                    if score > 0 {
+                        scored.push((
+                            score,
+                            completion_json(
+                                &format!("{replacement_prefix}{id}"),
+                                &format!("{} {}", &device.serial, &device.desc),
+                            ),
+                        ));
+                    }
+                }
+            }
+        }
+    }
+    scored.sort_by(|a, b| b.0.cmp(&a.0));
+    scored.into_iter().map(|(_, json)| json).collect()
+}
+
+fn prefix_score(prefix: &str, value: &str) -> i32 {
+    if prefix.is_empty() {
+        return 1;
+    }
+    let prefix = prefix.to_ascii_lowercase();
+    let value = value.to_ascii_lowercase();
+    if prefix == value {
+        1000
+    } else if value.starts_with(&prefix) {
+        900
+    } else {
+        0
+    }
+}
+
+fn adb_selector_args(rest: &[String]) -> Vec<String> {
+    let mut out = Vec::new();
+    let mut i = 0;
+    while i < rest.len() {
+        let token = rest[i].as_str();
+        if matches!(token, "-s" | "--serial" | "-t" | "--transport-id") {
+            if i + 1 < rest.len() && !rest[i + 1].is_empty() {
+                out.push(rest[i].clone());
+                out.push(rest[i + 1].clone());
+                i += 2;
+                continue;
+            }
+        } else if (token.starts_with("--serial=") || token.starts_with("--transport-id="))
+            && !token.ends_with('=')
+        {
+            out.push(rest[i].clone());
+        }
+        i += 1;
+    }
+    out
+}
+
+fn adb_command_tokens(rest: &[String]) -> Vec<&str> {
+    let mut out = Vec::new();
+    let mut i = 0;
+    while i < rest.len() {
+        let token = rest[i].as_str();
+        if matches!(
+            token,
+            "-s" | "--serial" | "-t" | "--transport-id" | "--one-device"
+        ) {
+            i += if i + 1 < rest.len() { 2 } else { 1 };
+            continue;
+        }
+        if token.starts_with("--serial=")
+            || token.starts_with("--transport-id=")
+            || token.starts_with("--one-device=")
+        {
+            i += 1;
+            continue;
+        }
+        out.push(token);
+        i += 1;
+    }
+    out
+}
+
+fn adb_package_completion_prefix(rest: &[String]) -> Option<&str> {
+    let tokens = adb_command_tokens(rest);
+    let first = *tokens.first()?;
+    if first == "uninstall" {
+        return package_prefix_for_arg_tail(&tokens[1..], &["--user"]);
+    }
+    if tokens.len() >= 4 && tokens[0] == "shell" && tokens[1] == "pm" {
+        let action = tokens[2];
+        if matches!(action, "clear" | "disable-user" | "enable") {
+            return package_prefix_for_arg_tail(&tokens[3..], &["--user"]);
+        }
+    }
+    if tokens.len() >= 4 && tokens[0] == "shell" && tokens[1] == "am" && tokens[2] == "force-stop" {
+        return package_prefix_for_arg_tail(&tokens[3..], &["--user"]);
+    }
+    None
+}
+
+fn package_prefix_for_arg_tail<'a>(args: &[&'a str], value_flags: &[&str]) -> Option<&'a str> {
+    let current = *args.last()?;
+    if current.starts_with('-') {
+        return None;
+    }
+    if args.len() >= 2 && value_flags.contains(&args[args.len() - 2]) {
+        return None;
+    }
+    let mut positional_count = 0usize;
+    let mut i = 0usize;
+    let end = args.len().saturating_sub(1);
+    while i < end {
+        let token = args[i];
+        if token.starts_with('-') {
+            i += if value_flags.contains(&token) && i + 1 < end {
+                2
+            } else {
+                1
+            };
+        } else {
+            positional_count += 1;
+            i += 1;
+        }
+    }
+    (positional_count == 0).then_some(current)
+}
+
+fn parse_adb_packages(output: &str) -> Vec<String> {
+    let mut out = Vec::new();
+    for line in output.lines() {
+        let Some(package) = line.trim().strip_prefix("package:") else {
+            continue;
+        };
+        let package = package
+            .rsplit_once('=')
+            .map(|(_, rhs)| rhs)
+            .unwrap_or(package);
+        let package = package.trim();
+        if !package.is_empty() {
+            out.push(package.to_string());
+        }
+    }
+    out
+}
+
+fn adb_package_candidates(
+    path: &Path,
+    selector_args: &[String],
+    prefix: &str,
+    timeout_ms: u64,
+) -> Vec<String> {
+    let mut args = vec![path.to_string_lossy().to_string()];
+    args.extend(selector_args.iter().cloned());
+    args.extend(
+        ["shell", "pm", "list", "packages"]
+            .into_iter()
+            .map(str::to_string),
+    );
+    let Some(output) = run_cmd(&args, timeout_ms) else {
+        return Vec::new();
+    };
+    let mut scored = Vec::new();
+    for package in parse_adb_packages(&output) {
+        let score = prefix_score(prefix, &package);
+        if score > 0 {
+            scored.push((score, completion_json(&package, "package")));
+        }
+    }
+    scored.sort_by(|a, b| b.0.cmp(&a.0));
+    scored.into_iter().map(|(_, json)| json).collect()
+}
+
+fn dynamic_value_completions(
+    cmd_name: &str,
+    rest: &[String],
+    explicit_cmd_path: Option<&Path>,
+    timeout_ms: u64,
+) -> Option<Vec<String>> {
+    if cmd_name != "adb" {
+        return None;
+    }
+    let path = explicit_cmd_path
+        .map(Path::to_path_buf)
+        .or_else(|| find_in_path(cmd_name))?;
+    if let Some(completion) = adb_device_completion(rest) {
+        return Some(adb_device_candidates(&path, completion, timeout_ms));
+    }
+    if let Some(prefix) = adb_package_completion_prefix(rest) {
+        let selectors = adb_selector_args(rest);
+        return Some(adb_package_candidates(
+            &path, &selectors, prefix, timeout_ms,
+        ));
+    }
+    None
+}
+
+/// dynamically scrape --help for a command not in the cache, write the result
+/// into the user store, and return its parsed form. discovered subcommands
+/// are also written.
+fn resolve_and_cache(
+    user_dir: &Path,
+    mandirs: &[PathBuf],
+    cmd_name: &str,
+    path: &Path,
+    timeout_ms: u64,
+) -> Option<ManpageResult> {
+    resolve_command_path_and_cache(user_dir, mandirs, cmd_name, &[], path, timeout_ms)
+}
+
+fn resolve_command_path_and_cache(
+    user_dir: &Path,
+    mandirs: &[PathBuf],
+    base_cmd: &str,
+    sub_args: &[String],
+    path: &Path,
+    timeout_ms: u64,
+) -> Option<ManpageResult> {
+    let full_cmd = if sub_args.is_empty() {
+        base_cmd.to_string()
+    } else {
+        format!("{base_cmd} {}", sub_args.join(" "))
+    };
+    let hyphenated = if sub_args.is_empty() {
+        base_cmd.to_string()
+    } else {
+        format!("{base_cmd}-{}", sub_args.join("-"))
+    };
+
+    // 1. native completions
+    if matches!(classify_binary(path, path), Classify::HasNativeCompletions)
+        && let Some(nu) = try_native_completion(path, timeout_ms)
+    {
+        let _ = write_native(user_dir, base_cmd, &nu);
+        return Some(parse_nu_completions(&full_cmd, &nu));
+    }
+    // 2. manpage as primary content source.
+    if let Some(mp_path) = find_manpage_path(mandirs, &hyphenated)
+        && let Ok(contents) = read_manpage_file(&mp_path)
+    {
+        let mut result = parse_manpage_string(&contents);
+        if !result.entries.is_empty() || !result.subcommands.is_empty() {
+            strip_subcmd_prefix(&mut result, &hyphenated);
+            let _ = write_result(user_dir, &full_cmd, "manpage", &result);
+            return Some(result);
+        }
+    }
+    // 3. fallback: scrape --help text.
+    let text = if sub_args.is_empty() {
+        try_help(path, timeout_ms)
+    } else {
+        let bin_s = path.to_string_lossy().to_string();
+        try_help_args(&bin_s, sub_args, timeout_ms)
+    }?;
+    let parsed = parse_help_text(&text);
+    if parsed.entries.is_empty() && parsed.subcommands.is_empty() && parsed.positionals.is_empty() {
+        return None;
+    }
+    if let Some(leaf) = sub_args.last()
+        && parsed
+            .subcommands
+            .iter()
+            .any(|sc| sc.name.eq_ignore_ascii_case(leaf))
+    {
+        return None;
+    }
+
+    let _ = write_result(user_dir, &full_cmd, "help", &parsed);
+    if sub_args.is_empty() {
+        let mut sub_acc: Vec<(String, ManpageResult)> = Vec::new();
+        help_resolve(path, base_cmd, 1, timeout_ms, &mut sub_acc);
+        for (cmd, r) in sub_acc.into_iter().skip(1) {
+            let _ = write_result(user_dir, &cmd, "help", &r);
+        }
+    } else {
+        let bin_s = path.to_string_lossy().to_string();
+        let inner_subs: Vec<String> = parsed
+            .subcommands
+            .iter()
+            .map(|sc| sc.name.clone())
+            .filter(|n| n.len() >= 2 && !n.starts_with('-') && n != "help")
+            .collect();
+        let mut sub_acc: Vec<(String, ManpageResult)> = Vec::new();
+        for sub in inner_subs {
+            let mut next = sub_args.to_vec();
+            next.push(sub);
+            recurse_subcommand(
+                &bin_s,
+                base_cmd,
+                &next,
+                sub_args.len() as u32 + 2,
+                timeout_ms,
+                &mut sub_acc,
+            );
+        }
+        for (cmd, r) in sub_acc {
+            let _ = write_result(user_dir, &cmd, "help", &r);
+        }
+    }
+    Some(parsed)
+}
+
+const ELEVATION_COMMANDS: &[&str] = &["sudo", "doas", "pkexec", "su", "run0"];
+
+fn cmd_complete(
+    spans: &[String],
+    user_dir: &Path,
+    system_dirs: &[PathBuf],
+    mandirs: &[PathBuf],
+    timeout_ms: u64,
+) {
+    let mut dirs: Vec<PathBuf> = system_dirs.to_vec();
+    dirs.push(user_dir.to_path_buf());
+
+    // skip past elevation wrappers (sudo, doas) to find the real command
+    let mut explicit_cmd_path: Option<PathBuf> = None;
+    let mut spans: Vec<String> = match spans.first() {
+        Some(first) if ELEVATION_COMMANDS.contains(&first.as_str()) => {
+            let rest = &spans[1..];
+            let mut real_spans = None;
+            for (idx, s) in rest.iter().enumerate() {
+                if let Some(path) = executable_span_path(s)
+                    && let Some(name) = command_name_for_path(&path)
+                {
+                    let mut target = rest[idx..].to_vec();
+                    target[0] = name;
+                    explicit_cmd_path = Some(path);
+                    real_spans = Some(target);
+                    break;
+                }
+                if !s.is_empty()
+                    && !s.starts_with('-')
+                    && (lookup(&dirs, s).is_some() || find_in_path(s).is_some())
+                {
+                    real_spans = Some(rest[idx..].to_vec());
+                    break;
+                }
+            }
+            real_spans.unwrap_or_else(|| spans.to_vec())
+        }
+        _ => spans.to_vec(),
+    };
+    if explicit_cmd_path.is_none()
+        && let Some(first) = spans.first()
+        && let Some(path) = executable_span_path(first)
+        && let Some(name) = command_name_for_path(&path)
+    {
+        spans[0] = name;
+        explicit_cmd_path = Some(path);
+    }
+
+    if spans.is_empty() {
+        println!("null");
+        return;
+    }
+
+    let cmd_name = spans[0].clone();
+    let rest: Vec<String> = spans[1..].to_vec();
+
+    if let Some(candidates) =
+        dynamic_value_completions(&cmd_name, &rest, explicit_cmd_path.as_deref(), timeout_ms)
+    {
+        print_completion_candidates(&candidates);
+        return;
+    }
+
+    // strip intermediate flag tokens — they aren't part of subcommand path
+    let mut tokens: Vec<String> = vec![cmd_name.clone()];
+    if !rest.is_empty() {
+        let (last, leading) = rest.split_last().unwrap();
+        for t in leading {
+            if !t.starts_with('-') || t.is_empty() {
+                tokens.push(t.clone());
+            }
+        }
+        tokens.push(last.clone());
+    }
+
+    let last_token = rest.last().cloned().unwrap_or_default();
+    // lookup tokens exclude the partial unless the user has typed a trailing space
+    let lookup_tokens: Vec<String> = if last_token.is_empty() {
+        tokens.clone()
+    } else if tokens.len() > 1 {
+        tokens[..tokens.len() - 1].to_vec()
+    } else {
+        vec![cmd_name.clone()]
+    };
+
+    // try longest-prefix match: "git stash apply" → "git stash" → "git"
+    let find_result = |toks: &[String]| -> Option<(String, ManpageResult, usize)> {
+        let n = toks.len();
+        for drop in 0..n {
+            let prefix = &toks[..n - drop];
+            if prefix.is_empty() {
+                continue;
+            }
+            let name = prefix.join(" ");
+            if let Some(r) = lookup(&dirs, &name) {
+                return Some((name, r, prefix.len()));
+            }
+        }
+        None
+    };
+
+    let mut found = find_result(&lookup_tokens);
+
+    // dynamic resolve: if nothing matches or only a parent matched, try --help
+    let resolve_tokens: Vec<String> = lookup_tokens
+        .iter()
+        .filter(|t| !t.is_empty())
+        .cloned()
+        .collect();
+    let lookup_depth = lookup_tokens.len();
+    let resolve_depth = resolve_tokens.len();
+    let need_resolve = match &found {
+        Some((_, _, depth)) => *depth < resolve_depth,
+        None => resolve_depth > 0,
+    };
+    if need_resolve
+        && let Some(path) = explicit_cmd_path
+            .as_ref()
+            .cloned()
+            .or_else(|| find_in_path(&cmd_name))
+    {
+        // build extended mandirs from the binary's own prefix as well
+        let mut all_mandirs = mandirs.to_vec();
+        if let Some(parent) = path.parent()
+            && let Some(prefix) = parent.parent()
+        {
+            let share_man = prefix.join("share/man");
+            if share_man.is_dir() {
+                all_mandirs.push(share_man);
+            }
+        }
+        let sub_args = if resolve_tokens.len() > 1 {
+            resolve_tokens[1..].to_vec()
+        } else {
+            Vec::new()
+        };
+        let resolved = if sub_args.is_empty() {
+            resolve_and_cache(user_dir, &all_mandirs, &cmd_name, &path, timeout_ms)
+        } else {
+            resolve_command_path_and_cache(
+                user_dir,
+                &all_mandirs,
+                &cmd_name,
+                &sub_args,
+                &path,
+                timeout_ms,
+            )
+        };
+        if resolved.is_some() {
+            found = find_result(&lookup_tokens);
+        }
+    }
+
+    let typing_flag = last_token.starts_with('-') && !last_token.is_empty();
+    let candidates: Vec<String> = match &found {
+        None => Vec::new(),
+        Some((matched_name, r, depth)) => {
+            let mut scored: Vec<(i32, String)> = Vec::new();
+            // subcommand candidates (skip if match is too shallow)
+            if *depth >= lookup_depth.saturating_sub(1) {
+                let subs: Vec<ManpageSubcommand> = if !r.subcommands.is_empty() {
+                    r.subcommands.clone()
+                } else {
+                    subcommands_of(&dirs, matched_name)
+                };
+                for sc in &subs {
+                    let s = fuzzy_score(&last_token, &sc.name);
+                    if s > 0 {
+                        scored.push((s, completion_json(&sc.name, &sc.desc)));
+                    }
+                }
+            }
+            // flag candidates
+            if typing_flag {
+                for e in &r.entries {
+                    let base_desc = match &e.param {
+                        Some(OwnedParam::Mandatory(p)) => {
+                            if e.desc.is_empty() {
+                                format!("<{p}>")
+                            } else {
+                                format!("{} <{p}>", e.desc)
+                            }
+                        }
+                        Some(OwnedParam::Optional(p)) => {
+                            if e.desc.is_empty() {
+                                format!("[{p}]")
+                            } else {
+                                format!("{} [{p}]", e.desc)
+                            }
+                        }
+                        None => e.desc.clone(),
+                    };
+                    let (flag, desc) = match &e.switch {
+                        OwnedSwitch::Long(l) => (format!("--{l}"), base_desc),
+                        OwnedSwitch::Short(c) => (format!("-{c}"), base_desc),
+                        OwnedSwitch::Both(c, l) => {
+                            let long_flag = format!("--{l}");
+                            let short_flag = format!("-{c}");
+                            let ls = fuzzy_score(&last_token, &long_flag);
+                            let ss = fuzzy_score(&last_token, &short_flag);
+                            if ss > ls {
+                                (short_flag, format!("(aka {long_flag}) {base_desc}"))
+                            } else {
+                                (long_flag.clone(), format!("(aka {short_flag}) {base_desc}"))
+                            }
+                        }
+                    };
+                    let s = fuzzy_score(&last_token, &flag);
+                    if s > 0 {
+                        scored.push((s, completion_json(&flag, &desc)));
+                    }
+                }
+            }
+            scored.sort_by(|a, b| b.0.cmp(&a.0));
+            scored.into_iter().map(|(_, json)| json).collect()
+        }
+    };
+
+    // protocol: null = hand off to nushell's file completer; [...] = our candidates
+    let has_subs = match &found {
+        Some((matched_name, r, _)) => {
+            !r.subcommands.is_empty() || !subcommands_of(&dirs, matched_name).is_empty()
+        }
+        None => false,
+    };
+    // hand off at non-flag leaf positions so file and dynamic completers can
+    // answer argument prefixes. when the token starts with "-", keep flags.
+    let want_files = !typing_flag && !has_subs && (last_token.is_empty() || candidates.is_empty());
+    if want_files || candidates.is_empty() {
+        println!("null");
+    } else {
+        print_completion_candidates(&candidates);
+    }
+}
+
+// --- completions self-emission ---
+
+fn cmd_completions() {
+    // emit completions for inshellah itself.
+    let entries: Vec<ManpageEntry> = vec![ManpageEntry {
+        switch: OwnedSwitch::Both('h', "help".to_string()),
+        param: None,
+        desc: "show help".to_string(),
+    }];
+    let subs = [
+        "index",
+        "manpage",
+        "manpage-dir",
+        "complete",
+        "query",
+        "dump",
+        "completions",
+    ];
+    let mut subcommands = Vec::new();
+    for s in subs {
+        subcommands.push(ManpageSubcommand {
+            name: s.to_string(),
+            desc: String::new(),
+        });
+    }
+    let result = ManpageResult {
+        entries,
+        subcommands,
+        positionals: Default::default(),
+        description: "nushell completions engine".to_string(),
+    };
+    print!("{}", generate_module("inshellah", &result));
+}
+
+// --- argument parsing ---
+
+struct IndexArgs {
+    prefixes: Vec<PathBuf>,
+    dir: Option<PathBuf>,
+    ignore: Option<PathBuf>,
+    help_only: Option<PathBuf>,
+    timeout_ms: u64,
+    workers: usize,
+}
+
+fn parse_index_args(args: &[String]) -> IndexArgs {
+    let mut out = IndexArgs {
+        prefixes: Vec::new(),
+        dir: None,
+        ignore: None,
+        help_only: None,
+        timeout_ms: DEFAULT_TIMEOUT_MS,
+        workers: default_workers(),
+    };
+    let mut i = 0;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--dir" => {
+                i += 1;
+                if i < args.len() {
+                    out.dir = Some(PathBuf::from(&args[i]));
+                }
+            }
+            "--ignore" => {
+                i += 1;
+                if i < args.len() {
+                    out.ignore = Some(PathBuf::from(&args[i]));
+                }
+            }
+            "--help-only" => {
+                i += 1;
+                if i < args.len() {
+                    out.help_only = Some(PathBuf::from(&args[i]));
+                }
+            }
+            "--timeout-ms" => {
+                i += 1;
+                if i < args.len()
+                    && let Ok(n) = args[i].parse::<u64>()
+                {
+                    out.timeout_ms = n;
+                }
+            }
+            "--workers" => {
+                i += 1;
+                if i < args.len()
+                    && let Ok(n) = args[i].parse::<usize>()
+                {
+                    out.workers = n.max(1);
+                }
+            }
+            other => {
+                out.prefixes.push(PathBuf::from(other));
+            }
+        }
+        i += 1;
+    }
+    out
+}
+
+/// best-effort thread count default: `available_parallelism` (1.59+), else 4.
+fn default_workers() -> usize {
+    std::thread::available_parallelism()
+        .map(|n| n.get())
+        .unwrap_or(4)
+}
+
+fn man_dir_of_prefix(prefix: &Path) -> PathBuf {
+    prefix.join("share/man")
+}
+
+/// parse --dir PATH[:PATH...], optional --timeout-ms N, plus any
+/// positional args. when --dir isn't supplied, returns the default cache
+/// dir as the single entry.
+fn parse_dir_args(args: &[String]) -> (Vec<String>, Vec<PathBuf>, u64) {
+    let mut positional = Vec::new();
+    let mut dirs: Option<Vec<PathBuf>> = None;
+    let mut timeout_ms = DEFAULT_TIMEOUT_MS;
+    let mut i = 0;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--dir" => {
+                i += 1;
+                if i < args.len() {
+                    dirs = Some(args[i].split(':').map(PathBuf::from).collect());
+                }
+            }
+            "--timeout-ms" => {
+                i += 1;
+                if i < args.len()
+                    && let Ok(n) = args[i].parse::<u64>()
+                {
+                    timeout_ms = n;
+                }
+            }
+            _ => {
+                positional.push(args[i].clone());
+            }
+        }
+        i += 1;
+    }
+    let dirs = dirs.unwrap_or_else(|| vec![default_store_path()]);
+    (positional, dirs, timeout_ms)
+}
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+    if args.len() < 2 {
+        usage();
+        std::process::exit(1);
+    }
+    match args[1].as_str() {
+        "index" => {
+            let parsed = parse_index_args(&args[2..]);
+            if parsed.prefixes.is_empty() {
+                eprintln!("error: index requires at least one PREFIX");
+                std::process::exit(1);
+            }
+            let dir = parsed.dir.unwrap_or_else(default_store_path);
+            let ignorelist = parsed
+                .ignore
+                .as_deref()
+                .map(load_ignorelist)
+                .unwrap_or_default();
+            let help_only = parsed
+                .help_only
+                .as_deref()
+                .map(load_ignorelist)
+                .unwrap_or_default();
+            let bindirs: Vec<PathBuf> = parsed.prefixes.iter().map(|p| p.join("bin")).collect();
+            let mandirs: Vec<PathBuf> = parsed
+                .prefixes
+                .iter()
+                .map(|p| man_dir_of_prefix(p))
+                .collect();
+            if let Err(e) = cmd_index(
+                &bindirs,
+                &mandirs,
+                &ignorelist,
+                &help_only,
+                &dir,
+                parsed.timeout_ms,
+                parsed.workers,
+            ) {
+                eprintln!("index failed: {e}");
+                std::process::exit(1);
+            }
+        }
+        "manpage" => {
+            if args.len() < 3 {
+                eprintln!("error: manpage requires a FILE argument");
+                std::process::exit(1);
+            }
+            if let Err(e) = cmd_manpage(Path::new(&args[2])) {
+                eprintln!("manpage failed: {e}");
+                std::process::exit(1);
+            }
+        }
+        "manpage-dir" => {
+            if args.len() < 3 {
+                eprintln!("error: manpage-dir requires a DIR argument");
+                std::process::exit(1);
+            }
+            if let Err(e) = cmd_manpage_dir(Path::new(&args[2])) {
+                eprintln!("manpage-dir failed: {e}");
+                std::process::exit(1);
+            }
+        }
+        "complete" => {
+            let (positional, dirs, timeout_ms) = parse_dir_args(&args[2..]);
+            // first dir is the writable user cache; rest are read-only system dirs
+            let (user_dir, system_dirs): (PathBuf, Vec<PathBuf>) = match dirs.split_first() {
+                Some((first, rest)) => (first.clone(), rest.to_vec()),
+                None => (default_store_path(), Vec::new()),
+            };
+            // mandirs default to share/man siblings of each system dir
+            let mandirs: Vec<PathBuf> = system_dirs
+                .iter()
+                .filter_map(|d| d.parent().map(|p| p.join("share/man")))
+                .filter(|p| p.is_dir())
+                .collect();
+            cmd_complete(&positional, &user_dir, &system_dirs, &mandirs, timeout_ms);
+        }
+        "query" => {
+            let (positional, dirs, _timeout_ms) = parse_dir_args(&args[2..]);
+            if positional.is_empty() {
+                eprintln!("error: query requires a CMD argument");
+                std::process::exit(1);
+            }
+            let cmd = positional.join(" ");
+            if let Err(e) = cmd_query(&cmd, &dirs) {
+                eprintln!("query failed: {e}");
+                std::process::exit(1);
+            }
+        }
+        "dump" => {
+            let (_, dirs, _timeout_ms) = parse_dir_args(&args[2..]);
+            cmd_dump(&dirs);
+        }
+        "completions" => cmd_completions(),
+        "--help" | "-h" | "help" => usage(),
+        other => {
+            eprintln!("unknown subcommand: {other}");
+            usage();
+            std::process::exit(1);
+        }
+    }
+    // make warning go away
+    let _ = filename_of_command;
+}
diff --git a/src/parsers/help.rs b/src/parsers/help.rs
new file mode 100644
index 0000000..ab88656
--- /dev/null
+++ b/src/parsers/help.rs
@@ -0,0 +1,187 @@
+mod description;
+mod helpers;
+mod options;
+mod positionals;
+mod subcommands;
+
+pub use options::{param_parser, parse_usage_flags, switch_parser};
+pub use positionals::{
+    extract_cli11_positionals, extract_usage_positionals, parse_usage_args, skip_command_name,
+};
+
+use std::collections::HashMap;
+
+use crate::{
+    parsers::help::{description::description, helpers::get_indent, subcommands::subcommand_entry},
+    types::*,
+};
+use nom::{IResult, Parser, character::complete::space0, combinator::opt};
+
+use crate::make_parser;
+
+type EntryParts<'a> = (
+    &'a str,
+    (Switch<'a>, Option<Param<'a>>),
+    (&'a str, Vec<&'a str>),
+);
+
+// parse a single flag entry: indent + switch + optional param + description.
+make_parser!(entry -> OptionEntry<'a>,
+    (
+        space0,
+        (switch_parser, opt(param_parser)),
+        description,
+    )
+    => |(_, (switch, param), (first, cont))
+        : EntryParts<'a>|
+    {
+        let mut desc: Vec<&str> = Vec::with_capacity(1 + cont.len());
+        if !first.trim().is_empty() { desc.push(first); }
+        desc.extend(cont.into_iter().filter(|l| !l.trim().is_empty()));
+        OptionEntry { switch, param, desc }
+    }
+);
+
+/// dedup raw subcommands by case-insensitive name, keeping the entry with
+/// the longest description. preserves first-seen ordering.
+fn dedup_subcommands<'a>(raw: Vec<Subcommand<'a>>) -> Vec<Subcommand<'a>> {
+    let mut by_name: HashMap<String, Subcommand<'a>> = HashMap::new();
+    let mut order: Vec<String> = Vec::new();
+    for sc in raw {
+        let key = sc.name.to_ascii_lowercase();
+        match by_name.get(&key) {
+            Some(prev) if prev.desc.len() >= sc.desc.len() => {}
+            _ => {
+                if !by_name.contains_key(&key) {
+                    order.push(key.clone());
+                }
+                by_name.insert(key, sc);
+            }
+        }
+    }
+    order
+        .into_iter()
+        .map(|k| by_name.remove(&k).unwrap())
+        .collect()
+}
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum HelpSection {
+    Unknown,
+    Options,
+    Commands,
+    Other,
+}
+
+fn classify_section_line(line: &str) -> Option<HelpSection> {
+    let (idx, indent) = get_indent(line);
+    if indent > 4 {
+        return None;
+    }
+    let trimmed = line[idx..].trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+    let without_colon = trimmed.trim_end_matches(':').trim();
+    let lower = without_colon.to_ascii_lowercase();
+
+    if lower.starts_with("usage") {
+        return Some(HelpSection::Unknown);
+    }
+    if lower.starts_with("valid arguments")
+        || lower.contains(" is one of the following")
+        || lower.contains(" defaults to")
+        || lower == "examples"
+        || lower == "example"
+    {
+        return Some(HelpSection::Other);
+    }
+    let command_header = matches!(lower.as_str(), "command" | "commands" | "subcommands")
+        || lower.ends_with(" commands")
+        || lower.ends_with(" subcommands");
+    if command_header && !lower.contains("option") && !lower.contains("flag") {
+        return Some(HelpSection::Commands);
+    }
+    if lower.contains("argument")
+        || lower == "args"
+        || lower == "positionals"
+        || lower == "positional arguments"
+    {
+        return Some(HelpSection::Other);
+    }
+    if lower.contains("option") || lower.contains("flag") || trimmed.ends_with(':') {
+        return Some(HelpSection::Options);
+    }
+    None
+}
+
+fn consume_line(s: &str) -> &str {
+    match s.find('\n') {
+        Some(idx) => &s[idx + 1..],
+        None => "",
+    }
+}
+
+fn parser_made_progress(original: &str, rem: &str) -> bool {
+    rem.len() < original.len()
+}
+
+/// build the final HelpResult by scanning help text with lightweight section
+/// awareness. options are accepted in option-like sections and before a
+/// section is known; subcommands are accepted only in command-like sections.
+fn build_help_result<'a>(original: &'a str) -> HelpResult<'a> {
+    let mut entries = Vec::new();
+    let mut raw_subcommands: Vec<Subcommand<'a>> = Vec::new();
+    let mut section = HelpSection::Unknown;
+    let mut rem = original;
+
+    while !rem.is_empty() {
+        let line = rem.split_once('\n').map(|(line, _)| line).unwrap_or(rem);
+        if let Some(next_section) = classify_section_line(line) {
+            section = next_section;
+            rem = consume_line(rem);
+            continue;
+        }
+
+        if matches!(section, HelpSection::Unknown | HelpSection::Options)
+            && let Ok((next, parsed)) = entry(rem)
+            && parser_made_progress(rem, next)
+        {
+            entries.push(parsed);
+            rem = next;
+            continue;
+        }
+
+        if section == HelpSection::Commands
+            && let Ok((next, parsed)) = subcommand_entry(rem)
+            && parser_made_progress(rem, next)
+        {
+            raw_subcommands.push(parsed);
+            rem = next;
+            continue;
+        }
+
+        rem = consume_line(rem);
+    }
+
+    let subcommands = dedup_subcommands(raw_subcommands);
+    // cli11 positional section takes priority over the usage-line scan
+    // when both are present — cli11 carries types and optionality.
+    let positionals = match extract_cli11_positionals(original) {
+        Ok((_, p)) if !p.is_empty() => p,
+        _ => extract_usage_positionals(original)
+            .map(|(_, p)| p)
+            .unwrap_or_default(),
+    };
+    HelpResult {
+        entries,
+        subcommands,
+        positionals,
+        desc: "",
+    }
+}
+
+/// top-level help parser.
+pub fn help_parser(s: &str) -> IResult<&str, HelpResult<'_>> {
+    Ok(("", build_help_result(s)))
+}
diff --git a/src/parsers/help/description.rs b/src/parsers/help/description.rs
new file mode 100644
index 0000000..b10ab72
--- /dev/null
+++ b/src/parsers/help/description.rs
@@ -0,0 +1,37 @@
+use nom::{
+    IResult, Parser,
+    character::complete::space0,
+    combinator::verify,
+    multi::many0,
+    sequence::{preceded, terminated},
+};
+
+use crate::make_parser;
+use crate::parsers::help::helpers::{at_least_indent, eol, rest_of_line};
+
+// continuation line: an indented (≥8 visual cols), non-flag-shaped line
+// belonging to the previous flag's description. blank-but-indented lines
+// are accepted (content = ""), filtered out by the caller's join.
+make_parser!(continuation_line -> &'a str,
+    verify(
+        preceded(
+            // assert ≥8 visual cols of leading horizontal whitespace
+            // without consuming — space0 inside `rest_of_line`'s preceded
+            // will eat them next.
+            at_least_indent(8),
+            terminated(preceded(space0, rest_of_line), eol)
+        ),
+        // reject lines whose first non-space char is '-' — that's a new
+        // flag entry, not a continuation of the previous one.
+        |content: &&str| !content.starts_with('-')
+    )
+);
+
+// description: the line of text after the switch+param, plus any
+// continuation lines. always succeeds — first line may be empty (when
+// the switch is followed immediately by a newline, "clap long" style).
+make_parser!(pub description -> (&'a str, Vec<&'a str>),
+(
+    terminated(preceded(space0, rest_of_line), eol),
+    many0(continuation_line),
+));
diff --git a/src/parsers/help/helpers.rs b/src/parsers/help/helpers.rs
new file mode 100644
index 0000000..c6892a3
--- /dev/null
+++ b/src/parsers/help/helpers.rs
@@ -0,0 +1,105 @@
+use nom::{
+    AsChar, IResult, Parser, branch::alt, bytes::complete::take_till,
+    character::complete::line_ending, combinator::eof,
+};
+#[allow(unused_imports)]
+use nom::{bytes::complete::take_while, combinator::peek, combinator::verify};
+
+#[macro_export]
+macro_rules! make_parser {
+    (pub $name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            let (rem, val) = $parser.parse(s)?;
+            Ok((rem, $wrap(val)))
+        }
+    };
+    (pub $name:ident -> $out:ty, $parser:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            $parser.parse(s)
+        }
+    };
+    ($name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            let (rem, val) = $parser.parse(s)?;
+            Ok((rem, $wrap(val)))
+        }
+    };
+    ($name:ident -> $out:ty, $parser:expr) => {
+        #[allow(clippy::needless_lifetimes)]
+        #[allow(mismatched_lifetime_syntaxes)]
+        fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
+            $parser.parse(s)
+        }
+    };
+}
+
+#[macro_export]
+macro_rules! make_predicate {
+    (pub $name:ident, |$c:ident| $($body:tt)*) => {
+        pub fn $name($c: char) -> bool { $($body)* }
+    };
+    ($name:ident, |$c:ident| $($body:tt)*) => {
+        fn $name($c: char) -> bool { $($body)* }
+    };
+}
+
+make_predicate!(pub is_option_char, |c| c.is_alphanumeric() || matches!(c, '-' | '_'));
+
+make_parser!(pub rest_of_line -> &'a str,
+    take_till(|c: char| c.is_newline())
+);
+
+// end of line — matches either a newline or end of input.
+// permissive version used in most line-consuming parsers.
+make_parser!(pub eol -> &'a str, alt((line_ending, eof)));
+
+/// compute the visual indent of a leading whitespace run.
+/// spaces count 1, tabs count 8 (typical terminal default).
+pub fn visual_indent(s: &str) -> u8 {
+    s.chars().fold(0u8, |acc, c| {
+        acc.saturating_add(match c {
+            ' ' => 1,
+            '\t' => 8,
+            _ => 0,
+        })
+    })
+}
+
+/// nom-shaped check that the input begins with at least `min` visual
+/// columns of horizontal whitespace (spaces or tabs). doesn't consume —
+/// pair with `space0`/`take_while` to actually eat the indent.
+pub fn at_least_indent<'a>(
+    min: u8,
+) -> impl Parser<&'a str, Output = &'a str, Error = nom::error::Error<&'a str>> {
+    verify(
+        peek(take_while(|c: char| c == ' ' || c == '\t')),
+        move |s: &str| visual_indent(s) >= min,
+    )
+}
+
+/// legacy helper: returns (byte index of first non-space, visual indent).
+/// used by callers that still need the byte index.
+pub fn get_indent(s: &str) -> (usize, u8) {
+    let mut traversed = 0;
+    let mut indent = 0;
+    for (i, c) in s.char_indices() {
+        let incr = match c {
+            ' ' => 1,
+            '\t' => 8,
+            _ => 0,
+        };
+        if incr == 0 {
+            traversed = i;
+            break;
+        } else {
+            indent += incr;
+        }
+    }
+    (traversed, indent)
+}
diff --git a/src/parsers/help/options.rs b/src/parsers/help/options.rs
new file mode 100644
index 0000000..73b2170
--- /dev/null
+++ b/src/parsers/help/options.rs
@@ -0,0 +1,192 @@
+use crate::make_parser;
+use crate::parsers::help::helpers::is_option_char;
+use crate::types::*;
+
+use nom::bytes::complete::{take_till, take_till1};
+use nom::character::complete::{space0, space1};
+use nom::combinator::{map, opt};
+use nom::multi::many0;
+use nom::sequence::separated_pair;
+use nom::{
+    IResult, Parser,
+    branch::alt,
+    bytes::complete::{tag, take_while1},
+    character::complete::{char, satisfy},
+    combinator::{value, verify},
+    sequence::{delimited, preceded},
+};
+
+make_parser!(short_switch -> char,
+    preceded(char('-'), satisfy(|c| c.is_alphanumeric())));
+
+make_parser!(long_switch -> &'a str,
+    preceded(tag("--"), take_while1(is_option_char)));
+
+make_parser!(negatable_long_switch -> &'a str,
+    preceded(tag("--[no-]"), take_while1(is_option_char)));
+
+make_parser!(comma -> (),
+    value((), preceded(char(','), space0)));
+
+make_parser!(eq_optional_param -> Param<'a>,
+    delimited(tag("[="), take_while1(is_option_char), char(']')) => Param::Optional);
+
+make_parser!(eq_optional_angle_param -> Param<'a>,
+    delimited(tag("[=<"), take_till1(|c| c == '>'), tag(">]")) => Param::Optional);
+
+make_parser!(eq_mandatory_param -> Param<'a>,
+    preceded(char('='), take_while1(is_option_char)) => Param::Mandatory);
+
+// take a wide alphanumeric/_/- token then verify the WHOLE thing looks
+// like an ALL_CAPS-style param name. taking only uppercase chars would
+// match just "N" of " Needs: ..." and leave "eeds:..." as desc, so we
+// widen, then reject anything that doesn't pass the all-caps check.
+make_parser!(spaced_uppercase_param -> Param<'a>,
+    preceded(
+        char(' '),
+        verify(
+            take_while1(|c: char|
+                c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_' || c == '-'
+            ),
+            |s: &str| {
+                let first = match s.chars().next() { Some(c) => c, None => return false };
+                if !(first.is_ascii_uppercase() || first == '_') { return false; }
+                s.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
+            }
+        )
+    ) => Param::Mandatory);
+
+make_parser!(spaced_angle_param -> Param<'a>,
+    preceded(char(' '), delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
+
+make_parser!(spaced_opt_angle_param -> Param<'a>,
+    preceded(char(' '), delimited(char('<'),
+        delimited(char('['), take_while1(|c| c != ']'), char(']')),
+        char('>'))) => Param::Optional);
+
+make_parser!(spaced_angle_param_after_space -> Param<'a>,
+    preceded(space1, delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
+
+// take the full lowercase token then verify it's <=10 chars. a
+// take_while_m_n with a 10-char cap would leave a partial match — e.g.
+// "--foo nanoseconds" would extract param "nanosecond" and leave "s" as
+// the description. a word longer than 10 chars is almost certainly the
+// start of the description, not a type annotation.
+make_parser!(spaced_type_param -> Param<'a>,
+    preceded(
+        char(' '),
+        verify(
+            take_while1(|c: char| !c.is_whitespace()),
+            |s: &str| s.len() <= 10 && s.chars().all(|c| c.is_ascii_lowercase())
+        )
+    ) => Param::Mandatory
+);
+
+make_parser!(pub param_parser -> Param<'a>, alt((
+    eq_optional_angle_param,
+    eq_optional_param,
+    eq_mandatory_param,
+    spaced_opt_angle_param,
+    spaced_angle_param_after_space,
+    spaced_angle_param,
+    spaced_uppercase_param,
+    spaced_type_param,
+)));
+
+macro_rules! switch_pair {
+    ($name:ident, $left:expr, $sep:expr, $right:expr => |$a:ident, $b:ident| $body:expr) => {
+        fn $name<'a>(s: &'a str) -> IResult<&'a str, Switch<'a>> {
+            use nom::sequence::separated_pair;
+            let (rem, ($a, $b)) = separated_pair($left, $sep, $right).parse(s)?;
+            Ok((rem, $body))
+        }
+    };
+}
+
+switch_pair!(short_comma_long,
+    short_switch, comma, long_switch => |s, l| Switch::Both(s, l));
+
+switch_pair!(short_comma_negatable_long,
+    short_switch, comma, negatable_long_switch => |s, l| Switch::Both(s, l));
+
+switch_pair!(short_space_long,
+    short_switch, char(' '), long_switch => |s, l| Switch::Both(s, l));
+
+switch_pair!(short_space_negatable_long,
+    short_switch, char(' '), negatable_long_switch => |s, l| Switch::Both(s, l));
+
+make_parser!(slash_sep -> (),
+    value((), delimited(space0, char('/'), space0)));
+
+switch_pair!(long_slash_short,
+    long_switch, slash_sep, short_switch => |l, s| Switch::Both(s, l));
+
+make_parser!(short_as_switch -> Switch<'a>, short_switch => Switch::Short);
+make_parser!(negatable_long_as_switch -> Switch<'a>, negatable_long_switch => Switch::Long);
+make_parser!(long_as_switch -> Switch<'a>, long_switch => Switch::Long);
+
+make_parser!(pub switch_parser -> Switch<'a>,
+    alt((
+        short_comma_negatable_long,
+        short_space_negatable_long,
+        short_comma_long,
+        short_space_long,
+        long_slash_short,
+        short_as_switch,
+        negatable_long_as_switch,
+        long_as_switch,
+    ))
+);
+
+// `{--long | -s}` — manpage SYNOPSIS-line switch pair. nix-env's
+// synopsis is the canonical case: `[{--file | -f} path] [{--profile |
+// -p} path]`. emits Switch::Both with the long name.
+make_parser!(brace_pipe_long_short -> Switch<'a>,
+    separated_pair(long_switch, (space0, char('|'), space0), short_switch)
+    => |(l, s): (&'a str, char)| Switch::Both(s, l)
+);
+
+make_parser!(brace_pipe_short_long -> Switch<'a>,
+    separated_pair(short_switch, (space0, char('|'), space0), long_switch)
+    => |(s, l): (char, &'a str)| Switch::Both(s, l)
+);
+
+make_parser!(brace_pipe_switch -> Switch<'a>,
+    delimited(
+        (char('{'), space0),
+        alt((brace_pipe_long_short, brace_pipe_short_long)),
+        (space0, char('}'))
+    )
+);
+
+make_parser!(usage_switch_parser -> Switch<'a>,
+    alt((brace_pipe_switch, switch_parser))
+);
+
+// consume any chars except `]`. used to swallow trailing tokens inside a
+// flag bracket — e.g. `[--option name value]` keeps switch=Long("option")
+// and param=Mandatory("name"), discarding ` value` before the closing `]`.
+make_parser!(take_till_bracket -> &'a str, take_till(|c: char| c == ']'));
+
+// `[<switch> [param] <junk>]` inside the SYNOPSIS line.
+make_parser!(flag_in_bracket -> (Switch<'a>, Option<Param<'a>>),
+    delimited(
+        (char('['), space0),
+        (usage_switch_parser, opt(param_parser)),
+        (take_till_bracket, char(']'))
+    )
+);
+
+// walk the joined SYNOPSIS-line text, collecting every flag-bracketed
+// switch + its first param. non-flag tokens (positional brackets,
+// command name, ellipses) are skipped one char at a time.
+make_parser!(pub parse_usage_flags -> Vec<(Switch<'a>, Option<Param<'a>>)>,
+    many0(alt((
+        map(flag_in_bracket, Some),
+        // `value(None, ...)` requires `None: Clone` which forces Clone
+        // on Switch/Param; `map(..., |_| None)` doesn't.
+        map(satisfy(|c| c != '\n' && c != '\r'), |_| None),
+    )))
+    => |v: Vec<Option<(Switch<'a>, Option<Param<'a>>)>>|
+        v.into_iter().flatten().collect()
+);
diff --git a/src/parsers/help/positionals.rs b/src/parsers/help/positionals.rs
new file mode 100644
index 0000000..5c137c2
--- /dev/null
+++ b/src/parsers/help/positionals.rs
@@ -0,0 +1,400 @@
+use crate::parsers::help::helpers::rest_of_line;
+use crate::types::Positional;
+use crate::{make_parser, make_predicate};
+use nom::branch::alt;
+use nom::bytes::complete::{tag, tag_no_case, take_till, take_till1, take_while, take_while1};
+use nom::character::complete::{char, line_ending, satisfy, space0, space1};
+use nom::combinator::{map, not, opt, peek, recognize, value, verify};
+use nom::multi::many0;
+use nom::sequence::{delimited, preceded, terminated};
+use nom::{AsChar, IResult, Parser};
+
+#[derive(Clone)]
+enum PositionalParse<'a> {
+    Curly,
+    Flag,
+    Skip,
+    Mandatory(&'a str),
+    Optional(&'a str),
+    ManVariadic(&'a str),
+    OptVariadic(&'a str),
+}
+
+make_predicate!(is_word_char, |c| c.is_alphanumeric()
+    || matches!(c, '-' | '_' | '/' | '.'));
+
+make_predicate!(is_pos_char, |c| c.is_ascii_uppercase()
+    || c.is_numeric()
+    || matches!(c, '_' | '-'));
+
+make_parser!(section_label -> (),
+    value((), alt((
+        tag_no_case("options"),
+        tag_no_case("option"),
+        tag_no_case("flags"),
+        tag_no_case("flag")
+    )))
+);
+
+make_parser!(ellipses -> (),
+    value((),
+        alt((tag("..."), tag("\u{2026}")))
+    )
+);
+
+make_parser!(braces -> PositionalParse<'a>,
+    value(PositionalParse::Curly, delimited(char('{'), take_till1(|c| c == '}'), char('}')))
+);
+
+// FIXME should this be a take_while is_option_char?
+// why tf do we have a ']' condition
+make_parser!(flag -> PositionalParse<'a>,
+    value(PositionalParse::Flag, preceded(char('-'), take_till1(|c: char| c.is_space() || c == ']')))
+);
+
+fn check_positional(s: &str) -> bool {
+    let s = s.trim();
+    if s.is_empty() {
+        return false;
+    }
+    // reject names starting with '-' — these are flag tokens accidentally
+    // captured by the bracket parser, e.g. "[--at-operation]" in jj's
+    // synopsis. without this guard every `[--flag]` token would be
+    // recorded as a positional named "--flag".
+    if s.starts_with('-') {
+        return false;
+    }
+    if section_label.parse(s).is_ok() {
+        return false;
+    }
+    let upper = s.to_ascii_uppercase();
+    if matches!(upper.as_str(), "OPTIONS" | "OPTION" | "FLAGS" | "FLAG") {
+        return false;
+    }
+    s.chars()
+        .all(|c| c.is_alphanumeric() || matches!(c, '-' | '_' | '/' | '.'))
+}
+
+// recognize a balanced `[...]` block, tolerating ONE level of nested
+// brackets inside. expressed entirely via nom combinators:
+//
+//   `[` + many0(alt((nested_bracket_block, non_bracket_char))) + `]`
+//
+// nested_bracket_block is `[ chars_until_] ]`, which means we accept a
+// single inner `[...]` correctly but not arbitrarily-deep nesting —
+// manpages don't go deeper than two levels.
+// returns the inner content (everything between the outer brackets).
+make_parser!(balanced_bracket_inner -> &'a str,
+    recognize(delimited(
+        char('['),
+        many0(alt((
+            recognize((char('['), take_till(|c: char| c == ']'), char(']'))),
+            recognize(satisfy(|c: char| c != ']' && c != '[')),
+        ))),
+        char(']'),
+    ))
+    => |whole: &'a str| &whole[1..whole.len() - 1]
+);
+
+/// extract a positional name from already-trimmed bracket-inner content.
+/// returns the name slice and a flag indicating whether the bracket inner
+/// carried a trailing `...` (in-bracket variadic marker).
+fn parse_bracket_inner_name(inner: &str) -> Option<(&str, bool)> {
+    let inner = inner.trim();
+    // strip trailing "..." for in-bracket variadic.
+    let (rest, has_dots) = if let Some(stripped) = inner.strip_suffix("...") {
+        (stripped.trim_end(), true)
+    } else if let Some(stripped) = inner.strip_suffix('\u{2026}') {
+        (stripped.trim_end(), true)
+    } else {
+        (inner, false)
+    };
+    if rest.starts_with('[') {
+        let mut found = None;
+        let mut remaining = rest;
+        while let Some(start) = remaining.find('[') {
+            let after_start = &remaining[start + 1..];
+            let Some(end) = after_start.find(']') else {
+                break;
+            };
+            let nested = &after_start[..end];
+            if let Some((nested_name, nested_dots)) = parse_bracket_inner_name(nested)
+                && check_positional(nested_name)
+            {
+                found = Some((nested_name, has_dots || nested_dots));
+            }
+            remaining = &after_start[end + 1..];
+        }
+        return found;
+    }
+    let name = if let Some(after_lt) = rest.strip_prefix('<') {
+        // angle-bracket name: take everything up to the matching '>'
+        let end = after_lt.find('>')?;
+        let inner = after_lt[..end].trim();
+        let (inner, inner_dots) = if let Some(stripped) = inner.strip_suffix("...") {
+            (stripped.trim_end(), true)
+        } else if let Some(stripped) = inner.strip_suffix('\u{2026}') {
+            (stripped.trim_end(), true)
+        } else {
+            (inner, false)
+        };
+        return Some((inner, has_dots || inner_dots));
+    } else {
+        // bare name: take leading word
+        let end = rest
+            .find(|c: char| c.is_whitespace() || c == '[' || c == ']')
+            .unwrap_or(rest.len());
+        if end == 0 {
+            return None;
+        }
+        &rest[..end]
+    };
+    Some((name, has_dots))
+}
+
+// extract a balanced `[...]` block and decompose its inner content into
+// (name, has-inner-`...` flag). `map_opt` turns a `None` from
+// `parse_bracket_inner_name` into a nom parse error.
+make_parser!(opt_bracket_name -> (&'a str, bool),
+    nom::combinator::map_opt(balanced_bracket_inner, parse_bracket_inner_name)
+);
+
+make_parser!(
+    opt_positional -> PositionalParse<'a>,
+    verify(
+        // tuple parser: (name + in-bracket variadic, post-bracket ellipsis).
+        // matches "[name]", "[name...]", "[name ...]", "[name] ...",
+        // "[<name>]", and one-level nests like "[<program> [<arg>...]]".
+        (opt_bracket_name, opt(ellipses)),
+        |((name, _), _): &((&'a str, bool), Option<()>)| check_positional(name)
+    ) => |((name, has_inner_dots), post_dots): ((&'a str, bool), Option<()>)| {
+        if has_inner_dots || post_dots.is_some() {
+            PositionalParse::OptVariadic(name)
+        } else {
+            PositionalParse::Optional(name)
+        }
+    }
+);
+
+make_parser!(man_positional -> PositionalParse<'a>,
+    verify(
+        (
+            delimited(
+                char('<'),
+                (
+                    take_till1(|c| c == '.' || c == '\u{2026}' || c == '>'),
+                    opt(ellipses)
+                ),
+                char('>')
+            ),
+            opt(ellipses)
+        ),
+        |((ss, _), _)| check_positional(ss)
+    ) => |((p, v), v1): ((&'a str, Option<()>), Option<()>)|
+        if v.is_some() || v1.is_some() { PositionalParse::ManVariadic(p) }
+        else { PositionalParse::Mandatory(p) }
+);
+
+make_parser!(allcaps_positional -> PositionalParse<'a>,
+    verify(
+        (
+            preceded(
+                peek(
+                    satisfy(|c: char| c.is_ascii_uppercase())
+                ),
+                take_while1(is_pos_char)
+            ),
+            opt(
+                alt((
+                    tag("..."),
+                    tag("\u{2026}"))
+                )
+            )
+        ),
+        |(ss, _): &(&str, _)| check_positional(ss)
+    ) => |(p, v): (&'a str, Option<&'a str>)|
+        if v.is_some() { PositionalParse::ManVariadic(p) } else { PositionalParse::Mandatory(p) }
+);
+
+fn caseless_push<'a>(k: &'a str, v: Positional, acc: &mut Vec<(&'a str, Positional)>) {
+    let dupe = acc.iter().any(|(ik, _)| ik.eq_ignore_ascii_case(k));
+    if !dupe {
+        acc.push((k, v));
+    }
+}
+
+// parse_usage_args runs on a single logical usage line. SKIP refuses to
+// cross a newline boundary so many0 stops at end-of-line — without this
+// the parser would happily wander into the OPTIONS section and treat
+// every `--flag <name>` angle-bracket parameter as a positional.
+//
+// the inner positional terminator uses peek(line_ending) instead of
+// consuming the newline, so the trailing `opt(line_ending)` in the
+// outer delimited eats it cleanly and we never advance past the usage
+// line.
+make_parser!(pub parse_usage_args -> Vec<(&'a str, Positional)>,
+    (delimited(
+        space0,
+        many0(
+            alt((
+                map(
+                    (
+                        terminated(
+                            alt((
+                                braces,
+                                opt_positional,
+                                value(PositionalParse::Skip, balanced_bracket_inner),
+                                man_positional,
+                                flag,
+                                allcaps_positional,
+                            )),
+                            alt((
+                                space1,
+                                value("", peek(line_ending)),
+                                value("", peek(nom::combinator::eof)),
+                            ))
+                        ),
+                        // catch "[section] ..." patterns where the ellipsis is
+                        // on the *next* token, separated by whitespace.
+                        opt(terminated(
+                            alt((tag("..."), tag("\u{2026}"))),
+                            alt((
+                                space1,
+                                value("", peek(line_ending)),
+                                value("", peek(nom::combinator::eof)),
+                            ))
+                        ))
+                    ),
+                    |(positional, trailing): (PositionalParse<'a>, Option<_>)| {
+                        if trailing.is_none() { positional }
+                        else {
+                            match positional {
+                                PositionalParse::Optional(n) => PositionalParse::OptVariadic(n),
+                                PositionalParse::Mandatory(n) => PositionalParse::ManVariadic(n),
+                                other => other,
+                            }
+                        }
+                    }
+                ),
+                // SKIP must NOT consume a newline. without this, many0 keeps
+                // iterating past the usage line into OPTIONS-section flag
+                // syntax and over-extracts positionals.
+                value(PositionalParse::Skip, satisfy(|c: char| c != '\n' && c != '\r')),
+            ))
+        ),
+        opt((space0, line_ending))
+    )) => |p: Vec<PositionalParse<'a>>|
+            p.into_iter().fold(Vec::new(), |mut acc, parse|
+            {
+                match parse {
+                    PositionalParse::Curly => (),
+                    PositionalParse::Flag => (),
+                    PositionalParse::Skip => (),
+                    PositionalParse::OptVariadic(arg) => caseless_push(arg, Positional {
+                        optional: true,
+                        variadic: true
+                    }, &mut acc),
+                    PositionalParse::ManVariadic(arg) => caseless_push(arg, Positional {
+                        optional: false,
+                        variadic: true
+                    }, &mut acc),
+                    PositionalParse::Optional(arg) => caseless_push(arg, Positional {
+                        optional: true,
+                        variadic: false,
+                    }, &mut acc),
+                    PositionalParse::Mandatory(arg) => caseless_push(arg, Positional {
+                        optional: false,
+                        variadic: false
+                    }, &mut acc),
+                }
+                acc
+            })
+);
+
+make_parser!(pub skip_command_name -> (),
+    value((), preceded(space0,
+        many0(
+            (
+                verify(
+                    preceded(not(char('-')), take_while1(is_word_char)),
+                    |ss: &str| ss.chars().any(|c: char| c.is_ascii_lowercase())
+                ),
+                space1
+            )
+        )
+    ))
+);
+
+make_parser!(find_usage_line -> (),
+    value((), preceded(
+        space0,
+        terminated(
+            tag_no_case("usage"),
+            // accept any of:
+            //   "Usage:"              — inline form with colon
+            //   "Usage args"          — inline form, space follows the word
+            //   "USAGE\n  cmd args"   — clap-style header on its own line
+            alt(
+                (
+                    value((), char(':')),
+                    value((), peek(line_ending)),
+                    value((), peek(satisfy(|c: char| c == ' ' || c == '\t'))),
+                )
+            )
+        )
+    ))
+);
+
+make_parser!(pub extract_usage_positionals -> Vec<(&'a str, Positional)>,
+    preceded(
+        many0(preceded(not(find_usage_line), (rest_of_line, line_ending))),
+        preceded(
+            (find_usage_line, space0, opt(line_ending), space0, skip_command_name),
+            parse_usage_args
+        )
+    )
+);
+
+make_predicate!(is_cli11_name_char, |c| c.is_alphanumeric()
+    || matches!(c, '_' | '-'));
+
+make_parser!(cli11_section_header -> (),
+    value((),
+        delimited(
+            space0,
+            alt((tag("POSITIONALS:"), tag("Positionals:"))),
+            (rest_of_line, opt(line_ending))
+        )
+    )
+);
+
+make_parser!(cli11_pos_line -> (&'a str, bool),
+    preceded(
+        verify(space0, |ss: &str| !ss.is_empty()),
+        terminated(
+            (
+                verify(take_while1(is_cli11_name_char), |s: &str| s.len() >= 2),
+                preceded(
+                    (space0, take_while(|c: char| c.is_ascii_uppercase()), space0),
+                    opt(tag("..."))
+                )
+            ),
+            (rest_of_line, opt(line_ending))
+        )
+    ) => |(name, variadic): (&'a str, Option<_>)| (name, variadic.is_some())
+);
+
+make_parser!(parse_cli11_body -> Vec<(&'a str, Positional)>,
+    many0(cli11_pos_line) => |entries: Vec<(&'a str, bool)>|
+        entries.into_iter().fold(Vec::new(), |mut acc, (name, variadic)| {
+            caseless_push(name, Positional { optional: false, variadic }, &mut acc);
+            acc
+        })
+);
+
+make_parser!(pub extract_cli11_positionals -> Vec<(&'a str, Positional)>,
+    preceded(
+        many0(preceded(not(cli11_section_header), (rest_of_line, line_ending))),
+        preceded(cli11_section_header, parse_cli11_body)
+    )
+);
diff --git a/src/parsers/help/subcommands.rs b/src/parsers/help/subcommands.rs
new file mode 100644
index 0000000..7b3a0c6
--- /dev/null
+++ b/src/parsers/help/subcommands.rs
@@ -0,0 +1,83 @@
+use nom::{
+    AsChar, IResult, Parser,
+    branch::alt,
+    bytes::complete::{tag, take_till, take_while1},
+    character::complete::{char, space0},
+    combinator::{not, value, verify},
+    multi::many0,
+    sequence::{delimited, preceded, terminated},
+};
+
+use crate::make_parser;
+use crate::parsers::help::helpers::{eol, is_option_char};
+use crate::types::Subcommand;
+
+fn is_placeholder(c: char) -> bool {
+    match c {
+        _ if c.is_alphanumeric() => true,
+        '_' | '-' | '.' | '|' | ',' => true,
+        _ => false,
+    }
+}
+
+/// chars allowed inside a bare (unbracketed) placeholder token, e.g.
+/// "FILE", "PATTERN...", "A|B". excludes lowercase letters so mixed-case
+/// description words like "NixOS" or "Home-manager" don't get swallowed
+/// as placeholders.
+fn is_bare_placeholder_char(c: char) -> bool {
+    matches!(c, 'A'..='Z' | '0'..='9' | '_' | '-' | '.' | '|' | ',')
+}
+
+make_parser!(
+    skip_arg_placeholders -> (),
+    value(
+        (),
+        many0(preceded(
+            // peek ahead one char (don't consume) so the per-branch parser can
+            // see the full token. needed because the bare ALL_CAPS branch must
+            // verify the *entire* token before deciding to consume.
+            char(' '),
+            alt((
+                // <...> bracketed placeholder
+                delimited(char('<'), take_while1(is_placeholder), char('>')),
+                // [...] optional bracketed placeholder
+                delimited(char('['), take_while1(is_placeholder), char(']')),
+                // bare ALL_CAPS placeholder — first char must be uppercase or
+                // a digit (allows e.g. "N", "M2"), and the whole token must
+                // be uppercase-friendly. rejects "NixOS"-style mixed-case so
+                // descriptions don't get swallowed.
+                verify(
+                    take_while1(is_bare_placeholder_char),
+                    |s: &str| {
+                        let first = s.chars().next().unwrap();
+                        first.is_ascii_uppercase() || first.is_ascii_digit()
+                    }
+                ),
+            )),
+        )),
+    )
+);
+
+// parse a subcommand entry: leading whitespace, then a name (2+ option
+// chars, not starting with '-'), optional argument placeholders, exactly
+// two spaces, optional padding, then the description text and eol.
+make_parser!(pub subcommand_entry -> Subcommand<'a>,
+    (
+        preceded(
+            space0,
+            verify(
+                preceded(not(char('-')), take_while1(is_option_char)),
+                |n: &str| n.len() >= 2,
+            ),
+        ),
+        skip_arg_placeholders,
+        tag("  "),
+        space0,
+        terminated(take_till(|c: char| c.is_newline()), eol),
+    ) => |(name, _, _, _, desc): (&'a str, _, _, _, &'a str)| {
+        // some help formats prefix desc with "- " (manpage-style); strip it.
+        let d = desc.trim_start();
+        let desc = d.strip_prefix("- ").map(|s| s.trim_start()).unwrap_or(d);
+        Subcommand { name, desc }
+    }
+);
diff --git a/src/parsers/manpage.rs b/src/parsers/manpage.rs
new file mode 100644
index 0000000..651598b
--- /dev/null
+++ b/src/parsers/manpage.rs
@@ -0,0 +1,335 @@
+//! parse unix manpages (groff/mdoc format) into a structured result.
+//!
+//! manpages are written in roff/groff markup — a decades-old typesetting language
+//! used by man(1). this module strips the formatting and extracts structured data
+//! (flags, subcommands, positionals) from the raw groff source.
+//!
+//! there are two major manpage macro packages:
+//!   - man (groff) — used by gnu/linux tools. uses macros like .SH, .TP, .IP, .PP
+//!   - mdoc (bsd) — used by bsd tools. uses .Sh, .Fl, .Ar, .Op, .It, .Bl/.El
+//!
+//! this module handles both, auto-detecting the format by checking for .Sh macros.
+//!
+//! for groff manpages, flag extraction uses multiple "strategies" that target
+//! different common formatting patterns:
+//!   - strategy_tp: .TP tagged paragraphs (gnu coreutils, help2man)
+//!   - strategy_ip: .IP indented paragraphs (curl, hand-written)
+//!   - strategy_pp_rs: .PP + .RS/.RE blocks (git, docbook)
+//!   - strategy_nix: nix3-style bullet .IP with .UR/.UE hyperlinks
+//!   - strategy_deroff: fallback — strip all groff, feed to help text parser
+//!
+//! the module tries all applicable strategies and picks the one that extracts
+//! the most flag entries, on the theory that more results = better match.
+
+mod commands;
+mod groff;
+mod mdoc;
+mod sections;
+mod strategies;
+
+use std::io::{self, Read};
+use std::path::Path;
+
+use crate::types::{HelpResult, OptionEntry, Param, Positional, Subcommand, Switch};
+
+pub use self::groff::{GroffLine, classify_line, strip_groff_escapes};
+pub use self::sections::{extract_subcommand_sections, extract_synopsis_command};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum OwnedSwitch {
+    Short(char),
+    Long(String),
+    Both(char, String),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum OwnedParam {
+    Mandatory(String),
+    Optional(String),
+}
+
+#[derive(Debug, Clone)]
+pub struct ManpageEntry {
+    pub switch: OwnedSwitch,
+    pub param: Option<OwnedParam>,
+    pub desc: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct ManpageSubcommand {
+    pub name: String,
+    pub desc: String,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct ManpageResult {
+    pub entries: Vec<ManpageEntry>,
+    pub subcommands: Vec<ManpageSubcommand>,
+    pub positionals: Vec<(String, Positional)>,
+    pub description: String,
+}
+
+impl From<&Switch<'_>> for OwnedSwitch {
+    fn from(s: &Switch<'_>) -> Self {
+        match s {
+            Switch::Short(c) => OwnedSwitch::Short(*c),
+            Switch::Long(l) => OwnedSwitch::Long((*l).to_string()),
+            Switch::Both(c, l) => OwnedSwitch::Both(*c, (*l).to_string()),
+        }
+    }
+}
+
+impl From<&Param<'_>> for OwnedParam {
+    fn from(p: &Param<'_>) -> Self {
+        match p {
+            Param::Mandatory(s) => OwnedParam::Mandatory((*s).to_string()),
+            Param::Optional(s) => OwnedParam::Optional((*s).to_string()),
+        }
+    }
+}
+
+impl From<&OptionEntry<'_>> for ManpageEntry {
+    fn from(e: &OptionEntry<'_>) -> Self {
+        let desc: String = e
+            .desc
+            .iter()
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .collect::<Vec<_>>()
+            .join(" ");
+        ManpageEntry {
+            switch: (&e.switch).into(),
+            param: e.param.as_ref().map(Into::into),
+            desc,
+        }
+    }
+}
+
+impl From<&Subcommand<'_>> for ManpageSubcommand {
+    fn from(sc: &Subcommand<'_>) -> Self {
+        // lowercase the subcommand name here so (a) file naming is
+        // consistent (meat_yum.json vs meat_YUM.json) and (b) recursive
+        // --help probes use the lowercase form, which is what most real
+        // CLIs accept — even tools like meat that DISPLAY uppercase
+        // names in their help text dispatch on the lowercased argument.
+        ManpageSubcommand {
+            name: sc.name.to_ascii_lowercase(),
+            desc: sc.desc.to_string(),
+        }
+    }
+}
+
+impl From<&HelpResult<'_>> for ManpageResult {
+    fn from(r: &HelpResult<'_>) -> Self {
+        ManpageResult {
+            entries: r.entries.iter().map(Into::into).collect(),
+            subcommands: r.subcommands.iter().map(Into::into).collect(),
+            // positional names are stored lowercased so output is
+            // stable across the various places we extract them from
+            // (synopsis, usage, cli11 sections).
+            positionals: r
+                .positionals
+                .iter()
+                .map(|(k, v)| (k.to_ascii_lowercase(), v.clone()))
+                .collect(),
+            description: r.desc.to_string(),
+        }
+    }
+}
+
+/// parse a manpage from its classified lines.
+/// auto-detects mdoc vs groff format. for groff, runs the multi-strategy
+/// extraction pipeline.
+pub fn parse_manpage_lines(lines: &[GroffLine]) -> ManpageResult {
+    if mdoc::is_mdoc(lines) {
+        mdoc::parse_mdoc_lines(lines)
+    } else {
+        let options_section = sections::extract_options_section(lines);
+        let mut entries = strategies::extract_entries(&options_section);
+        // merge SYNOPSIS-only flags (nix-env's `[{--profile | -p} path]`
+        // pattern, where the flag is declared in the synopsis but never
+        // listed as an entry in the OPTIONS body). body entries take
+        // precedence on duplicate names — they carry the descriptions.
+        let synopsis_flags = sections::extract_synopsis_flags(lines);
+        if !synopsis_flags.is_empty() {
+            let have_long: std::collections::HashSet<String> = entries
+                .iter()
+                .filter_map(|e| match &e.switch {
+                    OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => Some(l.to_ascii_lowercase()),
+                    _ => None,
+                })
+                .collect();
+            let have_short: std::collections::HashSet<char> = entries
+                .iter()
+                .filter_map(|e| match &e.switch {
+                    OwnedSwitch::Short(c) | OwnedSwitch::Both(c, _) => Some(*c),
+                    _ => None,
+                })
+                .collect();
+            for e in synopsis_flags {
+                let dup = match &e.switch {
+                    OwnedSwitch::Long(l) => have_long.contains(&l.to_ascii_lowercase()),
+                    OwnedSwitch::Short(c) => have_short.contains(c),
+                    OwnedSwitch::Both(c, l) => {
+                        have_short.contains(c) || have_long.contains(&l.to_ascii_lowercase())
+                    }
+                };
+                if !dup {
+                    entries.push(e);
+                }
+            }
+        }
+        let positionals = sections::extract_synopsis_positionals(lines);
+        let commands_section = sections::extract_commands_section(lines);
+        let mut subcommands = commands::extract_subcommands_from_commands(&commands_section);
+        for positional in sections::extract_description_positionals(lines) {
+            if !subcommands
+                .iter()
+                .any(|sc| sc.name.eq_ignore_ascii_case(&positional.name))
+            {
+                subcommands.push(positional);
+            }
+        }
+        ManpageResult {
+            entries,
+            subcommands,
+            positionals,
+            description: String::new(),
+        }
+    }
+}
+
+/// parse a manpage from its raw string contents.
+/// splits into lines, parses, then extracts the NAME section description.
+pub fn parse_manpage_string(contents: &str) -> ManpageResult {
+    let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
+    let mut result = parse_manpage_lines(&lines);
+    if let Some(desc) = sections::extract_name_description(&lines) {
+        result.description = desc;
+    }
+    result
+}
+
+/// parse a manpage and also pull out clap-style `.SH SUBCOMMAND` sections
+/// as separate per-subcommand results. each subcommand section in a
+/// clap-generated manpage is its own command with its own flags; the
+/// parent's subcommand list is populated from their names.
+///
+/// returns (main_result, sub_results) where each sub_result has
+/// name=full_command ("nh os"), desc, and its own ManpageResult.
+pub fn parse_manpage_with_subs(contents: &str) -> (ManpageResult, Vec<(String, ManpageResult)>) {
+    let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
+    let mut result = parse_manpage_lines(&lines);
+    if let Some(desc) = sections::extract_name_description(&lines) {
+        result.description = desc;
+    }
+    let sub_sections = sections::extract_subcommand_sections(&lines);
+    if !sub_sections.is_empty() {
+        // overwrite subcommands with the SUBCOMMAND-section names —
+        // these are the authoritative list for clap-generated manpages.
+        result.subcommands = sub_sections
+            .iter()
+            .map(|(name, desc, _)| ManpageSubcommand {
+                name: name.to_ascii_lowercase(),
+                desc: desc.clone(),
+            })
+            .collect();
+    }
+    // each SUBCOMMAND section body is parsed via the same strategy-picker
+    // as the top-level OPTIONS section — clap puts flag definitions
+    // directly under the .SH SUBCOMMAND header with no inner .SH wrapping,
+    // so parse_manpage_lines (which looks for a child OPTIONS section)
+    // would come back empty.
+    let subs: Vec<(String, ManpageResult)> = sub_sections
+        .into_iter()
+        .map(|(name, desc, lines)| {
+            let entries = strategies::extract_entries(&lines);
+            let sub_result = ManpageResult {
+                entries,
+                subcommands: Vec::new(),
+                positionals: Default::default(),
+                description: desc,
+            };
+            (name, sub_result)
+        })
+        .collect();
+    (result, subs)
+}
+
+/// read a manpage file from disk. handles .gz compressed files (the common
+/// case — most installed manpages are gzipped). plain text files are read directly.
+pub fn read_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<String> {
+    let path = path.as_ref();
+    let bytes = std::fs::read(path)?;
+    if path.extension().and_then(|e| e.to_str()) == Some("gz") {
+        let mut decoder = flate2::read::GzDecoder::new(&bytes[..]);
+        let mut out = String::new();
+        decoder.read_to_string(&mut out)?;
+        Ok(out)
+    } else {
+        String::from_utf8(bytes).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+    }
+}
+
+/// read + parse a manpage file in one step.
+pub fn parse_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<ManpageResult> {
+    let contents = read_manpage_file(path)?;
+    Ok(parse_manpage_string(&contents))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const TP_MANPAGE: &str = r#".TH FOO 1 "2024" "1.0" "User Commands"
+.SH NAME
+foo \- a synthetic test command
+.SH SYNOPSIS
+.B foo
+[\fIOPTIONS\fR] <input> [output]
+.SH OPTIONS
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+increase output verbosity
+.TP
+\fB\-o\fR \fIFILE\fR, \fB\-\-output\fR=\fIFILE\fR
+write to FILE
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help and exit
+"#;
+
+    #[test]
+    fn tp_strategy_extracts_flags() {
+        let r = parse_manpage_string(TP_MANPAGE);
+        assert_eq!(
+            r.entries.len(),
+            3,
+            "expected 3 entries, got {:?}",
+            r.entries
+        );
+        assert_eq!(r.description, "a synthetic test command");
+        assert!(matches!(
+            r.entries[0].switch,
+            OwnedSwitch::Both('v', ref l) if l == "verbose"
+        ));
+        assert!(matches!(
+            r.entries[2].switch,
+            OwnedSwitch::Both('h', ref l) if l == "help"
+        ));
+        assert!(r.entries[0].desc.contains("verbosity"));
+    }
+
+    #[test]
+    fn mdoc_format_detected() {
+        let src = ".Sh NAME\n.Nm test\n.Nd a test\n.Sh DESCRIPTION\nstuff\n";
+        let lines: Vec<GroffLine> = src.split('\n').map(classify_line).collect();
+        assert!(mdoc::is_mdoc(&lines));
+    }
+
+    #[test]
+    fn groff_escapes_stripped() {
+        let stripped = groff::strip_groff_escapes("\\fB\\-v\\fR \\fIfile\\fR");
+        assert_eq!(stripped.trim(), "-v file");
+    }
+}
diff --git a/src/parsers/manpage/commands.rs b/src/parsers/manpage/commands.rs
new file mode 100644
index 0000000..942de34
--- /dev/null
+++ b/src/parsers/manpage/commands.rs
@@ -0,0 +1,157 @@
+//! COMMANDS section subcommand extraction.
+//!
+//! some manpages (notably systemctl) have a dedicated COMMANDS section
+//! listing subcommands with descriptions. these use .PP + bold name +
+//! .RS/.RE blocks:
+//!   .PP
+//!   \fBstart\fR \fIUNIT\fR...
+//!   .RS 4
+//!   Start (activate) one or more units.
+//!   .RE
+
+use crate::parsers::manpage::ManpageSubcommand;
+use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes, strip_inline_macro_args};
+
+/// validate that the extracted name looks like a subcommand: lowercase,
+/// at least 2 chars, no leading dash.
+fn is_valid_subcmd(name: &str) -> bool {
+    name.len() >= 2
+        && !name.starts_with('-')
+        && name
+            .chars()
+            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
+}
+
+/// extract subcommand name from a bold groff text like
+///   "\fBlist\-units\fR [\fIPATTERN\fR...]" -> "list-units"
+fn extract_bold_command_name(text: &str) -> Option<String> {
+    let trimmed = text.trim();
+    if trimmed.len() >= 4 && trimmed.starts_with("\\fB") {
+        // look for \fB...\fR at the start: find the next '\\' and take
+        // the segment between \fB and there.
+        let after = &trimmed[3..];
+        let segment_end = after.find('\\').unwrap_or(after.len());
+        let name_part = &after[..segment_end];
+        let reconstructed = format!("\\fB{name_part}\\fR");
+        let name = normalize_command_token(strip_groff_escapes(&reconstructed).trim());
+        if is_valid_subcmd(&name) {
+            return Some(name);
+        }
+        return None;
+    }
+    // fallback: take the first whitespace-delimited word of the stripped text
+    let stripped = strip_groff_escapes(trimmed);
+    let first_word = stripped.split_whitespace().next().unwrap_or("");
+    let name = normalize_command_token(first_word);
+    if is_valid_subcmd(&name) {
+        Some(name)
+    } else {
+        None
+    }
+}
+
+fn normalize_command_token(token: &str) -> String {
+    let token = token.trim();
+    let token = token
+        .find('(')
+        .map(|idx| &token[..idx])
+        .unwrap_or(token)
+        .trim_end_matches(',');
+    token.to_string()
+}
+
+fn extract_command_name_from_line(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(tag) => extract_bold_command_name(tag),
+        GroffLine::Macro { name, args }
+            if matches!(
+                name.as_str(),
+                "B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
+            ) =>
+        {
+            let rendered = strip_groff_escapes(&strip_inline_macro_args(args));
+            extract_bold_command_name(&rendered)
+        }
+        _ => None,
+    }
+}
+
+/// walk through commands section lines, extracting subcommand name+description
+/// pairs from .PP + Text + .RS/.RE blocks.
+pub fn extract_subcommands_from_commands(lines: &[GroffLine]) -> Vec<ManpageSubcommand> {
+    let mut out = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, .. } = &lines[i]
+            && name == "PP"
+        {
+            i += 1;
+            if i >= lines.len() {
+                continue;
+            }
+            if let Some(name) = extract_command_name_from_line(&lines[i]) {
+                let (desc, new_i) = collect_subcmd_desc(lines, i + 1);
+                let short_desc = first_sentence(&desc);
+                out.push(ManpageSubcommand {
+                    name: name.to_ascii_lowercase(),
+                    desc: short_desc,
+                });
+                i = new_i;
+                continue;
+            } else {
+                i += 1;
+            }
+        } else {
+            i += 1;
+        }
+    }
+    out
+}
+
+/// collect the description for a subcommand entry. handles .RS/.RE blocks
+/// and stops at the next .PP/.SH/.SS boundary.
+fn collect_subcmd_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut acc: Vec<String> = Vec::new();
+    let mut i = start;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. } if name == "RS" => {
+                i += 1;
+                // inside .RS — collect until .RE or boundary
+                while i < lines.len() {
+                    match &lines[i] {
+                        GroffLine::Macro { name, .. } if name == "RE" => {
+                            return (acc.join(" "), i + 1);
+                        }
+                        GroffLine::Text(t) => {
+                            acc.push(t.clone());
+                            i += 1;
+                        }
+                        GroffLine::Macro { name, .. }
+                            if name == "PP" || name == "SH" || name == "SS" =>
+                        {
+                            return (acc.join(" "), i);
+                        }
+                        _ => i += 1,
+                    }
+                }
+                return (acc.join(" "), i);
+            }
+            GroffLine::Text(t) => {
+                acc.push(t.clone());
+                i += 1;
+            }
+            _ => return (acc.join(" "), i),
+        }
+    }
+    (acc.join(" "), i)
+}
+
+/// take the first sentence (up to '.') as the description.
+fn first_sentence(s: &str) -> String {
+    let s = s.trim();
+    match s.find('.') {
+        Some(idx) if idx > 0 => s[..idx].trim().to_string(),
+        _ => s.to_string(),
+    }
+}
diff --git a/src/parsers/manpage/groff.rs b/src/parsers/manpage/groff.rs
new file mode 100644
index 0000000..4196fac
--- /dev/null
+++ b/src/parsers/manpage/groff.rs
@@ -0,0 +1,385 @@
+//! groff escape/formatting stripping and line classification.
+//!
+//! groff escapes start with backslash and use various continuation syntaxes.
+//! we strip them, replacing named characters (like \(aq for apostrophe) with
+//! their text equivalents and discarding formatting directives.
+//!
+//! also exports `make_macro_walker!`, the manpage-side analogue of the
+//! help parser's `make_parser!`. all of our strategy_* functions are
+//! "scan lines, on each .MACRO_NAME run a handler, advance, accumulate"
+//! — this macro factors out the loop scaffolding so each strategy reduces
+//! to its specific extraction logic.
+
+/// walk a `&[GroffLine]` slice, and on each macro whose name matches
+/// `$mname`, invoke the body with `(lines, i, args)` where:
+///   - `lines` is the full slice (for slicing further bodies)
+///   - `i` is the current index of the matched macro
+///   - `args` is the macro's argument string (by reference)
+///
+/// the body returns `Option<(T, usize)>`. `Some((value, new_i))` pushes
+/// `value` and advances the cursor to `new_i` (typically computed as
+/// `lines.len() - rest.len()` after `collect_text_lines`). `None`
+/// advances by one line and keeps scanning.
+///
+/// matches the help-parser pattern `make_parser!(name -> T, parser => wrap)`:
+/// the macro hides the loop scaffolding, the handler expresses the actual
+/// extraction logic.
+#[macro_export]
+macro_rules! make_macro_walker {
+    (pub $name:ident -> Vec<$t:ty>, on macro $mname:expr =>
+     |$lines:ident, $i:ident, $args:ident| $body:expr) => {
+        pub fn $name(lines_input: &[$crate::parsers::manpage::GroffLine]) -> Vec<$t> {
+            let mut out = Vec::new();
+            let mut cursor = 0;
+            let $lines: &[$crate::parsers::manpage::GroffLine] = lines_input;
+            while cursor < $lines.len() {
+                if let $crate::parsers::manpage::GroffLine::Macro {
+                    name: macro_name,
+                    args: $args,
+                } = &$lines[cursor]
+                {
+                    if macro_name == $mname {
+                        let $i = cursor;
+                        // wrap the handler body in an IIFE so an early
+                        // `return None` inside the handler returns from the
+                        // closure, not from the surrounding strategy function.
+                        #[allow(clippy::redundant_closure_call)]
+                        let result: Option<($t, usize)> = (|| $body)();
+                        if let Some((value, new_i)) = result {
+                            out.push(value);
+                            cursor = new_i;
+                            continue;
+                        }
+                    }
+                }
+                cursor += 1;
+            }
+            out
+        }
+    };
+}
+
+/// every line in a manpage is classified as one of four types.
+/// this classification drives all subsequent parsing — strategies
+/// pattern-match on sequences of classified lines.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum GroffLine {
+    /// macro name + args, e.g. ("SH", "OPTIONS") or ("TP", "")
+    Macro { name: String, args: String },
+    /// plain text after groff stripping
+    Text(String),
+    /// empty line
+    Blank,
+    /// groff comment: .backslash-quote or backslash-quote
+    Comment,
+}
+
+/// translate a groff named character escape to its text equivalent.
+/// groff uses two-letter codes like "aq" for apostrophe, "lq"/"rq" for
+/// left/right quotes, "em"/"en" for dashes.
+fn named_char_of(name: &str) -> Option<char> {
+    match name {
+        "aq" => Some('\''),
+        "lq" | "Lq" | "rq" | "Rq" => Some('"'),
+        "em" | "en" => Some('-'),
+        _ => None,
+    }
+}
+
+fn is_alnum(c: u8) -> bool {
+    c.is_ascii_alphanumeric()
+}
+
+/// strip groff escape sequences, replacing named characters with text
+/// equivalents and discarding formatting directives.
+pub fn strip_groff_escapes(source: &str) -> String {
+    let bytes = source.as_bytes();
+    let len = bytes.len();
+    let mut buffer = String::with_capacity(len);
+    let mut pos = 0;
+    let mut prev_char: u8 = 0;
+
+    while pos < len {
+        if bytes[pos] == b'\\' && pos + 1 < len {
+            let next = bytes[pos + 1];
+            match next {
+                b'f' => {
+                    // font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...]
+                    if pos + 2 < len {
+                        let font_char = bytes[pos + 2];
+                        // insert space before italic font to preserve word boundaries
+                        // e.g. \fB--max-results\fR\fIcount\fR -> "--max-results count"
+                        if font_char == b'I' && is_alnum(prev_char) {
+                            buffer.push(' ');
+                            prev_char = b' ';
+                        }
+                        if font_char == b'(' {
+                            pos += 5; // \f(XX — two-character font name
+                        } else if font_char == b'[' {
+                            pos += 3;
+                            skip_to_byte(bytes, len, &mut pos, b']');
+                            if pos < len {
+                                pos += 1;
+                            }
+                        } else {
+                            pos += 3; // \fX — single-character font selector
+                        }
+                    } else {
+                        pos += 2;
+                    }
+                }
+                b'-' => {
+                    // escaped hyphen-minus — emit a plain hyphen
+                    buffer.push('-');
+                    prev_char = b'-';
+                    pos += 2;
+                }
+                b'&' | b'/' | b',' => {
+                    // zero-width characters — discard without output
+                    pos += 2;
+                }
+                b'(' => {
+                    // two-char named character: \(aq, \(lq, \(rq, etc.
+                    if pos + 3 < len {
+                        let name = &source[pos + 2..pos + 4];
+                        if let Some(c) = named_char_of(name) {
+                            buffer.push(c);
+                            prev_char = c as u8;
+                        }
+                        pos += 4;
+                    } else {
+                        pos += 2;
+                    }
+                }
+                b'[' => {
+                    // bracketed named character: \[aq], \[lq], etc.
+                    pos += 2;
+                    let start = pos;
+                    skip_to_byte(bytes, len, &mut pos, b']');
+                    if pos < len {
+                        let name = &source[start..pos];
+                        if let Some(c) = named_char_of(name) {
+                            buffer.push(c);
+                            prev_char = c as u8;
+                        }
+                        pos += 1;
+                    }
+                }
+                b's' => {
+                    // size escape: \sN, \s+N, \s-N — skip the numeric argument
+                    pos += 2;
+                    if pos < len && (bytes[pos] == b'+' || bytes[pos] == b'-') {
+                        pos += 1;
+                    }
+                    if pos < len && bytes[pos].is_ascii_digit() {
+                        pos += 1;
+                    }
+                    if pos < len && bytes[pos].is_ascii_digit() {
+                        pos += 1;
+                    }
+                }
+                b'm' => {
+                    // color escape: \m[...] — skip the bracketed color name
+                    pos += 2;
+                    if pos < len && bytes[pos] == b'[' {
+                        pos += 1;
+                        skip_to_byte(bytes, len, &mut pos, b']');
+                        if pos < len {
+                            pos += 1;
+                        }
+                    }
+                }
+                b'X' => {
+                    // device control: \X'...' — skip the single-quoted payload
+                    pos += 2;
+                    if pos < len && bytes[pos] == b'\'' {
+                        pos += 1;
+                        skip_to_byte(bytes, len, &mut pos, b'\'');
+                        if pos < len {
+                            pos += 1;
+                        }
+                    }
+                }
+                b'*' => {
+                    // string variable: \*X or \*(XX or \*[...] — skip the reference
+                    pos += 2;
+                    skip_groff_reference(bytes, len, &mut pos);
+                }
+                b'n' => {
+                    // number register: \nX or \n(XX or \n[...] — skip the reference
+                    pos += 2;
+                    skip_groff_reference(bytes, len, &mut pos);
+                }
+                b'e' => {
+                    // escaped backslash literal
+                    buffer.push('\\');
+                    prev_char = b'\\';
+                    pos += 2;
+                }
+                b'\\' => {
+                    // double backslash — emit one
+                    buffer.push('\\');
+                    prev_char = b'\\';
+                    pos += 2;
+                }
+                b' ' | b'~' => {
+                    // escaped/non-breaking space — emit a regular space
+                    buffer.push(' ');
+                    prev_char = b' ';
+                    pos += 2;
+                }
+                _ => {
+                    // unknown escape — skip the two-character sequence
+                    pos += 2;
+                }
+            }
+        } else {
+            // copy a full utf-8 char from source to buffer
+            let c = source[pos..].chars().next().unwrap();
+            buffer.push(c);
+            prev_char = if c.is_ascii() { c as u8 } else { 0 };
+            pos += c.len_utf8();
+        }
+    }
+    buffer
+}
+
+fn skip_to_byte(bytes: &[u8], len: usize, pos: &mut usize, delim: u8) {
+    while *pos < len && bytes[*pos] != delim {
+        *pos += 1;
+    }
+}
+
+/// skip a groff reference that uses one of three sub-forms:
+///   single char  — e.g. \*X or \nX
+///   ( + 2 chars  — e.g. \*(XX or \n(XX
+///   [ to ]       — e.g. \*[name] or \n[name]
+fn skip_groff_reference(bytes: &[u8], len: usize, pos: &mut usize) {
+    if *pos < len {
+        if bytes[*pos] == b'(' {
+            *pos += 3; // skip past '(' + two-character name
+        } else if bytes[*pos] == b'[' {
+            *pos += 1;
+            skip_to_byte(bytes, len, pos, b']');
+            if *pos < len {
+                *pos += 1;
+            }
+        } else {
+            *pos += 1;
+        }
+    }
+}
+
+/// strip inline macro formatting: .BI, .BR, .IR, etc.
+/// these macros alternate between fonts for their arguments, e.g.:
+///   .BI "--output " "FILE"
+/// becomes "--outputFILE" (arguments concatenated without spaces).
+///
+/// quoted strings are kept together (quotes stripped), but unquoted spaces
+/// are consumed. this matches groff's actual rendering of these macros.
+pub fn strip_inline_macro_args(text: &str) -> String {
+    let bytes = text.as_bytes();
+    let len = bytes.len();
+    let mut buffer = String::with_capacity(len);
+    let mut pos = 0;
+    while pos < len {
+        if bytes[pos] == b'"' {
+            // quoted argument — copy characters up to the closing quote
+            pos += 1;
+            while pos < len && bytes[pos] != b'"' {
+                let c = text[pos..].chars().next().unwrap();
+                buffer.push(c);
+                pos += c.len_utf8();
+            }
+            if pos < len {
+                pos += 1;
+            }
+        } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
+            // unquoted whitespace — skip (arguments are concatenated)
+            pos += 1;
+        } else {
+            let c = text[pos..].chars().next().unwrap();
+            buffer.push(c);
+            pos += c.len_utf8();
+        }
+    }
+    buffer
+}
+
+/// render same-font macro arguments (.B/.I) where arguments are separated
+/// by spaces. quote delimiters group arguments in roff source but should
+/// not become part of the visible text.
+pub fn strip_space_macro_args(text: &str) -> String {
+    strip_groff_escapes(&text.replace('"', ""))
+        .trim()
+        .to_string()
+}
+
+/// strip escapes and trim whitespace.
+pub fn strip_groff(line: &str) -> String {
+    strip_groff_escapes(line).trim().to_string()
+}
+
+/// refined comment detection — the base classify_line may miss some comment
+/// forms, so this wrapper checks more carefully before falling through.
+fn is_comment_line(line: &str) -> bool {
+    let bytes = line.as_bytes();
+    let len = bytes.len();
+    (len >= 3 && bytes[0] == b'.' && bytes[1] == b'\\' && bytes[2] == b'"')
+        || (len >= 2 && bytes[0] == b'\\' && bytes[1] == b'"')
+}
+
+/// classify a single line of manpage source.
+/// macro lines start with '.' or '\'' (groff alternate control char).
+/// the macro name is split from its arguments at the first space/tab.
+/// arguments wrapped in double quotes are unquoted.
+pub fn classify_line(line: &str) -> GroffLine {
+    if is_comment_line(line) {
+        return GroffLine::Comment;
+    }
+    let len = line.len();
+    if len == 0 {
+        return GroffLine::Blank;
+    }
+    let bytes = line.as_bytes();
+    // base classify also flags dot-backslash forms as comments
+    if len >= 2 && bytes[0] == b'.' && bytes[1] == b'\\' && (len < 3 || bytes[2] == b'"') {
+        return GroffLine::Comment;
+    }
+    if len >= 3 && bytes[0] == b'\\' && bytes[1] == b'"' {
+        return GroffLine::Comment;
+    }
+    if bytes[0] == b'.' || bytes[0] == b'\'' {
+        // macro line — extract macro name and arguments
+        let rest = line[1..].trim();
+        let split_at = rest.find([' ', '\t']);
+        match split_at {
+            Some(idx) => {
+                let name = rest[..idx].to_string();
+                let args = rest[idx + 1..].trim();
+                // strip surrounding quotes from arguments
+                let args = if args.len() >= 2
+                    && args.starts_with('"')
+                    && args.ends_with('"')
+                    && !args[1..args.len() - 1].contains('"')
+                {
+                    args[1..args.len() - 1].to_string()
+                } else {
+                    args.to_string()
+                };
+                GroffLine::Macro { name, args }
+            }
+            None => GroffLine::Macro {
+                name: rest.to_string(),
+                args: String::new(),
+            },
+        }
+    } else {
+        let stripped = strip_groff(line);
+        if stripped.is_empty() {
+            GroffLine::Blank
+        } else {
+            GroffLine::Text(stripped)
+        }
+    }
+}
diff --git a/src/parsers/manpage/mdoc.rs b/src/parsers/manpage/mdoc.rs
new file mode 100644
index 0000000..a4af154
--- /dev/null
+++ b/src/parsers/manpage/mdoc.rs
@@ -0,0 +1,237 @@
+//! BSD mdoc format support.
+//!
+//! mdoc is the bsd manpage macro package. it uses semantic macros rather than
+//! presentation macros:
+//!   .Fl v    -> flag: -v
+//!   .Ar file -> argument: file
+//!   .Op ...  -> optional: [...]
+//!   .Bl/.It/.El -> list begin/item/end
+//!   .Sh      -> section header (note lowercase 'h', vs groff's .SH)
+
+use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes};
+use crate::parsers::manpage::{ManpageEntry, ManpageResult, OwnedParam, OwnedSwitch};
+use crate::types::Positional;
+
+/// detect mdoc format by looking for any .Sh macro.
+pub fn is_mdoc(lines: &[GroffLine]) -> bool {
+    lines
+        .iter()
+        .any(|l| matches!(l, GroffLine::Macro { name, .. } if name == "Sh"))
+}
+
+/// extract renderable text from an mdoc line, skipping structural macros.
+fn mdoc_text_of(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(t) => Some(strip_groff_escapes(t)),
+        GroffLine::Macro { name, args } => match name.as_str() {
+            "Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt" | "Oo" | "Oc" | "Op" => None,
+            _ => {
+                let text = strip_groff_escapes(args);
+                let text = text.trim();
+                if text.is_empty() {
+                    None
+                } else {
+                    Some(text.to_string())
+                }
+            }
+        },
+        _ => None,
+    }
+}
+
+/// parse an mdoc .It (list item) line that contains flag definitions.
+/// mdoc .It lines look like: ".It Fl v Ar file"
+/// where Fl = flag, Ar = argument.
+fn parse_mdoc_it(args: &str) -> Option<ManpageEntry> {
+    let words: Vec<&str> = args
+        .split(' ')
+        .filter(|w| !w.is_empty() && *w != "Ns")
+        .collect();
+    let param = match words.as_slice() {
+        [_, _, "Ar", name, ..] => Some(OwnedParam::Mandatory(name.to_string())),
+        _ => None,
+    };
+    match words.as_slice() {
+        ["Fl", ch, ..] if ch.len() == 1 && ch.chars().next().unwrap().is_ascii_alphanumeric() => {
+            Some(ManpageEntry {
+                switch: OwnedSwitch::Short(ch.chars().next().unwrap()),
+                param,
+                desc: String::new(),
+            })
+        }
+        ["Fl", name, ..] if name.len() > 1 && name.starts_with('-') => Some(ManpageEntry {
+            switch: OwnedSwitch::Long(name[1..].to_string()),
+            param,
+            desc: String::new(),
+        }),
+        _ => None,
+    }
+}
+
+/// extract a positional argument from an mdoc line (.Ar or .Op Ar).
+fn positional_of_mdoc_line(args: &str) -> Option<(String, bool)> {
+    let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
+    let variadic = words.contains(&"...");
+    match words.first() {
+        Some(name) if name.len() >= 2 => Some((name.to_ascii_lowercase(), variadic)),
+        _ => None,
+    }
+}
+
+/// parse an entire mdoc-format manpage.
+/// walks through all classified lines looking for:
+///   1. .Bl/.It/.El list blocks containing flag definitions
+///   2. .Sh SYNOPSIS sections containing positional arguments (.Ar, .Op Ar)
+pub fn parse_mdoc_lines(lines: &[GroffLine]) -> ManpageResult {
+    // collect description for an entry — until next structural macro
+    fn desc_of(lines: &[GroffLine], start: usize) -> (String, usize) {
+        let mut acc: Vec<String> = Vec::new();
+        let mut i = start;
+        while i < lines.len() {
+            if let GroffLine::Macro { name, .. } = &lines[i]
+                && matches!(name.as_str(), "It" | "El" | "Sh" | "Ss")
+            {
+                break;
+            }
+            if let Some(t) = mdoc_text_of(&lines[i]) {
+                acc.push(t);
+            }
+            i += 1;
+        }
+        (acc.join(" ").trim().to_string(), i)
+    }
+
+    fn skip_to_el(lines: &[GroffLine], start: usize) -> usize {
+        let mut i = start;
+        while i < lines.len() {
+            if let GroffLine::Macro { name, .. } = &lines[i]
+                && name == "El"
+            {
+                return i + 1;
+            }
+            i += 1;
+        }
+        i
+    }
+
+    /// parse a single .It entry: extract flag, collect description.
+    fn parse_it(
+        args: &str,
+        lines: &[GroffLine],
+        start: usize,
+        entries: &mut Vec<ManpageEntry>,
+    ) -> usize {
+        let (desc, new_start) = desc_of(lines, start);
+        if let Some(mut entry) = parse_mdoc_it(args) {
+            entry.desc = desc;
+            entries.push(entry);
+        }
+        new_start
+    }
+
+    /// parse all .It entries within a .Bl/.El option list.
+    fn parse_option_list(
+        entries: &mut Vec<ManpageEntry>,
+        lines: &[GroffLine],
+        start: usize,
+    ) -> usize {
+        let mut i = start;
+        while i < lines.len() {
+            match &lines[i] {
+                GroffLine::Macro { name, .. } if name == "El" => return i + 1,
+                GroffLine::Macro { name, args } if name == "It" => {
+                    i = parse_it(args, lines, i + 1, entries);
+                }
+                _ => i += 1,
+            }
+        }
+        i
+    }
+
+    fn parse_synopsis(
+        positionals: &mut Vec<(String, bool, bool)>,
+        lines: &[GroffLine],
+        start: usize,
+    ) -> usize {
+        let mut i = start;
+        while i < lines.len() {
+            match &lines[i] {
+                GroffLine::Macro { name, .. } if name == "Sh" => return i,
+                GroffLine::Macro { name, args } if name == "Ar" => {
+                    if let Some((n, v)) = positional_of_mdoc_line(args) {
+                        positionals.push((n, false, v));
+                    }
+                    i += 1;
+                }
+                GroffLine::Macro { name, args } if name == "Op" => {
+                    let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
+                    if matches!(words.first(), Some(&"Ar")) {
+                        let rest = if args.len() > 3 { &args[3..] } else { "" };
+                        if let Some((n, v)) = positional_of_mdoc_line(rest) {
+                            positionals.push((n, true, v));
+                        }
+                    }
+                    i += 1;
+                }
+                _ => i += 1,
+            }
+        }
+        i
+    }
+
+    let mut entries: Vec<ManpageEntry> = Vec::new();
+    let mut positionals: Vec<(String, bool, bool)> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        // .Bl + .It header sequence — peek at first .It to decide if this is a flag list
+        if let GroffLine::Macro { name: n1, .. } = &lines[i]
+            && n1 == "Bl"
+        {
+            let j = i + 1;
+            if j < lines.len()
+                && let GroffLine::Macro {
+                    name: n2,
+                    args: it_args,
+                } = &lines[j]
+                && n2 == "It"
+            {
+                let words: Vec<&str> = it_args.split(' ').filter(|w| !w.is_empty()).collect();
+                if matches!(words.first(), Some(&"Fl")) {
+                    let k = parse_it(it_args, lines, j + 1, &mut entries);
+                    i = parse_option_list(&mut entries, lines, k);
+                    continue;
+                } else {
+                    i = skip_to_el(lines, j + 1);
+                    continue;
+                }
+            }
+            i = skip_to_el(lines, j);
+            continue;
+        }
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "Sh"
+            && args.trim().eq_ignore_ascii_case("SYNOPSIS")
+        {
+            i = parse_synopsis(&mut positionals, lines, i + 1);
+            continue;
+        }
+        i += 1;
+    }
+
+    // deduplicate positionals by name, preserving first-seen order
+    let mut seen: Vec<String> = Vec::new();
+    let mut deduped: Vec<(String, Positional)> = Vec::new();
+    for (name, optional, variadic) in positionals {
+        if !seen.contains(&name) {
+            seen.push(name.clone());
+            deduped.push((name, Positional { optional, variadic }));
+        }
+    }
+
+    ManpageResult {
+        entries,
+        subcommands: Vec::new(),
+        positionals: deduped,
+        description: String::new(),
+    }
+}
diff --git a/src/parsers/manpage/sections.rs b/src/parsers/manpage/sections.rs
new file mode 100644
index 0000000..423fa81
--- /dev/null
+++ b/src/parsers/manpage/sections.rs
@@ -0,0 +1,851 @@
+//! section extraction from manpages.
+//!
+//! manpages are divided into sections by .SH macros. we extract OPTIONS,
+//! NAME, SYNOPSIS, and COMMANDS sections for their specific content.
+
+use nom::{Parser, sequence::preceded};
+
+use crate::parsers::help::{parse_usage_args, parse_usage_flags, skip_command_name};
+use crate::parsers::manpage::groff::{
+    GroffLine, strip_groff_escapes, strip_inline_macro_args, strip_space_macro_args,
+};
+use crate::parsers::manpage::{ManpageEntry, ManpageSubcommand, OwnedParam, OwnedSwitch};
+use crate::types::{Param, Positional, Switch};
+
+fn is_options_section(name: &str) -> bool {
+    let upper = name.trim().to_ascii_uppercase();
+    upper == "OPTIONS" || upper.contains("OPTION")
+}
+
+/// extract the lines from the OPTIONS section(s). collects from all
+/// option-like .SH sections and concatenates them (handles the nix pattern
+/// of "Options" and "Common Options" being separate sections).
+/// falls back to DESCRIPTION if no OPTIONS section exists.
+pub fn extract_options_section(lines: &[GroffLine]) -> Vec<GroffLine> {
+    let mut acc: Vec<GroffLine> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && is_options_section(args)
+        {
+            i += 1;
+            // synthetic separator between concatenated sections so that
+            // collect_desc_text (which stops on SH/SS) does not let descriptions
+            // bleed between sections.
+            if !acc.is_empty() {
+                acc.push(GroffLine::Macro {
+                    name: "SH".to_string(),
+                    args: String::new(),
+                });
+            }
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                acc.push(lines[i].clone());
+                i += 1;
+            }
+        } else {
+            i += 1;
+        }
+    }
+    if !acc.is_empty() {
+        return acc;
+    }
+    // fallback: DESCRIPTION section
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && args.trim().eq_ignore_ascii_case("DESCRIPTION")
+        {
+            i += 1;
+            let mut desc_acc: Vec<GroffLine> = Vec::new();
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                desc_acc.push(lines[i].clone());
+                i += 1;
+            }
+            return desc_acc;
+        }
+        i += 1;
+    }
+    Vec::new()
+}
+
+fn extract_named_section(lines: &[GroffLine], section_name: &str) -> Vec<GroffLine> {
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && args.trim().eq_ignore_ascii_case(section_name)
+        {
+            i += 1;
+            let mut acc: Vec<GroffLine> = Vec::new();
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                acc.push(lines[i].clone());
+                i += 1;
+            }
+            return acc;
+        }
+        i += 1;
+    }
+    Vec::new()
+}
+
+/// the NAME section follows the convention "command \- short description".
+/// extract the part after "\-" as the command's description.
+/// handles both "\-" (groff) and " - " (plain text) separators.
+pub fn extract_name_description(lines: &[GroffLine]) -> Option<String> {
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && args.trim().eq_ignore_ascii_case("NAME")
+        {
+            i += 1;
+            let mut acc: Vec<String> = Vec::new();
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                match &lines[i] {
+                    GroffLine::Text(t) => acc.push(t.clone()),
+                    GroffLine::Macro { name, args }
+                        if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR") =>
+                    {
+                        let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                        let text = text.trim();
+                        if !text.is_empty() {
+                            acc.push(text.to_string());
+                        }
+                    }
+                    GroffLine::Macro { name, args } if name == "Nm" => {
+                        let text = strip_groff_escapes(args);
+                        let text = text.trim();
+                        if !text.is_empty() {
+                            acc.push(text.to_string());
+                        }
+                    }
+                    GroffLine::Macro { name, args } if name == "Nd" => {
+                        let text = strip_groff_escapes(args);
+                        let text = text.trim();
+                        if !text.is_empty() {
+                            acc.push(format!("\\- {text}"));
+                        }
+                    }
+                    _ => (),
+                }
+                i += 1;
+            }
+            let full = acc.join(" ").trim().to_string();
+            return split_name_separator(&full);
+        }
+        i += 1;
+    }
+    None
+}
+
+/// split a NAME line on either "\-" (groff) or " - " (plain).
+/// returns the part after the separator, trimmed.
+fn split_name_separator(full: &str) -> Option<String> {
+    // search for either marker
+    let groff_idx = find_padded(full, "\\-");
+    let dash_idx = find_padded(full, " - ");
+    let idx = match (groff_idx, dash_idx) {
+        (Some(a), Some(b)) => Some(a.min(b)),
+        (Some(a), None) => Some(a),
+        (None, Some(b)) => Some(b),
+        (None, None) => None,
+    }?;
+    // skip past the matched separator
+    let after = if full[idx..].starts_with("\\-") {
+        &full[idx + 2..]
+    } else {
+        &full[idx + 3..]
+    };
+    let desc = after.trim().to_string();
+    if desc.is_empty() { None } else { Some(desc) }
+}
+
+/// find a marker preceded and followed by optional surrounding space.
+/// approximated by a simple substring search — accepts spaces on either
+/// side without enforcing how many.
+fn find_padded(s: &str, needle: &str) -> Option<usize> {
+    s.find(needle)
+}
+
+/// extract the command name from the SYNOPSIS section.
+///
+/// the SYNOPSIS section shows how to invoke the command:
+///   .SH SYNOPSIS
+///   .B git add
+///   [\fIOPTIONS\fR] [\fB\-\-\fR] [\fI<pathspec>\fR...]
+///
+/// we extract the command name by taking consecutive "word" tokens until
+/// we hit something that looks like an argument (starts with [, <, -, etc.).
+pub fn extract_synopsis_command(contents: &str) -> Option<String> {
+    // pre-replace italic text (\fI...\fR) with angle-bracketed placeholders
+    // before classification strips the font info. italic in groff indicates
+    // a parameter/placeholder (e.g. \fIoperation\fR), not a command word.
+    // the angle brackets cause extract_cmd to stop at these tokens since
+    // '<' is in its stop set.
+    let preprocessed: Vec<String> = contents
+        .split('\n')
+        .map(replace_italic_with_angles)
+        .collect();
+    let classified: Vec<GroffLine> = preprocessed
+        .iter()
+        .map(|line| crate::parsers::manpage::groff::classify_line(line))
+        .collect();
+    let mut i = 0;
+    while i < classified.len() {
+        if let Some((stop_on_ss, content_start)) = synopsis_heading_at(&classified, i) {
+            i = content_start;
+            while i < classified.len() {
+                match &classified[i] {
+                    GroffLine::Macro { name, .. }
+                        if name == "SH" || (stop_on_ss && name == "SS") =>
+                    {
+                        return None;
+                    }
+                    GroffLine::Text(text) => {
+                        let trimmed = text.trim();
+                        if let Some(cmd) = synopsis_command_candidate(trimmed, true) {
+                            return Some(cmd);
+                        }
+                        i += 1;
+                    }
+                    GroffLine::Macro { name, args } if name == "SY" => {
+                        let text = strip_groff_escapes(args);
+                        if let Some(cmd) = synopsis_command_candidate(text.trim(), false) {
+                            return Some(cmd);
+                        }
+                        i += 1;
+                    }
+                    GroffLine::Macro { name, args }
+                        if matches!(name.as_str(), "B" | "BI" | "BR") =>
+                    {
+                        let text = render_synopsis_command_macro(name, args);
+                        if let Some(cmd) = synopsis_command_candidate(text.trim(), false) {
+                            return Some(cmd);
+                        }
+                        i += 1;
+                    }
+                    _ => i += 1,
+                }
+            }
+            return None;
+        }
+        i += 1;
+    }
+    None
+}
+
+fn synopsis_heading_at(lines: &[GroffLine], i: usize) -> Option<(bool, usize)> {
+    let GroffLine::Macro { name, args } = &lines[i] else {
+        return None;
+    };
+    if !matches!(name.as_str(), "SH" | "SS") {
+        return None;
+    }
+    if args.trim().eq_ignore_ascii_case("SYNOPSIS") {
+        return Some((name == "SS", i + 1));
+    }
+    if !args.trim().is_empty() {
+        return None;
+    }
+    let mut j = i + 1;
+    while j < lines.len() {
+        match &lines[j] {
+            GroffLine::Text(text) if text.trim().eq_ignore_ascii_case("SYNOPSIS") => {
+                return Some((name == "SS", j + 1));
+            }
+            GroffLine::Blank | GroffLine::Comment => j += 1,
+            _ => return None,
+        }
+    }
+    None
+}
+
+fn render_synopsis_command_macro(name: &str, args: &str) -> String {
+    match name {
+        "B" | "I" => strip_space_macro_args(args),
+        _ => strip_groff_escapes(&strip_inline_macro_args(args))
+            .trim()
+            .to_string(),
+    }
+}
+
+fn synopsis_command_candidate(line: &str, reject_long_unmarked: bool) -> Option<String> {
+    let trimmed = line.trim();
+    if trimmed.is_empty() || trimmed.ends_with(':') {
+        return None;
+    }
+    let cmd = extract_cmd(trimmed)?;
+    if cmd.starts_with('.') {
+        return None;
+    }
+    if looks_like_synopsis_prose(trimmed, &cmd, reject_long_unmarked) {
+        None
+    } else {
+        Some(cmd)
+    }
+}
+
+fn looks_like_synopsis_prose(line: &str, cmd: &str, reject_long_unmarked: bool) -> bool {
+    let Some(first) = cmd.split_whitespace().next() else {
+        return true;
+    };
+    if matches!(
+        first.to_ascii_lowercase().as_str(),
+        "a" | "an" | "and" | "or" | "the" | "this" | "these"
+    ) {
+        return true;
+    }
+
+    let line_has_invocation_marker = line.split_whitespace().any(|word| {
+        word.starts_with('[')
+            || word.starts_with('<')
+            || word.starts_with('-')
+            || word.starts_with('{')
+    }) || line.contains('|');
+    if line.ends_with('.') && !line_has_invocation_marker {
+        return true;
+    }
+    if reject_long_unmarked && cmd.split_whitespace().count() > 3 && !line_has_invocation_marker {
+        return true;
+    }
+    let looks_like_sentence_starter = first.chars().next().is_some_and(|c| c.is_ascii_uppercase())
+        && first.chars().skip(1).all(|c| c.is_ascii_lowercase());
+    looks_like_sentence_starter
+        && line.split_whitespace().count() > 1
+        && !line_has_invocation_marker
+}
+
+/// replace \fI...\f[RP] sequences with <...> so italic params are seen as
+/// non-word tokens by extract_cmd.
+///
+/// exception: some manpages put the command name itself in italics (e.g.
+/// git-am.1's synopsis reads `\fIgit am\fR ...`). when the first italic
+/// block on the line appears at the very start (preceded only by
+/// whitespace) and its content looks like a command word, we strip the
+/// font markers but leave the content bare so extract_cmd treats it as
+/// the command name rather than a placeholder.
+fn replace_italic_with_angles(line: &str) -> String {
+    let bytes = line.as_bytes();
+    let len = bytes.len();
+    let mut out = String::with_capacity(len);
+    let mut i = 0;
+    let mut command_consumed = false;
+    while i < len {
+        // byte-compare to avoid panicking on non-ASCII char boundaries
+        if i + 3 <= len && &bytes[i..i + 3] == b"\\fI" {
+            // find closing \fR or \fP — scan to next '\\'
+            let inner_start = i + 3;
+            let mut j = inner_start;
+            while j < len && bytes[j] != b'\\' {
+                j += 1;
+            }
+            if j + 3 <= len
+                && bytes[j] == b'\\'
+                && bytes[j + 1] == b'f'
+                && (bytes[j + 2] == b'R' || bytes[j + 2] == b'P')
+            {
+                let inner = &line[inner_start..j];
+                let at_line_start = !command_consumed && line[..i].chars().all(char::is_whitespace);
+                if at_line_start && italic_looks_like_command(inner) {
+                    out.push_str(inner);
+                    command_consumed = true;
+                } else {
+                    out.push('<');
+                    out.push_str(inner);
+                    out.push('>');
+                }
+                i = j + 3;
+                continue;
+            }
+        }
+        let c = line[i..].chars().next().unwrap();
+        out.push(c);
+        i += c.len_utf8();
+    }
+    out
+}
+
+/// is the italic content something that looks like a command name (rather
+/// than a placeholder)? lowercase letters, digits, hyphens, underscores,
+/// dots, and spaces only, after groff escapes (like `\-`) are resolved.
+fn italic_looks_like_command(inner: &str) -> bool {
+    let stripped = strip_groff_escapes(inner);
+    let trimmed = stripped.trim();
+    !trimmed.is_empty()
+        && trimmed.chars().all(|c| {
+            c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '-' | '_' | '.' | ' ')
+        })
+}
+
+/// extract the command name from a synopsis line by taking leading word tokens.
+fn extract_cmd(line: &str) -> Option<String> {
+    let words: Vec<&str> = line.split(' ').filter(|w| !w.is_empty()).collect();
+    let is_cmd_char = |c: char| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.');
+    let mut taken: Vec<&str> = Vec::new();
+    for word in words {
+        let first = word.chars().next().unwrap();
+        if matches!(first, '[' | '-' | '<' | '(' | '{') {
+            break;
+        }
+        if word.chars().all(is_cmd_char) {
+            taken.push(word);
+        } else {
+            break;
+        }
+    }
+    if taken.is_empty() {
+        None
+    } else {
+        Some(taken.join(" "))
+    }
+}
+
+/// extract the lines that form the SYNOPSIS section.
+fn extract_synopsis_section(lines: &[GroffLine]) -> Vec<GroffLine> {
+    extract_named_section(lines, "SYNOPSIS")
+}
+
+/// extract positional arguments from the SYNOPSIS section.
+/// joins all text/formatting macro lines via `join_synopsis_text`, then
+/// skips the command name prefix and runs `parse_usage_args` on the rest.
+pub fn extract_synopsis_positionals(lines: &[GroffLine]) -> Vec<(String, Positional)> {
+    let full = join_synopsis_text(lines);
+    if full.is_empty() {
+        return Vec::new();
+    }
+    let result: nom::IResult<&str, Vec<(&str, Positional)>> =
+        preceded(skip_command_name, parse_usage_args).parse(&full);
+    match result {
+        Ok((_, map)) => map
+            .into_iter()
+            .map(|(k, v)| (k.to_ascii_lowercase(), v))
+            .collect(),
+        Err(_) => Vec::new(),
+    }
+}
+
+/// join the SYNOPSIS section into a single line of plain text, stripping
+/// groff escapes and inline font macros. shared by both the positional
+/// and flag extractors so they see identical input.
+fn join_synopsis_text(lines: &[GroffLine]) -> String {
+    let section = extract_synopsis_section(lines);
+    let mut acc: Vec<String> = Vec::new();
+    for line in section {
+        match line {
+            GroffLine::Macro { name, .. } if name == "SS" || name == "br" => break,
+            GroffLine::Macro { name, args } if name == "SY" => {
+                let text = strip_groff_escapes(&args).trim().to_string();
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Macro { name, args } if name == "I" => {
+                let text = strip_groff_escapes(&args).trim().to_string();
+                if !text.is_empty() {
+                    acc.push(format!("<{text}>"));
+                }
+            }
+            GroffLine::Macro { name, args } if name == "IR" => {
+                let text = render_leading_italic_arg(&args);
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Text(t) => {
+                let text = strip_groff_escapes(&t).trim().to_string();
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Macro { name, args } if name == "B" => {
+                let text = strip_space_macro_args(&args);
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+            }
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "B" | "BI" | "BR" | "IB" | "RB" | "RI") =>
+            {
+                let text = strip_groff_escapes(&strip_inline_macro_args(&args));
+                let text = text.trim();
+                if !text.is_empty() {
+                    acc.push(text.to_string());
+                }
+            }
+            _ => (),
+        }
+    }
+    acc.join(" ").trim().to_string()
+}
+
+fn render_leading_italic_arg(args: &str) -> String {
+    let trimmed = args.trim();
+    if trimmed.is_empty() {
+        return String::new();
+    }
+    let (first, rest) = match trimmed.find(char::is_whitespace) {
+        Some(idx) => (&trimmed[..idx], trimmed[idx..].trim()),
+        None => (trimmed, ""),
+    };
+    let first = strip_groff_escapes(first).trim().to_string();
+    if first.is_empty() {
+        return String::new();
+    }
+    let rest = strip_groff_escapes(&strip_inline_macro_args(rest));
+    let rest = rest.trim();
+    if rest.is_empty() {
+        format!("<{first}>")
+    } else {
+        format!("<{first}> {rest}")
+    }
+}
+
+fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
+    match s {
+        Switch::Short(c) => OwnedSwitch::Short(c),
+        Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
+        Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
+    }
+}
+
+fn to_owned_param(p: Param<'_>) -> OwnedParam {
+    match p {
+        Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
+        Param::Optional(s) => OwnedParam::Optional(s.to_string()),
+    }
+}
+
+/// extract flag-tagged entries from the SYNOPSIS line. some manpages
+/// (notably nix-env, sed) declare flags only in the synopsis and never
+/// repeat them as entries in the OPTIONS body, so the body-only pass
+/// misses them. we join the synopsis text the same way the positional
+/// extractor does, then run `parse_usage_flags` over every bracketed
+/// switch+param. callers merge with body entries; body wins on duplicate
+/// flag names since body descriptions are richer.
+pub fn extract_synopsis_flags(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let full = join_synopsis_text(lines);
+    if full.is_empty() {
+        return Vec::new();
+    }
+    let result: nom::IResult<&str, Vec<(Switch<'_>, Option<Param<'_>>)>> =
+        preceded(skip_command_name, parse_usage_flags).parse(&full);
+    match result {
+        Ok((_, pairs)) => pairs
+            .into_iter()
+            .map(|(switch, param)| ManpageEntry {
+                switch: to_owned_switch(switch),
+                param: param.map(to_owned_param),
+                desc: String::new(),
+            })
+            .collect(),
+        Err(_) => Vec::new(),
+    }
+}
+
+/// extract first-positional choices from prose lists in DESCRIPTION.
+///
+/// getent(1) is the motivating shape: the synopsis has a `database`
+/// positional, while the actual database names are documented as a tagged
+/// list under DESCRIPTION rather than as subcommands or options. The
+/// completion model currently has no separate "positional choices" channel,
+/// so these are represented as subcommand-like candidates for completion.
+pub fn extract_description_positionals(lines: &[GroffLine]) -> Vec<ManpageSubcommand> {
+    let description = extract_named_section(lines, "DESCRIPTION");
+    if description.is_empty() || !description_mentions_listed_database(&description) {
+        return Vec::new();
+    }
+
+    let mut out = Vec::new();
+    let mut seen = std::collections::HashSet::new();
+    let mut i = 0;
+    let mut in_database_list = false;
+    while i < description.len() {
+        match &description[i] {
+            GroffLine::Text(text)
+                if text.to_ascii_lowercase().contains("listed below")
+                    || text.to_ascii_lowercase().contains("may be any of") =>
+            {
+                in_database_list = true;
+                i += 1;
+            }
+            GroffLine::Macro { name, .. } if name == "TP" && in_database_list => {
+                if i + 1 >= description.len() {
+                    break;
+                }
+                let Some(name) = description_tag_name(&description[i + 1]) else {
+                    i += 1;
+                    continue;
+                };
+                if !is_description_choice_name(&name) {
+                    i += 1;
+                    continue;
+                }
+                let (desc, new_i) = collect_description_choice_desc(&description, i + 2);
+                if seen.insert(name.clone()) {
+                    out.push(ManpageSubcommand { name, desc });
+                }
+                i = new_i;
+            }
+            _ => {
+                i += 1;
+            }
+        }
+    }
+    out
+}
+
+fn description_mentions_listed_database(lines: &[GroffLine]) -> bool {
+    let mut saw_database = false;
+    let mut saw_list = false;
+    for line in lines {
+        let text = match line {
+            GroffLine::Text(text) => text.clone(),
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR" | "RI") =>
+            {
+                strip_groff_escapes(&strip_inline_macro_args(args))
+            }
+            _ => String::new(),
+        };
+        let lower = text.to_ascii_lowercase();
+        saw_database |= lower.contains("database");
+        saw_list |= lower.contains("listed below") || lower.contains("may be any of");
+    }
+    saw_database && saw_list
+}
+
+fn description_tag_name(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(text) => Some(text.trim().to_string()),
+        GroffLine::Macro { name, args }
+            if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR") =>
+        {
+            Some(
+                strip_groff_escapes(&strip_inline_macro_args(args))
+                    .trim()
+                    .to_string(),
+            )
+        }
+        _ => None,
+    }
+}
+
+fn is_description_choice_name(name: &str) -> bool {
+    !name.is_empty()
+        && name.len() <= 32
+        && !name.starts_with('-')
+        && name
+            .chars()
+            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
+}
+
+fn collect_description_choice_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut parts = Vec::new();
+    let mut i = start;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. } if matches!(name.as_str(), "TP" | "SH" | "SS") => {
+                break;
+            }
+            GroffLine::Text(text) => {
+                parts.push(text.clone());
+                i += 1;
+            }
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR" | "RI") =>
+            {
+                let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                let text = text.trim();
+                if !text.is_empty() {
+                    parts.push(text.to_string());
+                }
+                i += 1;
+            }
+            GroffLine::Blank | GroffLine::Comment => {
+                i += 1;
+            }
+            GroffLine::Macro { .. } => {
+                i += 1;
+            }
+        }
+    }
+    (first_sentence(&parts.join(" ")), i)
+}
+
+fn first_sentence(text: &str) -> String {
+    let text = text.split_whitespace().collect::<Vec<_>>().join(" ");
+    for marker in [". ", ".) "] {
+        if let Some(idx) = text.find(marker) {
+            return text[..idx + 1].trim().to_string();
+        }
+    }
+    text.trim().to_string()
+}
+
+fn is_commands_section(name: &str) -> bool {
+    let trimmed = name.trim();
+    // strip a trailing parenthetical group so "HIGH-LEVEL COMMANDS (PORCELAIN)"
+    // (which is git.1's pattern) is treated as "HIGH-LEVEL COMMANDS".
+    let core = match (trimmed.rfind('('), trimmed.ends_with(')')) {
+        (Some(open), true) => trimmed[..open].trim(),
+        _ => trimmed,
+    };
+    let upper = core.to_ascii_uppercase();
+    if upper == "COMMAND" || upper == "COMMANDS" {
+        return true;
+    }
+    // accept headings ending in " COMMANDS" — catches "GIT COMMANDS",
+    // "MAIN COMMANDS", "HIGH-LEVEL COMMANDS", "LOW-LEVEL COMMANDS". the
+    // leading space prevents matches against "COMMAND LINE OPTIONS" etc.
+    upper.ends_with(" COMMANDS")
+}
+
+/// find all COMMANDS/.COMMAND sections and collect their lines.
+pub fn extract_commands_section(lines: &[GroffLine]) -> Vec<GroffLine> {
+    let mut acc: Vec<GroffLine> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, args } = &lines[i]
+            && name == "SH"
+            && is_commands_section(args)
+        {
+            i += 1;
+            while i < lines.len() {
+                if let GroffLine::Macro { name, .. } = &lines[i]
+                    && name == "SH"
+                {
+                    break;
+                }
+                acc.push(lines[i].clone());
+                i += 1;
+            }
+        } else {
+            i += 1;
+        }
+    }
+    acc
+}
+
+/// extract SUBCOMMAND-style sections (clap-generated manpages put each
+/// subcommand under its own .SH SUBCOMMAND header with a Usage: line).
+/// returns triples of (name, description, lines) so the caller can re-parse
+/// each section as its own help_result.
+pub fn extract_subcommand_sections(lines: &[GroffLine]) -> Vec<(String, String, Vec<GroffLine>)> {
+    // split into sections at .SH boundaries, keeping only SUBCOMMAND(S) ones
+    let mut sections: Vec<Vec<GroffLine>> = Vec::new();
+    let mut current_name: Option<String> = None;
+    let mut current: Vec<GroffLine> = Vec::new();
+    for line in lines {
+        if let GroffLine::Macro { name, args } = line
+            && name == "SH"
+        {
+            if current_name.is_some() {
+                sections.push(std::mem::take(&mut current));
+            }
+            let n = args.trim().to_ascii_uppercase();
+            if n == "SUBCOMMAND" || n == "SUBCOMMANDS" {
+                current_name = Some(n);
+            } else {
+                current_name = None;
+            }
+            continue;
+        }
+        if current_name.is_some() {
+            current.push(line.clone());
+        }
+    }
+    if current_name.is_some() {
+        sections.push(current);
+    }
+
+    let mut out = Vec::new();
+    for section in sections {
+        // scan section lines for the Usage: line to get the subcommand name
+        let mut subcmd_name: Option<String> = None;
+        let mut desc_lines: Vec<String> = Vec::new();
+        for line in &section {
+            if subcmd_name.is_some() {
+                break;
+            }
+            match line {
+                GroffLine::Text(t) => match find_usage_name(t) {
+                    Some(name) => subcmd_name = Some(name),
+                    None => desc_lines.push(t.clone()),
+                },
+                GroffLine::Macro { name, args }
+                    if matches!(name.as_str(), "TP" | "B" | "BI" | "BR") =>
+                {
+                    let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                    let text = text.trim();
+                    subcmd_name = find_usage_name(text);
+                }
+                _ => (),
+            }
+        }
+        if let Some(name) = subcmd_name {
+            let desc_raw = desc_lines.join(" ");
+            let desc = strip_groff_escapes(&desc_raw).trim().to_string();
+            let desc = strip_backtick_words(&desc);
+            out.push((name, desc, section));
+        }
+    }
+    out
+}
+
+/// look for "Usage: NAME" and return NAME if found.
+/// NAME contains alphanumeric, underscore, or dash.
+fn find_usage_name(text: &str) -> Option<String> {
+    const MARKER: &str = "Usage: ";
+    let idx = text.find(MARKER)?;
+    let after = &text[idx + MARKER.len()..];
+    let end = after
+        .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
+        .unwrap_or(after.len());
+    if end == 0 {
+        None
+    } else {
+        Some(after[..end].to_string())
+    }
+}
+
+/// strip backtick-quoted words: `word` -> word.
+fn strip_backtick_words(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut i = 0;
+    let bytes = s.as_bytes();
+    while i < bytes.len() {
+        if bytes[i] == b'`'
+            && let Some(end) = s[i + 1..].find('`')
+        {
+            out.push_str(&s[i + 1..i + 1 + end]);
+            i += end + 2;
+            continue;
+        }
+        let c = s[i..].chars().next().unwrap();
+        out.push(c);
+        i += c.len_utf8();
+    }
+    out
+}
diff --git a/src/parsers/manpage/strategies.rs b/src/parsers/manpage/strategies.rs
new file mode 100644
index 0000000..855c468
--- /dev/null
+++ b/src/parsers/manpage/strategies.rs
@@ -0,0 +1,456 @@
+//! strategy-based entry extraction.
+//!
+//! rather than a single monolithic parser, we use multiple "strategies" that
+//! each target a specific groff formatting pattern. this is necessary because
+//! manpage authors use very different macro combinations for the same purpose.
+
+use nom::{Parser, combinator::opt};
+
+use crate::make_macro_walker;
+use crate::parsers::help::{help_parser, param_parser, switch_parser};
+use crate::parsers::manpage::groff::{
+    GroffLine, strip_groff_escapes, strip_inline_macro_args, strip_space_macro_args,
+};
+use crate::parsers::manpage::{ManpageEntry, OwnedParam, OwnedSwitch};
+use crate::types::{Param, Switch};
+
+/// collect consecutive text lines, joining them with spaces.
+/// returns (collected, remaining).
+fn collect_text_lines(lines: &[GroffLine]) -> (String, &[GroffLine]) {
+    let mut acc: Vec<&str> = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Text(t) => acc.push(t),
+            _ => break,
+        }
+        i += 1;
+    }
+    (acc.join(" "), &lines[i..])
+}
+
+fn collect_description_lines(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut acc: Vec<String> = Vec::new();
+    let mut i = start;
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. }
+                if matches!(name.as_str(), "TP" | "TQ" | "IP" | "PP" | "SH" | "SS") =>
+            {
+                break;
+            }
+            GroffLine::Text(t) => {
+                acc.push(t.clone());
+                i += 1;
+            }
+            GroffLine::Macro { name, args }
+                if matches!(
+                    name.as_str(),
+                    "B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
+                ) =>
+            {
+                let text = tag_of_macro(name, args);
+                if !text.is_empty() {
+                    acc.push(text);
+                }
+                i += 1;
+            }
+            GroffLine::Blank | GroffLine::Comment => {
+                i += 1;
+            }
+            GroffLine::Macro { .. } => {
+                i += 1;
+            }
+        }
+    }
+    (acc.join(" "), i)
+}
+
+fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
+    match s {
+        Switch::Short(c) => OwnedSwitch::Short(c),
+        Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
+        Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
+    }
+}
+
+fn to_owned_param(p: Param<'_>) -> OwnedParam {
+    match p {
+        Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
+        Param::Optional(s) => OwnedParam::Optional(s.to_string()),
+    }
+}
+
+/// attempt to parse a tag string (e.g. "-v, --verbose FILE") into an entry.
+/// uses the nom switch_parser + param_parser from the help module.
+/// returns None if the tag doesn't look like a flag definition.
+pub fn parse_tag_to_entry(tag: &str, desc: String) -> Option<ManpageEntry> {
+    let tag = strip_groff_escapes(tag);
+    let tag = tag.trim();
+    let result: nom::IResult<&str, (Switch<'_>, Option<Param<'_>>)> =
+        (switch_parser, opt(param_parser)).parse(tag);
+    match result {
+        Ok((_, (switch, param))) => Some(ManpageEntry {
+            switch: to_owned_switch(switch),
+            param: param.map(to_owned_param),
+            desc,
+        }),
+        Err(_) => None,
+    }
+}
+
+/// extract tag text from a macro line.
+/// .B and .I preserve spaces (single argument); .BI, .BR, .IR alternate
+/// fonts and concatenate arguments.
+pub fn tag_of_macro(name: &str, args: &str) -> String {
+    match name {
+        "B" | "I" => strip_space_macro_args(args),
+        _ => strip_groff_escapes(&strip_inline_macro_args(args))
+            .trim()
+            .to_string(),
+    }
+}
+
+// strategy a: .TP style (most common — gnu coreutils, help2man).
+// .TP introduces a tagged paragraph: the next line is the "tag" (flag name)
+// and subsequent text lines are the description. the tag can be plain text
+// or wrapped in a formatting macro (.B, .BI, etc.).
+pub fn strategy_tp(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let mut out = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        let GroffLine::Macro { name, .. } = &lines[i] else {
+            i += 1;
+            continue;
+        };
+        if name != "TP" {
+            i += 1;
+            continue;
+        }
+
+        let (tags, body_start) = collect_tp_tags(lines, i + 1);
+        if tags.is_empty() {
+            i += 1;
+            continue;
+        }
+        let (desc, new_i) = collect_description_lines(lines, body_start);
+        out.extend(entries_from_tag_alternates(&tags, desc));
+        i = new_i;
+    }
+    out
+}
+
+fn collect_tp_tags(lines: &[GroffLine], start: usize) -> (Vec<String>, usize) {
+    let mut tags = Vec::new();
+    let mut i = start;
+    loop {
+        if i >= lines.len() {
+            break;
+        }
+        let Some(tag) = tag_from_line(&lines[i]) else {
+            break;
+        };
+        tags.push(tag);
+        i += 1;
+        if i < lines.len() && matches!(&lines[i], GroffLine::Macro { name, .. } if name == "TQ") {
+            i += 1;
+            continue;
+        }
+        break;
+    }
+    (tags, i)
+}
+
+fn tag_from_line(line: &GroffLine) -> Option<String> {
+    match line {
+        GroffLine::Text(tag) => Some(tag.clone()),
+        GroffLine::Macro { name, args }
+            if matches!(
+                name.as_str(),
+                "B" | "I" | "BI" | "BR" | "IR" | "IB" | "RB" | "RI"
+            ) =>
+        {
+            Some(tag_of_macro(name, args))
+        }
+        _ => None,
+    }
+}
+
+fn entries_from_tag_alternates(tags: &[String], desc: String) -> Vec<ManpageEntry> {
+    let entries: Vec<ManpageEntry> = tags
+        .iter()
+        .filter_map(|tag| parse_tag_to_entry(tag, desc.clone()))
+        .collect();
+    if entries.len() == 2
+        && let Some(combined) = combine_short_long_alternates(&entries[0], &entries[1])
+    {
+        return vec![combined];
+    }
+    entries
+}
+
+fn combine_short_long_alternates(
+    left: &ManpageEntry,
+    right: &ManpageEntry,
+) -> Option<ManpageEntry> {
+    match (&left.switch, &right.switch) {
+        (OwnedSwitch::Long(l), OwnedSwitch::Short(c)) => Some(ManpageEntry {
+            switch: OwnedSwitch::Both(*c, l.clone()),
+            param: left.param.clone().or_else(|| right.param.clone()),
+            desc: left.desc.clone(),
+        }),
+        (OwnedSwitch::Short(c), OwnedSwitch::Long(l)) => Some(ManpageEntry {
+            switch: OwnedSwitch::Both(*c, l.clone()),
+            param: right.param.clone().or_else(|| left.param.clone()),
+            desc: left.desc.clone(),
+        }),
+        _ => None,
+    }
+}
+
+// strategy b: .IP style (curl, hand-written manpages).
+// .IP takes an inline tag argument: .IP "-v, --verbose"
+// the description follows as text lines.
+make_macro_walker!(pub strategy_ip -> Vec<ManpageEntry>, on macro "IP" =>
+    |lines, i, args| {
+        let tag = strip_groff_escapes(args);
+        let (desc, rest) = collect_text_lines(&lines[i + 1..]);
+        let new_i = lines.len() - rest.len();
+        parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
+    }
+);
+
+// strategy c: .PP + .RS/.RE style (git, docbook-generated manpages).
+// flag entries are introduced by .PP (paragraph), with the flag name as
+// plain text, followed by a .RS (indent) block containing the description,
+// closed by .RE (de-indent).
+make_macro_walker!(pub strategy_pp_rs -> Vec<ManpageEntry>, on macro "PP" =>
+    |lines, i, _args| {
+        if i + 1 >= lines.len() { return None; }
+        if let GroffLine::Text(tag) = &lines[i + 1] {
+            let (desc, new_i) = collect_pp_rs_desc(lines, i + 2);
+            parse_tag_to_entry(tag, desc).map(|e| (e, new_i))
+        } else {
+            None
+        }
+    }
+);
+
+fn collect_pp_rs_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    let mut acc: Vec<String> = Vec::new();
+    let mut i = start;
+    // outer: look for .RS marker or text
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Macro { name, .. } if name == "RS" => {
+                i += 1;
+                // inside .RS — collect until .RE or boundary macro
+                while i < lines.len() {
+                    match &lines[i] {
+                        GroffLine::Macro { name, .. } if name == "RE" => {
+                            return (acc.join(" "), i + 1);
+                        }
+                        GroffLine::Text(t) => {
+                            acc.push(t.clone());
+                            i += 1;
+                        }
+                        GroffLine::Macro { name, .. } if name == "PP" || name == "SH" => {
+                            return (acc.join(" "), i);
+                        }
+                        _ => i += 1,
+                    }
+                }
+                return (acc.join(" "), i);
+            }
+            GroffLine::Text(t) => {
+                acc.push(t.clone());
+                i += 1;
+            }
+            _ => return (acc.join(" "), i),
+        }
+    }
+    (acc.join(" "), i)
+}
+
+/// strategy d: deroff fallback — strip all groff markup, then feed the
+/// resulting plain text through the help parser.
+pub fn strategy_deroff(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let mut buffer = String::with_capacity(256);
+    for line in lines {
+        match line {
+            GroffLine::Text(text) => {
+                buffer.push_str(text);
+                buffer.push('\n');
+            }
+            GroffLine::Macro { name, args }
+                if matches!(name.as_str(), "BI" | "BR" | "IR" | "B" | "I") =>
+            {
+                let text = strip_groff_escapes(&strip_inline_macro_args(args));
+                buffer.push_str(&text);
+                buffer.push('\n');
+            }
+            GroffLine::Blank => buffer.push('\n'),
+            _ => (),
+        }
+    }
+    match help_parser(&buffer) {
+        Ok((_, result)) => result
+            .entries
+            .into_iter()
+            .map(|e| ManpageEntry {
+                switch: to_owned_switch(e.switch),
+                param: e.param.map(to_owned_param),
+                desc: e.desc.join(" "),
+            })
+            .collect(),
+        Err(_) => Vec::new(),
+    }
+}
+
+fn is_bullet_ip(args: &str) -> bool {
+    !args.trim().is_empty()
+}
+
+// strategy e: nix3-style bullet .IP with .UR/.UE hyperlinks.
+// nix's manpages use .IP with bullet markers for flag entries, interleaved
+// with .UR/.UE hyperlink macros. the flag tag is in text lines after the
+// bullet .IP, and the description follows a non-bullet .IP marker.
+make_macro_walker!(pub strategy_nix -> Vec<ManpageEntry>, on macro "IP" =>
+    |lines, i, args| {
+        if !is_bullet_ip(args) { return None; }
+        // collect tag: skip .UR/.UE macros, gather Text lines
+        let mut tag_idx = i + 1;
+        let mut tag_parts: Vec<String> = Vec::new();
+        while tag_idx < lines.len() {
+            match &lines[tag_idx] {
+                GroffLine::Macro { name, .. } if name == "UR" || name == "UE" => {
+                    tag_idx += 1;
+                }
+                GroffLine::Text(t) => {
+                    tag_parts.push(t.clone());
+                    tag_idx += 1;
+                }
+                _ => break,
+            }
+        }
+        let tag = tag_parts.join(" ");
+        let (desc, new_i) = collect_nix_desc(lines, tag_idx);
+        parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
+    }
+);
+
+fn collect_nix_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
+    if start >= lines.len() {
+        return (String::new(), start);
+    }
+    let mut i = start;
+    // require non-bullet .IP marker for description
+    if let GroffLine::Macro { name, args } = &lines[i]
+        && name == "IP"
+        && args.trim().is_empty()
+    {
+        i += 1;
+    } else {
+        return (String::new(), start);
+    }
+    let mut parts: Vec<String> = Vec::new();
+    while i < lines.len() {
+        match &lines[i] {
+            GroffLine::Text(t) => {
+                parts.push(t.clone());
+                i += 1;
+            }
+            GroffLine::Macro { name, args } if name == "IP" => {
+                if !args.trim().is_empty() {
+                    // next bullet entry — stop
+                    return (parts.join(" "), i);
+                }
+                // non-bullet .IP = continuation paragraph
+                i += 1;
+            }
+            GroffLine::Macro { name, .. } if name == "SS" || name == "SH" => {
+                return (parts.join(" "), i);
+            }
+            GroffLine::Macro { name, .. } if name == "RS" => {
+                i = skip_rs(lines, i + 1, 1);
+            }
+            GroffLine::Macro { .. } => {
+                i += 1;
+            }
+            GroffLine::Blank | GroffLine::Comment => {
+                i += 1;
+            }
+        }
+    }
+    (parts.join(" "), i)
+}
+
+fn skip_rs(lines: &[GroffLine], start: usize, mut depth: usize) -> usize {
+    let mut i = start;
+    while i < lines.len() {
+        if let GroffLine::Macro { name, .. } = &lines[i] {
+            if name == "RE" {
+                depth -= 1;
+                if depth == 0 {
+                    return i + 1;
+                }
+            } else if name == "RS" {
+                depth += 1;
+            }
+        }
+        i += 1;
+    }
+    i
+}
+
+/// count occurrences of a specific macro in the section.
+fn count_macro(name: &str, lines: &[GroffLine]) -> usize {
+    lines
+        .iter()
+        .filter(|line| matches!(line, GroffLine::Macro { name: n, .. } if n == name))
+        .count()
+}
+
+/// auto-detect and try strategies, return the one with most entries.
+/// first counts macros to determine which strategies are applicable,
+/// then runs all applicable ones and picks the winner by entry count.
+/// if no specialized strategy produces results, falls back to deroff.
+pub fn extract_entries(lines: &[GroffLine]) -> Vec<ManpageEntry> {
+    let tp = count_macro("TP", lines);
+    let ip = count_macro("IP", lines);
+    let pp = count_macro("PP", lines);
+    let rs = count_macro("RS", lines);
+    let ur = count_macro("UR", lines);
+
+    let mut specialized: Vec<(&str, Vec<ManpageEntry>)> = Vec::new();
+    if tp > 0 {
+        specialized.push(("TP", strategy_tp(lines)));
+    }
+    if ip > 0 {
+        specialized.push(("IP", strategy_ip(lines)));
+    }
+    if pp > 0 && rs > 0 {
+        specialized.push(("PP+RS", strategy_pp_rs(lines)));
+    }
+    if ur > 0 && ip > 0 {
+        specialized.push(("nix", strategy_nix(lines)));
+    }
+    let candidates: Vec<(&str, Vec<ManpageEntry>)> = {
+        let filtered: Vec<_> = specialized
+            .into_iter()
+            .filter(|(_, e)| !e.is_empty())
+            .collect();
+        if filtered.is_empty() {
+            vec![("deroff", strategy_deroff(lines))]
+        } else {
+            filtered
+        }
+    };
+    let mut best: Vec<ManpageEntry> = Vec::new();
+    for (_, entries) in candidates {
+        if entries.len() >= best.len() {
+            best = entries;
+        }
+    }
+    best
+}
diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs
new file mode 100644
index 0000000..1f8090a
--- /dev/null
+++ b/src/parsers/mod.rs
@@ -0,0 +1,3 @@
+pub mod help;
+pub mod manpage;
+pub mod nushell;
diff --git a/src/parsers/nushell.rs b/src/parsers/nushell.rs
new file mode 100644
index 0000000..eaf4bcc
--- /dev/null
+++ b/src/parsers/nushell.rs
@@ -0,0 +1,475 @@
+//! generate nushell `extern` definitions from parsed help data.
+//!
+//! this module is the code generation backend. it takes a [`ManpageResult`]
+//! (from the help or manpage parsers) and produces nushell source that defines
+//! `extern` declarations — nushell's mechanism for teaching the shell about
+//! external commands' flags and subcommands so it can offer completions.
+//!
+//! key responsibilities:
+//!   - deduplicating flag entries (same flag from multiple help sources)
+//!   - mapping parameter names to nushell types (path, int, string)
+//!   - formatting flags in nushell syntax: --flag(-f): type  # description
+//!   - handling positional arguments with nushell's ordering constraints
+//!   - escaping special characters for nushell string literals
+
+use std::borrow::Cow;
+use std::collections::{HashMap, HashSet};
+use std::sync::OnceLock;
+
+use crate::parsers::manpage::{
+    ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
+};
+use crate::types::Positional;
+
+/// nushell built-in commands and keywords — we must never generate `extern`
+/// definitions for these because it would shadow nushell's own implementations.
+/// maintained manually and should be updated with new nushell releases.
+pub const NUSHELL_BUILTINS: &[&str] = &[
+    "alias",
+    "all",
+    "ansi",
+    "any",
+    "append",
+    "ast",
+    "attr",
+    "bits",
+    "break",
+    "bytes",
+    "cal",
+    "cd",
+    "char",
+    "chunk-by",
+    "chunks",
+    "clear",
+    "collect",
+    "columns",
+    "commandline",
+    "compact",
+    "complete",
+    "config",
+    "const",
+    "continue",
+    "cp",
+    "date",
+    "debug",
+    "decode",
+    "def",
+    "default",
+    "describe",
+    "detect",
+    "do",
+    "drop",
+    "du",
+    "each",
+    "echo",
+    "encode",
+    "enumerate",
+    "error",
+    "every",
+    "exec",
+    "exit",
+    "explain",
+    "explore",
+    "export",
+    "export-env",
+    "extern",
+    "fill",
+    "filter",
+    "find",
+    "first",
+    "flatten",
+    "for",
+    "format",
+    "from",
+    "generate",
+    "get",
+    "glob",
+    "grid",
+    "group-by",
+    "hash",
+    "headers",
+    "help",
+    "hide",
+    "hide-env",
+    "histogram",
+    "history",
+    "http",
+    "if",
+    "ignore",
+    "input",
+    "insert",
+    "inspect",
+    "interleave",
+    "into",
+    "is-admin",
+    "is-empty",
+    "is-not-empty",
+    "is-terminal",
+    "items",
+    "job",
+    "join",
+    "keybindings",
+    "kill",
+    "last",
+    "length",
+    "let",
+    "let-env",
+    "lines",
+    "load-env",
+    "loop",
+    "ls",
+    "match",
+    "math",
+    "merge",
+    "metadata",
+    "mkdir",
+    "mktemp",
+    "module",
+    "move",
+    "mut",
+    "mv",
+    "nu-check",
+    "nu-highlight",
+    "open",
+    "overlay",
+    "panic",
+    "par-each",
+    "parse",
+    "path",
+    "plugin",
+    "port",
+    "prepend",
+    "print",
+    "ps",
+    "query",
+    "random",
+    "reduce",
+    "reject",
+    "rename",
+    "return",
+    "reverse",
+    "rm",
+    "roll",
+    "rotate",
+    "run-external",
+    "save",
+    "schema",
+    "scope",
+    "select",
+    "seq",
+    "shuffle",
+    "skip",
+    "sleep",
+    "slice",
+    "sort",
+    "sort-by",
+    "source",
+    "source-env",
+    "split",
+    "start",
+    "stor",
+    "str",
+    "sys",
+    "table",
+    "take",
+    "tee",
+    "term",
+    "timeit",
+    "to",
+    "touch",
+    "transpose",
+    "try",
+    "tutor",
+    "ulimit",
+    "umask",
+    "uname",
+    "uniq",
+    "uniq-by",
+    "unlet",
+    "update",
+    "upsert",
+    "url",
+    "use",
+    "values",
+    "version",
+    "view",
+    "watch",
+    "where",
+    "which",
+    "while",
+    "whoami",
+    "window",
+    "with-env",
+    "wrap",
+    "zip",
+];
+
+fn builtin_set() -> &'static HashSet<&'static str> {
+    static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
+    SET.get_or_init(|| NUSHELL_BUILTINS.iter().copied().collect())
+}
+
+/// returns true if the given command name collides with a nushell built-in.
+pub fn is_nushell_builtin(cmd: &str) -> bool {
+    builtin_set().contains(cmd)
+}
+
+/// map parameter names to nushell types.
+/// nushell's `extern` declarations use typed parameters, so we infer the type
+/// from the parameter name. file/path-related names become "path" (enables
+/// path completion), numeric names become "int", everything else is "string".
+pub fn nushell_type_of_param(name: &str) -> &'static str {
+    match name {
+        "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY" | "FILENAME"
+        | "PATTERNFILE" => "path",
+        "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH" | "LINES" | "DEPTH"
+        | "depth" => "int",
+        _ => "string",
+    }
+}
+
+/// escape a string for use inside nushell double-quoted string literals.
+/// only double quotes and backslashes need escaping in nushell's syntax.
+pub fn escape_nu(s: &str) -> Cow<'_, str> {
+    if !s.contains('"') && !s.contains('\\') {
+        Cow::Borrowed(s)
+    } else {
+        let mut buf = String::with_capacity(s.len() + 4);
+        for c in s.chars() {
+            match c {
+                '"' => buf.push_str("\\\""),
+                '\\' => buf.push_str("\\\\"),
+                c => buf.push(c),
+            }
+        }
+        Cow::Owned(buf)
+    }
+}
+
+fn entry_key(e: &ManpageEntry) -> String {
+    match &e.switch {
+        OwnedSwitch::Short(c) => format!("-{c}"),
+        OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"),
+    }
+}
+
+fn entry_score(e: &ManpageEntry) -> i32 {
+    let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) {
+        10
+    } else {
+        0
+    };
+    let param_bonus = if e.param.is_some() { 5 } else { 0 };
+    let desc_bonus = (e.desc.len() / 10).min(5) as i32;
+    switch_bonus + param_bonus + desc_bonus
+}
+
+/// deduplicate flag entries that refer to the same flag.
+///
+/// when the same flag appears multiple times (e.g. from overlapping manpage
+/// sections or repeated help text), we keep the "best" version using a score:
+///   - both short+long form present: +10 (most informative)
+///   - has a parameter: +5
+///   - description length bonus: up to +5
+///
+/// after deduplication by long name, we also remove standalone short flags
+/// whose letter is already covered by a Both(short, long) entry. this prevents
+/// emitting both "-v" and "--verbose(-v)" which nushell would reject as a
+/// duplicate. the filtering preserves original ordering from the help text.
+pub fn dedup_entries(entries: &[ManpageEntry]) -> Vec<ManpageEntry> {
+    let mut best: HashMap<String, &ManpageEntry> = HashMap::new();
+    for e in entries {
+        let key = entry_key(e);
+        match best.get(&key) {
+            Some(prev) if entry_score(prev) >= entry_score(e) => {}
+            _ => {
+                best.insert(key, e);
+            }
+        }
+    }
+    let mut covered: HashSet<char> = HashSet::new();
+    for e in best.values() {
+        if let OwnedSwitch::Both(c, _) = &e.switch {
+            covered.insert(*c);
+        }
+    }
+    let mut seen: HashSet<String> = HashSet::new();
+    let mut out: Vec<ManpageEntry> = Vec::new();
+    for e in entries {
+        let key = entry_key(e);
+        if seen.contains(&key) {
+            continue;
+        }
+        if let OwnedSwitch::Short(c) = &e.switch
+            && covered.contains(c)
+        {
+            continue;
+        }
+        seen.insert(key.clone());
+        out.push((*best.get(&key).unwrap()).clone());
+    }
+    out
+}
+
+/// format a single flag entry as a nushell `extern` parameter line.
+/// output examples:
+///   "    --verbose(-v)                       # increase verbosity"
+///   "    --output(-o): path                  # write output to file"
+///   "    -n: int                             # number of results"
+///
+/// the description is right-padded to column 40 with a "# " comment prefix.
+pub fn format_flag(entry: &ManpageEntry) -> String {
+    let name = match &entry.switch {
+        OwnedSwitch::Both(c, l) => format!("--{l}(-{c})"),
+        OwnedSwitch::Long(l) => format!("--{l}"),
+        OwnedSwitch::Short(c) => format!("-{c}"),
+    };
+    let typed = match &entry.param {
+        Some(OwnedParam::Mandatory(p)) | Some(OwnedParam::Optional(p)) => {
+            format!(": {}", nushell_type_of_param(p))
+        }
+        None => String::new(),
+    };
+    let flag = format!("    {name}{typed}");
+    if entry.desc.is_empty() {
+        flag
+    } else {
+        let pad_len = 40usize.saturating_sub(flag.len()).max(1);
+        format!("{flag}{}# {}", " ".repeat(pad_len), entry.desc)
+    }
+}
+
+/// format a positional argument as a nushell `extern` parameter line.
+/// nushell syntax: "...name: type" for variadic, "name?: type" for optional.
+/// hyphens in names are converted to underscores since nushell identifiers
+/// cannot contain hyphens.
+pub fn format_positional(name: &str, p: &Positional) -> String {
+    let name_underscored: String = name
+        .chars()
+        .map(|c| if c == '-' { '_' } else { c })
+        .collect();
+    let prefix = if p.variadic { "..." } else { "" };
+    let suffix = if p.optional && !p.variadic { "?" } else { "" };
+    let typ = nushell_type_of_param(&name.to_ascii_uppercase());
+    format!("    {prefix}{name_underscored}{suffix}: {typ}")
+}
+
+/// enforce nushell's positional argument ordering rules:
+///   1. no required positional may follow an optional one
+///   2. at most one variadic ("rest") parameter is allowed
+///
+/// if a required positional appears after an optional one, it is silently
+/// promoted to optional. duplicate variadic params are dropped.
+pub fn fixup_positionals(positionals: Vec<(String, Positional)>) -> Vec<(String, Positional)> {
+    let mut seen_optional = false;
+    let mut seen_variadic = false;
+    let mut out = Vec::with_capacity(positionals.len());
+    for (name, mut p) in positionals {
+        if p.variadic {
+            if seen_variadic {
+                continue;
+            }
+            seen_variadic = true;
+            seen_optional = true;
+            out.push((name, p));
+        } else if seen_optional {
+            p.optional = true;
+            out.push((name, p));
+        } else {
+            seen_optional = p.optional;
+            out.push((name, p));
+        }
+    }
+    out
+}
+
+/// derive a nushell `module` name from a command name.
+/// replaces non-alphanumeric characters with hyphens and appends "-completions".
+pub fn module_name_of(cmd_name: &str) -> String {
+    let mut s: String = cmd_name
+        .chars()
+        .map(|c| {
+            if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
+                c
+            } else {
+                '-'
+            }
+        })
+        .collect();
+    s.push_str("-completions");
+    s
+}
+
+/// generate the full nushell `extern` block for a command.
+///
+/// produces output like:
+///   export extern "git add" [
+///     ...pathspec?: path
+///     --verbose(-v)              # be verbose
+///     --dry-run(-n)              # dry run
+///   ]
+///
+/// subcommands that weren't resolved into their own full definitions get
+/// stub `extern` blocks with just a comment containing their description:
+///   export extern "git stash" [  # stash changes
+///   ]
+pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String {
+    let entries = dedup_entries(&result.entries);
+    let escaped_name = escape_nu(cmd_name);
+    let positionals = fixup_positionals(result.positionals.clone());
+
+    let mut out = String::new();
+    out.push_str(&format!("export extern \"{escaped_name}\" [\n"));
+    for (name, p) in &positionals {
+        out.push_str(&format_positional(name, p));
+        out.push('\n');
+    }
+    for entry in &entries {
+        out.push_str(&format_flag(entry));
+        out.push('\n');
+    }
+    out.push_str("]\n");
+
+    for sc in &result.subcommands {
+        out.push_str(&format!(
+            "\nexport extern \"{} {}\" [  # {}\n]\n",
+            escaped_name,
+            escape_nu(&sc.name),
+            escape_nu(&sc.desc)
+        ));
+    }
+    out
+}
+
+/// generate a complete nushell `module` wrapping the `extern`.
+/// output: "module git-completions { ... }\n\nuse git-completions *\n"
+/// the `use` at the end makes the `extern` immediately available in scope.
+pub fn generate_module(cmd_name: &str, result: &ManpageResult) -> String {
+    let mod_name = module_name_of(cmd_name);
+    format!(
+        "module {mod_name} {{\n{}}}\n\nuse {mod_name} *\n",
+        generate_extern(cmd_name, result)
+    )
+}
+
+/// convenience wrapper: generate an `extern` from just a list of entries.
+pub fn generate_extern_from_entries(cmd_name: &str, entries: Vec<ManpageEntry>) -> String {
+    generate_extern(
+        cmd_name,
+        &ManpageResult {
+            entries,
+            subcommands: Vec::new(),
+            positionals: Vec::new(),
+            description: String::new(),
+        },
+    )
+}
+
+/// stub subcommand entry used when extracting subcommands from a parsed
+/// help result for nushell output.
+pub fn manpage_subcommand_from(name: &str, desc: &str) -> ManpageSubcommand {
+    ManpageSubcommand {
+        name: name.to_string(),
+        desc: desc.to_string(),
+    }
+}
diff --git a/src/pool.rs b/src/pool.rs
new file mode 100644
index 0000000..76fee66
--- /dev/null
+++ b/src/pool.rs
@@ -0,0 +1,233 @@
+//! BFS-queue worker pool for parallel subprocess scraping.
+//!
+//! workers pull jobs from a shared queue and call a user-supplied
+//! handler; the handler gets a `Submitter` to push newly-discovered
+//! child jobs back onto the same queue. when the in-flight count
+//! reaches zero the pool shuts down and `wait` returns.
+//!
+//! the queue-back design is deliberate: command-help trees are uneven
+//! (one binary has 30 subs, another has 1). queue-back keeps every
+//! worker fed; spawn-in-place would leave cores idle on lopsided trees.
+//!
+//! synchronization: `parking_lot::Condvar` parks workers when the queue is
+//! empty. the queue, in-flight count, and close state live under one mutex so
+//! the condvar predicate cannot miss a wakeup.
+//! parking_lot gives no-poison locks (no `Result` noise on every
+//! `lock()`) and a single-syscall fast path in the uncontended case.
+
+use std::collections::VecDeque;
+use std::sync::Arc;
+use std::thread::{self, JoinHandle};
+
+use parking_lot::{Condvar, Mutex};
+
+struct State<J> {
+    queue: VecDeque<J>,
+    /// jobs created but not yet completed. counts both queued and
+    /// in-progress jobs. workers can exit once wait() has closed the pool
+    /// and this reaches 0.
+    in_flight: usize,
+    /// set by wait(), which is also the point where top-level submission is
+    /// done. workers must not exit on transient empty periods before this.
+    closed: bool,
+}
+
+/// shared state held behind an `Arc` by every worker and by the
+/// submitter handles handed to the per-job handler.
+struct Inner<J> {
+    state: Mutex<State<J>>,
+    notify: Condvar,
+}
+
+impl<J> Inner<J> {
+    fn submit(&self, job: J) {
+        let mut state = self.state.lock();
+        state.in_flight += 1;
+        state.queue.push_back(job);
+        self.notify.notify_one();
+    }
+
+    fn next(&self) -> Option<J> {
+        let mut state = self.state.lock();
+        loop {
+            if let Some(job) = state.queue.pop_front() {
+                return Some(job);
+            }
+            if state.closed && state.in_flight == 0 {
+                return None;
+            }
+            self.notify.wait(&mut state);
+        }
+    }
+
+    fn complete(&self) {
+        let mut state = self.state.lock();
+        state.in_flight -= 1;
+        if state.closed && state.in_flight == 0 {
+            // we were the last in-flight job after wait() closed top-level
+            // submission, so parked workers can wake and exit.
+            self.notify.notify_all();
+        }
+    }
+}
+
+/// cheap-to-clone handle that lets a job handler enqueue further jobs.
+/// passed by reference to the handler closure.
+pub struct Submitter<J> {
+    inner: Arc<Inner<J>>,
+}
+
+impl<J> Clone for Submitter<J> {
+    fn clone(&self) -> Self {
+        Submitter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<J> Submitter<J> {
+    pub fn submit(&self, job: J) {
+        self.inner.submit(job);
+    }
+}
+
+/// BFS-queue worker pool. each worker pulls a job, calls the handler
+/// (which may submit further jobs via the passed `Submitter`), then marks
+/// the job complete. when in-flight reaches zero the pool shuts down and
+/// `wait` returns.
+pub struct ScrapePool<J> {
+    inner: Arc<Inner<J>>,
+    workers: Vec<JoinHandle<()>>,
+}
+
+impl<J: Send + 'static> ScrapePool<J> {
+    /// spawn `num_workers` threads that run `handler` on each job pulled
+    /// from the queue. the handler receives the job by value and a
+    /// `&Submitter` for enqueuing children.
+    pub fn new<F>(num_workers: usize, handler: F) -> Self
+    where
+        F: Fn(J, &Submitter<J>) + Send + Sync + 'static,
+    {
+        let inner = Arc::new(Inner {
+            state: Mutex::new(State {
+                queue: VecDeque::new(),
+                in_flight: 0,
+                closed: false,
+            }),
+            notify: Condvar::new(),
+        });
+        let handler = Arc::new(handler);
+        let workers = (0..num_workers.max(1))
+            .map(|_| {
+                let inner = inner.clone();
+                let handler = handler.clone();
+                thread::spawn(move || {
+                    let submitter = Submitter {
+                        inner: inner.clone(),
+                    };
+                    while let Some(job) = inner.next() {
+                        handler(job, &submitter);
+                        inner.complete();
+                    }
+                })
+            })
+            .collect();
+        ScrapePool { inner, workers }
+    }
+
+    /// submit a top-level job. typically called by the orchestrating
+    /// thread before `wait`; handlers should use `Submitter::submit`.
+    pub fn submit(&self, job: J) {
+        self.inner.submit(job);
+    }
+
+    /// block until all jobs (initial + transitively discovered) have
+    /// completed, then join every worker thread.
+    pub fn wait(self) {
+        {
+            let mut state = self.inner.state.lock();
+            state.closed = true;
+            // Wake workers so they can either drain queued work or exit if
+            // the pool was empty. The close flag is guarded by this same lock,
+            // so this cannot race with a worker entering the condvar wait.
+            self.inner.notify.notify_all();
+        }
+        for w in self.workers {
+            let _ = w.join();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::time::Duration;
+
+    #[test]
+    fn flat_jobs_processed_once_each() {
+        let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
+        let pool = ScrapePool::new(4, {
+            let collected = collected.clone();
+            move |n: u32, _: &Submitter<u32>| {
+                collected.lock().push(n);
+            }
+        });
+        for i in 0..100u32 {
+            pool.submit(i);
+        }
+        pool.wait();
+        let mut got = collected.lock().clone();
+        got.sort();
+        assert_eq!(got, (0..100).collect::<Vec<_>>());
+    }
+
+    #[test]
+    fn discovered_children_processed_to_completion() {
+        // BFS expansion: every odd number under 10 spawns its successor.
+        let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
+        let pool = ScrapePool::new(2, {
+            let collected = collected.clone();
+            move |n: u32, sub: &Submitter<u32>| {
+                collected.lock().push(n);
+                if n < 10 && n % 2 == 1 {
+                    sub.submit(n + 1);
+                }
+            }
+        });
+        for i in [1u32, 3, 5, 7, 9] {
+            pool.submit(i);
+        }
+        pool.wait();
+        let mut got = collected.lock().clone();
+        got.sort();
+        assert_eq!(got, vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+    }
+
+    #[test]
+    fn transient_empty_queue_before_wait_does_not_stop_workers() {
+        let processed = Arc::new(AtomicUsize::new(0));
+        let pool = ScrapePool::new(1, {
+            let processed = processed.clone();
+            move |_: u32, _: &Submitter<u32>| {
+                processed.fetch_add(1, Ordering::SeqCst);
+            }
+        });
+
+        pool.submit(1);
+        while processed.load(Ordering::SeqCst) == 0 {
+            thread::yield_now();
+        }
+        thread::sleep(Duration::from_millis(10));
+        pool.submit(2);
+        pool.wait();
+
+        assert_eq!(processed.load(Ordering::SeqCst), 2);
+    }
+
+    #[test]
+    fn wait_with_no_jobs_returns_immediately() {
+        let pool: ScrapePool<()> = ScrapePool::new(2, |_, _| {});
+        pool.wait();
+    }
+}
diff --git a/src/store.rs b/src/store.rs
new file mode 100644
index 0000000..fd1a09c
--- /dev/null
+++ b/src/store.rs
@@ -0,0 +1,657 @@
+//! filesystem store for parsed completion data.
+//!
+//! write side: serialize ManpageResult to JSON, derive sanitised
+//! filenames from command names ("git add" → git_add.json).
+//!
+//! read side: look up a command by name across the user cache + system
+//! dirs, deserialize JSON or parse a .nu extern blob back into a result.
+
+use std::collections::HashMap;
+use std::fs;
+use std::io;
+use std::path::{Path, PathBuf};
+
+use serde_json::Value;
+
+use crate::parsers::manpage::{
+    ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
+};
+use crate::types::Positional;
+
+/// default cache directory: $XDG_CACHE_HOME/inshellah, falling back to
+/// $HOME/.cache/inshellah.
+pub fn default_store_path() -> PathBuf {
+    if let Ok(xdg) = std::env::var("XDG_CACHE_HOME")
+        && !xdg.is_empty()
+    {
+        return PathBuf::from(xdg).join("inshellah");
+    }
+    if let Ok(home) = std::env::var("HOME") {
+        return PathBuf::from(home).join(".cache/inshellah");
+    }
+    PathBuf::from(".cache/inshellah")
+}
+
+/// create directory and all parents.
+pub fn ensure_dir(dir: &Path) -> io::Result<()> {
+    fs::create_dir_all(dir)
+}
+
+/// derive a safe filename from a command name.
+/// spaces in subcommand names ("git add") become "_" ("git_add").
+/// any other non-filesystem-safe characters are also replaced.
+pub fn filename_of_command(cmd: &str) -> String {
+    cmd.chars()
+        .map(|c| match c {
+            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => c,
+            ' ' => '_',
+            _ => '_',
+        })
+        .collect()
+}
+
+/// reverse: a filename "git_add" produces command name "git add".
+/// underscores are flipped to spaces unconditionally — names that
+/// genuinely contained an underscore round-trip as spaces, which is
+/// acceptable since the read side is only used for display.
+pub fn command_of_filename(base: &str) -> String {
+    base.replace('_', " ")
+}
+
+fn escape_json(s: &str) -> String {
+    let mut out = String::with_capacity(s.len() + 2);
+    for c in s.chars() {
+        match c {
+            '"' => out.push_str("\\\""),
+            '\\' => out.push_str("\\\\"),
+            '\n' => out.push_str("\\n"),
+            '\r' => out.push_str("\\r"),
+            '\t' => out.push_str("\\t"),
+            '\x08' => out.push_str("\\b"),
+            '\x0c' => out.push_str("\\f"),
+            c if (c as u32) < 0x20 => {
+                out.push_str(&format!("\\u{:04x}", c as u32));
+            }
+            c => out.push(c),
+        }
+    }
+    out
+}
+
+fn json_string(s: &str) -> String {
+    format!("\"{}\"", escape_json(s))
+}
+
+fn json_switch(s: &OwnedSwitch) -> String {
+    match s {
+        OwnedSwitch::Short(c) => {
+            format!(
+                r#"{{"type":"short","char":{}}}"#,
+                json_string(&c.to_string())
+            )
+        }
+        OwnedSwitch::Long(l) => {
+            format!(r#"{{"type":"long","name":{}}}"#, json_string(l))
+        }
+        OwnedSwitch::Both(c, l) => format!(
+            r#"{{"type":"both","char":{},"name":{}}}"#,
+            json_string(&c.to_string()),
+            json_string(l)
+        ),
+    }
+}
+
+fn json_param(p: &Option<OwnedParam>) -> String {
+    match p {
+        None => "null".to_string(),
+        Some(OwnedParam::Mandatory(n)) => {
+            format!(r#"{{"kind":"mandatory","name":{}}}"#, json_string(n))
+        }
+        Some(OwnedParam::Optional(n)) => {
+            format!(r#"{{"kind":"optional","name":{}}}"#, json_string(n))
+        }
+    }
+}
+
+fn json_entry(e: &ManpageEntry) -> String {
+    format!(
+        r#"{{"switch":{},"param":{},"desc":{}}}"#,
+        json_switch(&e.switch),
+        json_param(&e.param),
+        json_string(&e.desc)
+    )
+}
+
+fn json_subcommand(sc: &ManpageSubcommand) -> String {
+    format!(
+        r#"{{"name":{},"desc":{}}}"#,
+        json_string(&sc.name),
+        json_string(&sc.desc)
+    )
+}
+
+fn json_positional(name: &str, p: &Positional) -> String {
+    format!(
+        r#"{{"name":{},"optional":{},"variadic":{}}}"#,
+        json_string(name),
+        p.optional,
+        p.variadic
+    )
+}
+
+fn json_list<T, F: Fn(&T) -> String>(items: &[T], f: F) -> String {
+    let parts: Vec<String> = items.iter().map(f).collect();
+    format!("[{}]", parts.join(","))
+}
+
+/// serialize a ManpageResult to JSON:
+///   {"source":..., "description":..., "entries":[...],
+///    "subcommands":[...], "positionals":[...]}
+pub fn json_of_result(source: &str, result: &ManpageResult) -> String {
+    let entries = json_list(&result.entries, json_entry);
+    let subcommands = json_list(&result.subcommands, json_subcommand);
+    let positionals_parts: Vec<String> = result
+        .positionals
+        .iter()
+        .map(|(name, p)| json_positional(name, p))
+        .collect();
+    let positionals = format!("[{}]", positionals_parts.join(","));
+    format!(
+        r#"{{"source":{},"description":{},"entries":{},"subcommands":{},"positionals":{}}}"#,
+        json_string(source),
+        json_string(&result.description),
+        entries,
+        subcommands,
+        positionals,
+    )
+}
+
+pub fn write_file(path: &Path, contents: &str) -> io::Result<()> {
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent)?;
+    }
+    fs::write(path, contents)
+}
+
+/// write the parsed result for `command` into `dir` as JSON.
+pub fn write_result(
+    dir: &Path,
+    command: &str,
+    source: &str,
+    result: &ManpageResult,
+) -> io::Result<()> {
+    let path = dir.join(format!("{}.json", filename_of_command(command)));
+    write_file(&path, &json_of_result(source, result))
+}
+
+/// write a native-nushell completion blob (the binary supplied its own).
+pub fn write_native(dir: &Path, command: &str, data: &str) -> io::Result<()> {
+    let path = dir.join(format!("{}.nu", filename_of_command(command)));
+    write_file(&path, data)
+}
+
+// --- read side ---
+
+fn read_file(path: &Path) -> Option<String> {
+    fs::read_to_string(path).ok()
+}
+
+fn read_json_result(path: &Path) -> Option<(String, ManpageResult)> {
+    let data = read_file(path)?;
+    let v = serde_json::from_str::<Value>(&data).ok()?;
+    let source = v
+        .get("source")
+        .and_then(|x| x.as_str())
+        .unwrap_or("json")
+        .to_string();
+    Some((source, result_from_json(&v)))
+}
+
+fn switch_from_json(v: &Value) -> Option<OwnedSwitch> {
+    let t = v.get("type")?.as_str()?;
+    match t {
+        "short" => {
+            let c = v.get("char")?.as_str()?.chars().next()?;
+            Some(OwnedSwitch::Short(c))
+        }
+        "long" => Some(OwnedSwitch::Long(v.get("name")?.as_str()?.to_string())),
+        "both" => {
+            let c = v.get("char")?.as_str()?.chars().next()?;
+            let n = v.get("name")?.as_str()?.to_string();
+            Some(OwnedSwitch::Both(c, n))
+        }
+        _ => None,
+    }
+}
+
+fn param_from_json(v: &Value) -> Option<OwnedParam> {
+    if v.is_null() {
+        return None;
+    }
+    let kind = v.get("kind")?.as_str()?;
+    let name = v.get("name")?.as_str()?.to_string();
+    Some(match kind {
+        "mandatory" => OwnedParam::Mandatory(name),
+        "optional" => OwnedParam::Optional(name),
+        _ => return None,
+    })
+}
+
+fn entry_from_json(v: &Value) -> Option<ManpageEntry> {
+    let switch = switch_from_json(v.get("switch")?)?;
+    let param = v.get("param").and_then(param_from_json);
+    let desc = v
+        .get("desc")
+        .and_then(|d| d.as_str())
+        .unwrap_or("")
+        .to_string();
+    Some(ManpageEntry {
+        switch,
+        param,
+        desc,
+    })
+}
+
+fn subcommand_from_json(v: &Value) -> Option<ManpageSubcommand> {
+    let name = v.get("name")?.as_str()?.to_string();
+    let desc = v
+        .get("desc")
+        .and_then(|d| d.as_str())
+        .unwrap_or("")
+        .to_string();
+    Some(ManpageSubcommand { name, desc })
+}
+
+fn positional_from_json(v: &Value) -> Option<(String, Positional)> {
+    let name = v.get("name")?.as_str()?.to_string();
+    let optional = v.get("optional").and_then(|x| x.as_bool()).unwrap_or(false);
+    let variadic = v.get("variadic").and_then(|x| x.as_bool()).unwrap_or(false);
+    Some((name, Positional { optional, variadic }))
+}
+
+/// deserialize a JSON cache entry into ManpageResult.
+pub fn result_from_json(v: &Value) -> ManpageResult {
+    let description = v
+        .get("description")
+        .and_then(|d| d.as_str())
+        .unwrap_or("")
+        .to_string();
+    let entries = v
+        .get("entries")
+        .and_then(|x| x.as_array())
+        .map(|arr| arr.iter().filter_map(entry_from_json).collect())
+        .unwrap_or_default();
+    let subcommands = v
+        .get("subcommands")
+        .and_then(|x| x.as_array())
+        .map(|arr| arr.iter().filter_map(subcommand_from_json).collect())
+        .unwrap_or_default();
+    let positionals = v
+        .get("positionals")
+        .and_then(|x| x.as_array())
+        .map(|arr| arr.iter().filter_map(positional_from_json).collect())
+        .unwrap_or_default();
+    ManpageResult {
+        entries,
+        subcommands,
+        positionals,
+        description,
+    }
+}
+
+/// parse nushell `export extern` blocks out of a .nu source file.
+///
+/// returns the help_result that matches `target_cmd` — its entries,
+/// positionals, and any other extern blocks under it (`cmd sub`) are
+/// folded into the subcommands list.
+pub fn parse_nu_completions(target_cmd: &str, contents: &str) -> ManpageResult {
+    let mut blocks: Vec<NuBlock> = Vec::new();
+    let mut current_desc = String::new();
+    let mut in_block = false;
+    let mut block = NuBlock::default();
+
+    for line in contents.split('\n') {
+        let trimmed = line.trim();
+        if !in_block {
+            if let Some(stripped) = trimmed.strip_prefix("# ") {
+                current_desc = stripped.trim().to_string();
+            } else if trimmed.contains("export extern")
+                && let Some(cmd) = extract_extern_name(trimmed)
+            {
+                in_block = true;
+                block = NuBlock {
+                    cmd,
+                    description: std::mem::take(&mut current_desc),
+                    ..Default::default()
+                };
+            } else {
+                current_desc.clear();
+            }
+        } else if trimmed.starts_with(']') {
+            blocks.push(std::mem::take(&mut block));
+            in_block = false;
+        } else {
+            let (param_part, desc) = match trimmed.find('#') {
+                Some(idx) => (trimmed[..idx].trim(), trimmed[idx + 1..].trim()),
+                None => (trimmed, ""),
+            };
+            parse_nu_param_line_into(param_part, desc, &mut block);
+        }
+    }
+    if in_block {
+        blocks.push(block);
+    }
+
+    // find the block matching target_cmd
+    let Some(matched) = blocks.iter().find(|b| b.cmd == target_cmd) else {
+        return ManpageResult::default();
+    };
+
+    // collect immediate subcommands from other blocks ("target sub" pattern)
+    let prefix = format!("{target_cmd} ");
+    let mut subcommands: Vec<ManpageSubcommand> = Vec::new();
+    for b in &blocks {
+        if let Some(suffix) = b.cmd.strip_prefix(&prefix)
+            && !suffix.contains(' ')
+            && !suffix.is_empty()
+        {
+            subcommands.push(ManpageSubcommand {
+                name: suffix.to_string(),
+                desc: b.description.clone(),
+            });
+        }
+    }
+
+    ManpageResult {
+        entries: matched.entries.clone(),
+        subcommands,
+        positionals: matched.positionals.clone(),
+        description: matched.description.clone(),
+    }
+}
+
+fn extract_extern_name(line: &str) -> Option<String> {
+    let idx = line.find("export extern")?;
+    let after = line[idx + "export extern".len()..].trim_start();
+    if let Some(rest) = after.strip_prefix('"') {
+        let end = rest.find('"')?;
+        Some(rest[..end].to_string())
+    } else {
+        let end = after
+            .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
+            .unwrap_or(after.len());
+        if end == 0 {
+            None
+        } else {
+            Some(after[..end].to_string())
+        }
+    }
+}
+
+fn parse_nu_param_line_into(param_part: &str, desc: &str, block: &mut NuBlock) {
+    if param_part.len() < 2 {
+        return;
+    }
+    if let Some(after) = param_part.strip_prefix("--") {
+        // long flag: --name(-c): type or --name: type or --name
+        let (name, rest) = split_at_non_name_char(after);
+        if name.is_empty() {
+            return;
+        }
+        let mut short: Option<char> = None;
+        let mut rest = rest;
+        if let Some(after_open) = rest.strip_prefix("(-")
+            && let Some(c) = after_open.chars().next()
+            && after_open[c.len_utf8()..].starts_with(')')
+        {
+            short = Some(c);
+            rest = &after_open[c.len_utf8() + 1..];
+        }
+        let param = parse_type_suffix(rest);
+        let switch = match short {
+            Some(c) => OwnedSwitch::Both(c, name.to_string()),
+            None => OwnedSwitch::Long(name.to_string()),
+        };
+        block.entries.push(ManpageEntry {
+            switch,
+            param,
+            desc: desc.to_string(),
+        });
+    } else if param_part.starts_with('-') {
+        // short flag: -c
+        if let Some(c) = param_part.chars().nth(1)
+            && c.is_ascii_alphanumeric()
+        {
+            block.entries.push(ManpageEntry {
+                switch: OwnedSwitch::Short(c),
+                param: None,
+                desc: desc.to_string(),
+            });
+        }
+    } else {
+        // positional: name: type or name?: type or ...name: type
+        let variadic = param_part.starts_with("...");
+        let after_prefix = if variadic {
+            &param_part[3..]
+        } else {
+            param_part
+        };
+        let optional = after_prefix.contains('?');
+        let name_end = after_prefix.find([':', '?']).unwrap_or(after_prefix.len());
+        let name = after_prefix[..name_end].trim();
+        let name: String = name
+            .chars()
+            .map(|c| if c == '-' { '_' } else { c })
+            .collect();
+        if !name.is_empty() && !name.starts_with('-') {
+            let duplicate = block
+                .positionals
+                .iter()
+                .any(|(existing, _)| existing.eq_ignore_ascii_case(&name));
+            if !duplicate {
+                block.positionals.push((
+                    name,
+                    Positional {
+                        optional: optional || variadic,
+                        variadic,
+                    },
+                ));
+            }
+        }
+    }
+}
+
+fn split_at_non_name_char(s: &str) -> (&str, &str) {
+    let end = s
+        .find(|c: char| !(c.is_ascii_alphanumeric() || c == '-'))
+        .unwrap_or(s.len());
+    (&s[..end], &s[end..])
+}
+
+/// parse a `: type` suffix into an OwnedParam (always Mandatory since the
+/// nushell extern syntax doesn't distinguish optional-with-default).
+fn parse_type_suffix(s: &str) -> Option<OwnedParam> {
+    let s = s.trim_start();
+    let s = s.strip_prefix(':')?;
+    let s = s.trim_start();
+    let end = s
+        .find(|c: char| !c.is_ascii_alphabetic())
+        .unwrap_or(s.len());
+    if end == 0 {
+        None
+    } else {
+        Some(OwnedParam::Mandatory(s[..end].to_string()))
+    }
+}
+
+#[derive(Default)]
+struct NuBlock {
+    cmd: String,
+    entries: Vec<ManpageEntry>,
+    positionals: Vec<(String, Positional)>,
+    description: String,
+}
+
+/// look up a command's parsed result. source priority is native nushell,
+/// then manpage JSON, then help JSON. parent .nu files are searched for
+/// subcommand lookups because clap-generated .nu files contain all extern
+/// blocks in a single file.
+pub fn lookup(dirs: &[PathBuf], command: &str) -> Option<ManpageResult> {
+    let base_name = filename_of_command(command);
+    let parent_base = command
+        .find(' ')
+        .map(|i| filename_of_command(&command[..i]));
+
+    for directory in dirs {
+        let nu_path = directory.join(format!("{base_name}.nu"));
+        if let Some(data) = read_file(&nu_path) {
+            return Some(parse_nu_completions(command, &data));
+        }
+        if let Some(pb) = &parent_base {
+            let parent_nu = directory.join(format!("{pb}.nu"));
+            if let Some(data) = read_file(&parent_nu) {
+                let r = parse_nu_completions(command, &data);
+                if !r.entries.is_empty() || !r.subcommands.is_empty() || !r.positionals.is_empty() {
+                    return Some(r);
+                }
+            }
+        }
+    }
+
+    for directory in dirs {
+        let json_path = directory.join(format!("{base_name}.json"));
+        if let Some((source, result)) = read_json_result(&json_path)
+            && source != "help"
+        {
+            return Some(result);
+        }
+    }
+
+    for directory in dirs {
+        let json_path = directory.join(format!("{base_name}.json"));
+        if let Some((_, result)) = read_json_result(&json_path) {
+            return Some(result);
+        }
+    }
+    None
+}
+
+/// look up a command's raw stored data (JSON or .nu source).
+pub fn lookup_raw(dirs: &[PathBuf], command: &str) -> Option<String> {
+    let base_name = filename_of_command(command);
+    for directory in dirs {
+        let nu_path = directory.join(format!("{base_name}.nu"));
+        if let Some(data) = read_file(&nu_path) {
+            return Some(data);
+        }
+    }
+    for directory in dirs {
+        let json_path = directory.join(format!("{base_name}.json"));
+        if let Some(data) = read_file(&json_path) {
+            return Some(data);
+        }
+    }
+    None
+}
+
+fn chop_extension(filename: &str) -> Option<&str> {
+    filename
+        .strip_suffix(".json")
+        .or_else(|| filename.strip_suffix(".nu"))
+}
+
+/// list all indexed commands across all store directories.
+/// returns a sorted, deduplicated list of command names.
+pub fn all_commands(dirs: &[PathBuf]) -> Vec<String> {
+    let mut out: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
+    for directory in dirs {
+        let Ok(entries) = fs::read_dir(directory) else {
+            continue;
+        };
+        for entry in entries.flatten() {
+            if let Some(name) = entry.file_name().to_str()
+                && let Some(base) = chop_extension(name)
+            {
+                out.insert(command_of_filename(base));
+            }
+        }
+    }
+    out.into_iter().collect()
+}
+
+/// discover subcommands of a command by scanning filenames in the store
+/// (e.g. for "git", finds "git_add.json", "git_log.json").
+pub fn subcommands_of(dirs: &[PathBuf], command: &str) -> Vec<ManpageSubcommand> {
+    let prefix = format!("{}_", filename_of_command(command));
+    let mut seen: HashMap<String, ManpageSubcommand> = HashMap::new();
+    for directory in dirs {
+        let Ok(entries) = fs::read_dir(directory) else {
+            continue;
+        };
+        for entry in entries.flatten() {
+            let Some(filename) = entry.file_name().to_str().map(|s| s.to_string()) else {
+                continue;
+            };
+            if !filename.starts_with(&prefix) {
+                continue;
+            }
+            let is_json = filename.ends_with(".json");
+            let Some(base) = chop_extension(&filename) else {
+                continue;
+            };
+            let rest = &base[prefix.len()..];
+            if rest.is_empty() || rest.contains('_') {
+                continue;
+            }
+            if seen.contains_key(rest) {
+                continue;
+            }
+            let desc = if is_json {
+                read_file(&entry.path())
+                    .and_then(|d| serde_json::from_str::<Value>(&d).ok())
+                    .and_then(|v| {
+                        v.get("description")
+                            .and_then(|x| x.as_str())
+                            .map(|s| s.to_string())
+                    })
+                    .unwrap_or_default()
+            } else {
+                String::new()
+            };
+            seen.insert(
+                rest.to_string(),
+                ManpageSubcommand {
+                    name: rest.to_string(),
+                    desc,
+                },
+            );
+        }
+    }
+    let mut out: Vec<ManpageSubcommand> = seen.into_values().collect();
+    out.sort_by(|a, b| a.name.cmp(&b.name));
+    out
+}
+
+/// determine how a command was indexed: "help", "manpage", "native", etc.
+/// for JSON files, returns the "source" field. for .nu files, returns "native".
+pub fn file_type_of(dirs: &[PathBuf], command: &str) -> Option<String> {
+    let base = filename_of_command(command);
+    for directory in dirs {
+        let nu_path = directory.join(format!("{base}.nu"));
+        if nu_path.exists() {
+            return Some("native".to_string());
+        }
+    }
+    for directory in dirs {
+        let json_path = directory.join(format!("{base}.json"));
+        if json_path.exists() {
+            return Some(
+                read_file(&json_path)
+                    .and_then(|d| serde_json::from_str::<Value>(&d).ok())
+                    .and_then(|v| v.get("source").and_then(|x| x.as_str()).map(String::from))
+                    .unwrap_or_else(|| "json".to_string()),
+            );
+        }
+    }
+    None
+}
diff --git a/src/types.rs b/src/types.rs
new file mode 100644
index 0000000..ac6b01d
--- /dev/null
+++ b/src/types.rs
@@ -0,0 +1,34 @@
+pub enum Switch<'a> {
+    Short(char),
+    Long(&'a str),
+    Both(char, &'a str),
+}
+
+pub enum Param<'a> {
+    Mandatory(&'a str),
+    Optional(&'a str),
+}
+
+pub struct OptionEntry<'a> {
+    pub switch: Switch<'a>,
+    pub param: Option<Param<'a>>,
+    pub desc: Vec<&'a str>,
+}
+
+pub struct Subcommand<'a> {
+    pub name: &'a str,
+    pub desc: &'a str,
+}
+
+#[derive(Debug, Clone)]
+pub struct Positional {
+    pub optional: bool,
+    pub variadic: bool,
+}
+
+pub struct HelpResult<'a> {
+    pub entries: Vec<OptionEntry<'a>>,
+    pub subcommands: Vec<Subcommand<'a>>,
+    pub positionals: Vec<(&'a str, Positional)>,
+    pub desc: &'a str,
+}
diff --git a/test/dune b/test/dune
deleted file mode 100644
index d54a2fb..0000000
--- a/test/dune
+++ /dev/null
@@ -1,3 +0,0 @@
-(test
- (name test_inshellah)
- (libraries inshellah str))
diff --git a/test/test_inshellah.ml b/test/test_inshellah.ml
deleted file mode 100644
index 8f7b25e..0000000
--- a/test/test_inshellah.ml
+++ /dev/null
@@ -1,610 +0,0 @@
-open Inshellah.Parser
-open Inshellah.Manpage
-open Inshellah.Nushell
-
-let failures = ref 0
-let passes = ref 0
-
-let check name condition =
-  if condition then begin
-    incr passes;
-    Printf.printf "  PASS: %s\n" name
-  end else begin
-    incr failures;
-    Printf.printf "  FAIL: %s\n" name
-  end
-
-let parse txt =
-  match parse_help txt with
-  | Ok r -> r
-  | Error msg -> failwith (Printf.sprintf "parse_help failed: %s" msg)
-
-(* --- Help parser tests --- *)
-
-let test_gnu_basic () =
-  Printf.printf "\n== GNU basic flags ==\n";
-  let r = parse "  -a, --all                  do not ignore entries starting with .\n" in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "both switch" (e.switch = Both ('a', "all"));
-  check "no param" (e.param = None);
-  check "desc" (String.length e.desc > 0)
-
-let test_gnu_eq_param () =
-  Printf.printf "\n== GNU = param ==\n";
-  let r = parse "      --block-size=SIZE      scale sizes by SIZE\n" in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "long switch" (e.switch = Long "block-size");
-  check "mandatory param" (e.param = Some (Mandatory "SIZE"))
-
-let test_gnu_opt_param () =
-  Printf.printf "\n== GNU optional param ==\n";
-  let r = parse "      --color[=WHEN]         color the output WHEN\n" in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "long switch" (e.switch = Long "color");
-  check "optional param" (e.param = Some (Optional "WHEN"))
-
-let test_underscore_param () =
-  Printf.printf "\n== Underscore in param (TIME_STYLE) ==\n";
-  let r = parse "      --time-style=TIME_STYLE  time/date format\n" in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "param with underscore" (e.param = Some (Mandatory "TIME_STYLE"))
-
-let test_short_only () =
-  Printf.printf "\n== Short-only flag ==\n";
-  let r = parse "  -v                       verbose output\n" in
-  check "one entry" (List.length r.entries = 1);
-  check "short switch" ((List.hd r.entries).switch = Short 'v')
-
-let test_long_only () =
-  Printf.printf "\n== Long-only flag ==\n";
-  let r = parse "      --help                 display help\n" in
-  check "one entry" (List.length r.entries = 1);
-  check "long switch" ((List.hd r.entries).switch = Long "help")
-
-let test_multiline_desc () =
-  Printf.printf "\n== Multi-line description ==\n";
-  let r = parse {|      --block-size=SIZE      with -l, scale sizes by SIZE when printing them;
-                               e.g., '--block-size=M'; see SIZE format below
-|} in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "desc includes continuation" (String.length e.desc > 50)
-
-let test_multiple_entries () =
-  Printf.printf "\n== Multiple entries ==\n";
-  let r = parse {|  -a, --all                  do not ignore entries starting with .
-  -A, --almost-all           do not list implied . and ..
-      --author               with -l, print the author of each file
-|} in
-  check "three entries" (List.length r.entries = 3)
-
-let test_clap_short_sections () =
-  Printf.printf "\n== Clap short with section headers ==\n";
-  let r = parse {|INPUT OPTIONS:
-  -e, --regexp=PATTERN       A pattern to search for.
-  -f, --file=PATTERNFILE     Search for patterns from the given file.
-SEARCH OPTIONS:
-  -s, --case-sensitive       Search case sensitively.
-|} in
-  check "three entries" (List.length r.entries = 3);
-  let e = List.hd r.entries in
-  check "first is regexp" (e.switch = Both ('e', "regexp"));
-  check "first has param" (e.param = Some (Mandatory "PATTERN"))
-
-let test_clap_long_style () =
-  Printf.printf "\n== Clap long style (desc below flag) ==\n";
-  let r = parse {|  -H, --hidden
-          Include hidden directories and files.
-
-      --no-ignore
-          Do not respect ignore files.
-|} in
-  check "two entries" (List.length r.entries = 2);
-  let e = List.hd r.entries in
-  check "hidden switch" (e.switch = Both ('H', "hidden"));
-  check "desc below" (String.length e.desc > 0)
-
-let test_clap_long_angle_param () =
-  Printf.printf "\n== Clap long angle bracket param ==\n";
-  let r = parse {|      --nonprintable-notation <notation>
-          Set notation for non-printable characters.
-|} in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "long switch" (e.switch = Long "nonprintable-notation");
-  check "angle param" (e.param = Some (Mandatory "notation"))
-
-let test_space_upper_param () =
-  Printf.printf "\n== Space-separated ALL_CAPS param ==\n";
-  let r = parse "  -f, --foo FOO  foo help\n" in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "switch" (e.switch = Both ('f', "foo"));
-  check "space param" (e.param = Some (Mandatory "FOO"))
-
-let test_go_cobra_flags () =
-  Printf.printf "\n== Go/Cobra flags ==\n";
-  let r = parse {|Flags:
-  -D, --debug              Enable debug mode
-  -H, --host string        Daemon socket to connect to
-  -v, --version            Print version information
-|} in
-  check "three flag entries" (List.length r.entries = 3);
-  (* Check the host flag has a type param *)
-  let host = List.nth r.entries 1 in
-  check "host switch" (host.switch = Both ('H', "host"));
-  check "host type param" (host.param = Some (Mandatory "string"))
-
-let test_go_cobra_subcommands () =
-  Printf.printf "\n== Go/Cobra subcommands ==\n";
-  let r = parse {|Common Commands:
-  run         Create and run a new container from an image
-  exec        Execute a command in a running container
-  build       Build an image from a Dockerfile
-|} in
-  check "has subcommands" (List.length r.subcommands > 0)
-
-let test_busybox_tab () =
-  Printf.printf "\n== Busybox tab-indented ==\n";
-  let r = parse "\t-1\tOne column output\n\t-a\tInclude names starting with .\n" in
-  check "two entries" (List.length r.entries = 2);
-  check "first is -1" ((List.hd r.entries).switch = Short '1')
-
-let test_no_debug_prints () =
-  Printf.printf "\n== No debug side effects ==\n";
-  (* The old parser had print_endline at module load time.
-     If we got here without "opt param is running" on stdout, we're good. *)
-  check "no debug prints" true
-
-(* --- Manpage parser tests --- *)
-
-let test_manpage_tp_style () =
-  Printf.printf "\n== Manpage .TP style ==\n";
-  let groff = {|.SH OPTIONS
-.TP
-\fB\-a\fR, \fB\-\-all\fR
-do not ignore entries starting with .
-.TP
-\fB\-A\fR, \fB\-\-almost\-all\fR
-do not list implied . and ..
-.TP
-\fB\-\-block\-size\fR=\fISIZE\fR
-with \fB\-l\fR, scale sizes by SIZE
-.SH AUTHOR
-Written by someone.
-|} in
-  let result = parse_manpage_string groff in
-  check "three entries" (List.length result.entries = 3);
-  if List.length result.entries >= 1 then begin
-    let e = List.hd result.entries in
-    check "first is -a/--all" (e.switch = Both ('a', "all"));
-    check "first desc" (String.length e.desc > 0)
-  end;
-  if List.length result.entries >= 3 then begin
-    let e = List.nth result.entries 2 in
-    check "block-size switch" (e.switch = Long "block-size");
-    check "block-size param" (e.param = Some (Mandatory "SIZE"))
-  end
-
-let test_manpage_ip_style () =
-  Printf.printf "\n== Manpage .IP style ==\n";
-  let groff = {|.SH OPTIONS
-.IP "\fB\-k\fR, \fB\-\-insecure\fR"
-Allow insecure connections.
-.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
-Write output to file.
-.SH SEE ALSO
-|} in
-  let result = parse_manpage_string groff in
-  check "two entries" (List.length result.entries = 2);
-  if List.length result.entries >= 1 then begin
-    let e = List.hd result.entries in
-    check "first is -k/--insecure" (e.switch = Both ('k', "insecure"))
-  end
-
-let test_manpage_groff_stripping () =
-  Printf.printf "\n== Groff escape stripping ==\n";
-  let s = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
-  check "font escapes removed" (not (String.contains s 'f' && String.contains s 'B'));
-  check "dashes converted" (String.contains s '-');
-  let s2 = strip_groff_escapes {|\(aqhello\(aq|} in
-  check "aq -> quote" (String.contains s2 '\'')
-
-let test_manpage_empty_options () =
-  Printf.printf "\n== Manpage with no OPTIONS section ==\n";
-  let groff = {|.SH NAME
-foo \- does stuff
-.SH DESCRIPTION
-Does stuff.
-|} in
-  let result = parse_manpage_string groff in
-  check "no entries" (List.length result.entries = 0)
-
-let test_slash_switch_separator () =
-  Printf.printf "\n== Slash switch separator (--long / -s) ==\n";
-  let r = parse "  --verbose / -v             Increase verbosity\n" in
-  check "one entry" (List.length r.entries = 1);
-  let e = List.hd r.entries in
-  check "both switch" (e.switch = Both ('v', "verbose"));
-  check "no param" (e.param = None);
-  check "desc" (e.desc = "Increase verbosity")
-
-let test_manpage_nix3_style () =
-  Printf.printf "\n== Manpage nix3 style ==\n";
-  let groff = {|.SH Options
-.SS Logging-related options
-.IP "\(bu" 3
-.UR #opt-verbose
-\f(CR--verbose\fR
-.UE
-/ \f(CR-v\fR
-.IP
-Increase the logging verbosity level.
-.IP "\(bu" 3
-.UR #opt-quiet
-\f(CR--quiet\fR
-.UE
-.IP
-Decrease the logging verbosity level.
-.SH SEE ALSO
-|} in
-  let result = parse_manpage_string groff in
-  check "two entries" (List.length result.entries = 2);
-  if List.length result.entries >= 1 then begin
-    let e = List.hd result.entries in
-    check "verbose is Both" (e.switch = Both ('v', "verbose"));
-    check "verbose desc" (String.length e.desc > 0)
-  end;
-  if List.length result.entries >= 2 then begin
-    let e = List.nth result.entries 1 in
-    check "quiet is Long" (e.switch = Long "quiet");
-    check "quiet desc" (String.length e.desc > 0)
-  end
-
-let test_manpage_nix3_with_params () =
-  Printf.printf "\n== Manpage nix3 with params ==\n";
-  let groff = {|.SH Options
-.IP "\(bu" 3
-.UR #opt-arg
-\f(CR--arg\fR
-.UE
-\fIname\fR \fIexpr\fR
-.IP
-Pass the value as the argument name to Nix functions.
-.IP "\(bu" 3
-.UR #opt-include
-\f(CR--include\fR
-.UE
-/ \f(CR-I\fR \fIpath\fR
-.IP
-Add path to search path entries.
-.IP
-This option may be given multiple times.
-.SH SEE ALSO
-|} in
-  let result = parse_manpage_string groff in
-  check "two entries" (List.length result.entries = 2);
-  if List.length result.entries >= 1 then begin
-    let e = List.hd result.entries in
-    check "arg is Long" (e.switch = Long "arg");
-    check "arg has param" (e.param <> None)
-  end;
-  if List.length result.entries >= 2 then begin
-    let e = List.nth result.entries 1 in
-    check "include is Both" (e.switch = Both ('I', "include"));
-    check "include has path param" (e.param = Some (Mandatory "path"))
-  end
-
-let test_synopsis_subcommand () =
-  Printf.printf "\n== SYNOPSIS subcommand detection ==\n";
-  let groff = {|.SH "SYNOPSIS"
-.sp
-.nf
-\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR]
-.fi
-.SH "DESCRIPTION"
-|} in
-  let cmd = extract_synopsis_command groff in
-  check "detected git commit" (cmd = Some "git commit")
-
-let test_synopsis_standalone () =
-  Printf.printf "\n== SYNOPSIS standalone command ==\n";
-  let groff = {|.SH Synopsis
-.LP
-\f(CRnix-build\fR [\fIpaths\fR]
-.SH Description
-|} in
-  let cmd = extract_synopsis_command groff in
-  check "detected nix-build" (cmd = Some "nix-build")
-
-let test_synopsis_nix3 () =
-  Printf.printf "\n== SYNOPSIS nix3 subcommand ==\n";
-  let groff = {|.SH Synopsis
-.LP
-\f(CRnix run\fR [\fIoption\fR] \fIinstallable\fR
-.SH Description
-|} in
-  let cmd = extract_synopsis_command groff in
-  check "detected nix run" (cmd = Some "nix run")
-
-(* --- Nushell generation tests --- *)
-
-let contains s sub =
-  try
-    let _ = Str.search_forward (Str.regexp_string sub) s 0 in true
-  with Not_found -> false
-
-let test_nushell_basic () =
-  Printf.printf "\n== Nushell basic extern ==\n";
-  let r = parse "  -a, --all                  do not ignore entries starting with .\n" in
-  let nu = generate_extern "ls" r in
-  check "has extern" (contains nu "export extern \"ls\"");
-  check "has --all(-a)" (contains nu "--all(-a)");
-  check "has comment" (contains nu "# do not ignore")
-
-let test_nushell_param_types () =
-  Printf.printf "\n== Nushell param type mapping ==\n";
-  let r = parse {|  -w, --width=COLS           set output width
-      --block-size=SIZE      scale sizes
-  -o, --output FILE          output file
-|} in
-  let nu = generate_extern "ls" r in
-  check "COLS -> int" (contains nu "--width(-w): int");
-  check "SIZE -> string" (contains nu "--block-size: string");
-  check "FILE -> path" (contains nu "--output(-o): path")
-
-let test_nushell_subcommands () =
-  Printf.printf "\n== Nushell subcommands ==\n";
-  let r = parse {|Common Commands:
-  run         Create and run a new container
-  exec        Execute a command
-
-Flags:
-  -D, --debug              Enable debug mode
-|} in
-  let nu = generate_extern "docker" r in
-  check "has main extern" (contains nu "export extern \"docker\"");
-  check "has --debug" (contains nu "--debug(-D)");
-  check "has run subcommand" (contains nu "export extern \"docker run\"");
-  check "has exec subcommand" (contains nu "export extern \"docker exec\"")
-
-let test_nushell_from_manpage () =
-  Printf.printf "\n== Nushell from manpage ==\n";
-  let groff = {|.SH OPTIONS
-.TP
-\fB\-a\fR, \fB\-\-all\fR
-do not ignore entries starting with .
-.TP
-\fB\-\-block\-size\fR=\fISIZE\fR
-scale sizes by SIZE
-.SH AUTHOR
-|} in
-  let result = parse_manpage_string groff in
-  let nu = generate_extern "ls" result in
-  check "has extern" (contains nu "export extern \"ls\"");
-  check "has --all(-a)" (contains nu "--all(-a)");
-  check "has --block-size" (contains nu "--block-size: string")
-
-let test_nushell_module () =
-  Printf.printf "\n== Nushell module wrapper ==\n";
-  let r = parse "  -v, --verbose              verbose output\n" in
-  let nu = generate_module "myapp" r in
-  check "has module" (contains nu "module myapp-completions");
-  check "has extern inside" (contains nu "export extern \"myapp\"");
-  check "has flag" (contains nu "--verbose(-v)")
-
-let test_dedup_entries () =
-  Printf.printf "\n== Deduplication ==\n";
-  let r = parse {|  -v, --verbose              verbose output
-  --verbose                  verbose mode
-  -v                         be verbose
-|} in
-  let nu = generate_extern "test" r in
-  (* Count occurrences of --verbose *)
-  let count =
-    let re = Str.regexp_string "--verbose" in
-    let n = ref 0 in
-    let i = ref 0 in
-    (try while true do
-       let _ = Str.search_forward re nu !i in
-       incr n; i := Str.match_end ()
-     done with Not_found -> ());
-    !n
-  in
-  check "verbose appears once" (count = 1);
-  check "best version kept (Both)" (contains nu "--verbose(-v)")
-
-let test_dedup_manpage () =
-  Printf.printf "\n== Dedup from manpage ==\n";
-  let groff = {|.SH OPTIONS
-.TP
-\fB\-v\fR, \fB\-\-verbose\fR
-Be verbose.
-.SH DESCRIPTION
-Use \fB\-v\fR for verbose output.
-Use \fB\-\-verbose\fR to see more.
-|} in
-  let result = parse_manpage_string groff in
-  let nu = generate_extern "test" result in
-  check "has --verbose(-v)" (contains nu "--verbose(-v)");
-  (* Should not have standalone -v or duplicate --verbose *)
-  let lines = String.split_on_char '\n' nu in
-  let verbose_lines = List.filter (fun l -> contains l "verbose") lines in
-  check "only one verbose line" (List.length verbose_lines = 1)
-
-let test_commands_section_subcommands () =
-  Printf.printf "\n== COMMANDS section subcommand extraction ==\n";
-  (* manpages like systemctl have a COMMANDS section with bold command names
-   * inside .PP + .RS/.RE blocks. these should be extracted as subcommands
-   * and treated as leaf nodes (no entries of their own). *)
-  let groff = {|.SH OPTIONS
-.TP
-\fB\-\-user\fR
-Talk to the service manager of the calling user.
-.TP
-\fB\-\-system\fR
-Talk to the service manager of the system.
-.SH COMMANDS
-.PP
-\fBstart\fR \fIUNIT\fR\&...
-.RS 4
-Start (activate) one or more units.
-.RE
-.PP
-\fBstop\fR \fIUNIT\fR\&...
-.RS 4
-Stop (deactivate) one or more units.
-.RE
-.PP
-\fBreload\fR \fIUNIT\fR\&...
-.RS 4
-Asks all units to reload their configuration.
-.RE
-.SH SEE ALSO
-|} in
-  let result = parse_manpage_string groff in
-  check "has options entries" (List.length result.entries = 2);
-  check "has subcommands" (List.length result.subcommands = 3);
-  let sc_names = List.map (fun (sc : subcommand) -> sc.name) result.subcommands in
-  check "has start" (List.mem "start" sc_names);
-  check "has stop" (List.mem "stop" sc_names);
-  check "has reload" (List.mem "reload" sc_names);
-  (* verify subcommand descriptions are extracted *)
-  let start_sc = List.find (fun (sc : subcommand) -> sc.name = "start") result.subcommands in
-  check "start has desc" (String.length start_sc.desc > 0)
-
-let test_self_listing_detection () =
-  Printf.printf "\n== Self-listing subcommand detection ==\n";
-  (* when a subcommand's --help shows the parent's help text,
-   * the subcommand name appears in its own subcommand list.
-   * the parser should detect this — tested via parse_help. *)
-  let help_text = {|systemctl [OPTIONS...] COMMAND ...
-
-Unit Commands:
-  start UNIT...                       Start (activate) one or more units
-  stop UNIT...                        Stop (deactivate) one or more units
-  status [PATTERN...]                 Show runtime status
-
-Options:
-  --user                              Talk to the user service manager
-  --system                            Talk to the system service manager
-|} in
-  let r = parse help_text in
-  let has_start = List.exists (fun (sc : subcommand) -> sc.name = "start") r.subcommands in
-  check "detected start as subcommand" has_start;
-  (* the self-listing logic (in main.ml) would check: is "start" in r.subcommands?
-   * here we just verify the parser extracts it correctly. *)
-  check "has entries too" (List.length r.entries >= 2)
-
-let test_nu_file_parsing () =
-  Printf.printf "\n== .nu file parsing ==\n";
-  let nu_source = {|module completions {
-
-  # Unofficial CLI tool
-  export extern mytool [
-    --help(-h)                # Print help
-    --version(-V)             # Print version
-  ]
-
-  # List all items
-  export extern "mytool list" [
-    --raw                     # Output as JSON
-    --format(-f): string      # Output format
-    --help(-h)                # Print help
-    name?: string             # Filter by name
-  ]
-
-}
-
-use completions *
-|} in
-  let r = Inshellah.Store.parse_nu_completions "mytool" nu_source in
-  check "has entries" (List.length r.entries = 2);
-  check "has subcommands" (List.length r.subcommands >= 1);
-  let list_sc = List.find_opt (fun (sc : subcommand) -> sc.name = "list") r.subcommands in
-  check "has list subcommand" (list_sc <> None);
-  check "description" (r.description = "Unofficial CLI tool");
-  (* test subcommand lookup *)
-  let r2 = Inshellah.Store.parse_nu_completions "mytool list" nu_source in
-  check "list has entries" (List.length r2.entries = 3);
-  let has_format = List.exists (fun (e : entry) ->
-    e.switch = Both ('f', "format")) r2.entries in
-  check "list has --format(-f)" has_format;
-  check "list has positional" (List.length r2.positionals >= 1)
-
-let test_italic_synopsis () =
-  Printf.printf "\n== Italic in SYNOPSIS ==\n";
-  let groff = {|.SH Synopsis
-.LP
-\f(CRnix-env\fR \fIoperation\fR [\fIoptions\fR] [\fIarguments…\fR]
-.SH Description
-|} in
-  let cmd = extract_synopsis_command groff in
-  check "no phantom operation" (cmd = Some "nix-env")
-
-let test_font_boundary_spacing () =
-  Printf.printf "\n== Font boundary spacing ==\n";
-  (* \fB--max-results\fR\fIcount\fR should become "--max-results count" *)
-  let s = strip_groff_escapes {|\fB\-\-max\-results\fR\fIcount\fR|} in
-  check "has space before param" (contains s "--max-results count");
-  (* \fB--color\fR[=\fIWHEN\fR] should NOT insert space before = *)
-  let s2 = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
-  check "no space before =" (contains s2 "--color[=WHEN]")
-
-let () =
-  Printf.printf "Running help parser tests...\n";
-  test_gnu_basic ();
-  test_gnu_eq_param ();
-  test_gnu_opt_param ();
-  test_underscore_param ();
-  test_short_only ();
-  test_long_only ();
-  test_multiline_desc ();
-  test_multiple_entries ();
-  test_clap_short_sections ();
-  test_clap_long_style ();
-  test_clap_long_angle_param ();
-  test_space_upper_param ();
-  test_go_cobra_flags ();
-  test_go_cobra_subcommands ();
-  test_busybox_tab ();
-  test_no_debug_prints ();
-
-  Printf.printf "\nRunning manpage parser tests...\n";
-  test_manpage_tp_style ();
-  test_manpage_ip_style ();
-  test_manpage_groff_stripping ();
-  test_manpage_empty_options ();
-  test_slash_switch_separator ();
-  test_manpage_nix3_style ();
-  test_manpage_nix3_with_params ();
-  test_synopsis_subcommand ();
-  test_synopsis_standalone ();
-  test_synopsis_nix3 ();
-
-  Printf.printf "\nRunning nushell generation tests...\n";
-  test_nushell_basic ();
-  test_nushell_param_types ();
-  test_nushell_subcommands ();
-  test_nushell_from_manpage ();
-  test_nushell_module ();
-
-  Printf.printf "\nRunning dedup and font tests...\n";
-  test_dedup_entries ();
-  test_dedup_manpage ();
-  test_font_boundary_spacing ();
-
-  Printf.printf "\nRunning COMMANDS section tests...\n";
-  test_commands_section_subcommands ();
-  test_self_listing_detection ();
-
-  Printf.printf "\nRunning .nu and synopsis tests...\n";
-  test_nu_file_parsing ();
-  test_italic_synopsis ();
-
-  Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures;
-  if !failures > 0 then exit 1
diff --git a/tests/git_clone_fix.rs b/tests/git_clone_fix.rs
new file mode 100644
index 0000000..c12f0e1
--- /dev/null
+++ b/tests/git_clone_fix.rs
@@ -0,0 +1,78 @@
+use inshellah::parsers::help::help_parser;
+
+#[test]
+fn parser_recovers_past_no_bracket_long_form() {
+    // git clone -h produces lines like `--[no-]progress` that switch_parser
+    // can't parse. previously the help parser got stuck on these because
+    // skip_non_option_line refused to skip option-looking lines. now it falls
+    // through to skip, letting the parser continue to the next real entry.
+    let text = r#"usage: git clone [<options>] [--] <repo> [<dir>]
+
+    -v, --[no-]verbose    be more verbose
+    -q, --[no-]quiet      be more quiet
+    --[no-]progress       force progress reporting
+    --[no-]reject-shallow don't clone shallow repository
+    -n, --no-checkout     don't create a checkout
+    --checkout            opposite of --no-checkout
+    -s, --[no-]shared     setup as shared repository
+"#;
+    let (_, r) = help_parser(text).expect("parse");
+    // before the fix: only 2 entries (-v, -q) before the parser got stuck.
+    // after: -v, -q, -n/--no-checkout, --checkout, -s, plus any others.
+    assert!(
+        r.entries.len() >= 4,
+        "expected ≥4 entries, got {}",
+        r.entries.len()
+    );
+    assert!(
+        r.entries.iter().any(|e| {
+            matches!(
+                &e.switch,
+                inshellah::types::Switch::Both('v', l) if *l == "verbose"
+            )
+        }),
+        "expected -v/--verbose from --[no-]verbose, got {:?}",
+        r.entries.len()
+    );
+}
+
+#[test]
+fn parser_keeps_negatable_params() {
+    let text = r#"usage: git clone [<options>] [--] <repo> [<dir>]
+
+    -j, --[no-]jobs <n>   number of submodules cloned in parallel
+    --[no-]recurse-submodules[=<pathspec>]
+                          initialize submodules in the clone
+    --[no-]reject-shallow don't clone shallow repository
+"#;
+    let (_, r) = help_parser(text).expect("parse");
+    let jobs = r
+        .entries
+        .iter()
+        .find(|e| matches!(&e.switch, inshellah::types::Switch::Both('j', l) if *l == "jobs"))
+        .expect("jobs entry");
+    assert!(matches!(
+        &jobs.param,
+        Some(inshellah::types::Param::Mandatory("n"))
+    ));
+
+    let recurse = r
+        .entries
+        .iter()
+        .find(|e| matches!(&e.switch, inshellah::types::Switch::Long(l) if *l == "recurse-submodules"))
+        .expect("recurse-submodules entry");
+    assert!(matches!(
+        &recurse.param,
+        Some(inshellah::types::Param::Optional("pathspec"))
+    ));
+
+    let reject = r
+        .entries
+        .iter()
+        .find(|e| matches!(&e.switch, inshellah::types::Switch::Long(l) if *l == "reject-shallow"))
+        .expect("reject-shallow entry");
+    assert!(
+        reject.param.is_none(),
+        "reject-shallow should not parse prose as a param"
+    );
+}
diff --git a/tests/manpage_cli.rs b/tests/manpage_cli.rs
new file mode 100644
index 0000000..8fc2e0d
--- /dev/null
+++ b/tests/manpage_cli.rs
@@ -0,0 +1,150 @@
+use std::fs;
+use std::process::Command;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+fn unique_temp_dir(name: &str) -> std::path::PathBuf {
+    let nanos = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .expect("system time")
+        .as_nanos();
+    std::env::temp_dir().join(format!("{name}-{}-{nanos}", std::process::id()))
+}
+
+#[test]
+fn manpage_command_uses_synopsis_name() {
+    let root = unique_temp_dir("inshellah-manpage-cli");
+    fs::create_dir_all(&root).expect("temp dir");
+    let manpage = root.join("btrfs-check.8");
+    fs::write(
+        &manpage,
+        r#".SH SYNOPSIS
+btrfs check [options] <device>
+.SH OPTIONS
+.TP
+\fB\-\-repair\fR
+try to repair the filesystem
+"#,
+    )
+    .expect("write manpage");
+
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("manpage")
+        .arg(&manpage)
+        .output()
+        .expect("run inshellah manpage");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    assert!(
+        stdout.contains("export extern \"btrfs check\""),
+        "stdout = {stdout}"
+    );
+    assert!(
+        !stdout.contains("export extern \"btrfs-check\""),
+        "stdout = {stdout}"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn manpage_command_strips_git_style_subcommand_prefixes() {
+    let root = unique_temp_dir("inshellah-manpage-cli");
+    fs::create_dir_all(&root).expect("temp dir");
+    let manpage = root.join("git.1");
+    fs::write(
+        &manpage,
+        r#".SH SYNOPSIS
+git [--version] [--help] <command> [<args>]
+.SH OPTIONS
+.TP
+\fB\-\-version\fR
+show version
+.SH "GIT COMMANDS"
+.SS "Main porcelain commands"
+.PP
+.BR git-add (1)
+.RS 4
+Add file contents to the index.
+.RE
+"#,
+    )
+    .expect("write manpage");
+
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("manpage")
+        .arg(&manpage)
+        .output()
+        .expect("run inshellah manpage");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    assert!(
+        stdout.contains("export extern \"git add\""),
+        "stdout = {stdout}"
+    );
+    assert!(
+        !stdout.contains("export extern \"git git-add\""),
+        "stdout = {stdout}"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn manpage_command_falls_back_when_synopsis_starts_with_prose() {
+    let root = unique_temp_dir("inshellah-manpage-cli");
+    fs::create_dir_all(&root).expect("temp dir");
+    let manpage = root.join("ld.so.8");
+    fs::write(
+        &manpage,
+        r#".SH SYNOPSIS
+The dynamic linker can be run either indirectly by running some
+dynamically linked program or shared object
+(in which case no command-line options
+to the dynamic linker can be passed and, in the ELF case, the dynamic linker
+which is stored in the
+.B .interp
+section of the program is executed) or directly by running:
+.P
+.I /lib/ld\-linux.so.*
+[OPTIONS] [PROGRAM [ARGUMENTS]]
+.SH OPTIONS
+.TP
+.BI \-\-argv0\~ string
+Set argv[0] to the value string.
+"#,
+    )
+    .expect("write manpage");
+
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("manpage")
+        .arg(&manpage)
+        .output()
+        .expect("run inshellah manpage");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    assert!(
+        stdout.contains("export extern \"ld.so\""),
+        "stdout = {stdout}"
+    );
+    assert!(
+        !stdout.contains("export extern \"The\""),
+        "stdout = {stdout}"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
diff --git a/tests/nushell-completer.nu b/tests/nushell-completer.nu
new file mode 100644
index 0000000..0913c36
--- /dev/null
+++ b/tests/nushell-completer.nu
@@ -0,0 +1,128 @@
+def fail [msg: string] {
+    error make {msg: $msg}
+}
+
+def assert-eq [actual expected msg: string] {
+    if $actual != $expected {
+        fail $"($msg): expected ($expected | to nuon), got ($actual | to nuon)"
+    }
+}
+
+def assert-contains [items needle msg: string] {
+    if not ($needle in $items) {
+        fail $"($msg): expected ($items | to nuon) to contain ($needle | to nuon)"
+    }
+}
+
+def values [items] {
+    $items | default [] | get value
+}
+
+let completer = $env.config.completions.external.completer
+
+def _assert_elevation_wrappers_accept_command_tails [p: path] {
+    sudo nix-env --set -p /nix/var/nix/profiles/system $p
+    doas nix-env --set -p /nix/var/nix/profiles/system $p
+}
+
+'[{"value":"--static","description":"from static cache"}]' | save --force $env.INSHELLAH_STATIC_FILE
+let static_result = do $completer [demo ""]
+assert-eq ($static_result | get 0.value) "--static" "static completion pass-through"
+'[{"value":"--server","description":"from static cache"},{"value":"--preserve","description":"from static cache"}]' | save --force $env.INSHELLAH_STATIC_FILE
+let static_fuzzy_result = do $completer [demo ser]
+assert-eq (values $static_fuzzy_result) ['--server' '--preserve'] "static fuzzy completions are not refiltered by shim"
+
+"{" | save --force $env.INSHELLAH_STATIC_FILE
+let bad_static_result = do $completer [demo ""]
+assert-eq $bad_static_result null "bad static JSON falls back cleanly"
+"" | save --force $env.INSHELLAH_STATIC_FILE
+
+assert-eq (do $completer [nix]) null "nix completion ignores too-short spans"
+let nix_commands = do $completer [nix ""]
+assert-eq ($nix_commands | get 0.value) "build" "nix command completion uses NIX_GET_COMPLETIONS"
+let nix_pkg = do $completer [nix "flake#pkg"]
+assert-eq ($nix_pkg | get 0.description) "raw package description" "nix descriptions are raw strings"
+
+let systemctl_empty = do $completer [systemctl daemon-reload ""]
+assert-eq $systemctl_empty null "systemctl does not offer units for non-unit verbs"
+let systemctl_units = do $completer [systemctl status ""]
+assert-eq ($systemctl_units | get 0.value) "demo.service" "systemctl offers units for unit verbs"
+let systemctl_prefixed_units = do $completer [systemctl start g]
+assert-eq ($systemctl_prefixed_units | get 0.value) "greetd.service" "systemctl unit completions accept typed prefixes"
+
+let kubectl_pods = do $completer [kubectl get pods -n prod ""]
+assert-eq ($kubectl_pods | get 0.value) "pod-a" "kubectl resource names complete"
+assert-eq (open $env.KUBECTL_ARGS_FILE | str contains "-n prod") true "kubectl preserves namespace flags"
+let kubectl_rollout = do $completer [kubectl rollout status deployment ""]
+assert-eq ($kubectl_rollout | get 0.description) "deployment" "kubectl rollout uses resource kind, not action"
+
+let cargo_packages = do $completer [cargo test -p ""]
+assert-eq (values $cargo_packages) [app-lib helper-lib] "cargo -p completes packages"
+let cargo_bins = do $completer [cargo run --bin ""]
+assert-eq (values $cargo_bins) [app-cli] "cargo --bin completes only bin targets"
+
+"[]" | save --force $env.INSHELLAH_STATIC_FILE
+let git_top = do $completer [git ""]
+assert-contains (values $git_top) "remote" "git top-level completes common commands"
+assert-contains (values $git_top) "stash" "git top-level includes stash"
+let git_push = do $completer [git push ""]
+assert-eq (values $git_push) [origin upstream] "empty static completions fall through to git remotes"
+let git_remote_verbs = do $completer [git remote ""]
+assert-eq (values $git_remote_verbs) [add rename remove rm set-head set-branches get-url set-url show prune update] "git remote completes subcommands"
+let git_remote_filtered = do $completer [git remote sho]
+assert-eq (values $git_remote_filtered) [show] "git remote subcommands filter by typed prefix"
+let git_remote_fuzzy = do $completer [git remote shw]
+assert-eq (values $git_remote_fuzzy) [show] "git remote subcommands use fuzzy filtering"
+let git_remote_exact = do $completer [git remote show]
+assert-eq $git_remote_exact null "exact dynamic completion disappears"
+let git_remote_show = do $completer [git remote show ""]
+assert-eq (values $git_remote_show) [origin upstream] "git remote show completes remote names"
+let git_fetch = do $completer [git fetch ""]
+assert-eq (values $git_fetch) [origin upstream] "git fetch completes remotes"
+let git_fetch_ref = do $completer [git fetch origin ""]
+assert-contains (values $git_fetch_ref) "main" "git fetch after remote completes refs"
+let git_branch_delete = do $completer [git branch -d ""]
+assert-eq (values $git_branch_delete) [main feature] "git branch delete completes local branches"
+let git_tag_delete = do $completer [git tag -d ""]
+assert-eq (values $git_tag_delete) [v1.0 v2.0] "git tag delete completes tags"
+let git_stash_apply = do $completer [git stash apply ""]
+assert-eq (values $git_stash_apply) ['stash@{0}'] "git stash apply completes stashes"
+let git_submodule_update = do $completer [git submodule update ""]
+assert-eq (values $git_submodule_update) [deps/demo] "git submodule update completes submodule paths"
+let git_bisect = do $completer [git bisect ""]
+assert-contains (values $git_bisect) "good" "git bisect completes subcommands"
+let git_bisect_good = do $completer [git bisect good ""]
+assert-contains (values $git_bisect_good) "main" "git bisect good completes refs"
+let git_add_paths = do $completer [git add ""]
+assert-eq (values $git_add_paths) [src/main.rs new-file.txt renamed.txt] "git add completes changed paths"
+let git_rm_paths = do $completer [git rm ""]
+assert-eq (values $git_rm_paths) [src/main.rs README.md] "git rm completes tracked paths"
+"" | save --force $env.INSHELLAH_STATIC_FILE
+let git_worktree_add = do $completer [git worktree add ""]
+assert-eq $git_worktree_add null "git worktree add first argument falls back to files"
+let git_worktree_remove = do $completer [git worktree remove ""]
+assert-eq ($git_worktree_remove | get 0.value) "/repo/linked" "git worktree remove completes existing worktrees"
+
+"[]" | save --force $env.INSHELLAH_STATIC_FILE
+let jj_top = do $completer [jj ""]
+assert-contains (values $jj_top) "bookmark" "jj top-level completes common commands"
+assert-contains (values $jj_top) "git" "jj top-level includes git command"
+let jj_bookmarks = do $completer [jj bookmark delete ""]
+assert-eq (values $jj_bookmarks) [main feature origin/main] "jj bookmark delete completes bookmarks"
+let jj_tags = do $completer [jj tag delete ""]
+assert-eq (values $jj_tags) [v1.0 v2.0] "jj tag delete completes tags"
+let jj_git_fetch = do $completer [jj git fetch ""]
+assert-eq (values $jj_git_fetch) [origin upstream] "jj git fetch completes remotes"
+let jj_git_remote_verbs = do $completer [jj git remote ""]
+assert-eq (values $jj_git_remote_verbs) [add list remove rename set-url] "jj git remote completes subcommands"
+let jj_git_remote_remove = do $completer [jj git remote remove ""]
+assert-eq (values $jj_git_remote_remove) [origin upstream] "jj git remote remove completes remotes"
+let jj_revs = do $completer [jj rebase -d ""]
+assert-eq (values $jj_revs) [k m] "jj revision flags complete revisions"
+let jj_ops = do $completer [jj op restore ""]
+assert-eq (values $jj_ops) [abc123] "jj op restore completes operations"
+let jj_files = do $completer [jj file show ""]
+assert-eq (values $jj_files) [src/main.rs README.md] "jj file show completes repo files"
+let jj_workspaces = do $completer [jj workspace forget ""]
+assert-eq (values $jj_workspaces) [default linked] "jj workspace forget completes workspaces"
+"" | save --force $env.INSHELLAH_STATIC_FILE
diff --git a/tests/ports.rs b/tests/ports.rs
new file mode 100644
index 0000000..3a3fe4e
--- /dev/null
+++ b/tests/ports.rs
@@ -0,0 +1,915 @@
+//! Tests ported from ../inshellah/test/test_inshellah.ml.
+//!
+//! Covers the help parser, manpage parser, groff stripping, and nushell
+//! generation. The single .nu store parser test (`test_nu_file_parsing`) is
+//! not included — it requires porting store.ml first.
+
+use inshellah::parsers::help::help_parser;
+use inshellah::parsers::manpage::{
+    ManpageResult, OwnedParam, OwnedSwitch, extract_synopsis_command, parse_manpage_string,
+    strip_groff_escapes,
+};
+use inshellah::parsers::nushell::{generate_extern, generate_module};
+use inshellah::store::{json_of_result, parse_nu_completions, result_from_json};
+use inshellah::types::{HelpResult, Param, Switch};
+
+fn parse(txt: &str) -> HelpResult<'_> {
+    match help_parser(txt) {
+        Ok((_, r)) => r,
+        Err(e) => panic!("parse_help failed: {e:?}"),
+    }
+}
+
+// --- Help parser tests ---
+
+#[test]
+fn gnu_basic() {
+    let r = parse("  -a, --all                  do not ignore entries starting with .\n");
+    assert_eq!(r.entries.len(), 1);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Both('a', l) if *l == "all"));
+    assert!(e.param.is_none());
+    assert!(!e.desc.is_empty());
+}
+
+#[test]
+fn gnu_eq_param() {
+    let r = parse("      --block-size=SIZE      scale sizes by SIZE\n");
+    assert_eq!(r.entries.len(), 1);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Long(l) if *l == "block-size"));
+    assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "SIZE"));
+}
+
+#[test]
+fn gnu_opt_param() {
+    let r = parse("      --color[=WHEN]         color the output WHEN\n");
+    assert_eq!(r.entries.len(), 1);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Long(l) if *l == "color"));
+    assert!(matches!(&e.param, Some(Param::Optional(p)) if *p == "WHEN"));
+}
+
+#[test]
+fn underscore_param() {
+    let r = parse("      --time-style=TIME_STYLE  time/date format\n");
+    assert_eq!(r.entries.len(), 1);
+    let e = &r.entries[0];
+    assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "TIME_STYLE"));
+}
+
+#[test]
+fn short_only() {
+    let r = parse("  -v                       verbose output\n");
+    assert_eq!(r.entries.len(), 1);
+    assert!(matches!(r.entries[0].switch, Switch::Short('v')));
+}
+
+#[test]
+fn long_only() {
+    let r = parse("      --help                 display help\n");
+    assert_eq!(r.entries.len(), 1);
+    assert!(matches!(&r.entries[0].switch, Switch::Long(l) if *l == "help"));
+}
+
+#[test]
+fn multiline_desc() {
+    let txt = "      --block-size=SIZE      with -l, scale sizes by SIZE when printing them;\n                               e.g., '--block-size=M'; see SIZE format below\n";
+    let r = parse(txt);
+    assert_eq!(r.entries.len(), 1);
+    let combined: String = r.entries[0].desc.join(" ");
+    assert!(combined.len() > 50, "desc was: {combined}");
+}
+
+#[test]
+fn multiple_entries() {
+    let txt = "  -a, --all                  do not ignore entries starting with .\n  -A, --almost-all           do not list implied . and ..\n      --author               with -l, print the author of each file\n";
+    let r = parse(txt);
+    assert_eq!(r.entries.len(), 3);
+}
+
+#[test]
+fn clap_short_sections() {
+    let txt = "INPUT OPTIONS:\n  -e, --regexp=PATTERN       A pattern to search for.\n  -f, --file=PATTERNFILE     Search for patterns from the given file.\nSEARCH OPTIONS:\n  -s, --case-sensitive       Search case sensitively.\n";
+    let r = parse(txt);
+    assert_eq!(r.entries.len(), 3);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Both('e', l) if *l == "regexp"));
+    assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "PATTERN"));
+}
+
+#[test]
+fn clap_long_style() {
+    let txt = "  -H, --hidden\n          Include hidden directories and files.\n\n      --no-ignore\n          Do not respect ignore files.\n";
+    let r = parse(txt);
+    assert_eq!(r.entries.len(), 2);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Both('H', l) if *l == "hidden"));
+    assert!(!e.desc.is_empty());
+}
+
+#[test]
+fn clap_long_angle_param() {
+    let txt = "      --nonprintable-notation <notation>\n          Set notation for non-printable characters.\n";
+    let r = parse(txt);
+    assert_eq!(r.entries.len(), 1);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Long(l) if *l == "nonprintable-notation"));
+    assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "notation"));
+}
+
+#[test]
+fn space_upper_param() {
+    let r = parse("  -f, --foo FOO  foo help\n");
+    assert_eq!(r.entries.len(), 1);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Both('f', l) if *l == "foo"));
+    assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "FOO"));
+}
+
+#[test]
+fn go_cobra_flags() {
+    let txt = "Flags:\n  -D, --debug              Enable debug mode\n  -H, --host string        Daemon socket to connect to\n  -v, --version            Print version information\n";
+    let r = parse(txt);
+    assert_eq!(r.entries.len(), 3);
+    let host = &r.entries[1];
+    assert!(matches!(&host.switch, Switch::Both('H', l) if *l == "host"));
+    assert!(matches!(&host.param, Some(Param::Mandatory(p)) if *p == "string"));
+}
+
+#[test]
+fn go_cobra_subcommands() {
+    let txt = "Common Commands:\n  run         Create and run a new container from an image\n  exec        Execute a command in a running container\n  build       Build an image from a Dockerfile\n";
+    let r = parse(txt);
+    assert!(
+        !r.subcommands.is_empty(),
+        "expected subcommands, got: {:?}",
+        r.subcommands.len()
+    );
+}
+
+#[test]
+fn help_parser_ignores_value_enums_and_defaults() {
+    let txt = r#"Usage: tar [OPTION...] [FILE]...
+
+ Main operation mode:
+  -c, --create               create a new archive
+
+ Archive format selection:
+
+  -H, --format=FORMAT        create archive of the given format
+
+ FORMAT is one of the following:
+    gnu                      GNU tar 1.13.x format
+    oldgnu                   GNU format as per tar <= 1.12
+    pax                      POSIX 1003.1-2001 (pax) format
+    posix                    same as pax
+    ustar                    POSIX 1003.1-1988 (ustar) format
+    v7                       old V7 tar format
+
+*This* tar defaults to:
+--format=gnu -f- -b20 --quoting-style=escape
+--rmt-command=/nix/store/example/libexec/rmt
+"#;
+    let r = parse(txt);
+    assert!(
+        r.subcommands.is_empty(),
+        "enum values became subcommands: {:?}",
+        r.subcommands.len()
+    );
+    assert!(
+        !r.entries
+            .iter()
+            .any(|e| matches!(&e.switch, Switch::Long(l) if *l == "rmt-command")),
+        "default lines should not become flags"
+    );
+    assert!(
+        r.entries
+            .iter()
+            .any(|e| matches!(&e.switch, Switch::Both('H', l) if *l == "format")),
+        "real option should still be parsed"
+    );
+}
+
+#[test]
+fn busybox_tab() {
+    let r = parse("\t-1\tOne column output\n\t-a\tInclude names starting with .\n");
+    assert_eq!(r.entries.len(), 2);
+    assert!(matches!(r.entries[0].switch, Switch::Short('1')));
+}
+
+#[test]
+fn no_debug_prints() {
+    // the old ocaml parser had print_endline at module load time; this test
+    // documents that no such side effects exist in the rust port.
+    let _ = parse("  -v  verbose\n");
+}
+
+#[test]
+fn slash_switch_separator() {
+    let r = parse("  --verbose / -v             Increase verbosity\n");
+    assert_eq!(r.entries.len(), 1);
+    let e = &r.entries[0];
+    assert!(matches!(&e.switch, Switch::Both('v', l) if *l == "verbose"));
+    assert!(e.param.is_none());
+    let combined: String = e.desc.join(" ");
+    assert_eq!(combined.trim(), "Increase verbosity");
+}
+
+// --- Manpage parser tests ---
+
+#[test]
+fn manpage_tp_style() {
+    let groff = r#".SH OPTIONS
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+do not ignore entries starting with .
+.TP
+\fB\-A\fR, \fB\-\-almost\-all\fR
+do not list implied . and ..
+.TP
+\fB\-\-block\-size\fR=\fISIZE\fR
+with \fB\-l\fR, scale sizes by SIZE
+.SH AUTHOR
+Written by someone.
+"#;
+    let r = parse_manpage_string(groff);
+    assert_eq!(r.entries.len(), 3, "entries: {:?}", r.entries);
+    assert!(matches!(&r.entries[0].switch, OwnedSwitch::Both('a', l) if l == "all"));
+    assert!(!r.entries[0].desc.is_empty());
+    assert!(matches!(&r.entries[2].switch, OwnedSwitch::Long(l) if l == "block-size"));
+    assert!(matches!(&r.entries[2].param, Some(OwnedParam::Mandatory(p)) if p == "SIZE"));
+}
+
+#[test]
+fn manpage_ip_style() {
+    let groff = r#".SH OPTIONS
+.IP "\fB\-k\fR, \fB\-\-insecure\fR"
+Allow insecure connections.
+.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
+Write output to file.
+.SH SEE ALSO
+"#;
+    let r = parse_manpage_string(groff);
+    assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
+    assert!(matches!(&r.entries[0].switch, OwnedSwitch::Both('k', l) if l == "insecure"));
+}
+
+#[test]
+fn manpage_groff_stripping() {
+    let s = strip_groff_escapes(r#"\fB\-\-color\fR[=\fIWHEN\fR]"#);
+    // font escapes removed
+    assert!(!(s.contains('f') && s.contains('B') && s.contains('\\')));
+    // dashes converted
+    assert!(s.contains('-'));
+    let s2 = strip_groff_escapes(r#"\(aqhello\(aq"#);
+    assert!(s2.contains('\''), "expected apostrophe in: {s2}");
+}
+
+#[test]
+fn manpage_getent_databases_from_description() {
+    let groff = r#".SH SYNOPSIS
+.SY getent
+.RI [ option \~.\|.\|.\&]
+.I database
+.IR key \~.\|.\|.
+.YS
+.SH DESCRIPTION
+The
+.I database
+may be any of those supported by the GNU C Library, listed below:
+.TP
+.B passwd
+When no
+.I key
+is provided, enumerate the passwd database.
+.TP
+.B services
+When no
+.I key
+is provided, enumerate the services database.
+.SH OPTIONS
+.TP
+.BI \-\-service\~ service
+.TQ
+.BI \-s\~ service
+Override all databases with the specified service.
+.TP
+.BI \-\-service\~ database : service
+.TQ
+.BI \-s\~ database : service
+Override only specified databases with the specified service.
+.TP
+.B \-\-usage
+Print a short usage summary and exit.
+"#;
+    let r = parse_manpage_string(groff);
+    let positional_names: Vec<&str> = r
+        .positionals
+        .iter()
+        .map(|(name, _)| name.as_str())
+        .collect();
+    assert_eq!(positional_names, vec!["database", "key"]);
+
+    let service = r
+        .entries
+        .iter()
+        .find(|e| matches!(&e.switch, OwnedSwitch::Both('s', name) if name == "service"))
+        .expect("expected --service(-s)");
+    assert!(matches!(
+        &service.param,
+        Some(OwnedParam::Mandatory(param)) if param == "service"
+    ));
+    assert!(
+        !r.entries
+            .iter()
+            .any(|e| matches!(&e.switch, OwnedSwitch::Long(name) if name == "serviceservice" || name == "servicedatabase")),
+        "entries: {:?}",
+        r.entries
+    );
+
+    let subcommands: Vec<&str> = r.subcommands.iter().map(|sc| sc.name.as_str()).collect();
+    assert!(
+        subcommands.contains(&"passwd"),
+        "subcommands: {subcommands:?}"
+    );
+    assert!(
+        subcommands.contains(&"services"),
+        "subcommands: {subcommands:?}"
+    );
+
+    let nu = generate_extern("getent", &r);
+    assert!(nu.contains("database: string"), "nu = {nu}");
+    assert!(nu.contains("...key: string"), "nu = {nu}");
+    assert!(nu.contains("--service(-s): string"), "nu = {nu}");
+    assert!(!nu.contains("--servicedatabase"), "nu = {nu}");
+    assert!(nu.contains("export extern \"getent passwd\""), "nu = {nu}");
+}
+
+#[test]
+fn manpage_b_macro_option_tag_with_embedded_quotes() {
+    let groff = r#".SH OPTIONS
+.TP
+.B "\-s ""\fIprogram\fR [\fIargument \fR...]\fB""\fR, \fB\-\-speller=""\fIprogram\fR [\fIargument \fR...]\fB"""
+Use this command to perform spell checking and correcting.
+"#;
+    let r = parse_manpage_string(groff);
+    assert!(
+        r.entries
+            .iter()
+            .any(|e| matches!(e.switch, OwnedSwitch::Short('s'))),
+        "entries: {:?}",
+        r.entries
+    );
+}
+
+#[test]
+fn manpage_synopsis_b_macro_bracket_args_keep_spaces() {
+    let groff = r#".SH "SYNOPSIS"
+.B "rtmon"
+.RI "[ " OPTIONS " ] "
+.BI "file " FILE
+.BR "[ " all
+.RI "| " OBJECTS
+.RB "]"
+.ti -8
+.I OBJECTS
+.B ":= [" link "]" "[" address "]" "[" route "]"
+.SH OPTIONS
+"#;
+    let r = parse_manpage_string(groff);
+    let positional_names: Vec<&str> = r
+        .positionals
+        .iter()
+        .map(|(name, _)| name.as_str())
+        .collect();
+    assert!(
+        !positional_names.contains(&"ptions")
+            && positional_names.contains(&"link")
+            && positional_names.contains(&"address"),
+        "positionals: {positional_names:?}"
+    );
+}
+
+#[test]
+fn bracketed_angle_positionals_keep_inner_ellipsis() {
+    let groff = r#".SH SYNOPSIS
+.B bzip2
+.RB [ " \-cdfkqstvzVL123456789 " ]
+[
+.I "filenames \&..."
+]
+.SH OPTIONS
+"#;
+    let r = parse_manpage_string(groff);
+    assert!(
+        r.positionals
+            .iter()
+            .any(|(name, positional)| name == "filenames" && positional.variadic),
+        "positionals: {:?}",
+        r.positionals
+    );
+}
+
+#[test]
+fn nested_optional_positionals_keep_last_valid_inner_name() {
+    let groff = r#".SH SYNOPSIS
+\fBfc-cat\fR [ \fB-rvVh\fR ]
+ [ \fB [ \fIfonts-cache-%version%-files\fB ]  [ \fIdirs\fB ] \fR\fI...\fR ]
+.SH OPTIONS
+"#;
+    let r = parse_manpage_string(groff);
+    assert!(
+        r.positionals
+            .iter()
+            .any(|(name, positional)| name == "dirs" && positional.optional && positional.variadic),
+        "positionals: {:?}",
+        r.positionals
+    );
+}
+
+#[test]
+fn manpage_empty_options() {
+    let groff = ".SH NAME\nfoo \\- does stuff\n.SH DESCRIPTION\nDoes stuff.\n";
+    let r = parse_manpage_string(groff);
+    assert_eq!(r.entries.len(), 0);
+}
+
+#[test]
+fn manpage_nix3_style() {
+    let groff = r#".SH Options
+.SS Logging-related options
+.IP "\(bu" 3
+.UR #opt-verbose
+\f(CR--verbose\fR
+.UE
+/ \f(CR-v\fR
+.IP
+Increase the logging verbosity level.
+.IP "\(bu" 3
+.UR #opt-quiet
+\f(CR--quiet\fR
+.UE
+.IP
+Decrease the logging verbosity level.
+.SH SEE ALSO
+"#;
+    let r = parse_manpage_string(groff);
+    assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
+    assert!(matches!(&r.entries[0].switch, OwnedSwitch::Both('v', l) if l == "verbose"));
+    assert!(!r.entries[0].desc.is_empty());
+    assert!(matches!(&r.entries[1].switch, OwnedSwitch::Long(l) if l == "quiet"));
+    assert!(!r.entries[1].desc.is_empty());
+}
+
+#[test]
+fn manpage_nix3_with_params() {
+    let groff = r#".SH Options
+.IP "\(bu" 3
+.UR #opt-arg
+\f(CR--arg\fR
+.UE
+\fIname\fR \fIexpr\fR
+.IP
+Pass the value as the argument name to Nix functions.
+.IP "\(bu" 3
+.UR #opt-include
+\f(CR--include\fR
+.UE
+/ \f(CR-I\fR \fIpath\fR
+.IP
+Add path to search path entries.
+.IP
+This option may be given multiple times.
+.SH SEE ALSO
+"#;
+    let r = parse_manpage_string(groff);
+    assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
+    assert!(matches!(&r.entries[0].switch, OwnedSwitch::Long(l) if l == "arg"));
+    assert!(r.entries[0].param.is_some());
+    assert!(matches!(&r.entries[1].switch, OwnedSwitch::Both('I', l) if l == "include"));
+    assert!(matches!(&r.entries[1].param, Some(OwnedParam::Mandatory(p)) if p == "path"));
+}
+
+#[test]
+fn synopsis_subcommand() {
+    let groff = r#".SH "SYNOPSIS"
+.sp
+.nf
+\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR]
+.fi
+.SH "DESCRIPTION"
+"#;
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("git commit"));
+}
+
+#[test]
+fn synopsis_standalone() {
+    let groff = ".SH Synopsis\n.LP\n\\f(CRnix-build\\fR [\\fIpaths\\fR]\n.SH Description\n";
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("nix-build"));
+}
+
+#[test]
+fn synopsis_nix3() {
+    let groff = ".SH Synopsis\n.LP\n\\f(CRnix run\\fR [\\fIoption\\fR] \\fIinstallable\\fR\n.SH Description\n";
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("nix run"));
+}
+
+#[test]
+fn italic_synopsis() {
+    let groff = ".SH Synopsis\n.LP\n\\f(CRnix-env\\fR \\fIoperation\\fR [\\fIoptions\\fR] [\\fIarguments…\\fR]\n.SH Description\n";
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("nix-env"));
+}
+
+#[test]
+fn synopsis_italic_command_name() {
+    // git-am.1 (and many other git manpages) put the entire command
+    // invocation in italics: `\fIgit am\fR [...]`. should still resolve
+    // to "git am" rather than treating it as a placeholder.
+    let groff = ".SH \"SYNOPSIS\"\n.sp\n.nf\n\\fIgit am\\fR [\\-\\-signoff] [\\-\\-keep]\n.fi\n.SH \"DESCRIPTION\"\n";
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("git am"));
+}
+
+#[test]
+fn synopsis_skips_prose_before_invocation() {
+    let groff = r#".SH SYNOPSIS
+The dynamic linker can be run either indirectly by running some
+dynamically linked program or shared object
+(in which case no command-line options
+to the dynamic linker can be passed and, in the ELF case, the dynamic linker
+which is stored in the
+.B .interp
+section of the program is executed) or directly by running:
+.P
+.I /lib/ld\-linux.so.*
+[OPTIONS] [PROGRAM [ARGUMENTS]]
+.SH DESCRIPTION
+"#;
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), None);
+}
+
+#[test]
+fn synopsis_skips_labels_before_invocation() {
+    let groff = r#".SH "SYNOPSIS"
+.sp
+Set up a loop device:
+.sp
+\fBlosetup\fP [options] \fB\-f\fP|\fIloopdev file\fP
+.sp
+Get info:
+.RS 4
+\fBlosetup\fP \fIloopdev\fP
+.RE
+.SH "DESCRIPTION"
+"#;
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("losetup"));
+}
+
+#[test]
+fn synopsis_b_macro_preserves_command_spaces() {
+    let groff = r#".SH "SYNOPSIS"
+.sp
+.B ip link
+.RI " { " COMMAND " | "
+.BR help " }"
+.SH "DESCRIPTION"
+"#;
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("ip link"));
+}
+
+#[test]
+fn synopsis_br_macro_preserves_quoted_command_spaces() {
+    let groff = r#".SH "SYNOPSIS"
+.sp
+.BR "ip monitor" " [ " all " |"
+.IR OBJECT-LIST " ]"
+.SH "DESCRIPTION"
+"#;
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("ip monitor"));
+}
+
+#[test]
+fn synopsis_long_b_macro_is_not_prose() {
+    let groff = r#".SH SYNOPSIS
+.ad l
+.in +8
+.ti -8
+.B tipc peer remove address
+.IR ADDRESS
+.SH OPTIONS
+"#;
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("tipc peer remove address"));
+}
+
+#[test]
+fn synopsis_ss_heading_is_accepted() {
+    let groff = r#".SH Name
+.LP
+\f(CRnix-env --set\fR - set profile to contain a specified derivation
+.SS
+Synopsis
+.LP
+\f(CRnix-env\fR \f(CR--set\fR \fIdrvname\fR
+.SS
+Description
+"#;
+    let cmd = extract_synopsis_command(groff);
+    assert_eq!(cmd.as_deref(), Some("nix-env"));
+}
+
+// --- Font/dedup tests (only the font-spacing one is portable) ---
+
+#[test]
+fn font_boundary_spacing() {
+    // \fB--max-results\fR\fIcount\fR should become "--max-results count"
+    let s = strip_groff_escapes(r#"\fB\-\-max\-results\fR\fIcount\fR"#);
+    assert!(s.contains("--max-results count"), "got: {s}");
+    // \fB--color\fR[=\fIWHEN\fR] should NOT insert space before =
+    let s2 = strip_groff_escapes(r#"\fB\-\-color\fR[=\fIWHEN\fR]"#);
+    assert!(s2.contains("--color[=WHEN]"), "got: {s2}");
+}
+
+// --- COMMANDS section tests ---
+
+#[test]
+fn commands_section_subcommands() {
+    let groff = r#".SH OPTIONS
+.TP
+\fB\-\-user\fR
+Talk to the service manager of the calling user.
+.TP
+\fB\-\-system\fR
+Talk to the service manager of the system.
+.SH COMMANDS
+.PP
+\fBstart\fR \fIUNIT\fR\&...
+.RS 4
+Start (activate) one or more units.
+.RE
+.PP
+\fBstop\fR \fIUNIT\fR\&...
+.RS 4
+Stop (deactivate) one or more units.
+.RE
+.PP
+\fBreload\fR \fIUNIT\fR\&...
+.RS 4
+Asks all units to reload their configuration.
+.RE
+.SH SEE ALSO
+"#;
+    let r = parse_manpage_string(groff);
+    assert_eq!(r.entries.len(), 2, "options entries: {:?}", r.entries);
+    assert_eq!(r.subcommands.len(), 3, "subcommands: {:?}", r.subcommands);
+    let names: Vec<&str> = r.subcommands.iter().map(|sc| sc.name.as_str()).collect();
+    assert!(names.contains(&"start"));
+    assert!(names.contains(&"stop"));
+    assert!(names.contains(&"reload"));
+    let start_sc = r.subcommands.iter().find(|sc| sc.name == "start").unwrap();
+    assert!(!start_sc.desc.is_empty());
+}
+
+#[test]
+fn commands_section_git_style_refs() {
+    let groff = r#".SH OPTIONS
+.TP
+\fB\-\-version\fR
+Show version.
+.SH "GIT COMMANDS"
+.SS "Main porcelain commands"
+.PP
+.BR git-add (1)
+.RS 4
+Add file contents to the index.
+.RE
+.PP
+\fBgit-commit\fR(1)
+.RS 4
+Record changes to the repository.
+.RE
+"#;
+    let r = parse_manpage_string(groff);
+    let names: Vec<&str> = r.subcommands.iter().map(|sc| sc.name.as_str()).collect();
+    assert!(
+        names.contains(&"git-add"),
+        "subcommands: {:?}",
+        r.subcommands
+    );
+    assert!(
+        names.contains(&"git-commit"),
+        "subcommands: {:?}",
+        r.subcommands
+    );
+    let add = r
+        .subcommands
+        .iter()
+        .find(|sc| sc.name == "git-add")
+        .unwrap();
+    assert!(add.desc.contains("Add file contents"));
+}
+
+// --- Nushell generation tests ---
+
+fn to_owned_result(r: &HelpResult<'_>) -> ManpageResult {
+    r.into()
+}
+
+#[test]
+fn nushell_basic() {
+    let r = parse("  -a, --all                  do not ignore entries starting with .\n");
+    let nu = generate_extern("ls", &to_owned_result(&r));
+    assert!(nu.contains("export extern \"ls\""), "nu = {nu}");
+    assert!(nu.contains("--all(-a)"), "nu = {nu}");
+    assert!(nu.contains("# do not ignore"), "nu = {nu}");
+}
+
+#[test]
+fn nushell_param_types() {
+    let txt = "  -w, --width=COLS           set output width\n      --block-size=SIZE      scale sizes\n  -o, --output FILE          output file\n";
+    let r = parse(txt);
+    let nu = generate_extern("ls", &to_owned_result(&r));
+    assert!(nu.contains("--width(-w): int"), "nu = {nu}");
+    assert!(nu.contains("--block-size: string"), "nu = {nu}");
+    assert!(nu.contains("--output(-o): path"), "nu = {nu}");
+}
+
+#[test]
+fn nushell_subcommands() {
+    let txt = "Common Commands:\n  run         Create and run a new container\n  exec        Execute a command\n\nFlags:\n  -D, --debug              Enable debug mode\n";
+    let r = parse(txt);
+    let nu = generate_extern("docker", &to_owned_result(&r));
+    assert!(nu.contains("export extern \"docker\""), "nu = {nu}");
+    assert!(nu.contains("--debug(-D)"), "nu = {nu}");
+    assert!(nu.contains("export extern \"docker run\""), "nu = {nu}");
+    assert!(nu.contains("export extern \"docker exec\""), "nu = {nu}");
+}
+
+#[test]
+fn positional_order_survives_cache_and_generation() {
+    let txt = "usage: git clone [<options>] [--] <repository> [directory]\n";
+    let result = to_owned_result(&parse(txt));
+    assert_eq!(
+        result
+            .positionals
+            .iter()
+            .map(|(name, _)| name.as_str())
+            .collect::<Vec<_>>(),
+        vec!["repository", "directory"]
+    );
+
+    let json = json_of_result("help", &result);
+    let value = serde_json::from_str(&json).expect("cache json");
+    let cached = result_from_json(&value);
+    assert_eq!(
+        cached
+            .positionals
+            .iter()
+            .map(|(name, _)| name.as_str())
+            .collect::<Vec<_>>(),
+        vec!["repository", "directory"]
+    );
+
+    let nu = generate_extern("git clone", &cached);
+    let repository = nu
+        .find("repository: string")
+        .expect("repository positional");
+    let directory = nu.find("directory?: path").expect("directory positional");
+    assert!(repository < directory, "nu = {nu}");
+}
+
+#[test]
+fn nushell_from_manpage() {
+    let groff = r#".SH OPTIONS
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+do not ignore entries starting with .
+.TP
+\fB\-\-block\-size\fR=\fISIZE\fR
+scale sizes by SIZE
+.SH AUTHOR
+"#;
+    let result = parse_manpage_string(groff);
+    let nu = generate_extern("ls", &result);
+    assert!(nu.contains("export extern \"ls\""), "nu = {nu}");
+    assert!(nu.contains("--all(-a)"), "nu = {nu}");
+    assert!(nu.contains("--block-size: string"), "nu = {nu}");
+}
+
+#[test]
+fn nushell_module() {
+    let r = parse("  -v, --verbose              verbose output\n");
+    let nu = generate_module("myapp", &to_owned_result(&r));
+    assert!(nu.contains("module myapp-completions"), "nu = {nu}");
+    assert!(nu.contains("export extern \"myapp\""), "nu = {nu}");
+    assert!(nu.contains("--verbose(-v)"), "nu = {nu}");
+}
+
+#[test]
+fn dedup_entries_help() {
+    let txt = "  -v, --verbose              verbose output\n  --verbose                  verbose mode\n  -v                         be verbose\n";
+    let r = parse(txt);
+    let nu = generate_extern("test", &to_owned_result(&r));
+    let count = nu.matches("--verbose").count();
+    assert_eq!(count, 1, "expected --verbose to appear once, nu = {nu}");
+    assert!(nu.contains("--verbose(-v)"), "nu = {nu}");
+}
+
+#[test]
+fn dedup_manpage_entries() {
+    let groff = r#".SH OPTIONS
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+Be verbose.
+.SH DESCRIPTION
+Use \fB\-v\fR for verbose output.
+Use \fB\-\-verbose\fR to see more.
+"#;
+    let result = parse_manpage_string(groff);
+    let nu = generate_extern("test", &result);
+    assert!(nu.contains("--verbose(-v)"), "nu = {nu}");
+    let verbose_lines: Vec<&str> = nu.lines().filter(|l| l.contains("verbose")).collect();
+    assert_eq!(
+        verbose_lines.len(),
+        1,
+        "expected 1 verbose line, got: {verbose_lines:?}"
+    );
+}
+
+#[test]
+fn nu_file_parsing() {
+    let nu_source = r#"module completions {
+
+  # Unofficial CLI tool
+  export extern mytool [
+    --help(-h)                # Print help
+    --version(-V)             # Print version
+  ]
+
+  # List all items
+  export extern "mytool list" [
+    --raw                     # Output as JSON
+    --format(-f): string      # Output format
+    --help(-h)                # Print help
+    name?: string             # Filter by name
+  ]
+
+}
+
+use completions *
+"#;
+    let r = parse_nu_completions("mytool", nu_source);
+    assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
+    assert!(
+        !r.subcommands.is_empty(),
+        "subcommands: {:?}",
+        r.subcommands
+    );
+    assert!(r.subcommands.iter().any(|sc| sc.name == "list"));
+    assert_eq!(r.description, "Unofficial CLI tool");
+
+    let r2 = parse_nu_completions("mytool list", nu_source);
+    assert_eq!(r2.entries.len(), 3, "list entries: {:?}", r2.entries);
+    let has_format = r2
+        .entries
+        .iter()
+        .any(|e| matches!(&e.switch, OwnedSwitch::Both('f', l) if l == "format"));
+    assert!(
+        has_format,
+        "list should have --format(-f): {:?}",
+        r2.entries
+    );
+    assert!(!r2.positionals.is_empty(), "list should have a positional");
+}
+
+#[test]
+fn self_listing_detection() {
+    let txt = r#"systemctl [OPTIONS...] COMMAND ...
+
+Unit Commands:
+  start UNIT...                       Start (activate) one or more units
+  stop UNIT...                        Stop (deactivate) one or more units
+  status [PATTERN...]                 Show runtime status
+
+Options:
+  --user                              Talk to the user service manager
+  --system                            Talk to the system service manager
+"#;
+    let r = parse(txt);
+    let has_start = r.subcommands.iter().any(|sc| sc.name == "start");
+    assert!(
+        has_start,
+        "expected start in subcommands: {:?}",
+        r.subcommands.iter().map(|sc| sc.name).collect::<Vec<_>>()
+    );
+    assert!(r.entries.len() >= 2);
+}
diff --git a/tests/runtime_complete.rs b/tests/runtime_complete.rs
new file mode 100644
index 0000000..9d209b7
--- /dev/null
+++ b/tests/runtime_complete.rs
@@ -0,0 +1,500 @@
+use std::fs;
+use std::os::unix::fs::PermissionsExt;
+use std::process::Command;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use inshellah::parsers::manpage::{ManpageEntry, ManpageResult, ManpageSubcommand, OwnedSwitch};
+use inshellah::store::write_result;
+
+fn unique_temp_dir(name: &str) -> std::path::PathBuf {
+    let nanos = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .expect("system time")
+        .as_nanos();
+    std::env::temp_dir().join(format!("{name}-{}-{nanos}", std::process::id()))
+}
+
+#[test]
+fn complete_scrapes_missing_subcommand_when_parent_is_cached() {
+    let root = unique_temp_dir("inshellah-runtime-complete");
+    let bin_dir = root.join("bin");
+    let cache_dir = root.join("cache");
+    fs::create_dir_all(&bin_dir).expect("bin dir");
+    fs::create_dir_all(&cache_dir).expect("cache dir");
+
+    let fakecmd = bin_dir.join("fakecmd");
+    fs::write(
+        &fakecmd,
+        r#"#!/bin/sh
+if [ "$1" = "clone" ]; then
+  if [ "$2" = "--help" ] || [ "$2" = "-h" ]; then
+    cat <<'EOF'
+Usage: fakecmd clone [OPTIONS] <repository> [directory]
+
+Options:
+  --depth <n>          clone depth
+  -v, --verbose        verbose
+EOF
+    exit 0
+  fi
+fi
+
+if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
+  cat <<'EOF'
+Usage: fakecmd [OPTIONS] COMMAND
+
+Commands:
+  clone    Clone a repository
+
+Options:
+  -h, --help           show help
+EOF
+  exit 0
+fi
+
+exit 2
+"#,
+    )
+    .expect("write fakecmd");
+    let mut perms = fs::metadata(&fakecmd).expect("metadata").permissions();
+    perms.set_mode(0o755);
+    fs::set_permissions(&fakecmd, perms).expect("chmod");
+
+    let parent = ManpageResult {
+        entries: Vec::new(),
+        subcommands: vec![ManpageSubcommand {
+            name: "clone".to_string(),
+            desc: "Clone a repository".to_string(),
+        }],
+        positionals: Vec::new(),
+        description: String::new(),
+    };
+    write_result(&cache_dir, "fakecmd", "help", &parent).expect("parent cache");
+
+    let old_path = std::env::var_os("PATH").unwrap_or_default();
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("complete")
+        .arg("--dir")
+        .arg(&cache_dir)
+        .arg("--timeout-ms")
+        .arg("1000")
+        .arg("fakecmd")
+        .arg("clone")
+        .arg("--")
+        .env(
+            "PATH",
+            format!("{}:{}", bin_dir.display(), old_path.to_string_lossy()),
+        )
+        .output()
+        .expect("run inshellah complete");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    assert!(stdout.contains("--depth"), "stdout = {stdout}");
+    assert!(
+        cache_dir.join("fakecmd_clone.json").is_file(),
+        "subcommand cache was not written"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn complete_does_not_scan_path_at_command_position() {
+    let root = unique_temp_dir("inshellah-command-position-complete");
+    let bin_dir = root.join("bin");
+    let cache_dir = root.join("cache");
+    fs::create_dir_all(&bin_dir).expect("bin dir");
+    fs::create_dir_all(&cache_dir).expect("cache dir");
+
+    let fake_git = bin_dir.join("git");
+    fs::write(&fake_git, "#!/bin/sh\nexit 0\n").expect("write fake git");
+    let mut perms = fs::metadata(&fake_git).expect("metadata").permissions();
+    perms.set_mode(0o755);
+    fs::set_permissions(&fake_git, perms).expect("chmod");
+
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("complete")
+        .arg("--dir")
+        .arg(&cache_dir)
+        .arg("gi")
+        .env("PATH", &bin_dir)
+        .output()
+        .expect("run inshellah complete");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    assert_eq!(stdout.trim(), "null", "stdout = {stdout}");
+
+    let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn complete_uses_boundary_aware_fuzzy_ranking() {
+    let root = unique_temp_dir("inshellah-fuzzy-complete");
+    let cache_dir = root.join("cache");
+    fs::create_dir_all(&cache_dir).expect("cache dir");
+
+    let result = ManpageResult {
+        entries: Vec::new(),
+        subcommands: vec![
+            ManpageSubcommand {
+                name: "load".to_string(),
+                desc: "load something".to_string(),
+            },
+            ManpageSubcommand {
+                name: "clone".to_string(),
+                desc: "clone something".to_string(),
+            },
+        ],
+        positionals: Vec::new(),
+        description: String::new(),
+    };
+    write_result(&cache_dir, "demo", "help", &result).expect("cache");
+
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("complete")
+        .arg("--dir")
+        .arg(&cache_dir)
+        .arg("demo")
+        .arg("lo")
+        .output()
+        .expect("run inshellah complete");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    let load_pos = stdout.find(r#""value":"load""#).unwrap_or(usize::MAX);
+    let clone_pos = stdout.find(r#""value":"clone""#).unwrap_or(usize::MAX);
+    assert!(
+        load_pos < clone_pos,
+        "expected boundary match to outrank substring match, stdout = {stdout}"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn complete_returns_flags_only_after_hyphen() {
+    let root = unique_temp_dir("inshellah-flag-prefix-complete");
+    let cache_dir = root.join("cache");
+    fs::create_dir_all(&cache_dir).expect("cache dir");
+
+    let result = ManpageResult {
+        entries: vec![ManpageEntry {
+            switch: OwnedSwitch::Long("verbose".to_string()),
+            param: None,
+            desc: "verbose output".to_string(),
+        }],
+        subcommands: Vec::new(),
+        positionals: Vec::new(),
+        description: String::new(),
+    };
+    write_result(&cache_dir, "demo", "help", &result).expect("cache");
+
+    let argument_output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("complete")
+        .arg("--dir")
+        .arg(&cache_dir)
+        .arg("demo")
+        .arg("")
+        .output()
+        .expect("run inshellah complete");
+    assert!(
+        argument_output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&argument_output.stderr)
+    );
+    let argument_stdout = String::from_utf8(argument_output.stdout).expect("stdout");
+    assert_eq!(argument_stdout.trim(), "null", "stdout = {argument_stdout}");
+
+    let flag_output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("complete")
+        .arg("--dir")
+        .arg(&cache_dir)
+        .arg("demo")
+        .arg("--")
+        .output()
+        .expect("run inshellah complete");
+    assert!(
+        flag_output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&flag_output.stderr)
+    );
+    let flag_stdout = String::from_utf8(flag_output.stdout).expect("stdout");
+    assert!(
+        flag_stdout.contains(r#""value":"--verbose""#),
+        "stdout = {flag_stdout}"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn complete_resolves_absolute_path_after_elevation_wrapper() {
+    let root = unique_temp_dir("inshellah-absolute-elevation-complete");
+    let bin_dir = root.join("bin");
+    let cache_dir = root.join("cache");
+    fs::create_dir_all(&bin_dir).expect("bin dir");
+    fs::create_dir_all(&cache_dir).expect("cache dir");
+
+    let fakecmd = bin_dir.join("fakecmd");
+    fs::write(
+        &fakecmd,
+        r#"#!/bin/sh
+if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
+  printf '%s\n' 'Usage: fakecmd [OPTIONS]' '' 'Options:' '  --verbose        verbose output'
+  exit 0
+fi
+exit 2
+"#,
+    )
+    .expect("write fakecmd");
+    let mut perms = fs::metadata(&fakecmd).expect("metadata").permissions();
+    perms.set_mode(0o755);
+    fs::set_permissions(&fakecmd, perms).expect("chmod");
+
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("complete")
+        .arg("--dir")
+        .arg(&cache_dir)
+        .arg("--timeout-ms")
+        .arg("1000")
+        .arg("sudo")
+        .arg(&fakecmd)
+        .arg("--")
+        .env("PATH", "")
+        .output()
+        .expect("run inshellah complete");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    assert!(
+        stdout.contains(r#""value":"--verbose""#),
+        "stdout = {stdout}"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn complete_adb_dynamic_values_use_live_devices_and_packages() {
+    let root = unique_temp_dir("inshellah-adb-dynamic-complete");
+    let bin_dir = root.join("bin");
+    let cache_dir = root.join("cache");
+    fs::create_dir_all(&bin_dir).expect("bin dir");
+    fs::create_dir_all(&cache_dir).expect("cache dir");
+
+    let adb = bin_dir.join("adb");
+    fs::write(
+        &adb,
+        r#"#!/bin/sh
+selector=""
+case "$1" in
+  -s|--serial|--one-device)
+    selector="$2"
+    shift 2
+    ;;
+  -t|--transport-id)
+    selector="transport:$2"
+    shift 2
+    ;;
+  --serial=*)
+    selector="${1#--serial=}"
+    shift
+    ;;
+  --one-device=*)
+    selector="${1#--one-device=}"
+    shift
+    ;;
+  --transport-id=*)
+    selector="transport:${1#--transport-id=}"
+    shift
+    ;;
+esac
+
+if [ "$1" = "devices" ] && [ "$2" = "-l" ]; then
+  printf '%s\n' 'List of devices attached'
+  printf '%s\n' 'emulator-5554	device product:sdk_gphone_x86 model:Pixel_8 device:emu transport_id:1'
+  printf '%s\n' 'R58M123456	device product:oriole model:Pixel_6 device:oriole transport_id:2'
+  printf '%s\n' 'offline-1	offline transport_id:3'
+  exit 0
+fi
+
+if [ "$1" = "shell" ] && [ "$2" = "pm" ] && [ "$3" = "list" ] && [ "$4" = "packages" ]; then
+  case "$selector" in
+    emulator-5554)
+      printf '%s\n' 'package:com.example.emu'
+      printf '%s\n' 'package:org.example.shared'
+      ;;
+    transport:2)
+      printf '%s\n' 'package:com.example.transport'
+      printf '%s\n' 'package:org.example.transport'
+      ;;
+    *)
+      printf '%s\n' 'package:com.default.app'
+      printf '%s\n' 'package:/data/app/org.default.path/base.apk=org.default.path'
+      ;;
+  esac
+  exit 0
+fi
+
+exit 2
+"#,
+    )
+    .expect("write adb");
+    let mut perms = fs::metadata(&adb).expect("metadata").permissions();
+    perms.set_mode(0o755);
+    fs::set_permissions(&adb, perms).expect("chmod");
+
+    let run_complete = |args: &[&str]| -> String {
+        let mut cmd = Command::new(env!("CARGO_BIN_EXE_inshellah"));
+        cmd.arg("complete")
+            .arg("--dir")
+            .arg(&cache_dir)
+            .arg("--timeout-ms")
+            .arg("1000");
+        for arg in args {
+            cmd.arg(arg);
+        }
+        let output = cmd
+            .env("PATH", &bin_dir)
+            .output()
+            .expect("run inshellah complete");
+        assert!(
+            output.status.success(),
+            "stderr = {}",
+            String::from_utf8_lossy(&output.stderr)
+        );
+        String::from_utf8(output.stdout).expect("stdout")
+    };
+
+    let stdout = run_complete(&["adb", "-s", ""]);
+    assert!(
+        stdout.contains(r#""value":"emulator-5554""#),
+        "stdout = {stdout}"
+    );
+    assert!(
+        stdout.contains(r#""description":"device sdk gphone x86 Pixel 8""#),
+        "stdout = {stdout}"
+    );
+    assert!(
+        stdout.contains(r#""value":"R58M123456""#),
+        "stdout = {stdout}"
+    );
+    assert!(
+        stdout.contains(r#""value":"offline-1""#),
+        "stdout = {stdout}"
+    );
+
+    let prefixed_stdout = run_complete(&["adb", "--serial=R5"]);
+    assert!(
+        prefixed_stdout.contains(r#""value":"--serial=R58M123456""#),
+        "stdout = {prefixed_stdout}"
+    );
+    assert!(
+        !prefixed_stdout.contains(r#""value":"--serial=emulator-5554""#),
+        "stdout = {prefixed_stdout}"
+    );
+
+    let one_device_stdout = run_complete(&["adb", "--one-device", ""]);
+    assert!(
+        one_device_stdout.contains(r#""value":"emulator-5554""#),
+        "stdout = {one_device_stdout}"
+    );
+
+    let transport_stdout = run_complete(&["adb", "-t", ""]);
+    assert!(
+        transport_stdout.contains(r#""value":"1""#),
+        "stdout = {transport_stdout}"
+    );
+    assert!(
+        transport_stdout.contains(r#""description":"emulator-5554 device sdk gphone x86 Pixel 8""#),
+        "stdout = {transport_stdout}"
+    );
+    assert!(
+        transport_stdout.contains(r#""value":"2""#),
+        "stdout = {transport_stdout}"
+    );
+
+    let transport_prefixed_stdout = run_complete(&["adb", "--transport-id=2"]);
+    assert!(
+        transport_prefixed_stdout.contains(r#""value":"--transport-id=2""#),
+        "stdout = {transport_prefixed_stdout}"
+    );
+    assert!(
+        !transport_prefixed_stdout.contains(r#""value":"--transport-id=1""#),
+        "stdout = {transport_prefixed_stdout}"
+    );
+
+    let uninstall_stdout = run_complete(&["adb", "uninstall", "org"]);
+    assert!(
+        uninstall_stdout.contains(r#""value":"org.default.path""#),
+        "stdout = {uninstall_stdout}"
+    );
+    assert!(
+        !uninstall_stdout.contains(r#""value":"com.default.app""#),
+        "stdout = {uninstall_stdout}"
+    );
+
+    let clear_stdout = run_complete(&["adb", "-s", "emulator-5554", "shell", "pm", "clear", ""]);
+    assert!(
+        clear_stdout.contains(r#""value":"com.example.emu""#),
+        "stdout = {clear_stdout}"
+    );
+    assert!(
+        !clear_stdout.contains(r#""value":"com.example.transport""#),
+        "stdout = {clear_stdout}"
+    );
+
+    let force_stop_stdout = run_complete(&[
+        "adb",
+        "-t",
+        "2",
+        "shell",
+        "am",
+        "force-stop",
+        "--user",
+        "0",
+        "com.",
+    ]);
+    assert!(
+        force_stop_stdout.contains(r#""value":"com.example.transport""#),
+        "stdout = {force_stop_stdout}"
+    );
+    assert!(
+        !force_stop_stdout.contains(r#""value":"com.example.emu""#),
+        "stdout = {force_stop_stdout}"
+    );
+
+    let flag_value_stdout = run_complete(&["adb", "shell", "pm", "enable", "--user", ""]);
+    assert_eq!(
+        flag_value_stdout.trim(),
+        "null",
+        "stdout = {flag_value_stdout}"
+    );
+
+    let shell_flag_stdout = run_complete(&["adb", "shell", "-s", ""]);
+    assert_eq!(
+        shell_flag_stdout.trim(),
+        "null",
+        "stdout = {shell_flag_stdout}"
+    );
+
+    let _ = fs::remove_dir_all(root);
+}
diff --git a/tests/self_completions.rs b/tests/self_completions.rs
new file mode 100644
index 0000000..14b8667
--- /dev/null
+++ b/tests/self_completions.rs
@@ -0,0 +1,31 @@
+use std::process::Command;
+
+#[test]
+fn inshellah_completions_include_all_subcommands() {
+    let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
+        .arg("completions")
+        .output()
+        .expect("run inshellah completions");
+
+    assert!(
+        output.status.success(),
+        "stderr = {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+    let stdout = String::from_utf8(output.stdout).expect("stdout");
+    for subcommand in [
+        "index",
+        "manpage",
+        "manpage-dir",
+        "complete",
+        "query",
+        "dump",
+        "completions",
+    ] {
+        let extern_name = format!("export extern \"inshellah {subcommand}\"");
+        assert!(
+            stdout.contains(&extern_name),
+            "missing {extern_name}; stdout = {stdout}"
+        );
+    }
+}