init

2026-03-18 15:40:47 +11:00 · 2026-03-18 15:40:47 +11:00 · 55e74c6ed7
commit 55e74c6ed7
22 changed files with 4821 additions and 0 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1 @@
+use flake
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+/target
+/_build
+/.direnv
--- a/README.md
+++ b/README.md
@ -0,0 +1,11 @@
+# inshellah
+
+nushell completions engine. indexes completions from manpages, native
+generators, and `--help` output, then serves them to nushell's external
+completer.
+
+see `doc/` for details:
+
+- [nushell integration](doc/nushell-integration.md) — setup, usage, examples
+- [nixos module](doc/nixos.md) — automatic build-time indexing
+- [runtime completions](doc/runtime-completions.md) — on-the-fly caching via the completer
--- a/bin/.ocamlformat
+++ b/bin/.ocamlformat
--- a/bin/dune
+++ b/bin/dune
@ -0,0 +1,4 @@
+(executable
+ (public_name inshellah)
+ (name main)
+ (libraries inshellah))
--- a/bin/main.ml
+++ b/bin/main.ml
@ -0,0 +1,998 @@
+(* main.ml — cli entry point for inshellah, a nushell completions engine.
+ *
+ * inshellah generates nushell "extern" definitions for external commands by
+ * parsing their manpages and --help output. it has two main modes:
+ *
+ *   1. indexing (batch): scan a prefix directory's bin/ and share/man/,
+ *      extract completions for every binary, and write them to a cache dir.
+ *      this is typically run once per nix profile or system update.
+ *
+ *   2. completing (interactive): given a command and its current arguments,
+ *      look up the cached data and return json completion candidates for
+ *      nushell's custom completer protocol.
+ *
+ * the indexing pipeline for each binary:
+ *   a. classify the binary (skip? try --help? try native completions?)
+ *   b. if the tool has native nushell completion support, try various
+ *      subcommand patterns ("completions nushell", "--completion nushell", etc.)
+ *   c. otherwise, run the tool with --help/-h and parse the output
+ *   d. recursively resolve subcommands (depth-limited to 5)
+ *   e. after binaries, parse manpages for any commands not yet covered
+ *
+ * parallelism: indexing forks per binary, and subcommand resolution forks
+ * per subcommand. results are marshaled back via pipes. this gives good
+ * throughput on multi-core systems while keeping the code simple (no threads,
+ * no async runtime — just unix fork/pipe/waitpid).
+ *)
+
+open Inshellah.Parser
+open Inshellah.Manpage
+open Inshellah.Nushell
+open Inshellah.Store
+
+module SSet = Set.Make(String)
+
+(* print usage and exit. called when no valid subcommand is given. *)
+let usage () =
+  Printf.eprintf
+    {|inshellah - nushell completions engine
+
+Usage:
+  inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
+      Index completions into a directory of JSON/nu files.
+      PREFIX is a directory containing bin/ and share/man/.
+      Default dir: $XDG_CACHE_HOME/inshellah
+      --ignore FILE     skip listed commands entirely
+      --help-only FILE  skip manpages for listed commands, use --help instead
+  inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH]
+      Nushell custom completer. Outputs JSON completion candidates.
+      Falls back to --help resolution if command is not indexed.
+  inshellah query CMD [--dir PATH] [--system-dir PATH]
+      Print stored completion data for CMD.
+  inshellah dump [--dir PATH] [--system-dir PATH]
+      List indexed commands.
+  inshellah manpage FILE            Parse a manpage and emit nushell extern
+  inshellah manpage-dir DIR         Batch-process manpages under DIR
+
+|};
+  exit 1
+
+(* manpage sections that contain command documentation.
+ * section 1 = user commands, section 8 = system administration commands. *)
+let command_sections = [1; 8]
+
+(* simple substring search using Str *)
+let contains_str s sub =
+  try ignore (Str.search_forward (Str.regexp_string sub) s 0); true
+  with Not_found -> false
+
+(* heuristic to detect whether text is valid nushell source code.
+ * checks for common nushell declaration keywords. the length > 20
+ * check avoids false positives on short error messages. *)
+let is_nushell_source text =
+  String.length text > 20
+  && (contains_str text "export extern"
+      || contains_str text "export def"
+      || (contains_str text "module " && contains_str text "export"))
+
+(* extract command name from a manpage filename.
+ * "ls.1.gz" → strip .gz → "ls.1" → chop extension → "ls" *)
+let cmd_name_of_manpage path =
+  let base = Filename.basename path in
+  let base =
+    if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz"
+    else base in
+  try Filename.chop_extension base with Invalid_argument _ -> base
+
+(* sanitized environment for child processes.
+ * strips display-related variables (DISPLAY, WAYLAND_DISPLAY, etc.) to prevent
+ * gui tools from trying to open windows when we run them with --help.
+ * without this, some tools (e.g. ckb-next) would pop up dialogs or hang
+ * waiting for a display connection. *)
+let safe_env = lazy (
+  Array.of_list (
+    List.filter (fun s ->
+      not (String.starts_with ~prefix:"DISPLAY=" s
+           || String.starts_with ~prefix:"WAYLAND_DISPLAY=" s
+           || String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" s
+           || String.starts_with ~prefix:"XAUTHORITY=" s))
+      (Array.to_list (Unix.environment ()))))
+
+(* Non-blocking drain of a pipe fd into a buffer.  Safe to call repeatedly;
+   reads whatever is available without blocking.  Used by all fork-pipe sites
+   to keep pipes drained so children never block on write. *)
+let drain_fd rd buf =
+  let chunk = Bytes.create 8192 in
+  let continue = ref true in
+  while !continue do
+    match Unix.select [rd] [] [] 0.0 with
+    | (_ :: _, _, _) ->
+      (try
+         let n = Unix.read rd chunk 0 8192 in
+         if n = 0 then continue := false
+         else Buffer.add_subbytes buf chunk 0 n
+       with Unix.Unix_error _ -> continue := false)
+    | _ -> continue := false
+  done
+
+(* run a command with a timeout, capturing its stdout+stderr.
+ * forks a child process, redirects stdin from /dev/null, and merges
+ * stdout+stderr onto a pipe. reads from the pipe with select() polling
+ * until either the child exits or the deadline is reached.
+ *
+ * peculiarity: the child is run in /tmp to prevent tools that create
+ * side-effect files (like ckb-next-dev-detect-report.gz) from polluting
+ * the user's working directory. we chdir to /tmp before fork and restore after.
+ *
+ * peculiarity: the select timeout is capped at 0.05s per iteration to ensure
+ * we check the deadline frequently even when no data is available.
+ *
+ * returns none if the process couldn't be started, produced no output,
+ * or was killed due to timeout. *)
+let run_cmd args timeout_ms =
+  let (rd, wr) = Unix.pipe () in
+  let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in
+  let argv = Array.of_list args in
+  (* Run subprocesses in /tmp so commands that write side-effect files
+     (e.g. ckb-next-dev-detect-report.gz) don't pollute the working dir *)
+  let saved_cwd = Sys.getcwd () in
+  Sys.chdir "/tmp";
+  let pid =
+    try Unix.create_process_env (List.hd args) argv
+          (Lazy.force safe_env) devnull wr wr
+    with Unix.Unix_error _ ->
+      Unix.close rd; Unix.close wr; Unix.close devnull; -1 in
+  Sys.chdir saved_cwd;
+  Unix.close wr; Unix.close devnull;
+  if pid < 0 then (Unix.close rd; None)
+  else begin
+    let buf = Buffer.create 4096 in
+    let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in
+    let chunk = Bytes.create 8192 in
+    let alive = ref true in
+    (try while !alive do
+       let remaining = deadline -. Unix.gettimeofday () in
+       if remaining <= 0.0 then alive := false
+       else match Unix.select [rd] [] [] (min remaining 0.05) with
+         | (_ :: _, _, _) ->
+           let n = Unix.read rd chunk 0 8192 in
+           if n = 0 then raise Exit
+           else Buffer.add_subbytes buf chunk 0 n
+         | _ -> ()
+     done with Exit -> ());
+    Unix.close rd;
+    if not !alive then begin
+      (try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ());
+      ignore (Unix.waitpid [] pid)
+    end else
+      ignore (Unix.waitpid [] pid);
+    if Buffer.length buf > 0 then Some (Buffer.contents buf) else None
+  end
+
+(* check if a path is a regular file with at least one execute bit set *)
+let is_executable path =
+  try let st = Unix.stat path in
+    st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0
+  with Unix.Unix_error _ -> false
+
+(* check if a file is a script by looking for a #! shebang.
+ * follows symlinks via realpath before reading. *)
+let is_script path =
+  try
+    let real = Unix.realpath path in
+    let ic = open_in_bin real in
+    let has_shebang =
+      try let b = Bytes.create 2 in
+        really_input ic b 0 2;
+        Bytes.get b 0 = '#' && Bytes.get b 1 = '!'
+      with End_of_file -> false in
+    close_in ic;
+    has_shebang
+  with _ -> false
+
+(* scan an elf binary for string needles without loading the entire file.
+ * reads the file in 64kb chunks, searching each chunk for the needle strings.
+ * uses a sliding window (carry) of max_needle bytes between chunks to handle
+ * needles that span chunk boundaries.
+ *
+ * peculiarity: on read failure (e.g. if the path resolves to something
+ * unreadable), all needles are marked as found. this is a conservative
+ * fallback — we'd rather try --help on an unreadable binary than skip it.
+ *
+ * the inner loop is a manual byte-by-byte comparison rather than using
+ * String.contains or Str for performance — this runs on every binary
+ * in the prefix, so it needs to be fast. *)
+let elf_scan path needles =
+  let found = Hashtbl.create 4 in
+  let remaining () = List.filter (fun n -> not (Hashtbl.mem found n)) needles in
+  (try
+    let real = Unix.realpath path in
+    let ic = open_in_bin real in
+    let magic = Bytes.create 4 in
+    really_input ic magic 0 4;
+    if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E'
+       && Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin
+      let max_needle = List.fold_left (fun m n -> max m (String.length n)) 0 needles in
+      let chunk_size = 65536 in
+      let buf = Bytes.create (chunk_size + max_needle) in
+      let carry = ref 0 in
+      let eof = ref false in
+      while not !eof && remaining () <> [] do
+        let n = (try input ic buf !carry chunk_size with End_of_file -> 0) in
+        if n = 0 then eof := true
+        else begin
+          let total = !carry + n in
+          List.iter (fun needle ->
+            if not (Hashtbl.mem found needle) then begin
+              let nlen = String.length needle in
+              let i = ref 0 in
+              while !i <= total - nlen do
+                if Bytes.get buf !i = needle.[0] then begin
+                  let ok = ref true in
+                  for j = 1 to nlen - 1 do
+                    if Bytes.get buf (!i + j) <> needle.[j] then ok := false
+                  done;
+                  if !ok then (Hashtbl.replace found needle true; i := total)
+                  else incr i
+                end else incr i
+              done
+            end
+          ) (remaining ());
+          let new_carry = min max_needle total in
+          Bytes.blit buf (total - new_carry) buf 0 new_carry;
+          carry := new_carry
+        end
+      done
+    end;
+    close_in ic
+  with _ ->
+    List.iter (fun n -> Hashtbl.replace found n true) needles);
+  found
+
+(* detect nix-generated c wrapper scripts and extract the real binary path.
+ * nix's makeCWrapper creates small c programs that set up the environment
+ * and exec the real binary. these wrappers won't contain "-h" or "completion"
+ * in their own binary (they're just wrappers), so elf_scan would say "skip".
+ * this function reads the wrapper source to find the actual /nix/store/.../bin/...
+ * target path, so we can try --help on the real binary instead.
+ *
+ * peculiarity: caps the read at 64kb to avoid accidentally reading a large
+ * non-wrapper binary into memory. *)
+let nix_wrapper_target path =
+  try
+    let real = Unix.realpath path in
+    let ic = open_in_bin real in
+    let n = in_channel_length ic in
+    if n > 65536 then (close_in ic; None)
+    else begin
+      let s = Bytes.create n in
+      really_input ic s 0 n; close_in ic;
+      let s = Bytes.to_string s in
+      if not (contains_str s "makeCWrapper") then None
+      else
+        let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in
+        try ignore (Str.search_forward re s 0);
+          let target = Str.matched_string s in
+          if Sys.file_exists target then Some target else None
+        with Not_found -> None
+    end
+  with _ -> None
+
+(* heuristic filter for binary names that should never be indexed.
+ * skips: empty names, "-", dotfiles, libraries (lib-prefix), daemon wrappers
+ * (suffixes -daemon, -wrapped), shared objects (.so suffix), and names with no
+ * alphanumeric characters (e.g. punctuation-only names). *)
+let skip_name name =
+  String.length name = 0 || name = "-" || name.[0] = '.'
+  || String.starts_with ~prefix:"lib" name
+  || String.ends_with ~suffix:"-daemon" name
+  || String.ends_with ~suffix:"-wrapped" name
+  || String.ends_with ~suffix:".so" name
+  || not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name)
+
+(* classification result for a binary.
+ *   Skip               — don't index this binary at all
+ *   Try_help           — only try --help (scripts, binaries without "completion" string)
+ *   Try_native_and_help — try native nushell completion first, fall back to --help *)
+type bin_class = Skip | Try_help | Try_native_and_help
+
+(* classify a binary to decide the indexing strategy.
+ * decision tree:
+ *   1. nushell builtin or bad name → Skip
+ *   2. not executable → Skip
+ *   3. script (has shebang) → Try_help (scripts can't have native completions)
+ *   4. elf binary containing "completion" → Try_native_and_help
+ *   5. elf binary containing "-h" → Try_help
+ *   6. nix wrapper → Try_help (the wrapper itself is just an exec shim)
+ *   7. otherwise → Skip (binary has no help infrastructure) *)
+let classify_binary bindir name =
+  if is_nushell_builtin name || skip_name name then Skip
+  else
+    let path = Filename.concat bindir name in
+    if not (is_executable path) then Skip
+    else if is_script path then Try_help
+    else
+      let scan = elf_scan path ["-h"; "completion"] in
+      if Hashtbl.mem scan "completion" then Try_native_and_help
+      else if Hashtbl.mem scan "-h" then Try_help
+      else if nix_wrapper_target path <> None then Try_help
+      else Skip
+
+(* detect available cpu cores by counting "processor" lines in /proc/cpuinfo.
+ * falls back to 4 if /proc/cpuinfo can't be read (e.g. on non-linux). *)
+let num_cores () =
+  try
+    let ic = open_in "/proc/cpuinfo" in
+    let n = ref 0 in
+    (try while true do
+       if String.starts_with ~prefix:"processor" (input_line ic) then incr n
+     done with End_of_file -> ());
+    close_in ic; max 1 !n
+  with _ -> 4
+
+(* try to get native nushell completions from a binary.
+ * tries several common subcommand patterns that tools use for shell completions.
+ * returns the first one that produces valid nushell source code.
+ * the 500ms timeout is generous enough for most tools but prevents hangs.
+ *
+ * the patterns cover: cobra (go), clap (rust), click (python), and various
+ * ad-hoc implementations. *)
+let try_native_completion bin_path =
+  List.find_map (fun args ->
+    match run_cmd args 500 with
+    | Some text when is_nushell_source text -> Some text
+    | _ -> None
+  ) [
+    [bin_path; "completions"; "nushell"];
+    [bin_path; "completion"; "nushell"];
+    [bin_path; "--completions"; "nushell"];
+    [bin_path; "--completion"; "nushell"];
+    [bin_path; "generate-completion"; "nushell"];
+    [bin_path; "--generate-completion"; "nushell"];
+    [bin_path; "shell-completions"; "nushell"];
+  ]
+
+(* parse a manpage file, extracting the command name, its flags/subcommands,
+ * and any clap-style per-subcommand sections.
+ * returns none for nushell builtins or failed parses. *)
+let parse_manpage_for_command file =
+  let contents = read_manpage_file file in
+  let fallback = cmd_name_of_manpage file in
+  let cmd = match extract_synopsis_command contents with
+    | Some name -> name | None -> fallback in
+  if is_nushell_builtin cmd then None
+  else
+    let result = parse_manpage_string contents in
+    let sub_sections = extract_subcommand_sections contents in
+    let result = if sub_sections <> [] then
+      { result with subcommands = List.map (fun (name, desc, _) ->
+        { name; desc }) sub_sections }
+    else result in
+    let subs = List.map (fun (name, _desc, r) ->
+      (cmd ^ " " ^ name, r)) sub_sections in
+    Some (cmd, result, subs)
+
+(* "inshellah manpage FILE" — parse one manpage and print the nushell extern *)
+let cmd_manpage file =
+  match parse_manpage_for_command file with
+  | Some (cmd, result, _) when result.entries <> [] ->
+    print_string (generate_extern cmd result)
+  | _ -> ()
+
+(* "inshellah manpage-dir DIR" — batch-process all manpages under a directory *)
+let cmd_manpage_dir dir =
+  List.iter (fun section ->
+    let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in
+    if is_dir subdir then
+      Array.iter (fun file ->
+        (try cmd_manpage (Filename.concat subdir file) with _ -> ())
+      ) (Sys.readdir subdir)
+  ) command_sections
+
+(* safety limit: don't accumulate more than 500 subcommand resolution results
+ * per binary. prevents runaway recursion on tools with enormous subcommand trees. *)
+let max_resolve_results = 500
+
+(* safe wrapper around parse_manpage_for_command that catches all exceptions *)
+let process_manpage file =
+  try
+    match parse_manpage_for_command file with
+    | Some (cmd, result, subs) when result.entries <> [] || subs <> [] ->
+      Some (cmd, result, subs)
+    | _ -> None
+  with _ -> None
+
+(* collect the set of command names that have manpages in a given man directory.
+ * used during indexing to skip --help for commands that will be handled by
+ * the manpage parsing phase instead (manpages are more reliable than --help). *)
+let manpaged_commands mandir =
+  List.fold_left (fun acc section ->
+    let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
+    if is_dir subdir then
+      Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc)
+        acc (Sys.readdir subdir)
+    else acc
+  ) SSet.empty command_sections
+
+(* parallel structured help resolver — recursively resolves a command and
+ * all its subcommands by running --help on each, forking a child process
+ * per subcommand for parallelism.
+ *
+ * the resolver works as a breadth-first queue:
+ *   1. start with the root command in the queue
+ *   2. fork a child for each queued item (up to num_cores concurrent)
+ *   3. the child runs --help, parses the output, marshals the result via pipe
+ *   4. the parent collects results and enqueues discovered subcommands
+ *   5. repeat until queue is empty and all children have finished
+ *
+ * depth is limited to 5 levels and total results to max_resolve_results
+ * to prevent runaway recursion on pathological command trees.
+ *
+ * peculiarity: the child process detects "self-listing" — when a subcommand's
+ * --help lists itself as a subcommand (e.g. "git help" listing "help" as a
+ * subcommand of itself). this would cause infinite recursion, so such results
+ * are discarded.
+ *
+ * peculiarity: children close all pipe fds from other pending children
+ * immediately after fork to prevent fd leaks. the parent drains pipes
+ * regularly to prevent children from blocking on full pipe buffers. *)
+let help_resolve_par ?(timeout=200) cmd rest name =
+  let max_jobs = num_cores () in
+  let queue = Queue.create () in
+  Queue.push (rest, name, 0) queue;
+  let results = ref [] in
+  (* pending: (pid, rd, buf, rest, name, depth) *)
+  let pending = ref [] in
+  let collect rd buf q_rest q_name q_depth =
+    drain_fd rd buf;
+    (try Unix.close rd with _ -> ());
+    let data = Buffer.contents buf in
+    let result : (help_result * subcommand list) option =
+      if String.length data > 0 then
+        try Marshal.from_string data 0 with _ -> None
+      else None in
+    match result with
+    | None -> ()
+    | Some (r, subs) ->
+      let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in
+      results := (q_name, r) :: !results;
+      if not at_limit then
+        List.iter (fun (sc : subcommand) ->
+          Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue
+        ) subs in
+  let reap () =
+    pending := List.filter (fun (pid, rd, buf, q_rest, q_name, q_depth) ->
+      drain_fd rd buf;
+      match Unix.waitpid [Unix.WNOHANG] pid with
+      | (0, _) -> true
+      | _ -> collect rd buf q_rest q_name q_depth; false
+      | exception Unix.Unix_error (Unix.ECHILD, _, _) ->
+        (try Unix.close rd with _ -> ()); false
+    ) !pending in
+  let wait_for_slot () =
+    while List.length !pending >= max_jobs do
+      reap ();
+      if List.length !pending >= max_jobs then begin
+        let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
+        ignore (Unix.select fds [] [] 0.05)
+      end
+    done in
+  while not (Queue.is_empty queue) || !pending <> [] do
+    while not (Queue.is_empty queue) do
+      let (q_rest, q_name, q_depth) = Queue.pop queue in
+      wait_for_slot ();
+      let (rd, wr) = Unix.pipe () in
+      let pid = Unix.fork () in
+      if pid = 0 then begin
+        Unix.close rd;
+        List.iter (fun (_, prd, _, _, _, _) ->
+          try Unix.close prd with _ -> ()) !pending;
+        let result =
+          let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with
+            | Some _ as r -> r
+            | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in
+          match text with
+          | None -> None
+          | Some text ->
+            (match parse_help text with
+             | Error _ -> None
+             | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
+             | Ok r ->
+               let self_listed = match q_rest with
+                 | [] -> false
+                 | _ ->
+                   let leaf = List.nth q_rest (List.length q_rest - 1) in
+                   List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in
+               if self_listed then None
+               else
+                 let at_limit = q_depth >= 5 in
+                 let subs = if at_limit then [] else r.subcommands in
+                 Some (r, subs)) in
+        let oc = Unix.out_channel_of_descr wr in
+        Marshal.to_channel oc (result : (help_result * subcommand list) option) [];
+        close_out oc;
+        exit 0
+      end else begin
+        Unix.close wr;
+        pending := (pid, rd, Buffer.create 4096, q_rest, q_name, q_depth) :: !pending
+      end
+    done;
+    if !pending <> [] then begin
+      reap ();
+      if !pending <> [] && Queue.is_empty queue then begin
+        let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
+        ignore (Unix.select fds [] [] 0.05)
+      end
+    end
+  done;
+  List.rev !results
+
+(* "inshellah index" — the main indexing command.
+ * processes all binaries and manpages in the given prefix directories,
+ * writing completion data to the cache dir.
+ *
+ * the pipeline has two phases:
+ *
+ * phase 1 (binaries): fork one child per binary. each child:
+ *   - tries native nushell completions (if classified as Try_native_and_help)
+ *   - falls back to help_resolve_par (which itself forks per subcommand)
+ *   - marshals the result back via pipe as a tagged variant:
+ *     `Native of string — raw nushell source
+ *     `Parsed of (string * help_result) list — parsed flag data
+ *     `None — nothing useful extracted
+ *
+ * phase 2 (manpages): sequentially parse manpages for commands not yet
+ *   covered by phase 1. manpages are more reliable than --help for many
+ *   gnu tools, but slower to process.
+ *
+ * commands on the ignorelist are skipped entirely. commands on the
+ * help_only list skip manpage parsing and only use --help. commands
+ * with manpages skip --help in phase 1 (they'll be handled in phase 2).
+ *
+ * peculiarity: the done_cmds set tracks which commands have already been
+ * indexed to prevent duplicates across phases and across multiple prefix
+ * directories. *)
+
+(* known privilege-escalation wrappers — defined here (before cmd_index and
+ * cmd_complete) because both need the list: cmd_index writes @complete
+ * external stubs, and cmd_complete strips the wrapper to find the real command. *)
+let elevation_commands =
+  ["sudo"; "run0"; "doas"; "pkexec"; "su"; "calife"; "sux"; "sudoedit";
+   "please"; "super"; "priv"]
+
+let cmd_index bindirs mandirs ignorelist help_only dir =
+  ensure_dir dir;
+  let done_cmds = ref SSet.empty in
+  let n_results = ref 0 in
+  let index_bindir bindir mandir =
+    if not (is_dir bindir) then
+      Printf.eprintf "skipping %s (not found)\n" bindir
+    else begin
+      let bins = Sys.readdir bindir in
+      Array.sort String.compare bins;
+      let manpaged = if is_dir mandir
+        then manpaged_commands mandir else SSet.empty in
+      let max_jobs = num_cores () in
+      let classified = Array.map (fun name ->
+        if SSet.mem name ignorelist then (name, Skip)
+        else if SSet.mem name help_only then (name, classify_binary bindir name)
+        else if SSet.mem name manpaged then (name, Skip)
+        else (name, classify_binary bindir name)
+      ) bins in
+      let pending = ref [] in
+      let process_result name rd buf =
+        drain_fd rd buf;
+        (try Unix.close rd with _ -> ());
+        let data = Buffer.contents buf in
+        if String.length data > 0 then begin
+          let result : [`Native of string | `Parsed of (string * help_result) list | `None] =
+            try Marshal.from_string data 0 with _ -> `None in
+          (match result with
+          | `Native src ->
+            write_native ~dir name src;
+            incr n_results
+          | `Parsed pairs ->
+            List.iter (fun (cmd_name, r) ->
+              if not (SSet.mem cmd_name !done_cmds) then begin
+                write_result ~dir ~source:"help" cmd_name r;
+                done_cmds := SSet.add cmd_name !done_cmds;
+                incr n_results
+              end
+            ) pairs
+          | `None -> ())
+        end;
+        done_cmds := SSet.add name !done_cmds in
+      let reap () =
+        pending := List.filter (fun (pid, rd, buf, name) ->
+          drain_fd rd buf;
+          match Unix.waitpid [Unix.WNOHANG] pid with
+          | (0, _) -> true
+          | _ ->
+            process_result name rd buf;
+            false
+          | exception Unix.Unix_error (Unix.ECHILD, _, _) ->
+            (try Unix.close rd with _ -> ()); false
+        ) !pending in
+      let wait_for_slot () =
+        while List.length !pending >= max_jobs do
+          reap ();
+          if List.length !pending >= max_jobs then begin
+            let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
+            ignore (Unix.select fds [] [] 0.05)
+          end
+        done in
+      Array.iter (fun (name, cls) ->
+        match cls with
+        | Skip -> ()
+        | Try_help | Try_native_and_help ->
+          wait_for_slot ();
+          let (rd, wr) = Unix.pipe () in
+          let pid = Unix.fork () in
+          if pid = 0 then begin
+            Unix.close rd;
+            List.iter (fun (_, prd, _, _) ->
+              try Unix.close prd with _ -> ()) !pending;
+            let result =
+              try
+                let path = Filename.concat bindir name in
+                let native = match cls with
+                  | Try_native_and_help ->
+                    (match try_native_completion path with
+                     | Some src -> Some src | None -> None)
+                  | _ -> None in
+                match native with
+                | Some src -> `Native src
+                | None ->
+                  let pairs = help_resolve_par ~timeout:200 path [] name in
+                  if pairs <> [] then `Parsed pairs else `None
+              with _ -> `None in
+            let oc = Unix.out_channel_of_descr wr in
+            Marshal.to_channel oc
+              (result : [`Native of string | `Parsed of (string * help_result) list | `None]) [];
+            close_out oc;
+            exit 0
+          end else begin
+            Unix.close wr;
+            pending := (pid, rd, Buffer.create 4096, name) :: !pending
+          end
+      ) classified;
+      while !pending <> [] do
+        reap ();
+        if !pending <> [] then begin
+          let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
+          ignore (Unix.select fds [] [] 0.05)
+        end
+      done;
+      (* Phase 2: manpages *)
+      if is_dir mandir then
+        List.iter (fun section ->
+          let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
+          if is_dir subdir then begin
+            let files = Sys.readdir subdir in
+            Array.sort String.compare files;
+            Array.iter (fun file ->
+              let base_cmd = cmd_name_of_manpage file in
+              if SSet.mem base_cmd help_only then ()
+              else match process_manpage (Filename.concat subdir file) with
+              | None -> ()
+              | Some (cmd, result, subs) ->
+                if not (SSet.mem cmd !done_cmds) then begin
+                  write_result ~dir ~source:"manpage" cmd result;
+                  done_cmds := SSet.add cmd !done_cmds;
+                  incr n_results
+                end;
+                List.iter (fun (sub_cmd, sub_result) ->
+                  if not (SSet.mem sub_cmd !done_cmds) then begin
+                    write_result ~dir ~source:"manpage" sub_cmd sub_result;
+                    done_cmds := SSet.add sub_cmd !done_cmds;
+                    incr n_results
+                  end
+                ) subs
+            ) files
+          end
+        ) command_sections
+    end in
+  List.iter2 index_bindir bindirs mandirs;
+  (* write @complete external stubs for elevation commands (sudo, doas, etc.)
+   * so nushell routes their completions through the external completer.
+   * without this, nushell hardcodes sudo/doas to show command-name completion
+   * and never calls the external completer for their own flags. *)
+  List.iter (fun cmd ->
+    let json_path = Filename.concat dir (filename_of_command cmd ^ ".json") in
+    if Sys.file_exists json_path then
+      write_native ~dir cmd
+        (Printf.sprintf "@complete external\nextern \"%s\" []\n" cmd)
+  ) elevation_commands;
+  Printf.printf "indexed %d commands into %s\n" !n_results dir
+
+(* "inshellah dump" — list all indexed commands with their source type *)
+let cmd_dump dirs =
+  let cmds = all_commands dirs in
+  Printf.printf "%d commands\n" (List.length cmds);
+  List.iter (fun cmd ->
+    let src = match file_type_of dirs cmd with
+      | Some s -> s | None -> "?" in
+    Printf.printf "  %-40s [%s]\n" cmd src
+  ) cmds
+
+(* search $PATH for an executable with the given name.
+ * used during completion to find binaries for on-the-fly resolution. *)
+let find_in_path name =
+  try
+    Sys.getenv "PATH"
+    |> String.split_on_char ':'
+    |> List.find_map (fun dir ->
+         let p = Filename.concat dir name in
+         if is_executable p then Some p else None)
+  with Not_found -> None
+
+(* resolve a command's completions on-the-fly and cache the results.
+ * called during "complete" when a command isn't in the index.
+ * runs help_resolve_par and writes results to the user's cache dir. *)
+let resolve_and_cache ~dir name path =
+  let pairs = help_resolve_par ~timeout:200 path [] name in
+  if pairs <> [] then begin
+    ensure_dir dir;
+    List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs;
+    Some pairs
+  end else None
+
+(* format a single completion candidate as json for nushell's completer protocol *)
+let completion_json value desc =
+  Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}"
+    (escape_json value) (escape_json desc)
+
+(* fuzzy matching: returns a score > 0 if needle is a subsequence of haystack.
+ * higher scores = better match. scoring tiers:
+ *   - exact match: 1000
+ *   - prefix match: 900 + length bonus (how much of the haystack is covered)
+ *   - subsequence: base 10 per char + bonuses for:
+ *     - word boundary alignment (50): matching at '-', '_', or camelCase transitions
+ *     - consecutive matches (20): matching adjacent characters
+ *
+ * this drives the completion candidate ranking. users typing "ser" should see
+ * "--server" ranked above "--preserve" even though both contain "ser" as a
+ * subsequence. the word-boundary bonus achieves this. *)
+let fuzzy_score needle haystack =
+  let nlen = String.length needle and hlen = String.length haystack in
+  if nlen = 0 then 1
+  else if nlen > hlen then 0
+  else if needle = haystack then 1000
+  else
+    let needle = String.lowercase_ascii needle
+    and haystack_lc = String.lowercase_ascii haystack in
+    if String.starts_with ~prefix:needle haystack_lc then
+      900 + (nlen * 100 / hlen)
+    else
+      let is_boundary hi =
+        hi = 0 || haystack.[hi - 1] = '-' || haystack.[hi - 1] = '_'
+        || (haystack.[hi - 1] >= 'a' && haystack.[hi - 1] <= 'z'
+            && haystack.[hi] >= 'A' && haystack.[hi] <= 'Z') in
+      (* Walk haystack matching needle chars as a subsequence *)
+      let ni, score, _, _ =
+        String.fold_left (fun (ni, score, hi, prev_match) c ->
+          if ni >= nlen then (ni, score, hi + 1, prev_match)
+          else if c = needle.[ni] then
+            let bonus = (if is_boundary hi then 50 else 10)
+                      + (if prev_match = hi - 1 then 20 else 0) in
+            (ni + 1, score + bonus, hi + 1, hi)
+          else (ni, score, hi + 1, prev_match)
+        ) (0, 0, 0, -1) haystack_lc in
+      if ni = nlen then score else 0
+
+(* scan past the elevation command's flags and arguments to find the real
+ * command. is_command checks whether a token names a known command.
+ * returns Some (real_cmd :: args) or None if no command was found. *)
+let find_real_command is_command args =
+  let rec scan = function
+    | [] -> None
+    | "--" :: rest -> Some rest
+    | arg :: rest when String.length arg > 0 && arg.[0] = '-' ->
+      scan rest
+    | arg :: _ as cmd_and_rest when is_command arg ->
+      Some cmd_and_rest
+    | _ :: rest -> scan rest
+  in
+  scan args
+
+(* "inshellah complete CMD [ARGS...]" — the nushell custom completer.
+ * this is the hot path — called every time the user presses tab in nushell.
+ *
+ * the completion logic:
+ *   1. try to find the command (or longest subcommand prefix) in the store
+ *   2. if not found, try on-the-fly resolution (find in $PATH, run --help, cache)
+ *   3. score all candidate completions against the partial input using fuzzy_score
+ *   4. output scored candidates as a json array
+ *
+ * subcommand resolution: the lookup tries longest prefix first.
+ * for "git add --", it first looks for "git add", then "git".
+ * this ensures subcommand-specific flags are shown.
+ *
+ * peculiarity: nushell sends a trailing empty token when the cursor is after
+ * a space ("git add "). in this case all_tokens includes the empty string.
+ * when the last token is non-empty, the user is still typing it, so we use
+ * it as the fuzzy filter. when empty, we show all candidates.
+ *
+ * peculiarity: if only a parent command matched (e.g. "git" matched but not
+ * "git add"), we suppress subcommand suggestions and only show flags. this
+ * prevents showing sibling subcommands when the user has already committed
+ * to a specific subcommand path. *)
+let cmd_complete spans user_dir system_dirs =
+  let dirs = user_dir :: system_dirs in
+  (* if the command line starts with a privilege-escalation wrapper, scan past
+   * it to find the real command. we identify the command by checking the store
+   * and $PATH — this avoids needing per-command option tables which are fragile
+   * across different implementations. if no real command is found, fall back to
+   * completing the elevation command itself. *)
+  let spans = match spans with
+    | cmd :: rest when List.mem cmd elevation_commands ->
+      let is_command name =
+        name <> "" && (lookup dirs name <> None || find_in_path name <> None)
+      in
+      (match find_real_command is_command rest with
+       | Some (_ :: _ as real_spans) -> real_spans
+       | _ -> spans)
+    | _ -> spans in
+  match spans with
+  | [] -> print_string "[]\n"
+  | cmd_name :: rest ->
+    (* Try longest prefix match: "git add" before "git" *)
+    let find_result tokens =
+      let n = List.length tokens in
+      List.init n Fun.id |> List.find_map (fun drop ->
+        let prefix = List.filteri (fun i _ -> i < n - drop) tokens in
+        match prefix with
+        | [] -> None
+        | _ ->
+          let try_name = String.concat " " prefix in
+          match lookup dirs try_name with
+          | Some r -> Some (try_name, r, List.length prefix)
+          | None -> None) in
+    let all_tokens = cmd_name :: rest in
+    let last_token = match rest with
+      | [] -> "" | _ -> List.nth rest (List.length rest - 1) in
+    (* Only treat the last token as a completed subcommand when nushell
+       sends a trailing empty token (cursor is after a space).
+       Otherwise the user is still typing and we treat it as partial. *)
+    let lookup_tokens = if last_token = "" then all_tokens
+      else match rest with
+        | _ :: _ -> cmd_name :: List.rev (List.tl (List.rev rest))
+        | _ -> [cmd_name] in
+    let resolve tokens partial =
+      match find_result tokens with
+      | Some _ as found -> (found, partial)
+      | None -> (None, partial) in
+    let found, partial = resolve lookup_tokens last_token in
+    (* Try on-the-fly resolution when no match or only a parent matched *)
+    let n_lookup = List.length lookup_tokens in
+    let result, partial = match found with
+      | Some (_, _, depth) when depth >= n_lookup - 1 ->
+        (* Exact or near-exact match — use it *)
+        (found, partial)
+      | _ ->
+        (* No match, or only a parent matched — try on-the-fly resolution *)
+        (match find_in_path cmd_name with
+         | Some path ->
+           (match resolve_and_cache ~dir:user_dir cmd_name path with
+            | Some _pairs -> resolve lookup_tokens last_token
+            | None -> (found, partial))
+         | None -> (found, partial)) in
+    let candidates = match result with
+      | None -> []
+      | Some (_matched_name, r, depth) ->
+        (* When the match is shallower than requested, the user already
+           typed a subcommand beyond the matched level — don't show
+           sibling subcommands, only flags *)
+        let sub_candidates = if depth < n_lookup - 1 then [] else
+        let subs = match r.subcommands with
+          | _ :: _ -> r.subcommands
+          | [] -> subcommands_of dirs _matched_name in
+        List.filter_map (fun (sc : subcommand) ->
+          let s = fuzzy_score partial sc.name in
+          if s > 0 then Some (s, completion_json sc.name sc.desc) else None
+        ) subs in
+        (* build flag completion candidates from the entry list.
+         * for flags with both short and long forms (Both), we pick which form
+         * to display based on what the user is currently typing:
+         *   - if the partial input matches the short flag better, show the short
+         *     flag as the value and note the long form in the description
+         *   - otherwise (including empty partial), prefer the long flag and note
+         *     the short form in the description
+         * this keeps the candidate list clean (one entry per flag) while still
+         * surfacing the alternate form so the user knows about it.
+         *
+         * parameter names are appended to descriptions in angle brackets for
+         * mandatory params and square brackets for optional ones, matching the
+         * conventions users expect from cli help text. *)
+        let flag_candidates = List.filter_map (fun (e : entry) ->
+          let base_desc = match e.param with
+            | Some (Mandatory p) -> if e.desc <> "" then e.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">"
+            | Some (Optional p) -> if e.desc <> "" then e.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]"
+            | None -> e.desc in
+          let flag, desc = match e.switch with
+            | Long l -> ("--" ^ l, base_desc)
+            | Short c -> (Printf.sprintf "-%c" c, base_desc)
+            | Both (c, l) ->
+              (* score the partial against both forms to decide which to present.
+               * e.g. typing "-s" scores higher against "-s" than "--squeeze-blank",
+               * so we show "-s (aka --squeeze-blank)". when the partial is empty or
+               * matches the long form better, we default to the long form. *)
+              let long_flag = "--" ^ l in
+              let short_flag = Printf.sprintf "-%c" c in
+              let long_score = fuzzy_score partial long_flag in
+              let short_score = fuzzy_score partial short_flag in
+              if short_score > long_score then
+                (short_flag, Printf.sprintf "(aka %s) %s" long_flag base_desc)
+              else
+                (long_flag, Printf.sprintf "(aka %s) %s" short_flag base_desc) in
+          let s = fuzzy_score partial flag in
+          if s > 0 then Some (s, completion_json flag desc) else None
+        ) r.entries in
+        let scored = sub_candidates @ flag_candidates in
+        List.sort (fun (a, _) (b, _) -> compare b a) scored
+        |> List.map snd in
+    Printf.printf "[%s]\n" (String.concat "," candidates)
+
+(* "inshellah query CMD" — print the raw stored data for a command *)
+let cmd_query cmd dirs =
+  match lookup_raw dirs cmd with
+  | None ->
+    Printf.eprintf "not found: %s\n" cmd; exit 1
+  | Some data ->
+    print_string data; print_newline ()
+
+(* load a newline-separated list of command names to ignore.
+ * blank lines and lines starting with '#' are skipped. *)
+let load_ignorelist path =
+  try
+    In_channel.with_open_text path In_channel.input_all
+    |> String.split_on_char '\n'
+    |> List.filter_map (fun line ->
+         let line = String.trim line in
+         if String.length line > 0 && line.[0] <> '#' then Some line else None)
+    |> SSet.of_list
+  with _ -> SSet.empty
+
+(* parse "index" subcommand arguments: prefix dirs + optional --dir, --ignore, --help-only *)
+let parse_index_args args =
+  let rec go prefixes dir ignore help_only = function
+    | [] -> (List.rev prefixes, dir, ignore, help_only)
+    | "--dir" :: path :: rest -> go prefixes path ignore help_only rest
+    | "--ignore" :: path :: rest -> go prefixes dir (SSet.union ignore (load_ignorelist path)) help_only rest
+    | "--help-only" :: path :: rest -> go prefixes dir ignore (SSet.union help_only (load_ignorelist path)) rest
+    | prefix :: rest -> go (prefix :: prefixes) dir ignore help_only rest in
+  go [] (default_store_path ()) SSet.empty SSet.empty args
+
+(* parse common --dir/--system-dir arguments for complete/query/dump commands *)
+let parse_dir_args args =
+  let rec go user_dir system_dirs rest_args = function
+    | [] -> (user_dir, system_dirs, List.rev rest_args)
+    | "--dir" :: path :: rest -> go path system_dirs rest_args rest
+    | "--system-dir" :: path :: rest -> go user_dir (path :: system_dirs) rest_args rest
+    | arg :: rest -> go user_dir system_dirs (arg :: rest_args) rest in
+  go (default_store_path ()) [] [] args
+
+(* --- entry point ---
+ * dispatch on the first argument to the appropriate subcommand handler. *)
+let () =
+  match Array.to_list Sys.argv |> List.tl with
+  | "index" :: rest ->
+    let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in
+    if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1);
+    let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in
+    let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in
+    cmd_index bindirs mandirs ignorelist help_only dir
+  | "complete" :: rest ->
+    let (user_dir, system_dirs, spans) = parse_dir_args rest in
+    cmd_complete spans user_dir system_dirs
+  | "query" :: rest ->
+    let (user_dir, system_dirs, args) = parse_dir_args rest in
+    (match args with
+     | [cmd] -> cmd_query cmd (user_dir :: system_dirs)
+     | _ -> Printf.eprintf "error: query CMD [--dir PATH] [--system-dir PATH]\n"; exit 1)
+  | "dump" :: rest ->
+    let (user_dir, system_dirs, _) = parse_dir_args rest in
+    cmd_dump (user_dir :: system_dirs)
+  | ["manpage"; file] -> cmd_manpage file
+  | ["manpage-dir"; dir] -> cmd_manpage_dir dir
+  | _ -> usage ()
--- a/doc/nixos.md
+++ b/doc/nixos.md
@ -0,0 +1,192 @@
+# nixos integration
+
+inshellah provides a nixos module that automatically indexes nushell
+completions for all installed packages at system build time.
+
+## enabling
+
+```nix
+# in your flake.nix outputs:
+{
+  nixosConfigurations.myhost = nixpkgs.lib.nixosSystem {
+    modules = [
+      inshellah.nixosModules.default
+      {
+        programs.inshellah.enable = true;
+      }
+    ];
+  };
+}
+```
+
+or if importing the module directly:
+
+```nix
+# configuration.nix
+{ pkgs, ... }: {
+  imports = [ ./path/to/inshellah/nix/module.nix ];
+  programs.inshellah = {
+    enable = true;
+    package = pkgs.inshellah;  # or your local build
+  };
+}
+```
+
+## what happens at build time
+
+the module hooks into `environment.extraSetup`, which runs during the
+system profile build (the `buildEnv` that creates `/run/current-system/sw`).
+at that point, all system packages are merged, so `$out/bin` contains every
+executable and `$out/share/man` contains every manpage.
+
+inshellah runs a single command:
+
+```
+inshellah index "$out" --dir $out/share/inshellah
+```
+
+this executes a three-phase pipeline:
+
+### phase 1: native completion detection (parallel)
+
+for each executable, inshellah scans the elf binary for the string
+`completion`. if found, it probes common patterns like
+`CMD completions nushell` to see if the program can generate its own
+nushell completions. native output is used verbatim — these are always
+higher quality than parsed completions.
+
+programs like `niri`, and any clap/cobra tool with nushell support,
+are handled this way.
+
+### phase 2: manpage parsing (sequential)
+
+for commands not covered by phase 1, inshellah parses manpages from
+man1 (user commands) and man8 (sysadmin commands). it handles:
+
+- gnu `.TP` style (coreutils, help2man)
+- `.IP` style (curl, hand-written)
+- `.PP`+`.RS`/`.RE` style (git, docbook)
+- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
+- mdoc (bsd) format
+- deroff fallback for unusual formats
+
+synopsis sections are parsed to detect subcommands: `git-commit.1`
+generates `export extern "git commit"`, not `export extern "git-commit"`.
+
+### phase 3: --help fallback (parallel)
+
+remaining executables without manpages get `--help` (or `-h`) called
+with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
+to skip those that don't support help flags. shell scripts are run
+directly (they're fast). execution is parallelized to available cores.
+
+### output
+
+each command gets its own file in `/share/inshellah` under the system
+profile. native generators produce `.nu` files; parsed results produce
+`.json` files. the `complete` command reads both formats.
+
+nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
+nushell provides its own completions.
+
+### performance
+
+on a typical nixos system (~950 executables, ~1600 manpages):
+- total time: ~4-10 seconds
+- native gzip decompression (camlzip, no process spawning)
+- parallel --help with core-scaled forking
+- elf string scanning to skip ~15% of binaries
+
+## module options
+
+```nix
+programs.inshellah = {
+  enable = true;
+
+  # the inshellah package (set automatically by the flake module)
+  package = pkgs.inshellah;
+
+  # where to place indexed completion files under the system profile
+  # default: "/share/inshellah"
+  completionsPath = "/share/inshellah";
+
+  # commands to skip entirely during indexing
+  ignoreCommands = [ "problematic-tool" ];
+
+  # commands to skip manpage parsing for (uses --help instead)
+  helpOnlyCommands = [ "nix" ];
+};
+```
+
+## using the completer
+
+the flake module sets a read-only `snippet` option containing the nushell
+config needed to wire up the completer. you can access it via
+`config.programs.inshellah.snippet` and paste it into your nushell config,
+or source it from a file generated by your nixos config.
+
+the snippet sets up the external completer pointing at the system index
+at `/run/current-system/sw/share/inshellah`:
+
+```nu
+let inshellah_complete = {|spans|
+    inshellah complete ...$spans --system-dir /run/current-system/sw/share/inshellah | from json
+}
+$env.config.completions.external = {
+    enable: true
+    max_results: 100
+    completer: $inshellah_complete
+}
+```
+
+## home manager and other user-level package managers
+
+the nixos module only indexes packages installed at the system level
+(those that end up in `/run/current-system/sw`). if you use home-manager,
+nix-env, or another user-level package manager, those binaries and
+manpages live elsewhere — typically under `/etc/profiles/per-user/<name>`
+or `~/.nix-profile`.
+
+to get completions for user-installed packages, run `inshellah index`
+against those prefixes separately:
+
+```sh
+# home-manager / per-user profile
+inshellah index /etc/profiles/per-user/$USER
+
+# classic nix-env profile
+inshellah index ~/.nix-profile
+```
+
+this indexes into the default user cache (`$XDG_CACHE_HOME/inshellah`),
+which the completer searches automatically. you can re-run this after
+installing new packages, or add it to a home-manager activation script.
+
+if you want to automate this in home-manager:
+
+```nix
+# home.nix
+home.activation.inshellah-index = lib.hm.dag.entryAfter [ "writeBoundary" ] ''
+  ${pkgs.inshellah}/bin/inshellah index /etc/profiles/per-user/$USER 2>/dev/null || true
+'';
+```
+
+the completer will then search both the system index (`--system-dir`)
+and the user cache, so completions from both sources are available.
+
+## troubleshooting
+
+**completions not appearing**: ensure the completer is configured in
+your nushell config (see above). check that the system index exists:
+`ls /run/current-system/sw/share/inshellah/`.
+
+**missing completions for a specific command**: check if it's a nushell
+built-in (`help commands | where name == "thecommand"`). built-ins are
+excluded because nushell serves its own completions for them.
+
+**stale completions after update**: completions regenerate on every
+`nixos-rebuild`. if a command changed its flags, rebuild to pick up
+the changes.
+
+**build-time errors**: indexing failures are non-fatal (`|| true`).
+check `journalctl` for the build log if completions are missing.
--- a/doc/nushell-integration.md
+++ b/doc/nushell-integration.md
@ -0,0 +1,184 @@
+# using inshellah completions in nushell
+
+inshellah indexes completions from three sources (in priority order):
+1. **native generators** — programs that can emit nushell completions directly
+2. **manpages** — groff/troff/mdoc manpage parsing
+3. **`--help` output** — parsing help text as a fallback
+
+indexed data is stored as `.json` and `.nu` files in a directory that the
+`complete` command reads from at tab-completion time.
+
+## quick start
+
+index completions from a system prefix:
+
+```sh
+# index from a prefix containing bin/ and share/man/
+inshellah index /usr
+
+# index from multiple prefixes
+inshellah index /usr /usr/local
+
+# store in a custom directory
+inshellah index /usr --dir ~/my-completions
+```
+
+parse a single manpage:
+
+```sh
+inshellah manpage /usr/share/man/man1/git.1.gz
+```
+
+batch-process all manpages under a directory (man1 and man8):
+
+```sh
+inshellah manpage-dir /usr/share/man
+```
+
+## commands
+
+```
+inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
+    index completions into a directory of json/nu files.
+    PREFIX is a directory containing bin/ and share/man/.
+    default dir: $XDG_CACHE_HOME/inshellah
+    --ignore FILE     skip listed commands entirely
+    --help-only FILE  skip manpages for listed commands, use --help instead
+
+inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH]
+    nushell custom completer. outputs json completion candidates.
+    falls back to --help resolution if command is not indexed.
+
+inshellah query CMD [--dir PATH] [--system-dir PATH]
+    print stored completion data for CMD.
+
+inshellah dump [--dir PATH] [--system-dir PATH]
+    list indexed commands.
+
+inshellah manpage FILE
+    parse a manpage and emit nushell extern block.
+
+inshellah manpage-dir DIR
+    batch-process manpages under DIR (man1 and man8 sections).
+```
+
+## the index pipeline
+
+the `index` command runs a three-phase pipeline over all executables
+in each `PREFIX/bin`:
+
+### phase 1: native completion detection (parallel)
+
+for each executable, inshellah scans the elf binary for the string
+`completion`. if found, it probes common patterns like
+`CMD completions nushell` to see if the program can generate its own
+nushell completions. native output is used verbatim — these are always
+higher quality than parsed completions.
+
+programs like `niri`, and any clap/cobra tool with nushell support,
+are handled this way.
+
+### phase 2: manpage parsing (sequential)
+
+for commands not covered by phase 1, inshellah parses manpages from
+man1 (user commands) and man8 (sysadmin commands). it handles:
+
+- gnu `.TP` style (coreutils, help2man)
+- `.IP` style (curl, hand-written)
+- `.PP`+`.RS`/`.RE` style (git, docbook)
+- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
+- mdoc (bsd) format
+- deroff fallback for unusual formats
+
+synopsis sections are parsed to detect subcommands: `git-commit.1`
+generates `export extern "git commit"`, not `export extern "git-commit"`.
+
+### phase 3: --help fallback (parallel)
+
+remaining executables without manpages get `--help` (or `-h`) called
+with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
+to skip those that don't support help flags. shell scripts are run
+directly (they're fast). execution is parallelized to available cores.
+
+subcommands are recursively resolved — if `--help` output lists
+subcommands, inshellah runs `CMD SUBCMD --help` for each.
+
+### output
+
+each command gets its own file in the index directory. native generators
+produce `.nu` files; parsed results produce `.json` files. the `complete`
+command reads both formats.
+
+nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
+nushell provides its own completions.
+
+### performance
+
+on a typical nixos system (~950 executables, ~1600 manpages):
+- total time: ~4-10 seconds
+- native gzip decompression (camlzip, no process spawning)
+- parallel --help with core-scaled forking
+- elf string scanning to skip ~15% of binaries
+
+## the completer
+
+the `complete` command is designed to be wired into nushell as an
+external completer. it reads from the index directory (`--dir`) and
+optional system directories (`--system-dir`), performs fuzzy matching,
+and outputs json completion candidates.
+
+if a command is not indexed, `complete` falls back to on-the-fly
+`--help` resolution — it runs the command's help, caches the result
+in the user directory, and returns completions immediately.
+
+### setting up the completer
+
+```nu
+# ~/.config/nushell/config.nu
+$env.config.completions.external = {
+    enable: true
+    completer: {|spans|
+        inshellah complete ...$spans
+        | from json
+    }
+}
+```
+
+with the nixos module, use the provided `snippet` option value (see
+[nixos.md](nixos.md)) which points at the system index automatically.
+
+## nixos module
+
+enable automatic completion indexing at system build time:
+
+```nix
+{
+  imports = [ ./path/to/inshellah/nix/module.nix ];
+  programs.inshellah.enable = true;
+}
+```
+
+this runs `inshellah index` during the system profile build. see
+[nixos.md](nixos.md) for full details.
+
+## what gets generated
+
+the `manpage` and `manpage-dir` commands emit nushell `extern` blocks
+with flags, parameter types, and descriptions:
+
+```nu
+export extern "rg" [
+    --regexp(-e): string            # a pattern to search for
+    --file(-f): path                # search for patterns from the given file
+    --count(-c)                     # only show the count of matching lines
+    --color: string                 # controls when to use color
+    --max-depth: int                # limit the depth of directory traversal
+]
+```
+
+subcommand manpages (e.g. `git-commit.1`) are detected via synopsis
+parsing and generate the correct nushell name (`git commit` not
+`git-commit`).
+
+nushell built-in commands (ls, cd, mv, etc.) are excluded since nushell
+provides its own completions for these.
--- a/doc/runtime-completions.md
+++ b/doc/runtime-completions.md
@ -0,0 +1,84 @@
+# runtime completion resolution
+
+the `complete` command has built-in on-the-fly resolution: when a command
+is not found in the index, it falls back to running `--help`, caches the
+result, and returns completions immediately. this means commands installed
+outside the system profile (via cargo, pip, npm, go, etc.) get completions
+on first tab-press with no manual setup.
+
+## how it works
+
+when you type `docker compose up --<TAB>`:
+
+1. nushell calls `inshellah complete docker compose up --`
+2. inshellah looks up the index for the longest matching prefix
+3. if found, it fuzzy-matches flags and subcommands against the partial input
+4. if not found, it locates the binary in `$PATH`, runs `--help`,
+   recursively resolves subcommands, caches the results in the user
+   directory (`$XDG_CACHE_HOME/inshellah`), and returns completions
+
+all subsequent completions for that command are instant (served from cache).
+
+## setup
+
+the completer works with no extra configuration beyond the basic setup:
+
+```nu
+# ~/.config/nushell/config.nu
+$env.config.completions.external = {
+    enable: true
+    completer: {|spans|
+        inshellah complete ...$spans
+        | from json
+    }
+}
+```
+
+with the nixos module, add `--system-dir` to also search the system index:
+
+```nu
+$env.config.completions.external = {
+    enable: true
+    completer: {|spans|
+        inshellah complete ...$spans --system-dir /run/current-system/sw/share/inshellah
+        | from json
+    }
+}
+```
+
+or use the `snippet` option provided by the flake module (see
+[nixos.md](nixos.md)).
+
+## cache management
+
+the user cache lives at `$XDG_CACHE_HOME/inshellah` (typically
+`~/.cache/inshellah`).
+
+```sh
+# list cached commands
+inshellah dump
+
+# view cached data for a command
+inshellah query docker
+
+# clear cache
+rm -rf ~/.cache/inshellah/
+
+# re-index from a prefix
+inshellah index /usr --dir ~/.cache/inshellah
+```
+
+## when to use this vs build-time indexing
+
+the nixos module (`programs.inshellah.enable = true`) handles system
+packages at build time. runtime resolution covers:
+
+- commands installed outside the system profile (cargo, pip, npm, go)
+- subcommand completions at arbitrary depth
+- systems without the nixos module
+
+for upfront indexing on non-nixos systems:
+
+```sh
+inshellah index /usr /usr/local
+```
--- a/28
+++ b/28
@ -0,0 +1,28 @@
+(lang dune 3.20)
+
+(name inshellah)
+
+(generate_opam_files true)
+
+(source
+ (github username/reponame))
+
+(authors "atagen <boss@atagen.co>")
+
+(maintainers "atagen <boss@atagen.co>")
+
+(license GPL-3.0-or-later)
+
+(package
+ (name inshellah)
+ (synopsis "Nushell completions generator")
+ (description
+  "Inshellah parses manpages and --help switches to generate completions for nushell.")
+ (depends
+  ocaml
+  dune
+  angstrom
+  angstrom-unix
+  camlzip)
+ (tags
+  (shell completions nushell parser angstrom)))
--- a/flake.lock
+++ b/flake.lock
@ -0,0 +1,27 @@
+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1773385838,
+        "narHash": "sha256-ylF2AGl08seexxlLvMqj3jd+yZq56W9zicwe51mp0Pw=",
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "fef542e7a88eec2b698389e6279464fd479926b6",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nixos",
+        "ref": "nixpkgs-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
--- a/flake.nix
+++ b/flake.nix
@ -0,0 +1,71 @@
+{
+  inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
+
+  outputs =
+    { self, nixpkgs }:
+    let
+      forAllSystems =
+        f:
+        nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ] (
+          system: f (import nixpkgs { inherit system; })
+        );
+    in
+    {
+      devShells = forAllSystems (pkgs: {
+        default = pkgs.mkShell {
+          packages = with pkgs.ocamlPackages; [
+            dune_3
+            ocaml
+            angstrom
+            angstrom-unix
+            camlzip
+            ppx_inline_test
+            ocaml-lsp
+            ocamlformat
+            ocamlformat-rpc-lib
+            utop
+          ];
+        };
+      });
+
+      packages = forAllSystems (pkgs: {
+        default = pkgs.ocamlPackages.buildDunePackage {
+          pname = "inshellah";
+          version = "0.1";
+          src = ./.;
+          nativeBuildInputs = [ pkgs.git ];
+          buildInputs = with pkgs.ocamlPackages; [
+            dune_3
+            ocaml
+            angstrom
+            angstrom-unix
+            camlzip
+          ];
+
+          meta.mainProgram = "inshellah";
+        };
+      });
+
+      nixosModules.default =
+        {
+          pkgs,
+          lib,
+          config,
+          ...
+        }:
+        {
+          imports = [ ./nix/module.nix ];
+          programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default;
+          programs.inshellah.snippet = ''
+            let inshellah_complete = {|spans|
+              ${lib.getExe config.programs.inshellah.package} complete ...$spans --system-dir /run/current-system/sw/${config.programs.inshellah.completionsPath} | from json
+            }
+            $env.config.completions.external = {
+              enable: true
+              max_results: 100
+              completer: $inshellah_complete
+            }
+          '';
+        };
+    };
+}
--- a/inshellah.opam
+++ b/inshellah.opam
@ -0,0 +1,35 @@
+# This file is generated by dune, edit dune-project instead
+opam-version: "2.0"
+synopsis: "Nushell completions generator"
+description:
+  "Inshellah parses manpages and --help switches to generate completions for nushell."
+maintainer: ["atagen <boss@atagen.co>"]
+authors: ["atagen <boss@atagen.co>"]
+license: "GPL-3.0-or-later"
+tags: ["shell" "completions" "nushell" "parser" "angstrom"]
+homepage: "https://github.com/username/reponame"
+bug-reports: "https://github.com/username/reponame/issues"
+depends: [
+  "ocaml"
+  "dune" {>= "3.20"}
+  "angstrom"
+  "angstrom-unix"
+  "camlzip"
+  "odoc" {with-doc}
+]
+build: [
+  ["dune" "subst"] {dev}
+  [
+    "dune"
+    "build"
+    "-p"
+    name
+    "-j"
+    jobs
+    "@install"
+    "@runtest" {with-test}
+    "@doc" {with-doc}
+  ]
+]
+dev-repo: "git+https://github.com/username/reponame.git"
+x-maintenance-intent: ["(latest)"]
--- a/lib/.ocamlformat
+++ b/lib/.ocamlformat
--- a/lib/dune
+++ b/lib/dune
@ -0,0 +1,3 @@
+(library
+ (name inshellah)
+ (libraries angstrom angstrom-unix camlzip str unix))
--- a/lib/manpage.ml
+++ b/lib/manpage.ml
--- a/lib/nushell.ml
+++ b/lib/nushell.ml
@ -0,0 +1,242 @@
+(* nushell.ml — generate nushell extern definitions from parsed help data.
+ *
+ * this module is the code generation backend. it takes a help_result (from
+ * the parser or manpage modules) and produces nushell source code that
+ * defines "extern" declarations — nushell's mechanism for teaching the shell
+ * about external commands' flags and subcommands so it can offer completions.
+ *
+ * it also maintains a list of nushell's built-in commands to avoid generating
+ * extern definitions that would shadow them.
+ *
+ * key responsibilities:
+ *   - deduplicating flag entries (same flag from multiple help sources)
+ *   - mapping parameter names to nushell types (path, int, string)
+ *   - formatting flags in nushell syntax: --flag(-f): type  # description
+ *   - handling positional arguments with nushell's ordering constraints
+ *   - escaping special characters for nushell string literals
+ *)
+
+open Parser
+
+module SSet = Set.Make(String)
+module SMap = Map.Make(String)
+module CSet = Set.Make(Char)
+
+(* nushell built-in commands and keywords — we must never generate extern
+ * definitions for these because it would shadow nushell's own implementations.
+ * this list is maintained manually and should be updated with new nushell releases. *)
+let nushell_builtins = [
+  "alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
+  "bits"; "break"; "bytes";
+  "cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
+  "columns"; "commandline"; "compact"; "complete"; "config"; "const";
+  "continue"; "cp";
+  "date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
+  "do"; "drop"; "du";
+  "each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
+  "exit"; "explain"; "explore"; "export"; "export-env"; "extern";
+  "fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
+  "generate"; "get"; "glob"; "grid"; "group-by";
+  "hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
+  "history"; "http";
+  "if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
+  "is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
+  "job"; "join";
+  "keybindings"; "kill";
+  "last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
+  "match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
+  "move"; "mut"; "mv";
+  "nu-check"; "nu-highlight";
+  "open"; "overlay";
+  "panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
+  "query";
+  "random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
+  "roll"; "rotate"; "run-external";
+  "save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
+  "slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
+  "stor"; "str"; "sys";
+  "table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
+  "try"; "tutor";
+  "ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
+  "upsert"; "url"; "use";
+  "values"; "version"; "view";
+  "watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
+  "zip";
+]
+
+(* lazily constructed set for fast lookup *)
+let builtin_set = lazy (SSet.of_list nushell_builtins)
+
+let is_nushell_builtin cmd =
+  SSet.mem cmd (Lazy.force builtin_set)
+
+(* deduplicate flag entries that refer to the same flag.
+ * when the same flag appears multiple times (e.g. from overlapping manpage
+ * sections or repeated help text), we keep the "best" version using a score:
+ *   - both short+long form: +10 (most informative)
+ *   - has a parameter: +5
+ *   - description length bonus: up to +5
+ *
+ * peculiarity: after deduplication by long name, we also remove standalone
+ * short flags whose letter is already covered by a Both(short, long) entry.
+ * this prevents emitting both "-v" and "--verbose(-v)" which nushell would
+ * reject as a duplicate. the filtering preserves original ordering from the
+ * help text. *)
+let dedup_entries entries =
+  let key_of entry =
+    match entry.switch with
+    | Short c -> Printf.sprintf "-%c" c
+    | Long l | Both (_, l) -> Printf.sprintf "--%s" l
+  in
+  let score entry =
+    let sw = match entry.switch with Both _ -> 10 | _ -> 0 in
+    let p = match entry.param with Some _ -> 5 | None -> 0 in
+    let d = min 5 (String.length entry.desc / 10) in
+    sw + p + d
+  in
+  let best = List.fold_left (fun acc e ->
+    let k = key_of e in
+    match SMap.find_opt k acc with
+    | Some prev when score prev >= score e -> acc
+    | _ -> SMap.add k e acc
+  ) SMap.empty entries in
+  let covered = SMap.fold (fun _ e acc ->
+    match e.switch with
+    | Both (c, _) -> CSet.add c acc
+    | _ -> acc
+  ) best CSet.empty in
+  List.fold_left (fun (seen, acc) e ->
+    let k = key_of e in
+    if SSet.mem k seen then (seen, acc)
+    else match e.switch with
+    | Short c when CSet.mem c covered -> (seen, acc)
+    | _ -> (SSet.add k seen, SMap.find k best :: acc)
+  ) (SSet.empty, []) entries |> snd |> List.rev
+
+(* map parameter names to nushell types.
+ * nushell's extern declarations use typed parameters, so we infer the type
+ * from the parameter name. file/path-related names become "path" (enables
+ * path completion), numeric names become "int", everything else is "string". *)
+let nushell_type_of_param = function
+  | "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
+  | "FILENAME" | "PATTERNFILE" -> "path"
+  | "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
+  | "LINES" | "DEPTH" | "depth" -> "int"
+  | _ -> "string"
+
+(* escape a string for use inside nushell double-quoted string literals.
+ * only double quotes and backslashes need escaping in nushell's syntax. *)
+let escape_nu s =
+  if not (String.contains s '"') && not (String.contains s '\\') then s
+  else begin
+    let buf = Buffer.create (String.length s + 4) in
+    String.iter (fun c -> match c with
+      | '"' -> Buffer.add_string buf "\\\""
+      | '\\' -> Buffer.add_string buf "\\\\"
+      | _ -> Buffer.add_char buf c
+    ) s;
+    Buffer.contents buf
+  end
+
+(* format a single flag entry as a nushell extern parameter line.
+ * output examples:
+ *   "    --verbose(-v)                       # increase verbosity"
+ *   "    --output(-o): path                  # write output to file"
+ *   "    -n: int                             # number of results"
+ *
+ * the description is right-padded to column 40 with a "# " comment prefix.
+ * nushell's syntax for combined short+long is "--long(-s)". *)
+let format_flag entry =
+  let name = match entry.switch with
+    | Both (s, l) -> Printf.sprintf "--%s(-%c)" l s
+    | Long l -> Printf.sprintf "--%s" l
+    | Short s -> Printf.sprintf "-%c" s
+  in
+  let typed = match entry.param with
+    | Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
+    | None -> ""
+  in
+  let flag = "    " ^ name ^ typed in
+  if String.length entry.desc = 0 then flag
+  else
+    let pad_len = max 1 (40 - String.length flag) in
+    flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
+
+(* format a positional argument as a nushell extern parameter line.
+ * nushell syntax: "...name: type" for variadic, "name?: type" for optional.
+ * hyphens in names are converted to underscores (nushell identifiers can't
+ * contain hyphens). *)
+let format_positional p =
+  let name = String.map (function '-' -> '_' | c -> c) p.pos_name in
+  let prefix = if p.variadic then "..." else "" in
+  let suffix = if p.optional && not p.variadic then "?" else "" in
+  let typ = nushell_type_of_param (String.uppercase_ascii p.pos_name) in
+  Printf.sprintf "    %s%s%s: %s" prefix name suffix typ
+
+(* enforce nushell's positional argument ordering rules:
+ *   1. no required positional may follow an optional one
+ *   2. at most one variadic ("rest") parameter is allowed
+ *
+ * if a required positional appears after an optional one, it's silently
+ * promoted to optional. duplicate variadic params are dropped. *)
+let fixup_positionals positionals =
+  List.fold_left (fun (saw_opt, saw_rest, acc) p ->
+    if p.variadic then
+      if saw_rest then (saw_opt, saw_rest, acc)
+      else (true, true, p :: acc)
+    else if saw_opt then
+      (true, saw_rest, { p with optional = true } :: acc)
+    else
+      (p.optional, saw_rest, p :: acc)
+  ) (false, false, []) positionals
+  |> fun (_, _, acc) -> List.rev acc
+
+(* generate the full nushell extern block for a command.
+ * produces output like:
+ *   export extern "git add" [
+ *     ...pathspec?: path
+ *     --verbose(-v)              # be verbose
+ *     --dry-run(-n)              # dry run
+ *   ]
+ *
+ * subcommands that weren't resolved into their own full definitions get
+ * stub externs with just a comment containing their description:
+ *   export extern "git stash" [  # stash changes
+ *   ]
+ *)
+let extern_of cmd_name result =
+  let entries = dedup_entries result.entries in
+  let cmd = escape_nu cmd_name in
+  let positionals = fixup_positionals result.positionals in
+  let pos_lines = List.map (fun p -> format_positional p ^ "\n") positionals in
+  let flags = List.map (fun e -> format_flag e ^ "\n") entries in
+  let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" cmd (String.concat "" pos_lines) (String.concat "" flags) in
+  let subs = List.map (fun (sc : subcommand) ->
+    Printf.sprintf "\nexport extern \"%s %s\" [  # %s\n]\n"
+      cmd (escape_nu sc.name) (escape_nu sc.desc)
+  ) result.subcommands in
+  String.concat "" (main :: subs)
+
+(* public alias for extern_of *)
+let generate_extern = extern_of
+
+(* derive a nushell module name from a command name.
+ * replaces non-alphanumeric characters with hyphens and appends "-completions".
+ * e.g. "git" → "git-completions", "docker-compose" → "docker-compose-completions" *)
+let module_name_of cmd_name =
+  let s = String.map (function
+    | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
+  s ^ "-completions"
+
+(* generate a complete nushell module wrapping the extern.
+ * output: "module git-completions { ... }\n\nuse git-completions *\n"
+ * the "use" at the end makes the extern immediately available. *)
+let generate_module cmd_name result =
+  let m = module_name_of cmd_name in
+  Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" m (extern_of cmd_name result) m
+
+(* convenience wrapper: generate an extern from just a list of entries
+ * (no subcommands, positionals, or description). used when we only have
+ * flag data and nothing else. *)
+let generate_extern_from_entries cmd_name entries =
+  generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }
--- a/lib/parser.ml
+++ b/lib/parser.ml
@ -0,0 +1,802 @@
+(* parser.ml — parse --help output into structured flag/subcommand/positional data.
+ *
+ * this module is the core of inshellah's help-text understanding. it takes the
+ * raw text that a cli tool prints when you run `cmd --help` and extracts:
+ *   - flag entries (short/long switches with optional parameters and descriptions)
+ *   - subcommand listings (name + description pairs)
+ *   - positional arguments (from usage lines)
+ *
+ * the parser is built on angstrom (a monadic parser combinator library) for the
+ * structured flag/subcommand extraction, with hand-rolled imperative parsers for
+ * usage-line positional extraction (where the format is too varied for clean
+ * combinator composition).
+ *
+ * key design decisions:
+ *   - the angstrom parser runs in prefix-consume mode — it doesn't need to parse
+ *     the entire input, just extract what it can recognize. unrecognized lines are
+ *     skipped via skip_non_option_line.
+ *   - multi-line descriptions are handled via indentation-based continuation:
+ *     lines indented 8+ spaces that don't start with '-' are folded into the
+ *     previous entry's description.
+ *   - subcommand detection uses a heuristic: lines with a name followed by 2+
+ *     spaces then a description, where the name is at least 2 chars. section
+ *     headers (like "arguments:") toggle whether name-description pairs are
+ *     treated as subcommands or positionals.
+ *   - positional extraction has two paths: usage-line parsing (the common case)
+ *     and cli11's explicit "positionals:" section format.
+ *)
+
+open Angstrom
+
+(* strip ansi escape sequences and osc hyperlinks from --help output.
+ * many modern cli tools emit colored/styled output even when piped,
+ * so we need to clean this before parsing. handles:
+ *   - csi sequences (esc [ ... final_byte) — colors, cursor movement, etc.
+ *   - osc sequences (esc ] ... bel/st) — hyperlinks, window titles, etc.
+ *   - other two-byte esc+char sequences *)
+let strip_ansi s =
+  let buf = Buffer.create (String.length s) in
+  let len = String.length s in
+  let i = ref 0 in
+  while !i < len do
+    if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
+      let next = s.[!i + 1] in
+      if next = '[' then begin
+        (* CSI sequence: ESC [ ... final_byte *)
+        i := !i + 2;
+        while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
+        if !i < len then incr i
+      end else if next = ']' then begin
+        (* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
+        i := !i + 2;
+        let found = ref false in
+        while !i < len && not !found do
+          if s.[!i] = '\x07' then
+            (incr i; found := true)
+          else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
+            (i := !i + 2; found := true)
+          else
+            incr i
+        done
+      end else begin
+        (* Other ESC sequence, skip ESC + one char *)
+        i := !i + 2
+      end
+    end else begin
+      Buffer.add_char buf s.[!i];
+      incr i
+    end
+  done;
+  Buffer.contents buf
+
+(* --- character class predicates --- *)
+(* these are used throughout the angstrom parsers to classify characters.
+ * they're separated out for readability and reuse. *)
+
+let is_whitespace = function ' ' | '\t' -> true | _ -> false
+
+let is_alphanumeric = function
+  | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
+  | _ -> false
+
+(* characters allowed inside parameter names like FILE, output-dir, etc. *)
+let is_param_char = function
+  | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
+  | _ -> false
+
+(* used to detect all-caps parameter names like FILE, TIME_STYLE *)
+let is_upper_or_underscore = function
+  | 'A' .. 'Z' | '_' -> true
+  | _ -> false
+
+(* characters allowed in long flag names (--foo-bar, --enable-feature2) *)
+let is_long_char = function
+  | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
+  | _ -> false
+
+(* --- core types ---
+ * these types represent the structured output of parsing a help text.
+ * they are shared across the entire codebase (nushell codegen, store, manpage parser).
+ *
+ * switch: a flag can be short-only (-v), long-only (--verbose), or both (-v, --verbose).
+ *   the both variant keeps the pair together so nushell can emit "--verbose(-v)".
+ *
+ * param: flags can take mandatory (--output FILE) or optional (--color[=WHEN]) values.
+ *
+ * entry: one complete flag definition — its switch form, optional parameter, and
+ *   the description text (potentially multi-line, already joined).
+ *
+ * help_result: the complete parsed output for a single command. *)
+type switch = Short of char | Long of string | Both of char * string
+type param = Mandatory of string | Optional of string
+type entry = { switch : switch; param : param option; desc : string }
+type subcommand = { name : string; desc : string }
+type positional = { pos_name : string; optional : bool; variadic : bool }
+type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list; description : string }
+
+(* --- low-level angstrom combinators --- *)
+(* these are the building blocks for all the parsers below. *)
+
+(* consume horizontal whitespace (spaces and tabs) without crossing lines *)
+let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
+(* end of line — matches either a newline or end of input.
+ * this is the permissive version used in most places. *)
+let eol = end_of_line <|> end_of_input
+(* strict end of line — must consume an actual newline character.
+ * used in skip_non_option_line so we don't accidentally match eof
+ * and consume it when we shouldn't. *)
+let eol_strict = end_of_line
+
+(* --- switch and parameter parsers --- *)
+(* these parse the flag name portion of an option line, e.g. "-v", "--verbose" *)
+
+let short_switch = char '-' *> satisfy is_alphanumeric
+let long_switch = string "--" *> take_while1 is_long_char
+let comma = char ',' *> inline_ws
+
+(* parameter parsers — these handle the various syntaxes tools use to indicate
+ * that a flag takes a value. the formats are surprisingly diverse:
+ *   --output=FILE        (eq_man_param — mandatory, common in gnu tools)
+ *   --color[=WHEN]       (eq_opt_param — optional with = syntax)
+ *   --depth DEPTH        (space_upper_param — space-separated ALL_CAPS)
+ *   --file <path>        (space_angle_param — angle brackets)
+ *   --file [<path>]      (space_opt_angle_param — optional angle brackets)
+ *   --format string      (space_type_param — go/cobra lowercase type word)
+ *)
+let eq_opt_param =
+  string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
+
+let eq_man_param =
+  char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
+
+(* space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE".
+ * peculiarity: we peek ahead and check the first char is uppercase, then
+ * validate the entire word is ALL_CAPS. this prevents false positives where
+ * a description word like "Do" or "Set" immediately follows the flag name.
+ * digits are allowed (e.g. "SHA256") but lowercase chars disqualify. *)
+let space_upper_param =
+  char ' ' *> peek_char_fail >>= fun c ->
+  if is_upper_or_underscore c then
+    take_while1 is_param_char >>= fun name ->
+    if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
+      return (Mandatory name)
+    else
+      fail "not an all-caps param"
+  else
+    fail "not an uppercase param"
+
+(* Angle-bracket param: e.g. "<file>", "<notation>" *)
+let angle_param =
+  char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
+  Mandatory name
+
+(* Space + angle bracket param *)
+let space_angle_param =
+  char ' ' *> angle_param
+
+(* Optional angle bracket param: [<file>] *)
+let opt_angle_param =
+  char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
+  >>| fun name -> Optional name
+
+let space_opt_angle_param =
+  char ' ' *> opt_angle_param
+
+(* go/cobra style: space + lowercase type word like "string", "list", "int".
+ * peculiarity: capped at 10 chars to avoid consuming description words.
+ * go's flag libraries commonly emit "--timeout duration" or "--name string"
+ * where the type name is a short lowercase word. longer words are almost
+ * certainly the start of a description, not a type annotation. *)
+let space_type_param =
+  char ' ' *> peek_char_fail >>= fun c ->
+  if c >= 'a' && c <= 'z' then
+    take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
+    if String.length name <= 10 then
+      return (Mandatory name)
+    else
+      fail "too long for type param"
+  else
+    fail "not a lowercase type param"
+
+(* try each parameter format in order of specificity. the ordering matters:
+ * eq_opt_param must come before eq_man_param because "[=WHEN]" would otherwise
+ * partially match as "=WHEN" then fail on the trailing "]". similarly,
+ * space_opt_angle_param before space_angle_param to catch "[<file>]" before "<file>". *)
+let param_parser =
+  option None
+    (choice
+       [ eq_opt_param; eq_man_param;
+         space_opt_angle_param; space_angle_param;
+         space_upper_param; space_type_param ]
+     >>| fun a -> Some a)
+
+(* switch parser — handles the various ways help text presents flag names.
+ * formats handled (in order of attempt):
+ *   -a, --all       (short + comma + long — gnu style)
+ *   -a --all        (short + space + long — some tools omit the comma)
+ *   --all / -a      (long + slash + short — rare but seen in some tools)
+ *   -a              (short only)
+ *   --all           (long only)
+ *
+ * peculiarity: the ordering is critical because angstrom's choice commits to
+ * the first parser that makes progress. short_switch consumes "-a", so the
+ * combined parsers must be tried before the short-only parser. *)
+let switch_parser =
+  choice
+    [
+      (short_switch >>= fun s ->
+       comma *> long_switch >>| fun l -> Both (s, l));
+      (short_switch >>= fun s ->
+       char ' ' *> long_switch >>| fun l -> Both (s, l));
+      (long_switch >>= fun l ->
+       inline_ws *> char '/' *> inline_ws *>
+       short_switch >>| fun s -> Both (s, l));
+      (short_switch >>| fun s -> Short s);
+      (long_switch >>| fun l -> Long l);
+    ]
+
+(* --- description parsing with multi-line continuation ---
+ * descriptions in help text often wrap across multiple lines. the convention
+ * is that continuation lines are deeply indented (8+ spaces) and don't start
+ * with '-' (which would indicate a new flag entry). we peek ahead to check
+ * indentation without consuming, then decide whether to fold the line in. *)
+
+(* take the rest of the line as text (does not consume the newline itself) *)
+let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
+
+(* check if a line is a continuation line: deeply indented, doesn't start with '-'.
+ * peculiarity: we count tabs as 8 spaces to match typical terminal rendering.
+ * the 8-space threshold was chosen empirically — most help formatters indent
+ * descriptions at least this much, while flag lines are indented 2-4 spaces. *)
+let continuation_line =
+  peek_string 1 >>= fun _ ->
+  (* Must start with significant whitespace (8+ spaces or tab) *)
+  let count_indent s =
+    let n = ref 0 in
+    let i = ref 0 in
+    while !i < String.length s do
+      (match s.[!i] with
+       | ' ' -> incr n
+       | '\t' -> n := !n + 8
+       | _ -> i := String.length s);
+      incr i
+    done;
+    !n
+  in
+  available >>= fun avail ->
+  if avail = 0 then fail "eof"
+  else
+    (* Peek ahead to see indentation level *)
+    peek_string (min avail 80) >>= fun preview ->
+    let indent = count_indent preview in
+    let trimmed = String.trim preview in
+    let starts_with_dash =
+      String.length trimmed > 0 && trimmed.[0] = '-'
+    in
+    if indent >= 8 && not starts_with_dash then
+      (* This is a continuation line — consume whitespace + text *)
+      inline_ws *> rest_of_line <* eol
+    else
+      fail "not a continuation line"
+
+(* parse description text: first line (after switch+param) plus any continuation lines.
+ * blank continuation lines are filtered out, and all lines are trimmed and joined
+ * with spaces into a single string. *)
+let description =
+  inline_ws *> rest_of_line <* eol >>= fun first_line ->
+  many continuation_line >>| fun cont_lines ->
+  let all = first_line :: cont_lines in
+  let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
+  String.concat " " (List.map String.trim all)
+
+(* description that appears on a separate line below the flag.
+ * this handles the clap (rust) "long" help format where flags and descriptions
+ * are on separate lines:
+ *   --verbose
+ *           increase verbosity
+ * here there's no inline description — just deeply-indented continuation lines. *)
+let description_below =
+  many1 continuation_line >>| fun lines ->
+  let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
+  String.concat " " (List.map String.trim lines)
+
+(* --- line classification for skipping ---
+ * the parser needs to skip lines it doesn't understand (section headers,
+ * blank lines, description paragraphs not attached to a flag, etc.)
+ * without consuming lines that ARE flag entries. *)
+
+(* peek ahead to check if the current line looks like a flag entry.
+ * an option line starts with whitespace then '-'. *)
+let at_option_line =
+  peek_string 1 >>= fun _ ->
+  available >>= fun avail ->
+  if avail = 0 then fail "eof"
+  else
+    peek_string (min avail 40) >>= fun preview ->
+    let s = String.trim preview in
+    if String.length s > 0 && s.[0] = '-' then return ()
+    else fail "not an option line"
+
+(* skip a non-option line (section header, blank, description-only, etc.).
+ * peculiarity: uses eol_strict (not eol) so it won't match at eof — this
+ * prevents the parser from infinitely skipping at the end of input. if the
+ * line looks like an option line (at_option_line succeeds), we deliberately
+ * fail so that the entry parser gets a chance at it instead. *)
+let skip_non_option_line =
+  (at_option_line *> fail "this is an option line")
+  <|> (rest_of_line *> eol_strict *> return ())
+
+(* --- entry parsing --- *)
+
+(* parse a single flag entry: leading whitespace, then switch+param, then description.
+ * the description can appear on the same line (inline) or on the next line (below).
+ * if there's no description at all, we accept an empty string.
+ * the (eol *> description_below) branch handles the clap long-help format. *)
+let entry =
+  inline_ws *>
+  lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
+    (lift2 (fun a b -> (a, b)) switch_parser param_parser)
+    (description <|> (eol *> (description_below <|> return "")))
+
+(* --- subcommand parsing ---
+ * subcommand lines in help text follow the pattern:
+ *   "  name   description"
+ * where the name and description are separated by 2+ spaces.
+ * some tools also include argument placeholders between name and description:
+ *   "  start UNIT...   start one or more units"
+ *   "  list [PATTERN]  list matching units"
+ *)
+
+let is_subcommand_char = function
+  | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
+  | _ -> false
+
+(* skip argument placeholders like UNIT..., [PATTERN...|PID...], <file>
+ * that appear between the subcommand name and the description.
+ * only consumes single-space gaps — the two-space gap before the
+ * description is left for the main parser to use as the delimiter.
+ *
+ * peculiarity: this is a recursive (fix-point) parser that peeks ahead
+ * to distinguish single-space argument gaps from the double-space
+ * description separator. it accepts tokens that start with [, <, or
+ * are ALL_CAPS (with dots/pipes/commas for variadic syntax). *)
+let skip_arg_placeholders =
+  fix (fun self ->
+    (* Peek ahead: single space followed by arg-like token *)
+    available >>= fun avail ->
+    if avail < 2 then return ()
+    else
+    peek_string (min avail 2) >>= fun s2 ->
+    if String.length s2 >= 2 && s2.[0] = ' ' && s2.[1] <> ' ' then
+      (* Single space — could be an arg placeholder *)
+      let next = s2.[1] in
+      if next = '[' || next = '<'
+         || (next >= 'A' && next <= 'Z') then
+        (* Peek the full token to check if it's ALL_CAPS/brackets *)
+        peek_string (min avail 80) >>= fun preview ->
+        (* Extract the token after the single space *)
+        let tok_start = 1 in
+        let tok_end = ref tok_start in
+        while !tok_end < String.length preview
+              && preview.[!tok_end] <> ' '
+              && preview.[!tok_end] <> '\n'
+              && preview.[!tok_end] <> '\r' do
+          incr tok_end
+        done;
+        let tok = String.sub preview tok_start (!tok_end - tok_start) in
+        (* Accept as placeholder if it starts with [ or < or is ALL_CAPS
+           (possibly with dots, pipes, dashes) *)
+        let is_placeholder =
+          tok.[0] = '[' || tok.[0] = '<'
+          || String.for_all (fun c ->
+               (c >= 'A' && c <= 'Z') || c = '_' || c = '-'
+               || c = '.' || c = '|' || c = ',' || (c >= '0' && c <= '9')
+             ) tok
+        in
+        if is_placeholder then
+          advance (1 + String.length tok) *> self
+        else return ()
+      else return ()
+    else return ())
+
+(* parse a subcommand entry line.
+ * requires: name >= 2 chars, followed by 2+ spaces, then description.
+ * the name is lowercased for consistent lookup.
+ *
+ * peculiarity: if the description starts with "- " (a dash-space prefix),
+ * it's stripped. some tools format their subcommand lists as:
+ *   "  add   - add a new item"
+ * where the "- " is decorative, not part of the description. *)
+let subcommand_entry =
+  inline_ws *>
+  take_while1 is_subcommand_char >>= fun name ->
+  if String.length name < 2 then fail "subcommand name too short"
+  else
+  skip_arg_placeholders *>
+  char ' ' *> char ' ' *> inline_ws *>
+  rest_of_line <* eol >>| fun desc ->
+  { name = String.lowercase_ascii name;
+    desc = let t = String.trim desc in
+      if String.length t >= 2 && t.[0] = '-' && t.[1] = ' ' then
+        String.trim (String.sub t 2 (String.length t - 2))
+      else t }
+
+(* --- section header detection ---
+ * section headers are critical for disambiguating subcommands from positional
+ * arguments. lines like "commands:" introduce subcommand sections, while
+ * "arguments:" or "positionals:" introduce argument sections where the same
+ * name+description format should NOT be treated as subcommands. *)
+
+(* detect section names that introduce positional argument listings.
+ * the check is case-insensitive and strips trailing colons. *)
+let is_arg_section s =
+  let lc = String.lowercase_ascii (String.trim s) in
+  let base = if String.ends_with ~suffix:":" lc
+    then String.sub lc 0 (String.length lc - 1) |> String.trim
+    else lc in
+  base = "arguments" || base = "args" || base = "positionals"
+  || base = "positional arguments"
+
+(* a section header: left-aligned (or lightly indented, <= 4 spaces) text
+ * ending with ':', not starting with '-'. must be consumed BEFORE
+ * subcommand_entry in the choice combinator, otherwise "commands:" would
+ * be parsed as a subcommand named "commands" with description ":".
+ *
+ * returns a bool indicating whether this is an argument section (true)
+ * or some other section (false). this drives the subcommand filtering logic
+ * in help_parser — entries under argument sections are excluded from the
+ * subcommand list. *)
+let section_header =
+  available >>= fun avail ->
+  if avail = 0 then fail "eof"
+  else
+    peek_string (min avail 80) >>= fun preview ->
+    (* Extract just the first line from the preview *)
+    let first_line = match String.index_opt preview '\n' with
+      | Some i -> String.sub preview 0 i
+      | None -> preview in
+    let t = String.trim first_line in
+    let len = String.length t in
+    let indent = let i = ref 0 in
+      while !i < String.length first_line && (first_line.[!i] = ' ' || first_line.[!i] = '\t') do incr i done;
+      !i in
+    if len >= 2 && t.[len - 1] = ':' && t.[0] <> '-' && indent <= 4 then
+      rest_of_line <* eol_strict >>| fun line -> is_arg_section line
+    else fail "not a section header"
+
+(* --- top-level parser ---
+ * the main help parser: walks through all lines, trying each line as one of:
+ *   1. a flag entry (starts with whitespace + '-')
+ *   2. a section header (left-aligned text ending with ':')
+ *   3. a subcommand line (name + 2+ spaces + description)
+ *   4. anything else → skip
+ *
+ * the choice ordering matters: entries are tried first (highest priority),
+ * then section headers (must beat subcommand_entry to avoid misparse),
+ * then subcommands, then skip as fallback.
+ *
+ * after collecting all items, two post-processing steps happen:
+ *   - subcommands under argument sections are excluded (tracked via
+ *     a running in_arg_sec boolean toggled by section headers)
+ *   - duplicate subcommand names are deduplicated, keeping the entry
+ *     with the longer description (heuristic: more info = better)
+ *
+ * peculiarity: positionals are NOT extracted here — they come from
+ * the usage line parser (extract_usage_positionals) or cli11's
+ * explicit section parser (extract_cli11_positionals), applied later
+ * in parse_help. *)
+let help_parser =
+  let open Angstrom in
+  fix (fun _self ->
+    let try_entry =
+      entry >>| fun e -> `Entry e
+    in
+    let try_section =
+      section_header >>| fun is_arg -> `Section is_arg
+    in
+    let try_subcommand =
+      subcommand_entry >>| fun sc -> `Subcommand sc
+    in
+    let try_skip =
+      skip_non_option_line >>| fun () -> `Skip
+    in
+    many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items ->
+    let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
+    let subcommands =
+      List.fold_left (fun (in_arg_sec, acc) item ->
+        match item with
+        | `Section is_arg -> (is_arg, acc)
+        | `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc)
+        | _ -> (in_arg_sec, acc)
+      ) (false, []) items
+      |> snd |> List.rev
+      |> List.fold_left (fun acc sc ->
+           match List.assoc_opt sc.name acc with
+           | Some prev when String.length prev.desc >= String.length sc.desc -> acc
+           | _ -> (sc.name, sc) :: List.remove_assoc sc.name acc
+         ) []
+      |> List.rev_map snd
+    in
+    { entries; subcommands; positionals = []; description = "" })
+
+(* --- usage line parsing ---
+ * usage lines look like: "usage: git add [OPTIONS] [--] [<pathspec>...]"
+ * to extract positional arguments, we first need to skip past the command
+ * name prefix ("git add") to reach the argument portion.
+ *
+ * skip_command_prefix walks word-by-word, treating each space-separated
+ * token as part of the command name as long as it:
+ *   - is made of "word chars" (alphanumeric, hyphen, underscore, slash, dot)
+ *   - contains at least one lowercase letter (to distinguish from ALL_CAPS
+ *     positional names like FILE)
+ *   - doesn't start with [, <, (, {, or - (which indicate arguments, not
+ *     command name components)
+ *
+ * peculiarity: this is an imperative index-walking parser rather than using
+ * angstrom, because usage lines are a single string (not line-oriented)
+ * and the format is too varied for clean combinator composition. *)
+let skip_command_prefix s =
+  let len = String.length s in
+  let i = ref 0 in
+  let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
+  let is_word_char = function
+    | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
+    | _ -> false
+  in
+  let rec loop () =
+    skip_ws ();
+    if !i >= len then ()
+    else if s.[!i] = '[' || s.[!i] = '<' || s.[!i] = '(' || s.[!i] = '{' || s.[!i] = '-' then ()
+    else if is_word_char s.[!i] then begin
+      let start = !i in
+      while !i < len && is_word_char s.[!i] do incr i done;
+      let word = String.sub s start (!i - start) in
+      let has_lower = ref false in
+      String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
+      if not !has_lower then
+        i := start
+      else
+        loop ()
+    end
+  in
+  loop ();
+  !i
+
+(* parse the argument portion of a usage line into positional definitions.
+ * handles these syntactic forms:
+ *   <file>          - mandatory positional
+ *   [file]          - optional positional
+ *   FILE            - mandatory positional (ALL_CAPS convention)
+ *   <file>...       - variadic (also handles utf-8 ellipsis)
+ *   [file...]       - optional variadic
+ *   curly-brace alternatives - skipped, not a positional
+ *   -flag           - flags (skipped)
+ *
+ * peculiarity: certain all-caps names are skipped because they're not real
+ * positionals — "OPTIONS", "FLAGS", etc. are section labels that sometimes
+ * appear in usage lines for readability.
+ *
+ * deduplication at the end ensures we don't emit the same positional twice
+ * (can happen when usage lines are reformatted or repeated). *)
+let parse_usage_args s =
+  let len = String.length s in
+  let i = ref 0 in
+  let results = ref [] in
+  let skip_ws () =
+    while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
+  let is_pos_char c =
+    (c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
+  let read_dots () =
+    skip_ws ();
+    if !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' then
+      (i := !i + 3; true)
+    else if !i + 2 < len && s.[!i] = '\xe2' && s.[!i+1] = '\x80' && s.[!i+2] = '\xa6' then
+      (i := !i + 3; true)  (* UTF-8 ellipsis … *)
+    else false
+  in
+  let is_skip name =
+    let u = String.uppercase_ascii name in
+    u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
+  in
+  let is_clean_name name =
+    String.length name >= 2
+    && String.for_all (fun c ->
+         (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+         || (c >= '0' && c <= '9') || c = '_' || c = '-') name
+  in
+  let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
+  let skip_braces () =
+    (* Skip {A|c|d|...} alternative blocks *)
+    if !i < len && s.[!i] = '{' then begin
+      let depth = ref 1 in
+      incr i;
+      while !i < len && !depth > 0 do
+        if s.[!i] = '{' then incr depth
+        else if s.[!i] = '}' then decr depth;
+        incr i
+      done;
+      ignore (read_dots ());
+      true
+    end else false
+  in
+  while !i < len do
+    skip_ws ();
+    if !i >= len then ()
+    else if skip_braces () then ()
+    else match s.[!i] with
+    | '[' ->
+      incr i;
+      let start = !i in
+      let depth = ref 1 in
+      while !i < len && !depth > 0 do
+        if s.[!i] = '[' then incr depth
+        else if s.[!i] = ']' then decr depth;
+        incr i
+      done;
+      let bracket_end = !i - 1 in
+      let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
+      let inner, has_inner_dots =
+        if String.ends_with ~suffix:"..." inner then
+          (String.sub inner 0 (String.length inner - 3) |> String.trim, true)
+        else (inner, false)
+      in
+      let variadic = has_inner_dots || read_dots () in
+      if String.length inner > 0
+         && inner.[0] <> '-'
+         && (is_letter inner.[0] || inner.[0] = '<') then begin
+        let name =
+          if inner.[0] = '<' then
+            let e = try String.index inner '>' with Not_found -> String.length inner in
+            String.sub inner 1 (e - 1)
+          else inner
+        in
+        if is_clean_name name && not (is_skip name) then
+          results := { pos_name = String.lowercase_ascii name;
+                       optional = true; variadic } :: !results
+      end
+    | '<' ->
+      incr i;
+      let start = !i in
+      while !i < len && s.[!i] <> '>' do incr i done;
+      let name = String.sub s start (!i - start) in
+      if !i < len then incr i;
+      let variadic = read_dots () in
+      if is_clean_name name && not (is_skip name) then
+        results := { pos_name = String.lowercase_ascii name;
+                     optional = false; variadic } :: !results
+    | '-' ->
+      while !i < len && s.[!i] <> ' ' && s.[!i] <> '\t' && s.[!i] <> ']' do incr i done
+    | c when c >= 'A' && c <= 'Z' ->
+      let start = !i in
+      while !i < len && is_pos_char s.[!i] do incr i done;
+      let name = String.sub s start (!i - start) in
+      let variadic = read_dots () in
+      if String.length name >= 2
+         && String.for_all (fun c ->
+              (c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
+            ) name
+         && not (is_skip name) then
+        results := { pos_name = String.lowercase_ascii name;
+                     optional = false; variadic } :: !results
+    | _ ->
+      incr i
+  done;
+  List.rev !results
+  |> List.fold_left (fun (seen, acc) p ->
+       if List.mem p.pos_name seen then (seen, acc)
+       else (p.pos_name :: seen, p :: acc)
+     ) ([], [])
+  |> snd |> List.rev
+
+(* find the "usage:" line in the help text and extract positionals from it.
+ * searches line-by-line for a line starting with "usage:" (case-insensitive).
+ * handles both inline usage ("usage: cmd [OPTIONS] FILE") and the clap style
+ * where the actual usage is on the next line:
+ *   USAGE:
+ *     cmd [OPTIONS] FILE
+ *
+ * also handles the bare "usage" header (no colon) followed by a next line. *)
+let extract_usage_positionals text =
+  let lines = String.split_on_char '\n' text in
+  let lines_arr = Array.of_list lines in
+  let len = Array.length lines_arr in
+  let find_usage_line () =
+    let rec go i =
+      if i >= len then None
+      else
+        let t = String.trim lines_arr.(i) in
+        let tlen = String.length t in
+        let lc = String.lowercase_ascii t in
+        if tlen >= 6 && String.sub lc 0 6 = "usage:" then begin
+          let after = String.sub t 6 (tlen - 6) |> String.trim in
+          if String.length after > 0 then Some after
+          else if i + 1 < len then
+            (* Clap style: USAGE:\n  cmd [OPTIONS] PATTERN *)
+            let next = String.trim lines_arr.(i + 1) in
+            if String.length next > 0 then Some next else None
+          else None
+        end else if lc = "usage" then begin
+          if i + 1 < len then
+            let next = String.trim lines_arr.(i + 1) in
+            if String.length next > 0 then Some next else None
+          else None
+        end else go (i + 1)
+    in
+    go 0
+  in
+  match find_usage_line () with
+  | None -> []
+  | Some usage ->
+    let cmd_end = skip_command_prefix usage in
+    let args = String.sub usage cmd_end (String.length usage - cmd_end) in
+    parse_usage_args args
+
+(* extract positionals from cli11's explicit "POSITIONALS:" section.
+ * cli11 (a c++ arg parsing library) emits a dedicated section:
+ *   Positionals:
+ *     name TEXT           description here
+ *     count INT           another description
+ *
+ * this is preferred over usage-line extraction when present because it
+ * provides more accurate type information. the parser looks for the
+ * section header, then reads indented lines until a blank or unindented
+ * line signals the end. type words (TEXT, INT, FLOAT, etc.) between the
+ * name and description are skipped. *)
+let extract_cli11_positionals text =
+  let lines = String.split_on_char '\n' text in
+  let rec find_section = function
+    | [] -> []
+    | line :: rest ->
+      let t = String.trim line in
+      if t = "POSITIONALS:" || t = "Positionals:" then
+        parse_lines rest []
+      else
+        find_section rest
+  and parse_lines lines acc =
+    match lines with
+    | [] -> List.rev acc
+    | line :: rest ->
+      let len = String.length line in
+      if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
+        List.rev acc
+      else
+        let t = String.trim line in
+        if String.length t = 0 then List.rev acc
+        else match parse_one t with
+          | Some p -> parse_lines rest (p :: acc)
+          | None -> parse_lines rest acc
+  and parse_one s =
+    let len = String.length s in
+    let i = ref 0 in
+    let is_name_char c =
+      (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+      || (c >= '0' && c <= '9') || c = '_' || c = '-' in
+    while !i < len && is_name_char s.[!i] do incr i done;
+    if !i < 2 then None
+    else
+      let name = String.sub s 0 !i in
+      while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
+      (* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
+      while !i < len && s.[!i] >= 'A' && s.[!i] <= 'Z' do incr i done;
+      while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
+      let variadic = !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' in
+      Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
+  in
+  find_section lines
+
+(* top-level entry point: parse a --help text string into a help_result.
+ * steps:
+ *   1. strip ansi escapes (colors, hyperlinks, etc.)
+ *   2. run the angstrom help_parser for flags and subcommands
+ *   3. extract positionals via cli11 format (preferred) or usage line (fallback)
+ *   4. merge positionals into the result
+ * uses angstrom's prefix-consume mode — we don't need to parse every byte. *)
+let parse_help txt =
+  let clean = strip_ansi txt in
+  match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
+  | Ok result ->
+    let cli11 = extract_cli11_positionals clean in
+    let usage = extract_usage_positionals clean in
+    let positionals = if cli11 <> [] then cli11 else usage in
+    Ok { result with positionals }
+  | Error msg -> Error msg
--- a/lib/store.ml
+++ b/lib/store.ml
@ -0,0 +1,444 @@
+(* store.ml — filesystem-backed cache of parsed completion data.
+ *
+ * this module handles persistence of completion data to disk. each command's
+ * help_result is serialized to json and stored as a file in a cache directory
+ * (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions
+ * are stored as .nu files instead.
+ *
+ * the store also provides lookup, listing, and subcommand discovery by
+ * scanning filenames in the cache directory.
+ *
+ * file naming convention:
+ *   - spaces in command names become underscores (e.g. "git add" → "git_add.json")
+ *   - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json")
+ *   - .json files contain serialized help_result
+ *   - .nu files contain native nushell extern source code
+ *
+ * the module includes a minimal hand-rolled json parser/serializer because
+ * we only need to handle our own output format (no need for a full json library).
+ *)
+
+open Parser
+
+(* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to
+ * ~/.cache/inshellah if XDG_CACHE_HOME is not set. *)
+let default_store_path () =
+  let cache = try Sys.getenv "XDG_CACHE_HOME"
+    with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
+  Filename.concat cache "inshellah"
+
+(* recursively create directories (equivalent to mkdir -p) *)
+let ensure_dir dir =
+  let rec mkdir_p d =
+    if Sys.file_exists d then ()
+    else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in
+  mkdir_p dir
+
+(* convert command name to safe filename: spaces become underscores,
+ * non-alphanumeric chars become hyphens.
+ * e.g. "git add" → "git_add", "docker-compose" → "docker-compose" *)
+let filename_of_command cmd =
+  String.map (function
+    | ' ' -> '_'
+    | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c
+    | _ -> '-') cmd
+
+(* inverse of filename_of_command: underscores back to spaces.
+ * peculiarity: this is lossy — original underscores in command names
+ * (e.g. "my_tool") would be converted to spaces. in practice this
+ * doesn't matter because tools with underscores in names are rare,
+ * and subcommands use space-separated naming. *)
+let command_of_filename base =
+  String.map (function '_' -> ' ' | c -> c) base
+
+(* --- json serialization of help_result ---
+ * hand-rolled json emitters. we don't use a json library because:
+ *   1. the schema is fixed and simple — we only serialize our own types
+ *   2. avoiding dependencies keeps the binary small
+ *   3. printf-style emission is fast and straightforward for our types *)
+
+(* escape a string for json: quotes, backslashes, and control characters.
+ * control chars below 0x20 are emitted as \u00XX unicode escapes. *)
+let escape_json s =
+  let buf = Buffer.create (String.length s + 4) in
+  String.iter (fun c -> match c with
+    | '"' -> Buffer.add_string buf "\\\""
+    | '\\' -> Buffer.add_string buf "\\\\"
+    | '\n' -> Buffer.add_string buf "\\n"
+    | '\t' -> Buffer.add_string buf "\\t"
+    | '\r' -> Buffer.add_string buf "\\r"
+    | c when Char.code c < 0x20 ->
+      Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
+    | c -> Buffer.add_char buf c
+  ) s;
+  Buffer.contents buf
+
+let json_string s = Printf.sprintf "\"%s\"" (escape_json s)
+let json_null = "null"
+
+let json_switch_of = function
+  | Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c))
+  | Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l)
+  | Both (c, l) ->
+    Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
+      (json_string (String.make 1 c)) (json_string l)
+
+let json_param_of = function
+  | None -> json_null
+  | Some (Mandatory p) ->
+    Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p)
+  | Some (Optional p) ->
+    Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p)
+
+let json_entry_of e =
+  Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
+    (json_switch_of e.switch) (json_param_of e.param) (json_string e.desc)
+
+let json_subcommand_of sc =
+  Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
+
+let json_positional_of p =
+  Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
+    (json_string p.pos_name) p.optional p.variadic
+
+let json_list f items =
+  "[" ^ String.concat "," (List.map f items) ^ "]"
+
+let json_of_help_result ?(source="help") r =
+  Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
+    (json_string source)
+    (json_string r.description)
+    (json_list json_entry_of r.entries)
+    (json_list json_subcommand_of r.subcommands)
+    (json_list json_positional_of r.positionals)
+
+(* --- json deserialization ---
+ * minimal hand-rolled recursive-descent json parser. only handles the subset
+ * we emit: strings, booleans, nulls, arrays, and objects. no number parsing
+ * (we don't emit numbers). this is intentionally minimal — we only read back
+ * our own serialized format, so robustness against arbitrary json is not needed.
+ *
+ * peculiarity: the \u escape handler does basic utf-8 encoding for code points
+ * up to 0xffff but doesn't handle surrogate pairs. this is fine for our use
+ * case since we only escape control characters below 0x20. *)
+
+type json =
+  | Jnull
+  | Jbool of bool
+  | Jstring of string
+  | Jarray of json list
+  | Jobject of (string * json) list
+
+(* json accessor helpers — return sensible defaults for missing/wrong types *)
+let json_get key = function
+  | Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
+  | _ -> Jnull
+
+let json_to_string = function Jstring s -> s | _ -> ""
+let json_to_bool = function Jbool b -> b | _ -> false
+let json_to_list = function Jarray l -> l | _ -> []
+
+exception Json_error of string
+
+(* imperative recursive-descent json parser.
+ * uses a mutable position ref to walk through the string.
+ * peculiarity: boolean/null parsing just advances a fixed number of chars
+ * without validating the actual characters — safe because we only read
+ * our own output, but would be incorrect for arbitrary json. *)
+let parse_json s =
+  let len = String.length s in
+  let pos = ref 0 in
+  let peek () = if !pos < len then s.[!pos] else '\x00' in
+  let advance () = incr pos in
+  let skip_ws () =
+    while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t'
+                         || s.[!pos] = '\n' || s.[!pos] = '\r') do
+      advance ()
+    done in
+  let expect c =
+    skip_ws ();
+    if peek () <> c then
+      raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos));
+    advance () in
+  let rec parse_value () =
+    skip_ws ();
+    match peek () with
+    | '"' -> Jstring (parse_string ())
+    | '{' -> parse_object ()
+    | '[' -> parse_array ()
+    | 'n' -> advance (); advance (); advance (); advance (); Jnull
+    | 't' -> advance (); advance (); advance (); advance (); Jbool true
+    | 'f' ->
+      advance (); advance (); advance (); advance (); advance (); Jbool false
+    | c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos))
+  and parse_string () =
+    expect '"';
+    let buf = Buffer.create 32 in
+    while peek () <> '"' do
+      if peek () = '\\' then begin
+        advance ();
+        (match peek () with
+         | '"' -> Buffer.add_char buf '"'
+         | '\\' -> Buffer.add_char buf '\\'
+         | 'n' -> Buffer.add_char buf '\n'
+         | 't' -> Buffer.add_char buf '\t'
+         | 'r' -> Buffer.add_char buf '\r'
+         | 'u' ->
+           advance ();
+           let hex = String.sub s !pos 4 in
+           pos := !pos + 3;
+           let code = int_of_string ("0x" ^ hex) in
+           if code < 128 then Buffer.add_char buf (Char.chr code)
+           else begin
+             (* UTF-8 encode *)
+             if code < 0x800 then begin
+               Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
+               Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
+             end else begin
+               Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
+               Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
+               Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
+             end
+           end
+         | c -> Buffer.add_char buf c);
+        advance ()
+      end else begin
+        Buffer.add_char buf (peek ());
+        advance ()
+      end
+    done;
+    advance (); (* closing quote *)
+    Buffer.contents buf
+  and parse_object () =
+    expect '{';
+    skip_ws ();
+    if peek () = '}' then (advance (); Jobject [])
+    else begin
+      let pairs = ref [] in
+      let cont = ref true in
+      while !cont do
+        skip_ws ();
+        let key = parse_string () in
+        expect ':';
+        let value = parse_value () in
+        pairs := (key, value) :: !pairs;
+        skip_ws ();
+        if peek () = ',' then advance ()
+        else cont := false
+      done;
+      expect '}';
+      Jobject (List.rev !pairs)
+    end
+  and parse_array () =
+    expect '[';
+    skip_ws ();
+    if peek () = ']' then (advance (); Jarray [])
+    else begin
+      let items = ref [] in
+      let cont = ref true in
+      while !cont do
+        let v = parse_value () in
+        items := v :: !items;
+        skip_ws ();
+        if peek () = ',' then advance ()
+        else cont := false
+      done;
+      expect ']';
+      Jarray (List.rev !items)
+    end
+  in
+  parse_value ()
+
+(* --- json → ocaml type converters ---
+ * these reconstruct our parser types from their json representations.
+ * they mirror the json_*_of serializers above. *)
+
+let switch_of_json j =
+  match json_to_string (json_get "type" j) with
+  | "short" ->
+    let c = json_to_string (json_get "char" j) in
+    Short (if String.length c > 0 then c.[0] else '?')
+  | "long" -> Long (json_to_string (json_get "name" j))
+  | "both" ->
+    let c = json_to_string (json_get "char" j) in
+    Both ((if String.length c > 0 then c.[0] else '?'),
+          json_to_string (json_get "name" j))
+  | _ -> Long "?"
+
+let param_of_json = function
+  | Jnull -> None
+  | j ->
+    let name = json_to_string (json_get "name" j) in
+    (match json_to_string (json_get "kind" j) with
+     | "mandatory" -> Some (Mandatory name)
+     | "optional" -> Some (Optional name)
+     | _ -> None)
+
+let entry_of_json j =
+  { switch = switch_of_json (json_get "switch" j);
+    param = param_of_json (json_get "param" j);
+    desc = json_to_string (json_get "desc" j) }
+
+let subcommand_of_json j =
+  { name = json_to_string (json_get "name" j);
+    desc = json_to_string (json_get "desc" j) }
+
+let positional_of_json j =
+  { pos_name = json_to_string (json_get "name" j);
+    optional = json_to_bool (json_get "optional" j);
+    variadic = json_to_bool (json_get "variadic" j) }
+
+let help_result_of_json j =
+  { entries = List.map entry_of_json (json_to_list (json_get "entries" j));
+    subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j));
+    positionals = List.map positional_of_json (json_to_list (json_get "positionals" j));
+    description = json_to_string (json_get "description" j) }
+
+(* --- filesystem operations --- *)
+
+let write_file path contents =
+  let oc = open_out path in
+  output_string oc contents;
+  close_out oc
+
+let read_file path =
+  try
+    let ic = open_in path in
+    let n = in_channel_length ic in
+    let s = Bytes.create n in
+    really_input ic s 0 n;
+    close_in ic;
+    Some (Bytes.to_string s)
+  with _ -> None
+
+(* write a parsed help_result to the store as json *)
+let write_result ~dir ?(source="help") command result =
+  let path = Filename.concat dir (filename_of_command command ^ ".json") in
+  write_file path (json_of_help_result ~source result)
+
+(* write native nushell completion source to the store as a .nu file *)
+let write_native ~dir command data =
+  let path = Filename.concat dir (filename_of_command command ^ ".nu") in
+  write_file path data
+
+let is_dir path = Sys.file_exists path && Sys.is_directory path
+
+(* look for a command's data file across multiple store directories.
+ * checks json first, then .nu. returns the first match found.
+ * directories are searched in order (user dir before system dirs). *)
+let find_file dirs command =
+  let base = filename_of_command command in
+  List.find_map (fun dir ->
+    let json_path = Filename.concat dir (base ^ ".json") in
+    if Sys.file_exists json_path then Some json_path
+    else
+      let nu_path = Filename.concat dir (base ^ ".nu") in
+      if Sys.file_exists nu_path then Some nu_path
+      else None
+  ) dirs
+
+(* look up a command and deserialize its help_result from json.
+ * only searches for .json files (not .nu, since those can't be deserialized
+ * back into help_result). returns none if not found or parse fails. *)
+let lookup dirs command =
+  let base = filename_of_command command in
+  List.find_map (fun dir ->
+    let path = Filename.concat dir (base ^ ".json") in
+    match read_file path with
+    | Some data ->
+      (try Some (help_result_of_json (parse_json data))
+       with _ -> None)
+    | None -> None
+  ) dirs
+
+(* look up a command's raw data (json or .nu source) without parsing.
+ * used by the "query" command to dump stored data as-is. *)
+let lookup_raw dirs command =
+  let base = filename_of_command command in
+  List.find_map (fun dir ->
+    let json_path = Filename.concat dir (base ^ ".json") in
+    match read_file json_path with
+    | Some _ as r -> r
+    | None ->
+      let nu_path = Filename.concat dir (base ^ ".nu") in
+      read_file nu_path
+  ) dirs
+
+let chop_extension f =
+  if Filename.check_suffix f ".json" then Some (Filename.chop_suffix f ".json")
+  else if Filename.check_suffix f ".nu" then Some (Filename.chop_suffix f ".nu")
+  else None
+
+(* discover subcommands of a command by scanning filenames in the store.
+ * looks for files whose names start with the command's filename + "_"
+ * (e.g. for "git", finds "git_add.json", "git_commit.json", etc.)
+ *
+ * only returns immediate subcommands (no nested underscores beyond the prefix).
+ * tries to extract description from the json "description" field if available.
+ *
+ * peculiarity: this filesystem-based discovery is used as a fallback when the
+ * command's own help_result doesn't list subcommands. it enables completion
+ * for subcommands that were indexed from separate manpages or help runs. *)
+let subcommands_of dirs command =
+  let prefix = filename_of_command command ^ "_" in
+  let plen = String.length prefix in
+  let module SMap = Map.Make(String) in
+  let subs = List.fold_left (fun subs dir ->
+    if is_dir dir then
+      Array.fold_left (fun subs f ->
+        if not (String.starts_with ~prefix f) then subs
+        else
+          let is_json = Filename.check_suffix f ".json" in
+          match chop_extension f with
+          | None -> subs
+          | Some b ->
+            let rest = String.sub b plen (String.length b - plen) in
+            if String.contains rest '_' || String.length rest = 0 then subs
+            else if SMap.mem rest subs then subs
+            else
+              let desc = if is_json then
+                match read_file (Filename.concat dir f) with
+                | Some data ->
+                  (try json_to_string (json_get "description" (parse_json data))
+                   with _ -> "")
+                | None -> ""
+              else "" in
+              SMap.add rest { name = rest; desc } subs
+      ) subs (Sys.readdir dir)
+    else subs
+  ) SMap.empty dirs in
+  SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
+
+(* list all indexed commands across all store directories.
+ * returns a sorted, deduplicated list of command names. *)
+let all_commands dirs =
+  let module SSet = Set.Make(String) in
+  List.fold_left (fun cmds dir ->
+    if is_dir dir then
+      Array.fold_left (fun cmds f ->
+        match chop_extension f with
+        | Some b -> SSet.add (command_of_filename b) cmds
+        | None -> cmds
+      ) cmds (Sys.readdir dir)
+    else cmds
+  ) SSet.empty dirs
+  |> SSet.elements
+
+(* determine how a command was indexed: "help", "manpage", "native", etc.
+ * for json files, reads the "source" field. for .nu files, returns "native".
+ * used by the "dump" command to show provenance. *)
+let file_type_of dirs command =
+  let base = filename_of_command command in
+  List.find_map (fun dir ->
+    let json_path = Filename.concat dir (base ^ ".json") in
+    if Sys.file_exists json_path then
+      (match read_file json_path with
+       | Some data ->
+         (try Some (json_to_string (json_get "source" (parse_json data)))
+          with _ -> Some "json")
+       | None -> Some "json")
+    else
+      let nu_path = Filename.concat dir (base ^ ".nu") in
+      if Sys.file_exists nu_path then Some "native"
+      else None
+  ) dirs
--- a/nix/module.nix
+++ b/nix/module.nix
@ -0,0 +1,109 @@
+# NixOS module: automatic nushell completion indexing
+#
+# Indexes completions using three strategies in priority order:
+#   1. Native completion generators (e.g. CMD completions nushell)
+#   2. Manpage parsing
+#   3. --help output parsing
+#
+# Produces a directory of .json/.nu files at build time.
+# The `complete` command reads from this directory as a system overlay.
+#
+# Usage:
+#   { pkgs, ... }: {
+#     imports = [ ./path/to/inshellah/nix/module.nix ];
+#     programs.inshellah.enable = true;
+#   }
+
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+
+let
+  cfg = config.programs.inshellah;
+in
+{
+  options.programs.inshellah = {
+    enable = lib.mkEnableOption "nushell completion indexing via inshellah";
+
+    package = lib.mkOption {
+      type = lib.types.package;
+      description = "package to use for indexing completions";
+    };
+
+    completionsPath = lib.mkOption {
+      type = lib.types.str;
+      default = "/share/inshellah";
+      description = ''
+        subdirectory within the system profile where completion files
+        are placed. used as --system-dir for the completer.
+      '';
+    };
+
+    ignoreCommands = lib.mkOption {
+      type = lib.types.listOf lib.types.str;
+      default = [ ];
+      example = [ "problematic-tool" ];
+      description = ''
+        list of command names to skip during completion indexing
+      '';
+    };
+
+    helpOnlyCommands = lib.mkOption {
+      type = lib.types.listOf lib.types.str;
+      default = [ ];
+      example = [ "nix" ];
+      description = ''
+        list of command names to skip manpage parsing for,
+        using --help scraping instead
+      '';
+    };
+
+    snippet = lib.mkOption {
+      type = lib.types.str;
+      readOnly = true;
+    };
+  };
+
+  config = lib.mkIf cfg.enable {
+    environment.systemPackages = [ config.programs.inshellah.package ];
+    environment.pathsToLink = [ "/share/nushell/autoload" ];
+    environment.extraSetup =
+      let
+        inshellah = "${cfg.package}/bin/inshellah";
+        destDir = "$out${cfg.completionsPath}";
+        ignoreFile = pkgs.writeText "inshellah-ignore" (lib.concatStringsSep "\n" cfg.ignoreCommands);
+        ignoreFlag = lib.optionalString (cfg.ignoreCommands != [ ]) " --ignore ${ignoreFile}";
+        helpOnlyFile = pkgs.writeText "inshellah-help-only" (
+          lib.concatStringsSep "\n" cfg.helpOnlyCommands
+        );
+        helpOnlyFlag = lib.optionalString (cfg.helpOnlyCommands != [ ]) " --help-only ${helpOnlyFile}";
+      in
+      ''
+        mkdir -p ${destDir}
+
+        if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then
+          ${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag} \
+            2>/dev/null || true
+        fi
+
+        find ${destDir} -maxdepth 1 -empty -delete
+
+        # nushell hardcodes sudo and doas to bypass the external completer,
+        # returning command-name completion instead of calling inshellah.
+        # these @complete external stubs override that so inshellah handles
+        # their flags and elevation stripping. placed in the nushell autoload
+        # dir so they are sourced automatically at shell startup.
+        mkdir -p $out/share/nushell/vendor/autoload
+        cat > $out/share/nushell/vendor/autoload/inshellah-elevation.nu << 'NUSHELL'
+        @complete external
+        extern "sudo" []
+
+        @complete external
+        extern "doas" []
+        NUSHELL
+      '';
+  };
+}
--- a/test/dune
+++ b/test/dune
@ -0,0 +1,3 @@
+(test
+ (name test_inshellah)
+ (libraries inshellah str))
--- a/test/test_inshellah.ml
+++ b/test/test_inshellah.ml
@ -0,0 +1,492 @@
+open Inshellah.Parser
+open Inshellah.Manpage
+open Inshellah.Nushell
+
+let failures = ref 0
+let passes = ref 0
+
+let check name condition =
+  if condition then begin
+    incr passes;
+    Printf.printf "  PASS: %s\n" name
+  end else begin
+    incr failures;
+    Printf.printf "  FAIL: %s\n" name
+  end
+
+let parse txt =
+  match parse_help txt with
+  | Ok r -> r
+  | Error msg -> failwith (Printf.sprintf "parse_help failed: %s" msg)
+
+(* --- Help parser tests --- *)
+
+let test_gnu_basic () =
+  Printf.printf "\n== GNU basic flags ==\n";
+  let r = parse "  -a, --all                  do not ignore entries starting with .\n" in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "both switch" (e.switch = Both ('a', "all"));
+  check "no param" (e.param = None);
+  check "desc" (String.length e.desc > 0)
+
+let test_gnu_eq_param () =
+  Printf.printf "\n== GNU = param ==\n";
+  let r = parse "      --block-size=SIZE      scale sizes by SIZE\n" in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "long switch" (e.switch = Long "block-size");
+  check "mandatory param" (e.param = Some (Mandatory "SIZE"))
+
+let test_gnu_opt_param () =
+  Printf.printf "\n== GNU optional param ==\n";
+  let r = parse "      --color[=WHEN]         color the output WHEN\n" in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "long switch" (e.switch = Long "color");
+  check "optional param" (e.param = Some (Optional "WHEN"))
+
+let test_underscore_param () =
+  Printf.printf "\n== Underscore in param (TIME_STYLE) ==\n";
+  let r = parse "      --time-style=TIME_STYLE  time/date format\n" in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "param with underscore" (e.param = Some (Mandatory "TIME_STYLE"))
+
+let test_short_only () =
+  Printf.printf "\n== Short-only flag ==\n";
+  let r = parse "  -v                       verbose output\n" in
+  check "one entry" (List.length r.entries = 1);
+  check "short switch" ((List.hd r.entries).switch = Short 'v')
+
+let test_long_only () =
+  Printf.printf "\n== Long-only flag ==\n";
+  let r = parse "      --help                 display help\n" in
+  check "one entry" (List.length r.entries = 1);
+  check "long switch" ((List.hd r.entries).switch = Long "help")
+
+let test_multiline_desc () =
+  Printf.printf "\n== Multi-line description ==\n";
+  let r = parse {|      --block-size=SIZE      with -l, scale sizes by SIZE when printing them;
+                               e.g., '--block-size=M'; see SIZE format below
+|} in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "desc includes continuation" (String.length e.desc > 50)
+
+let test_multiple_entries () =
+  Printf.printf "\n== Multiple entries ==\n";
+  let r = parse {|  -a, --all                  do not ignore entries starting with .
+  -A, --almost-all           do not list implied . and ..
+      --author               with -l, print the author of each file
+|} in
+  check "three entries" (List.length r.entries = 3)
+
+let test_clap_short_sections () =
+  Printf.printf "\n== Clap short with section headers ==\n";
+  let r = parse {|INPUT OPTIONS:
+  -e, --regexp=PATTERN       A pattern to search for.
+  -f, --file=PATTERNFILE     Search for patterns from the given file.
+SEARCH OPTIONS:
+  -s, --case-sensitive       Search case sensitively.
+|} in
+  check "three entries" (List.length r.entries = 3);
+  let e = List.hd r.entries in
+  check "first is regexp" (e.switch = Both ('e', "regexp"));
+  check "first has param" (e.param = Some (Mandatory "PATTERN"))
+
+let test_clap_long_style () =
+  Printf.printf "\n== Clap long style (desc below flag) ==\n";
+  let r = parse {|  -H, --hidden
+          Include hidden directories and files.
+
+      --no-ignore
+          Do not respect ignore files.
+|} in
+  check "two entries" (List.length r.entries = 2);
+  let e = List.hd r.entries in
+  check "hidden switch" (e.switch = Both ('H', "hidden"));
+  check "desc below" (String.length e.desc > 0)
+
+let test_clap_long_angle_param () =
+  Printf.printf "\n== Clap long angle bracket param ==\n";
+  let r = parse {|      --nonprintable-notation <notation>
+          Set notation for non-printable characters.
+|} in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "long switch" (e.switch = Long "nonprintable-notation");
+  check "angle param" (e.param = Some (Mandatory "notation"))
+
+let test_space_upper_param () =
+  Printf.printf "\n== Space-separated ALL_CAPS param ==\n";
+  let r = parse "  -f, --foo FOO  foo help\n" in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "switch" (e.switch = Both ('f', "foo"));
+  check "space param" (e.param = Some (Mandatory "FOO"))
+
+let test_go_cobra_flags () =
+  Printf.printf "\n== Go/Cobra flags ==\n";
+  let r = parse {|Flags:
+  -D, --debug              Enable debug mode
+  -H, --host string        Daemon socket to connect to
+  -v, --version            Print version information
+|} in
+  check "three flag entries" (List.length r.entries = 3);
+  (* Check the host flag has a type param *)
+  let host = List.nth r.entries 1 in
+  check "host switch" (host.switch = Both ('H', "host"));
+  check "host type param" (host.param = Some (Mandatory "string"))
+
+let test_go_cobra_subcommands () =
+  Printf.printf "\n== Go/Cobra subcommands ==\n";
+  let r = parse {|Common Commands:
+  run         Create and run a new container from an image
+  exec        Execute a command in a running container
+  build       Build an image from a Dockerfile
+|} in
+  check "has subcommands" (List.length r.subcommands > 0)
+
+let test_busybox_tab () =
+  Printf.printf "\n== Busybox tab-indented ==\n";
+  let r = parse "\t-1\tOne column output\n\t-a\tInclude names starting with .\n" in
+  check "two entries" (List.length r.entries = 2);
+  check "first is -1" ((List.hd r.entries).switch = Short '1')
+
+let test_no_debug_prints () =
+  Printf.printf "\n== No debug side effects ==\n";
+  (* The old parser had print_endline at module load time.
+     If we got here without "opt param is running" on stdout, we're good. *)
+  check "no debug prints" true
+
+(* --- Manpage parser tests --- *)
+
+let test_manpage_tp_style () =
+  Printf.printf "\n== Manpage .TP style ==\n";
+  let groff = {|.SH OPTIONS
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+do not ignore entries starting with .
+.TP
+\fB\-A\fR, \fB\-\-almost\-all\fR
+do not list implied . and ..
+.TP
+\fB\-\-block\-size\fR=\fISIZE\fR
+with \fB\-l\fR, scale sizes by SIZE
+.SH AUTHOR
+Written by someone.
+|} in
+  let result = parse_manpage_string groff in
+  check "three entries" (List.length result.entries = 3);
+  if List.length result.entries >= 1 then begin
+    let e = List.hd result.entries in
+    check "first is -a/--all" (e.switch = Both ('a', "all"));
+    check "first desc" (String.length e.desc > 0)
+  end;
+  if List.length result.entries >= 3 then begin
+    let e = List.nth result.entries 2 in
+    check "block-size switch" (e.switch = Long "block-size");
+    check "block-size param" (e.param = Some (Mandatory "SIZE"))
+  end
+
+let test_manpage_ip_style () =
+  Printf.printf "\n== Manpage .IP style ==\n";
+  let groff = {|.SH OPTIONS
+.IP "\fB\-k\fR, \fB\-\-insecure\fR"
+Allow insecure connections.
+.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
+Write output to file.
+.SH SEE ALSO
+|} in
+  let result = parse_manpage_string groff in
+  check "two entries" (List.length result.entries = 2);
+  if List.length result.entries >= 1 then begin
+    let e = List.hd result.entries in
+    check "first is -k/--insecure" (e.switch = Both ('k', "insecure"))
+  end
+
+let test_manpage_groff_stripping () =
+  Printf.printf "\n== Groff escape stripping ==\n";
+  let s = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
+  check "font escapes removed" (not (String.contains s 'f' && String.contains s 'B'));
+  check "dashes converted" (String.contains s '-');
+  let s2 = strip_groff_escapes {|\(aqhello\(aq|} in
+  check "aq -> quote" (String.contains s2 '\'')
+
+let test_manpage_empty_options () =
+  Printf.printf "\n== Manpage with no OPTIONS section ==\n";
+  let groff = {|.SH NAME
+foo \- does stuff
+.SH DESCRIPTION
+Does stuff.
+|} in
+  let result = parse_manpage_string groff in
+  check "no entries" (List.length result.entries = 0)
+
+let test_slash_switch_separator () =
+  Printf.printf "\n== Slash switch separator (--long / -s) ==\n";
+  let r = parse "  --verbose / -v             Increase verbosity\n" in
+  check "one entry" (List.length r.entries = 1);
+  let e = List.hd r.entries in
+  check "both switch" (e.switch = Both ('v', "verbose"));
+  check "no param" (e.param = None);
+  check "desc" (e.desc = "Increase verbosity")
+
+let test_manpage_nix3_style () =
+  Printf.printf "\n== Manpage nix3 style ==\n";
+  let groff = {|.SH Options
+.SS Logging-related options
+.IP "\(bu" 3
+.UR #opt-verbose
+\f(CR--verbose\fR
+.UE
+/ \f(CR-v\fR
+.IP
+Increase the logging verbosity level.
+.IP "\(bu" 3
+.UR #opt-quiet
+\f(CR--quiet\fR
+.UE
+.IP
+Decrease the logging verbosity level.
+.SH SEE ALSO
+|} in
+  let result = parse_manpage_string groff in
+  check "two entries" (List.length result.entries = 2);
+  if List.length result.entries >= 1 then begin
+    let e = List.hd result.entries in
+    check "verbose is Both" (e.switch = Both ('v', "verbose"));
+    check "verbose desc" (String.length e.desc > 0)
+  end;
+  if List.length result.entries >= 2 then begin
+    let e = List.nth result.entries 1 in
+    check "quiet is Long" (e.switch = Long "quiet");
+    check "quiet desc" (String.length e.desc > 0)
+  end
+
+let test_manpage_nix3_with_params () =
+  Printf.printf "\n== Manpage nix3 with params ==\n";
+  let groff = {|.SH Options
+.IP "\(bu" 3
+.UR #opt-arg
+\f(CR--arg\fR
+.UE
+\fIname\fR \fIexpr\fR
+.IP
+Pass the value as the argument name to Nix functions.
+.IP "\(bu" 3
+.UR #opt-include
+\f(CR--include\fR
+.UE
+/ \f(CR-I\fR \fIpath\fR
+.IP
+Add path to search path entries.
+.IP
+This option may be given multiple times.
+.SH SEE ALSO
+|} in
+  let result = parse_manpage_string groff in
+  check "two entries" (List.length result.entries = 2);
+  if List.length result.entries >= 1 then begin
+    let e = List.hd result.entries in
+    check "arg is Long" (e.switch = Long "arg");
+    check "arg has param" (e.param <> None)
+  end;
+  if List.length result.entries >= 2 then begin
+    let e = List.nth result.entries 1 in
+    check "include is Both" (e.switch = Both ('I', "include"));
+    check "include has path param" (e.param = Some (Mandatory "path"))
+  end
+
+let test_synopsis_subcommand () =
+  Printf.printf "\n== SYNOPSIS subcommand detection ==\n";
+  let groff = {|.SH "SYNOPSIS"
+.sp
+.nf
+\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR]
+.fi
+.SH "DESCRIPTION"
+|} in
+  let cmd = extract_synopsis_command groff in
+  check "detected git commit" (cmd = Some "git commit")
+
+let test_synopsis_standalone () =
+  Printf.printf "\n== SYNOPSIS standalone command ==\n";
+  let groff = {|.SH Synopsis
+.LP
+\f(CRnix-build\fR [\fIpaths\fR]
+.SH Description
+|} in
+  let cmd = extract_synopsis_command groff in
+  check "detected nix-build" (cmd = Some "nix-build")
+
+let test_synopsis_nix3 () =
+  Printf.printf "\n== SYNOPSIS nix3 subcommand ==\n";
+  let groff = {|.SH Synopsis
+.LP
+\f(CRnix run\fR [\fIoption\fR] \fIinstallable\fR
+.SH Description
+|} in
+  let cmd = extract_synopsis_command groff in
+  check "detected nix run" (cmd = Some "nix run")
+
+(* --- Nushell generation tests --- *)
+
+let contains s sub =
+  try
+    let _ = Str.search_forward (Str.regexp_string sub) s 0 in true
+  with Not_found -> false
+
+let test_nushell_basic () =
+  Printf.printf "\n== Nushell basic extern ==\n";
+  let r = parse "  -a, --all                  do not ignore entries starting with .\n" in
+  let nu = generate_extern "ls" r in
+  check "has extern" (contains nu "export extern \"ls\"");
+  check "has --all(-a)" (contains nu "--all(-a)");
+  check "has comment" (contains nu "# do not ignore")
+
+let test_nushell_param_types () =
+  Printf.printf "\n== Nushell param type mapping ==\n";
+  let r = parse {|  -w, --width=COLS           set output width
+      --block-size=SIZE      scale sizes
+  -o, --output FILE          output file
+|} in
+  let nu = generate_extern "ls" r in
+  check "COLS -> int" (contains nu "--width(-w): int");
+  check "SIZE -> string" (contains nu "--block-size: string");
+  check "FILE -> path" (contains nu "--output(-o): path")
+
+let test_nushell_subcommands () =
+  Printf.printf "\n== Nushell subcommands ==\n";
+  let r = parse {|Common Commands:
+  run         Create and run a new container
+  exec        Execute a command
+
+Flags:
+  -D, --debug              Enable debug mode
+|} in
+  let nu = generate_extern "docker" r in
+  check "has main extern" (contains nu "export extern \"docker\"");
+  check "has --debug" (contains nu "--debug(-D)");
+  check "has run subcommand" (contains nu "export extern \"docker run\"");
+  check "has exec subcommand" (contains nu "export extern \"docker exec\"")
+
+let test_nushell_from_manpage () =
+  Printf.printf "\n== Nushell from manpage ==\n";
+  let groff = {|.SH OPTIONS
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+do not ignore entries starting with .
+.TP
+\fB\-\-block\-size\fR=\fISIZE\fR
+scale sizes by SIZE
+.SH AUTHOR
+|} in
+  let result = parse_manpage_string groff in
+  let nu = generate_extern "ls" result in
+  check "has extern" (contains nu "export extern \"ls\"");
+  check "has --all(-a)" (contains nu "--all(-a)");
+  check "has --block-size" (contains nu "--block-size: string")
+
+let test_nushell_module () =
+  Printf.printf "\n== Nushell module wrapper ==\n";
+  let r = parse "  -v, --verbose              verbose output\n" in
+  let nu = generate_module "myapp" r in
+  check "has module" (contains nu "module myapp-completions");
+  check "has extern inside" (contains nu "export extern \"myapp\"");
+  check "has flag" (contains nu "--verbose(-v)")
+
+let test_dedup_entries () =
+  Printf.printf "\n== Deduplication ==\n";
+  let r = parse {|  -v, --verbose              verbose output
+  --verbose                  verbose mode
+  -v                         be verbose
+|} in
+  let nu = generate_extern "test" r in
+  (* Count occurrences of --verbose *)
+  let count =
+    let re = Str.regexp_string "--verbose" in
+    let n = ref 0 in
+    let i = ref 0 in
+    (try while true do
+       let _ = Str.search_forward re nu !i in
+       incr n; i := Str.match_end ()
+     done with Not_found -> ());
+    !n
+  in
+  check "verbose appears once" (count = 1);
+  check "best version kept (Both)" (contains nu "--verbose(-v)")
+
+let test_dedup_manpage () =
+  Printf.printf "\n== Dedup from manpage ==\n";
+  let groff = {|.SH OPTIONS
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+Be verbose.
+.SH DESCRIPTION
+Use \fB\-v\fR for verbose output.
+Use \fB\-\-verbose\fR to see more.
+|} in
+  let result = parse_manpage_string groff in
+  let nu = generate_extern "test" result in
+  check "has --verbose(-v)" (contains nu "--verbose(-v)");
+  (* Should not have standalone -v or duplicate --verbose *)
+  let lines = String.split_on_char '\n' nu in
+  let verbose_lines = List.filter (fun l -> contains l "verbose") lines in
+  check "only one verbose line" (List.length verbose_lines = 1)
+
+let test_font_boundary_spacing () =
+  Printf.printf "\n== Font boundary spacing ==\n";
+  (* \fB--max-results\fR\fIcount\fR should become "--max-results count" *)
+  let s = strip_groff_escapes {|\fB\-\-max\-results\fR\fIcount\fR|} in
+  check "has space before param" (contains s "--max-results count");
+  (* \fB--color\fR[=\fIWHEN\fR] should NOT insert space before = *)
+  let s2 = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
+  check "no space before =" (contains s2 "--color[=WHEN]")
+
+let () =
+  Printf.printf "Running help parser tests...\n";
+  test_gnu_basic ();
+  test_gnu_eq_param ();
+  test_gnu_opt_param ();
+  test_underscore_param ();
+  test_short_only ();
+  test_long_only ();
+  test_multiline_desc ();
+  test_multiple_entries ();
+  test_clap_short_sections ();
+  test_clap_long_style ();
+  test_clap_long_angle_param ();
+  test_space_upper_param ();
+  test_go_cobra_flags ();
+  test_go_cobra_subcommands ();
+  test_busybox_tab ();
+  test_no_debug_prints ();
+
+  Printf.printf "\nRunning manpage parser tests...\n";
+  test_manpage_tp_style ();
+  test_manpage_ip_style ();
+  test_manpage_groff_stripping ();
+  test_manpage_empty_options ();
+  test_slash_switch_separator ();
+  test_manpage_nix3_style ();
+  test_manpage_nix3_with_params ();
+  test_synopsis_subcommand ();
+  test_synopsis_standalone ();
+  test_synopsis_nix3 ();
+
+  Printf.printf "\nRunning nushell generation tests...\n";
+  test_nushell_basic ();
+  test_nushell_param_types ();
+  test_nushell_subcommands ();
+  test_nushell_from_manpage ();
+  test_nushell_module ();
+
+  Printf.printf "\nRunning dedup and font tests...\n";
+  test_dedup_entries ();
+  test_dedup_manpage ();
+  test_font_boundary_spacing ();
+
+  Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures;
+  if !failures > 0 then exit 1