This commit is contained in:
atagen 2026-03-18 15:40:47 +11:00
commit bbd7c67d0c
22 changed files with 4740 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
use flake

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/target
/_build
/.direnv

11
README.md Normal file
View file

@ -0,0 +1,11 @@
# inshellah
nushell completions engine. indexes completions from manpages, native
generators, and `--help` output, then serves them to nushell's external
completer.
see `doc/` for details:
- [nushell integration](doc/nushell-integration.md) — setup, usage, examples
- [nixos module](doc/nixos.md) — automatic build-time indexing
- [runtime completions](doc/runtime-completions.md) — on-the-fly caching via the completer

0
bin/.ocamlformat Normal file
View file

4
bin/dune Normal file
View file

@ -0,0 +1,4 @@
(executable
(public_name inshellah)
(name main)
(libraries inshellah))

951
bin/main.ml Normal file
View file

@ -0,0 +1,951 @@
(* main.ml — cli entry point for inshellah, a nushell completions engine.
*
* inshellah generates nushell "extern" definitions for external commands by
* parsing their manpages and --help output. it has two main modes:
*
* 1. indexing (batch): scan a prefix directory's bin/ and share/man/,
* extract completions for every binary, and write them to a cache dir.
* this is typically run once per nix profile or system update.
*
* 2. completing (interactive): given a command and its current arguments,
* look up the cached data and return json completion candidates for
* nushell's custom completer protocol.
*
* the indexing pipeline for each binary:
* a. classify the binary (skip? try --help? try native completions?)
* b. if the tool has native nushell completion support, try various
* subcommand patterns ("completions nushell", "--completion nushell", etc.)
* c. otherwise, run the tool with --help/-h and parse the output
* d. recursively resolve subcommands (depth-limited to 5)
* e. after binaries, parse manpages for any commands not yet covered
*
* parallelism: indexing forks per binary, and subcommand resolution forks
* per subcommand. results are marshaled back via pipes. this gives good
* throughput on multi-core systems while keeping the code simple (no threads,
* no async runtime just unix fork/pipe/waitpid).
*)
open Inshellah.Parser
open Inshellah.Manpage
open Inshellah.Nushell
open Inshellah.Store
module SSet = Set.Make(String)
(* print usage and exit. called when no valid subcommand is given. *)
let usage () =
Printf.eprintf
{|inshellah - nushell completions engine
Usage:
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
Index completions into a directory of JSON/nu files.
PREFIX is a directory containing bin/ and share/man/.
Default dir: $XDG_CACHE_HOME/inshellah
--ignore FILE skip listed commands entirely
--help-only FILE skip manpages for listed commands, use --help instead
inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH]
Nushell custom completer. Outputs JSON completion candidates.
Falls back to --help resolution if command is not indexed.
inshellah query CMD [--dir PATH] [--system-dir PATH]
Print stored completion data for CMD.
inshellah dump [--dir PATH] [--system-dir PATH]
List indexed commands.
inshellah manpage FILE Parse a manpage and emit nushell extern
inshellah manpage-dir DIR Batch-process manpages under DIR
|};
exit 1
(* manpage sections that contain command documentation.
* section 1 = user commands, section 8 = system administration commands. *)
let command_sections = [1; 8]
(* simple substring search using Str *)
let contains_str s sub =
try ignore (Str.search_forward (Str.regexp_string sub) s 0); true
with Not_found -> false
(* heuristic to detect whether text is valid nushell source code.
* checks for common nushell declaration keywords. the length > 20
* check avoids false positives on short error messages. *)
let is_nushell_source text =
String.length text > 20
&& (contains_str text "export extern"
|| contains_str text "export def"
|| (contains_str text "module " && contains_str text "export"))
(* extract command name from a manpage filename.
* "ls.1.gz" strip .gz "ls.1" chop extension "ls" *)
let cmd_name_of_manpage path =
let base = Filename.basename path in
let base =
if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz"
else base in
try Filename.chop_extension base with Invalid_argument _ -> base
(* sanitized environment for child processes.
* strips display-related variables (DISPLAY, WAYLAND_DISPLAY, etc.) to prevent
* gui tools from trying to open windows when we run them with --help.
* without this, some tools (e.g. ckb-next) would pop up dialogs or hang
* waiting for a display connection. *)
let safe_env = lazy (
Array.of_list (
List.filter (fun s ->
not (String.starts_with ~prefix:"DISPLAY=" s
|| String.starts_with ~prefix:"WAYLAND_DISPLAY=" s
|| String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" s
|| String.starts_with ~prefix:"XAUTHORITY=" s))
(Array.to_list (Unix.environment ()))))
(* Non-blocking drain of a pipe fd into a buffer. Safe to call repeatedly;
reads whatever is available without blocking. Used by all fork-pipe sites
to keep pipes drained so children never block on write. *)
let drain_fd rd buf =
let chunk = Bytes.create 8192 in
let continue = ref true in
while !continue do
match Unix.select [rd] [] [] 0.0 with
| (_ :: _, _, _) ->
(try
let n = Unix.read rd chunk 0 8192 in
if n = 0 then continue := false
else Buffer.add_subbytes buf chunk 0 n
with Unix.Unix_error _ -> continue := false)
| _ -> continue := false
done
(* run a command with a timeout, capturing its stdout+stderr.
* forks a child process, redirects stdin from /dev/null, and merges
* stdout+stderr onto a pipe. reads from the pipe with select() polling
* until either the child exits or the deadline is reached.
*
* peculiarity: the child is run in /tmp to prevent tools that create
* side-effect files (like ckb-next-dev-detect-report.gz) from polluting
* the user's working directory. we chdir to /tmp before fork and restore after.
*
* peculiarity: the select timeout is capped at 0.05s per iteration to ensure
* we check the deadline frequently even when no data is available.
*
* returns none if the process couldn't be started, produced no output,
* or was killed due to timeout. *)
let run_cmd args timeout_ms =
let (rd, wr) = Unix.pipe () in
let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in
let argv = Array.of_list args in
(* Run subprocesses in /tmp so commands that write side-effect files
(e.g. ckb-next-dev-detect-report.gz) don't pollute the working dir *)
let saved_cwd = Sys.getcwd () in
Sys.chdir "/tmp";
let pid =
try Unix.create_process_env (List.hd args) argv
(Lazy.force safe_env) devnull wr wr
with Unix.Unix_error _ ->
Unix.close rd; Unix.close wr; Unix.close devnull; -1 in
Sys.chdir saved_cwd;
Unix.close wr; Unix.close devnull;
if pid < 0 then (Unix.close rd; None)
else begin
let buf = Buffer.create 4096 in
let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in
let chunk = Bytes.create 8192 in
let alive = ref true in
(try while !alive do
let remaining = deadline -. Unix.gettimeofday () in
if remaining <= 0.0 then alive := false
else match Unix.select [rd] [] [] (min remaining 0.05) with
| (_ :: _, _, _) ->
let n = Unix.read rd chunk 0 8192 in
if n = 0 then raise Exit
else Buffer.add_subbytes buf chunk 0 n
| _ -> ()
done with Exit -> ());
Unix.close rd;
if not !alive then begin
(try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ());
ignore (Unix.waitpid [] pid)
end else
ignore (Unix.waitpid [] pid);
if Buffer.length buf > 0 then Some (Buffer.contents buf) else None
end
(* check if a path is a regular file with at least one execute bit set *)
let is_executable path =
try let st = Unix.stat path in
st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0
with Unix.Unix_error _ -> false
(* check if a file is a script by looking for a #! shebang.
* follows symlinks via realpath before reading. *)
let is_script path =
try
let real = Unix.realpath path in
let ic = open_in_bin real in
let has_shebang =
try let b = Bytes.create 2 in
really_input ic b 0 2;
Bytes.get b 0 = '#' && Bytes.get b 1 = '!'
with End_of_file -> false in
close_in ic;
has_shebang
with _ -> false
(* scan an elf binary for string needles without loading the entire file.
* reads the file in 64kb chunks, searching each chunk for the needle strings.
* uses a sliding window (carry) of max_needle bytes between chunks to handle
* needles that span chunk boundaries.
*
* peculiarity: on read failure (e.g. if the path resolves to something
* unreadable), all needles are marked as found. this is a conservative
* fallback we'd rather try --help on an unreadable binary than skip it.
*
* the inner loop is a manual byte-by-byte comparison rather than using
* String.contains or Str for performance this runs on every binary
* in the prefix, so it needs to be fast. *)
let elf_scan path needles =
let found = Hashtbl.create 4 in
let remaining () = List.filter (fun n -> not (Hashtbl.mem found n)) needles in
(try
let real = Unix.realpath path in
let ic = open_in_bin real in
let magic = Bytes.create 4 in
really_input ic magic 0 4;
if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E'
&& Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin
let max_needle = List.fold_left (fun m n -> max m (String.length n)) 0 needles in
let chunk_size = 65536 in
let buf = Bytes.create (chunk_size + max_needle) in
let carry = ref 0 in
let eof = ref false in
while not !eof && remaining () <> [] do
let n = (try input ic buf !carry chunk_size with End_of_file -> 0) in
if n = 0 then eof := true
else begin
let total = !carry + n in
List.iter (fun needle ->
if not (Hashtbl.mem found needle) then begin
let nlen = String.length needle in
let i = ref 0 in
while !i <= total - nlen do
if Bytes.get buf !i = needle.[0] then begin
let ok = ref true in
for j = 1 to nlen - 1 do
if Bytes.get buf (!i + j) <> needle.[j] then ok := false
done;
if !ok then (Hashtbl.replace found needle true; i := total)
else incr i
end else incr i
done
end
) (remaining ());
let new_carry = min max_needle total in
Bytes.blit buf (total - new_carry) buf 0 new_carry;
carry := new_carry
end
done
end;
close_in ic
with _ ->
List.iter (fun n -> Hashtbl.replace found n true) needles);
found
(* detect nix-generated c wrapper scripts and extract the real binary path.
* nix's makeCWrapper creates small c programs that set up the environment
* and exec the real binary. these wrappers won't contain "-h" or "completion"
* in their own binary (they're just wrappers), so elf_scan would say "skip".
* this function reads the wrapper source to find the actual /nix/store/.../bin/...
* target path, so we can try --help on the real binary instead.
*
* peculiarity: caps the read at 64kb to avoid accidentally reading a large
* non-wrapper binary into memory. *)
let nix_wrapper_target path =
try
let real = Unix.realpath path in
let ic = open_in_bin real in
let n = in_channel_length ic in
if n > 65536 then (close_in ic; None)
else begin
let s = Bytes.create n in
really_input ic s 0 n; close_in ic;
let s = Bytes.to_string s in
if not (contains_str s "makeCWrapper") then None
else
let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in
try ignore (Str.search_forward re s 0);
let target = Str.matched_string s in
if Sys.file_exists target then Some target else None
with Not_found -> None
end
with _ -> None
(* heuristic filter for binary names that should never be indexed.
* skips: empty names, "-", dotfiles, libraries (lib-prefix), daemon wrappers
* (suffixes -daemon, -wrapped), shared objects (.so suffix), and names with no
* alphanumeric characters (e.g. punctuation-only names). *)
let skip_name name =
String.length name = 0 || name = "-" || name.[0] = '.'
|| String.starts_with ~prefix:"lib" name
|| String.ends_with ~suffix:"-daemon" name
|| String.ends_with ~suffix:"-wrapped" name
|| String.ends_with ~suffix:".so" name
|| not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name)
(* classification result for a binary.
* Skip don't index this binary at all
* Try_help only try --help (scripts, binaries without "completion" string)
* Try_native_and_help try native nushell completion first, fall back to --help *)
type bin_class = Skip | Try_help | Try_native_and_help
(* classify a binary to decide the indexing strategy.
* decision tree:
* 1. nushell builtin or bad name Skip
* 2. not executable Skip
* 3. script (has shebang) Try_help (scripts can't have native completions)
* 4. elf binary containing "completion" Try_native_and_help
* 5. elf binary containing "-h" Try_help
* 6. nix wrapper Try_help (the wrapper itself is just an exec shim)
* 7. otherwise Skip (binary has no help infrastructure) *)
let classify_binary bindir name =
if is_nushell_builtin name || skip_name name then Skip
else
let path = Filename.concat bindir name in
if not (is_executable path) then Skip
else if is_script path then Try_help
else
let scan = elf_scan path ["-h"; "completion"] in
if Hashtbl.mem scan "completion" then Try_native_and_help
else if Hashtbl.mem scan "-h" then Try_help
else if nix_wrapper_target path <> None then Try_help
else Skip
(* detect available cpu cores by counting "processor" lines in /proc/cpuinfo.
* falls back to 4 if /proc/cpuinfo can't be read (e.g. on non-linux). *)
let num_cores () =
try
let ic = open_in "/proc/cpuinfo" in
let n = ref 0 in
(try while true do
if String.starts_with ~prefix:"processor" (input_line ic) then incr n
done with End_of_file -> ());
close_in ic; max 1 !n
with _ -> 4
(* try to get native nushell completions from a binary.
* tries several common subcommand patterns that tools use for shell completions.
* returns the first one that produces valid nushell source code.
* the 500ms timeout is generous enough for most tools but prevents hangs.
*
* the patterns cover: cobra (go), clap (rust), click (python), and various
* ad-hoc implementations. *)
let try_native_completion bin_path =
List.find_map (fun args ->
match run_cmd args 500 with
| Some text when is_nushell_source text -> Some text
| _ -> None
) [
[bin_path; "completions"; "nushell"];
[bin_path; "completion"; "nushell"];
[bin_path; "--completions"; "nushell"];
[bin_path; "--completion"; "nushell"];
[bin_path; "generate-completion"; "nushell"];
[bin_path; "--generate-completion"; "nushell"];
[bin_path; "shell-completions"; "nushell"];
]
(* parse a manpage file, extracting the command name, its flags/subcommands,
* and any clap-style per-subcommand sections.
* returns none for nushell builtins or failed parses. *)
let parse_manpage_for_command file =
let contents = read_manpage_file file in
let fallback = cmd_name_of_manpage file in
let cmd = match extract_synopsis_command contents with
| Some name -> name | None -> fallback in
if is_nushell_builtin cmd then None
else
let result = parse_manpage_string contents in
let sub_sections = extract_subcommand_sections contents in
let result = if sub_sections <> [] then
{ result with subcommands = List.map (fun (name, desc, _) ->
{ name; desc }) sub_sections }
else result in
let subs = List.map (fun (name, _desc, r) ->
(cmd ^ " " ^ name, r)) sub_sections in
Some (cmd, result, subs)
(* "inshellah manpage FILE" — parse one manpage and print the nushell extern *)
let cmd_manpage file =
match parse_manpage_for_command file with
| Some (cmd, result, _) when result.entries <> [] ->
print_string (generate_extern cmd result)
| _ -> ()
(* "inshellah manpage-dir DIR" — batch-process all manpages under a directory *)
let cmd_manpage_dir dir =
List.iter (fun section ->
let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in
if is_dir subdir then
Array.iter (fun file ->
(try cmd_manpage (Filename.concat subdir file) with _ -> ())
) (Sys.readdir subdir)
) command_sections
(* safety limit: don't accumulate more than 500 subcommand resolution results
* per binary. prevents runaway recursion on tools with enormous subcommand trees. *)
let max_resolve_results = 500
(* safe wrapper around parse_manpage_for_command that catches all exceptions *)
let process_manpage file =
try
match parse_manpage_for_command file with
| Some (cmd, result, subs) when result.entries <> [] || subs <> [] ->
Some (cmd, result, subs)
| _ -> None
with _ -> None
(* collect the set of command names that have manpages in a given man directory.
* used during indexing to skip --help for commands that will be handled by
* the manpage parsing phase instead (manpages are more reliable than --help). *)
let manpaged_commands mandir =
List.fold_left (fun acc section ->
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
if is_dir subdir then
Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc)
acc (Sys.readdir subdir)
else acc
) SSet.empty command_sections
(* parallel structured help resolver — recursively resolves a command and
* all its subcommands by running --help on each, forking a child process
* per subcommand for parallelism.
*
* the resolver works as a breadth-first queue:
* 1. start with the root command in the queue
* 2. fork a child for each queued item (up to num_cores concurrent)
* 3. the child runs --help, parses the output, marshals the result via pipe
* 4. the parent collects results and enqueues discovered subcommands
* 5. repeat until queue is empty and all children have finished
*
* depth is limited to 5 levels and total results to max_resolve_results
* to prevent runaway recursion on pathological command trees.
*
* peculiarity: the child process detects "self-listing" when a subcommand's
* --help lists itself as a subcommand (e.g. "git help" listing "help" as a
* subcommand of itself). this would cause infinite recursion, so such results
* are discarded.
*
* peculiarity: children close all pipe fds from other pending children
* immediately after fork to prevent fd leaks. the parent drains pipes
* regularly to prevent children from blocking on full pipe buffers. *)
let help_resolve_par ?(timeout=200) cmd rest name =
let max_jobs = num_cores () in
let queue = Queue.create () in
Queue.push (rest, name, 0) queue;
let results = ref [] in
(* pending: (pid, rd, buf, rest, name, depth) *)
let pending = ref [] in
let collect rd buf q_rest q_name q_depth =
drain_fd rd buf;
(try Unix.close rd with _ -> ());
let data = Buffer.contents buf in
let result : (help_result * subcommand list) option =
if String.length data > 0 then
try Marshal.from_string data 0 with _ -> None
else None in
match result with
| None -> ()
| Some (r, subs) ->
let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in
results := (q_name, r) :: !results;
if not at_limit then
List.iter (fun (sc : subcommand) ->
Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue
) subs in
let reap () =
pending := List.filter (fun (pid, rd, buf, q_rest, q_name, q_depth) ->
drain_fd rd buf;
match Unix.waitpid [Unix.WNOHANG] pid with
| (0, _) -> true
| _ -> collect rd buf q_rest q_name q_depth; false
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
(try Unix.close rd with _ -> ()); false
) !pending in
let wait_for_slot () =
while List.length !pending >= max_jobs do
reap ();
if List.length !pending >= max_jobs then begin
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done in
while not (Queue.is_empty queue) || !pending <> [] do
while not (Queue.is_empty queue) do
let (q_rest, q_name, q_depth) = Queue.pop queue in
wait_for_slot ();
let (rd, wr) = Unix.pipe () in
let pid = Unix.fork () in
if pid = 0 then begin
Unix.close rd;
List.iter (fun (_, prd, _, _, _, _) ->
try Unix.close prd with _ -> ()) !pending;
let result =
let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with
| Some _ as r -> r
| None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in
match text with
| None -> None
| Some text ->
(match parse_help text with
| Error _ -> None
| Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
| Ok r ->
let self_listed = match q_rest with
| [] -> false
| _ ->
let leaf = List.nth q_rest (List.length q_rest - 1) in
List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in
if self_listed then None
else
let at_limit = q_depth >= 5 in
let subs = if at_limit then [] else r.subcommands in
Some (r, subs)) in
let oc = Unix.out_channel_of_descr wr in
Marshal.to_channel oc (result : (help_result * subcommand list) option) [];
close_out oc;
exit 0
end else begin
Unix.close wr;
pending := (pid, rd, Buffer.create 4096, q_rest, q_name, q_depth) :: !pending
end
done;
if !pending <> [] then begin
reap ();
if !pending <> [] && Queue.is_empty queue then begin
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
end
done;
List.rev !results
(* "inshellah index" — the main indexing command.
* processes all binaries and manpages in the given prefix directories,
* writing completion data to the cache dir.
*
* the pipeline has two phases:
*
* phase 1 (binaries): fork one child per binary. each child:
* - tries native nushell completions (if classified as Try_native_and_help)
* - falls back to help_resolve_par (which itself forks per subcommand)
* - marshals the result back via pipe as a tagged variant:
* `Native of string raw nushell source
* `Parsed of (string * help_result) list parsed flag data
* `None nothing useful extracted
*
* phase 2 (manpages): sequentially parse manpages for commands not yet
* covered by phase 1. manpages are more reliable than --help for many
* gnu tools, but slower to process.
*
* commands on the ignorelist are skipped entirely. commands on the
* help_only list skip manpage parsing and only use --help. commands
* with manpages skip --help in phase 1 (they'll be handled in phase 2).
*
* peculiarity: the done_cmds set tracks which commands have already been
* indexed to prevent duplicates across phases and across multiple prefix
* directories. *)
let cmd_index bindirs mandirs ignorelist help_only dir =
ensure_dir dir;
let done_cmds = ref SSet.empty in
let n_results = ref 0 in
let index_bindir bindir mandir =
if not (is_dir bindir) then
Printf.eprintf "skipping %s (not found)\n" bindir
else begin
let bins = Sys.readdir bindir in
Array.sort String.compare bins;
let manpaged = if is_dir mandir
then manpaged_commands mandir else SSet.empty in
let max_jobs = num_cores () in
let classified = Array.map (fun name ->
if SSet.mem name ignorelist then (name, Skip)
else if SSet.mem name help_only then (name, classify_binary bindir name)
else if SSet.mem name manpaged then (name, Skip)
else (name, classify_binary bindir name)
) bins in
let pending = ref [] in
let process_result name rd buf =
drain_fd rd buf;
(try Unix.close rd with _ -> ());
let data = Buffer.contents buf in
if String.length data > 0 then begin
let result : [`Native of string | `Parsed of (string * help_result) list | `None] =
try Marshal.from_string data 0 with _ -> `None in
(match result with
| `Native src ->
write_native ~dir name src;
incr n_results
| `Parsed pairs ->
List.iter (fun (cmd_name, r) ->
if not (SSet.mem cmd_name !done_cmds) then begin
write_result ~dir ~source:"help" cmd_name r;
done_cmds := SSet.add cmd_name !done_cmds;
incr n_results
end
) pairs
| `None -> ())
end;
done_cmds := SSet.add name !done_cmds in
let reap () =
pending := List.filter (fun (pid, rd, buf, name) ->
drain_fd rd buf;
match Unix.waitpid [Unix.WNOHANG] pid with
| (0, _) -> true
| _ ->
process_result name rd buf;
false
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
(try Unix.close rd with _ -> ()); false
) !pending in
let wait_for_slot () =
while List.length !pending >= max_jobs do
reap ();
if List.length !pending >= max_jobs then begin
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done in
Array.iter (fun (name, cls) ->
match cls with
| Skip -> ()
| Try_help | Try_native_and_help ->
wait_for_slot ();
let (rd, wr) = Unix.pipe () in
let pid = Unix.fork () in
if pid = 0 then begin
Unix.close rd;
List.iter (fun (_, prd, _, _) ->
try Unix.close prd with _ -> ()) !pending;
let result =
try
let path = Filename.concat bindir name in
let native = match cls with
| Try_native_and_help ->
(match try_native_completion path with
| Some src -> Some src | None -> None)
| _ -> None in
match native with
| Some src -> `Native src
| None ->
let pairs = help_resolve_par ~timeout:200 path [] name in
if pairs <> [] then `Parsed pairs else `None
with _ -> `None in
let oc = Unix.out_channel_of_descr wr in
Marshal.to_channel oc
(result : [`Native of string | `Parsed of (string * help_result) list | `None]) [];
close_out oc;
exit 0
end else begin
Unix.close wr;
pending := (pid, rd, Buffer.create 4096, name) :: !pending
end
) classified;
while !pending <> [] do
reap ();
if !pending <> [] then begin
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done;
(* Phase 2: manpages *)
if is_dir mandir then
List.iter (fun section ->
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
if is_dir subdir then begin
let files = Sys.readdir subdir in
Array.sort String.compare files;
Array.iter (fun file ->
let base_cmd = cmd_name_of_manpage file in
if SSet.mem base_cmd help_only then ()
else match process_manpage (Filename.concat subdir file) with
| None -> ()
| Some (cmd, result, subs) ->
if not (SSet.mem cmd !done_cmds) then begin
write_result ~dir ~source:"manpage" cmd result;
done_cmds := SSet.add cmd !done_cmds;
incr n_results
end;
List.iter (fun (sub_cmd, sub_result) ->
if not (SSet.mem sub_cmd !done_cmds) then begin
write_result ~dir ~source:"manpage" sub_cmd sub_result;
done_cmds := SSet.add sub_cmd !done_cmds;
incr n_results
end
) subs
) files
end
) command_sections
end in
List.iter2 index_bindir bindirs mandirs;
Printf.printf "indexed %d commands into %s\n" !n_results dir
(* "inshellah dump" — list all indexed commands with their source type *)
let cmd_dump dirs =
let cmds = all_commands dirs in
Printf.printf "%d commands\n" (List.length cmds);
List.iter (fun cmd ->
let src = match file_type_of dirs cmd with
| Some s -> s | None -> "?" in
Printf.printf " %-40s [%s]\n" cmd src
) cmds
(* search $PATH for an executable with the given name.
* used during completion to find binaries for on-the-fly resolution. *)
let find_in_path name =
try
Sys.getenv "PATH"
|> String.split_on_char ':'
|> List.find_map (fun dir ->
let p = Filename.concat dir name in
if is_executable p then Some p else None)
with Not_found -> None
(* resolve a command's completions on-the-fly and cache the results.
* called during "complete" when a command isn't in the index.
* runs help_resolve_par and writes results to the user's cache dir. *)
let resolve_and_cache ~dir name path =
let pairs = help_resolve_par ~timeout:200 path [] name in
if pairs <> [] then begin
ensure_dir dir;
List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs;
Some pairs
end else None
(* format a single completion candidate as json for nushell's completer protocol *)
let completion_json value desc =
Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}"
(escape_json value) (escape_json desc)
(* fuzzy matching: returns a score > 0 if needle is a subsequence of haystack.
* higher scores = better match. scoring tiers:
* - exact match: 1000
* - prefix match: 900 + length bonus (how much of the haystack is covered)
* - subsequence: base 10 per char + bonuses for:
* - word boundary alignment (50): matching at '-', '_', or camelCase transitions
* - consecutive matches (20): matching adjacent characters
*
* this drives the completion candidate ranking. users typing "ser" should see
* "--server" ranked above "--preserve" even though both contain "ser" as a
* subsequence. the word-boundary bonus achieves this. *)
let fuzzy_score needle haystack =
let nlen = String.length needle and hlen = String.length haystack in
if nlen = 0 then 1
else if nlen > hlen then 0
else if needle = haystack then 1000
else
let needle = String.lowercase_ascii needle
and haystack_lc = String.lowercase_ascii haystack in
if String.starts_with ~prefix:needle haystack_lc then
900 + (nlen * 100 / hlen)
else
let is_boundary hi =
hi = 0 || haystack.[hi - 1] = '-' || haystack.[hi - 1] = '_'
|| (haystack.[hi - 1] >= 'a' && haystack.[hi - 1] <= 'z'
&& haystack.[hi] >= 'A' && haystack.[hi] <= 'Z') in
(* Walk haystack matching needle chars as a subsequence *)
let ni, score, _, _ =
String.fold_left (fun (ni, score, hi, prev_match) c ->
if ni >= nlen then (ni, score, hi + 1, prev_match)
else if c = needle.[ni] then
let bonus = (if is_boundary hi then 50 else 10)
+ (if prev_match = hi - 1 then 20 else 0) in
(ni + 1, score + bonus, hi + 1, hi)
else (ni, score, hi + 1, prev_match)
) (0, 0, 0, -1) haystack_lc in
if ni = nlen then score else 0
(* "inshellah complete CMD [ARGS...]" — the nushell custom completer.
* this is the hot path called every time the user presses tab in nushell.
*
* the completion logic:
* 1. try to find the command (or longest subcommand prefix) in the store
* 2. if not found, try on-the-fly resolution (find in $PATH, run --help, cache)
* 3. score all candidate completions against the partial input using fuzzy_score
* 4. output scored candidates as a json array
*
* subcommand resolution: the lookup tries longest prefix first.
* for "git add --", it first looks for "git add", then "git".
* this ensures subcommand-specific flags are shown.
*
* peculiarity: nushell sends a trailing empty token when the cursor is after
* a space ("git add "). in this case all_tokens includes the empty string.
* when the last token is non-empty, the user is still typing it, so we use
* it as the fuzzy filter. when empty, we show all candidates.
*
* peculiarity: if only a parent command matched (e.g. "git" matched but not
* "git add"), we suppress subcommand suggestions and only show flags. this
* prevents showing sibling subcommands when the user has already committed
* to a specific subcommand path. *)
let cmd_complete spans user_dir system_dirs =
match spans with
| [] -> print_string "[]\n"
| cmd_name :: rest ->
let dirs = user_dir :: system_dirs in
(* Try longest prefix match: "git add" before "git" *)
let find_result tokens =
let n = List.length tokens in
List.init n Fun.id |> List.find_map (fun drop ->
let prefix = List.filteri (fun i _ -> i < n - drop) tokens in
match prefix with
| [] -> None
| _ ->
let try_name = String.concat " " prefix in
match lookup dirs try_name with
| Some r -> Some (try_name, r, List.length prefix)
| None -> None) in
let all_tokens = cmd_name :: rest in
let last_token = match rest with
| [] -> "" | _ -> List.nth rest (List.length rest - 1) in
(* Only treat the last token as a completed subcommand when nushell
sends a trailing empty token (cursor is after a space).
Otherwise the user is still typing and we treat it as partial. *)
let lookup_tokens = if last_token = "" then all_tokens
else match rest with
| _ :: _ -> cmd_name :: List.rev (List.tl (List.rev rest))
| _ -> [cmd_name] in
let resolve tokens partial =
match find_result tokens with
| Some _ as found -> (found, partial)
| None -> (None, partial) in
let found, partial = resolve lookup_tokens last_token in
(* Try on-the-fly resolution when no match or only a parent matched *)
let n_lookup = List.length lookup_tokens in
let result, partial = match found with
| Some (_, _, depth) when depth >= n_lookup - 1 ->
(* Exact or near-exact match — use it *)
(found, partial)
| _ ->
(* No match, or only a parent matched — try on-the-fly resolution *)
(match find_in_path cmd_name with
| Some path ->
(match resolve_and_cache ~dir:user_dir cmd_name path with
| Some _pairs -> resolve lookup_tokens last_token
| None -> (found, partial))
| None -> (found, partial)) in
let candidates = match result with
| None -> []
| Some (_matched_name, r, depth) ->
(* When the match is shallower than requested, the user already
typed a subcommand beyond the matched level don't show
sibling subcommands, only flags *)
let sub_candidates = if depth < n_lookup - 1 then [] else
let subs = match r.subcommands with
| _ :: _ -> r.subcommands
| [] -> subcommands_of dirs _matched_name in
List.filter_map (fun (sc : subcommand) ->
let s = fuzzy_score partial sc.name in
if s > 0 then Some (s, completion_json sc.name sc.desc) else None
) subs in
(* build flag completion candidates from the entry list.
* for flags with both short and long forms (Both), we pick which form
* to display based on what the user is currently typing:
* - if the partial input matches the short flag better, show the short
* flag as the value and note the long form in the description
* - otherwise (including empty partial), prefer the long flag and note
* the short form in the description
* this keeps the candidate list clean (one entry per flag) while still
* surfacing the alternate form so the user knows about it.
*
* parameter names are appended to descriptions in angle brackets for
* mandatory params and square brackets for optional ones, matching the
* conventions users expect from cli help text. *)
let flag_candidates = List.filter_map (fun (e : entry) ->
let base_desc = match e.param with
| Some (Mandatory p) -> if e.desc <> "" then e.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">"
| Some (Optional p) -> if e.desc <> "" then e.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]"
| None -> e.desc in
let flag, desc = match e.switch with
| Long l -> ("--" ^ l, base_desc)
| Short c -> (Printf.sprintf "-%c" c, base_desc)
| Both (c, l) ->
(* score the partial against both forms to decide which to present.
* e.g. typing "-s" scores higher against "-s" than "--squeeze-blank",
* so we show "-s (aka --squeeze-blank)". when the partial is empty or
* matches the long form better, we default to the long form. *)
let long_flag = "--" ^ l in
let short_flag = Printf.sprintf "-%c" c in
let long_score = fuzzy_score partial long_flag in
let short_score = fuzzy_score partial short_flag in
if short_score > long_score then
(short_flag, Printf.sprintf "(aka %s) %s" long_flag base_desc)
else
(long_flag, Printf.sprintf "(aka %s) %s" short_flag base_desc) in
let s = fuzzy_score partial flag in
if s > 0 then Some (s, completion_json flag desc) else None
) r.entries in
let scored = sub_candidates @ flag_candidates in
List.sort (fun (a, _) (b, _) -> compare b a) scored
|> List.map snd in
Printf.printf "[%s]\n" (String.concat "," candidates)
(* "inshellah query CMD" — print the raw stored data for a command *)
let cmd_query cmd dirs =
match lookup_raw dirs cmd with
| None ->
Printf.eprintf "not found: %s\n" cmd; exit 1
| Some data ->
print_string data; print_newline ()
(* load a newline-separated list of command names to ignore.
* blank lines and lines starting with '#' are skipped. *)
let load_ignorelist path =
try
In_channel.with_open_text path In_channel.input_all
|> String.split_on_char '\n'
|> List.filter_map (fun line ->
let line = String.trim line in
if String.length line > 0 && line.[0] <> '#' then Some line else None)
|> SSet.of_list
with _ -> SSet.empty
(* parse "index" subcommand arguments: prefix dirs + optional --dir, --ignore, --help-only *)
let parse_index_args args =
let rec go prefixes dir ignore help_only = function
| [] -> (List.rev prefixes, dir, ignore, help_only)
| "--dir" :: path :: rest -> go prefixes path ignore help_only rest
| "--ignore" :: path :: rest -> go prefixes dir (SSet.union ignore (load_ignorelist path)) help_only rest
| "--help-only" :: path :: rest -> go prefixes dir ignore (SSet.union help_only (load_ignorelist path)) rest
| prefix :: rest -> go (prefix :: prefixes) dir ignore help_only rest in
go [] (default_store_path ()) SSet.empty SSet.empty args
(* parse common --dir/--system-dir arguments for complete/query/dump commands *)
let parse_dir_args args =
let rec go user_dir system_dirs rest_args = function
| [] -> (user_dir, system_dirs, List.rev rest_args)
| "--dir" :: path :: rest -> go path system_dirs rest_args rest
| "--system-dir" :: path :: rest -> go user_dir (path :: system_dirs) rest_args rest
| arg :: rest -> go user_dir system_dirs (arg :: rest_args) rest in
go (default_store_path ()) [] [] args
(* --- entry point ---
* dispatch on the first argument to the appropriate subcommand handler. *)
let () =
match Array.to_list Sys.argv |> List.tl with
| "index" :: rest ->
let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in
if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1);
let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in
let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in
cmd_index bindirs mandirs ignorelist help_only dir
| "complete" :: rest ->
let (user_dir, system_dirs, spans) = parse_dir_args rest in
cmd_complete spans user_dir system_dirs
| "query" :: rest ->
let (user_dir, system_dirs, args) = parse_dir_args rest in
(match args with
| [cmd] -> cmd_query cmd (user_dir :: system_dirs)
| _ -> Printf.eprintf "error: query CMD [--dir PATH] [--system-dir PATH]\n"; exit 1)
| "dump" :: rest ->
let (user_dir, system_dirs, _) = parse_dir_args rest in
cmd_dump (user_dir :: system_dirs)
| ["manpage"; file] -> cmd_manpage file
| ["manpage-dir"; dir] -> cmd_manpage_dir dir
| _ -> usage ()

192
doc/nixos.md Normal file
View file

@ -0,0 +1,192 @@
# nixos integration
inshellah provides a nixos module that automatically indexes nushell
completions for all installed packages at system build time.
## enabling
```nix
# in your flake.nix outputs:
{
nixosConfigurations.myhost = nixpkgs.lib.nixosSystem {
modules = [
inshellah.nixosModules.default
{
programs.inshellah.enable = true;
}
];
};
}
```
or if importing the module directly:
```nix
# configuration.nix
{ pkgs, ... }: {
imports = [ ./path/to/inshellah/nix/module.nix ];
programs.inshellah = {
enable = true;
package = pkgs.inshellah; # or your local build
};
}
```
## what happens at build time
the module hooks into `environment.extraSetup`, which runs during the
system profile build (the `buildEnv` that creates `/run/current-system/sw`).
at that point, all system packages are merged, so `$out/bin` contains every
executable and `$out/share/man` contains every manpage.
inshellah runs a single command:
```
inshellah index "$out" --dir $out/share/inshellah
```
this executes a three-phase pipeline:
### phase 1: native completion detection (parallel)
for each executable, inshellah scans the elf binary for the string
`completion`. if found, it probes common patterns like
`CMD completions nushell` to see if the program can generate its own
nushell completions. native output is used verbatim — these are always
higher quality than parsed completions.
programs like `niri`, and any clap/cobra tool with nushell support,
are handled this way.
### phase 2: manpage parsing (sequential)
for commands not covered by phase 1, inshellah parses manpages from
man1 (user commands) and man8 (sysadmin commands). it handles:
- gnu `.TP` style (coreutils, help2man)
- `.IP` style (curl, hand-written)
- `.PP`+`.RS`/`.RE` style (git, docbook)
- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
- mdoc (bsd) format
- deroff fallback for unusual formats
synopsis sections are parsed to detect subcommands: `git-commit.1`
generates `export extern "git commit"`, not `export extern "git-commit"`.
### phase 3: --help fallback (parallel)
remaining executables without manpages get `--help` (or `-h`) called
with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
to skip those that don't support help flags. shell scripts are run
directly (they're fast). execution is parallelized to available cores.
### output
each command gets its own file in `/share/inshellah` under the system
profile. native generators produce `.nu` files; parsed results produce
`.json` files. the `complete` command reads both formats.
nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
nushell provides its own completions.
### performance
on a typical nixos system (~950 executables, ~1600 manpages):
- total time: ~4-10 seconds
- native gzip decompression (camlzip, no process spawning)
- parallel --help with core-scaled forking
- elf string scanning to skip ~15% of binaries
## module options
```nix
programs.inshellah = {
enable = true;
# the inshellah package (set automatically by the flake module)
package = pkgs.inshellah;
# where to place indexed completion files under the system profile
# default: "/share/inshellah"
completionsPath = "/share/inshellah";
# commands to skip entirely during indexing
ignoreCommands = [ "problematic-tool" ];
# commands to skip manpage parsing for (uses --help instead)
helpOnlyCommands = [ "nix" ];
};
```
## using the completer
the flake module sets a read-only `snippet` option containing the nushell
config needed to wire up the completer. you can access it via
`config.programs.inshellah.snippet` and paste it into your nushell config,
or source it from a file generated by your nixos config.
the snippet sets up the external completer pointing at the system index
at `/run/current-system/sw/share/inshellah`:
```nu
let inshellah_complete = {|spans|
inshellah complete ...$spans --system-dir /run/current-system/sw/share/inshellah | from json
}
$env.config.completions.external = {
enable: true
max_results: 100
completer: $inshellah_complete
}
```
## home manager and other user-level package managers
the nixos module only indexes packages installed at the system level
(those that end up in `/run/current-system/sw`). if you use home-manager,
nix-env, or another user-level package manager, those binaries and
manpages live elsewhere — typically under `/etc/profiles/per-user/<name>`
or `~/.nix-profile`.
to get completions for user-installed packages, run `inshellah index`
against those prefixes separately:
```sh
# home-manager / per-user profile
inshellah index /etc/profiles/per-user/$USER
# classic nix-env profile
inshellah index ~/.nix-profile
```
this indexes into the default user cache (`$XDG_CACHE_HOME/inshellah`),
which the completer searches automatically. you can re-run this after
installing new packages, or add it to a home-manager activation script.
if you want to automate this in home-manager:
```nix
# home.nix
home.activation.inshellah-index = lib.hm.dag.entryAfter [ "writeBoundary" ] ''
${pkgs.inshellah}/bin/inshellah index /etc/profiles/per-user/$USER 2>/dev/null || true
'';
```
the completer will then search both the system index (`--system-dir`)
and the user cache, so completions from both sources are available.
## troubleshooting
**completions not appearing**: ensure the completer is configured in
your nushell config (see above). check that the system index exists:
`ls /run/current-system/sw/share/inshellah/`.
**missing completions for a specific command**: check if it's a nushell
built-in (`help commands | where name == "thecommand"`). built-ins are
excluded because nushell serves its own completions for them.
**stale completions after update**: completions regenerate on every
`nixos-rebuild`. if a command changed its flags, rebuild to pick up
the changes.
**build-time errors**: indexing failures are non-fatal (`|| true`).
check `journalctl` for the build log if completions are missing.

184
doc/nushell-integration.md Normal file
View file

@ -0,0 +1,184 @@
# using inshellah completions in nushell
inshellah indexes completions from three sources (in priority order):
1. **native generators** — programs that can emit nushell completions directly
2. **manpages** — groff/troff/mdoc manpage parsing
3. **`--help` output** — parsing help text as a fallback
indexed data is stored as `.json` and `.nu` files in a directory that the
`complete` command reads from at tab-completion time.
## quick start
index completions from a system prefix:
```sh
# index from a prefix containing bin/ and share/man/
inshellah index /usr
# index from multiple prefixes
inshellah index /usr /usr/local
# store in a custom directory
inshellah index /usr --dir ~/my-completions
```
parse a single manpage:
```sh
inshellah manpage /usr/share/man/man1/git.1.gz
```
batch-process all manpages under a directory (man1 and man8):
```sh
inshellah manpage-dir /usr/share/man
```
## commands
```
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
index completions into a directory of json/nu files.
PREFIX is a directory containing bin/ and share/man/.
default dir: $XDG_CACHE_HOME/inshellah
--ignore FILE skip listed commands entirely
--help-only FILE skip manpages for listed commands, use --help instead
inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH]
nushell custom completer. outputs json completion candidates.
falls back to --help resolution if command is not indexed.
inshellah query CMD [--dir PATH] [--system-dir PATH]
print stored completion data for CMD.
inshellah dump [--dir PATH] [--system-dir PATH]
list indexed commands.
inshellah manpage FILE
parse a manpage and emit nushell extern block.
inshellah manpage-dir DIR
batch-process manpages under DIR (man1 and man8 sections).
```
## the index pipeline
the `index` command runs a three-phase pipeline over all executables
in each `PREFIX/bin`:
### phase 1: native completion detection (parallel)
for each executable, inshellah scans the elf binary for the string
`completion`. if found, it probes common patterns like
`CMD completions nushell` to see if the program can generate its own
nushell completions. native output is used verbatim — these are always
higher quality than parsed completions.
programs like `niri`, and any clap/cobra tool with nushell support,
are handled this way.
### phase 2: manpage parsing (sequential)
for commands not covered by phase 1, inshellah parses manpages from
man1 (user commands) and man8 (sysadmin commands). it handles:
- gnu `.TP` style (coreutils, help2man)
- `.IP` style (curl, hand-written)
- `.PP`+`.RS`/`.RE` style (git, docbook)
- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
- mdoc (bsd) format
- deroff fallback for unusual formats
synopsis sections are parsed to detect subcommands: `git-commit.1`
generates `export extern "git commit"`, not `export extern "git-commit"`.
### phase 3: --help fallback (parallel)
remaining executables without manpages get `--help` (or `-h`) called
with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
to skip those that don't support help flags. shell scripts are run
directly (they're fast). execution is parallelized to available cores.
subcommands are recursively resolved — if `--help` output lists
subcommands, inshellah runs `CMD SUBCMD --help` for each.
### output
each command gets its own file in the index directory. native generators
produce `.nu` files; parsed results produce `.json` files. the `complete`
command reads both formats.
nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
nushell provides its own completions.
### performance
on a typical nixos system (~950 executables, ~1600 manpages):
- total time: ~4-10 seconds
- native gzip decompression (camlzip, no process spawning)
- parallel --help with core-scaled forking
- elf string scanning to skip ~15% of binaries
## the completer
the `complete` command is designed to be wired into nushell as an
external completer. it reads from the index directory (`--dir`) and
optional system directories (`--system-dir`), performs fuzzy matching,
and outputs json completion candidates.
if a command is not indexed, `complete` falls back to on-the-fly
`--help` resolution — it runs the command's help, caches the result
in the user directory, and returns completions immediately.
### setting up the completer
```nu
# ~/.config/nushell/config.nu
$env.config.completions.external = {
enable: true
completer: {|spans|
inshellah complete ...$spans
| from json
}
}
```
with the nixos module, use the provided `snippet` option value (see
[nixos.md](nixos.md)) which points at the system index automatically.
## nixos module
enable automatic completion indexing at system build time:
```nix
{
imports = [ ./path/to/inshellah/nix/module.nix ];
programs.inshellah.enable = true;
}
```
this runs `inshellah index` during the system profile build. see
[nixos.md](nixos.md) for full details.
## what gets generated
the `manpage` and `manpage-dir` commands emit nushell `extern` blocks
with flags, parameter types, and descriptions:
```nu
export extern "rg" [
--regexp(-e): string # a pattern to search for
--file(-f): path # search for patterns from the given file
--count(-c) # only show the count of matching lines
--color: string # controls when to use color
--max-depth: int # limit the depth of directory traversal
]
```
subcommand manpages (e.g. `git-commit.1`) are detected via synopsis
parsing and generate the correct nushell name (`git commit` not
`git-commit`).
nushell built-in commands (ls, cd, mv, etc.) are excluded since nushell
provides its own completions for these.

View file

@ -0,0 +1,84 @@
# runtime completion resolution
the `complete` command has built-in on-the-fly resolution: when a command
is not found in the index, it falls back to running `--help`, caches the
result, and returns completions immediately. this means commands installed
outside the system profile (via cargo, pip, npm, go, etc.) get completions
on first tab-press with no manual setup.
## how it works
when you type `docker compose up --<TAB>`:
1. nushell calls `inshellah complete docker compose up --`
2. inshellah looks up the index for the longest matching prefix
3. if found, it fuzzy-matches flags and subcommands against the partial input
4. if not found, it locates the binary in `$PATH`, runs `--help`,
recursively resolves subcommands, caches the results in the user
directory (`$XDG_CACHE_HOME/inshellah`), and returns completions
all subsequent completions for that command are instant (served from cache).
## setup
the completer works with no extra configuration beyond the basic setup:
```nu
# ~/.config/nushell/config.nu
$env.config.completions.external = {
enable: true
completer: {|spans|
inshellah complete ...$spans
| from json
}
}
```
with the nixos module, add `--system-dir` to also search the system index:
```nu
$env.config.completions.external = {
enable: true
completer: {|spans|
inshellah complete ...$spans --system-dir /run/current-system/sw/share/inshellah
| from json
}
}
```
or use the `snippet` option provided by the flake module (see
[nixos.md](nixos.md)).
## cache management
the user cache lives at `$XDG_CACHE_HOME/inshellah` (typically
`~/.cache/inshellah`).
```sh
# list cached commands
inshellah dump
# view cached data for a command
inshellah query docker
# clear cache
rm -rf ~/.cache/inshellah/
# re-index from a prefix
inshellah index /usr --dir ~/.cache/inshellah
```
## when to use this vs build-time indexing
the nixos module (`programs.inshellah.enable = true`) handles system
packages at build time. runtime resolution covers:
- commands installed outside the system profile (cargo, pip, npm, go)
- subcommand completions at arbitrary depth
- systems without the nixos module
for upfront indexing on non-nixos systems:
```sh
inshellah index /usr /usr/local
```

28
dune-project Normal file
View file

@ -0,0 +1,28 @@
(lang dune 3.20)
(name inshellah)
(generate_opam_files true)
(source
(github username/reponame))
(authors "atagen <boss@atagen.co>")
(maintainers "atagen <boss@atagen.co>")
(license GPL-3.0-or-later)
(package
(name inshellah)
(synopsis "Nushell completions generator")
(description
"Inshellah parses manpages and --help switches to generate completions for nushell.")
(depends
ocaml
dune
angstrom
angstrom-unix
camlzip)
(tags
(shell completions nushell parser angstrom)))

27
flake.lock generated Normal file
View file

@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1773385838,
"narHash": "sha256-ylF2AGl08seexxlLvMqj3jd+yZq56W9zicwe51mp0Pw=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "fef542e7a88eec2b698389e6279464fd479926b6",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

71
flake.nix Normal file
View file

@ -0,0 +1,71 @@
{
inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
outputs =
{ self, nixpkgs }:
let
forAllSystems =
f:
nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ] (
system: f (import nixpkgs { inherit system; })
);
in
{
devShells = forAllSystems (pkgs: {
default = pkgs.mkShell {
packages = with pkgs.ocamlPackages; [
dune_3
ocaml
angstrom
angstrom-unix
camlzip
ppx_inline_test
ocaml-lsp
ocamlformat
ocamlformat-rpc-lib
utop
];
};
});
packages = forAllSystems (pkgs: {
default = pkgs.ocamlPackages.buildDunePackage {
pname = "inshellah";
version = "0.1";
src = ./.;
nativeBuildInputs = [ pkgs.git ];
buildInputs = with pkgs.ocamlPackages; [
dune_3
ocaml
angstrom
angstrom-unix
camlzip
];
meta.mainProgram = "inshellah";
};
});
nixosModules.default =
{
pkgs,
lib,
config,
...
}:
{
imports = [ ./nix/module.nix ];
programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default;
programs.inshellah.snippet = ''
let inshellah_complete = {|spans|
${lib.getExe config.programs.inshellah.package} complete ...$spans --system-dir /run/current-system/sw/${config.programs.inshellah.completionsPath} | from json
}
$env.config.completions.external = {
enable: true
max_results: 100
completer: $inshellah_complete
}
'';
};
};
}

35
inshellah.opam Normal file
View file

@ -0,0 +1,35 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "Nushell completions generator"
description:
"Inshellah parses manpages and --help switches to generate completions for nushell."
maintainer: ["atagen <boss@atagen.co>"]
authors: ["atagen <boss@atagen.co>"]
license: "GPL-3.0-or-later"
tags: ["shell" "completions" "nushell" "parser" "angstrom"]
homepage: "https://github.com/username/reponame"
bug-reports: "https://github.com/username/reponame/issues"
depends: [
"ocaml"
"dune" {>= "3.20"}
"angstrom"
"angstrom-unix"
"camlzip"
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/username/reponame.git"
x-maintenance-intent: ["(latest)"]

0
lib/.ocamlformat Normal file
View file

3
lib/dune Normal file
View file

@ -0,0 +1,3 @@
(library
(name inshellah)
(libraries angstrom angstrom-unix camlzip str unix))

1071
lib/manpage.ml Normal file

File diff suppressed because it is too large Load diff

242
lib/nushell.ml Normal file
View file

@ -0,0 +1,242 @@
(* nushell.ml — generate nushell extern definitions from parsed help data.
*
* this module is the code generation backend. it takes a help_result (from
* the parser or manpage modules) and produces nushell source code that
* defines "extern" declarations nushell's mechanism for teaching the shell
* about external commands' flags and subcommands so it can offer completions.
*
* it also maintains a list of nushell's built-in commands to avoid generating
* extern definitions that would shadow them.
*
* key responsibilities:
* - deduplicating flag entries (same flag from multiple help sources)
* - mapping parameter names to nushell types (path, int, string)
* - formatting flags in nushell syntax: --flag(-f): type # description
* - handling positional arguments with nushell's ordering constraints
* - escaping special characters for nushell string literals
*)
open Parser
module SSet = Set.Make(String)
module SMap = Map.Make(String)
module CSet = Set.Make(Char)
(* nushell built-in commands and keywords — we must never generate extern
* definitions for these because it would shadow nushell's own implementations.
* this list is maintained manually and should be updated with new nushell releases. *)
let nushell_builtins = [
"alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
"bits"; "break"; "bytes";
"cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
"columns"; "commandline"; "compact"; "complete"; "config"; "const";
"continue"; "cp";
"date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
"do"; "drop"; "du";
"each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
"exit"; "explain"; "explore"; "export"; "export-env"; "extern";
"fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
"generate"; "get"; "glob"; "grid"; "group-by";
"hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
"history"; "http";
"if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
"is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
"job"; "join";
"keybindings"; "kill";
"last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
"match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
"move"; "mut"; "mv";
"nu-check"; "nu-highlight";
"open"; "overlay";
"panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
"query";
"random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
"roll"; "rotate"; "run-external";
"save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
"slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
"stor"; "str"; "sys";
"table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
"try"; "tutor";
"ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
"upsert"; "url"; "use";
"values"; "version"; "view";
"watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
"zip";
]
(* lazily constructed set for fast lookup *)
let builtin_set = lazy (SSet.of_list nushell_builtins)
let is_nushell_builtin cmd =
SSet.mem cmd (Lazy.force builtin_set)
(* deduplicate flag entries that refer to the same flag.
* when the same flag appears multiple times (e.g. from overlapping manpage
* sections or repeated help text), we keep the "best" version using a score:
* - both short+long form: +10 (most informative)
* - has a parameter: +5
* - description length bonus: up to +5
*
* peculiarity: after deduplication by long name, we also remove standalone
* short flags whose letter is already covered by a Both(short, long) entry.
* this prevents emitting both "-v" and "--verbose(-v)" which nushell would
* reject as a duplicate. the filtering preserves original ordering from the
* help text. *)
let dedup_entries entries =
let key_of entry =
match entry.switch with
| Short c -> Printf.sprintf "-%c" c
| Long l | Both (_, l) -> Printf.sprintf "--%s" l
in
let score entry =
let sw = match entry.switch with Both _ -> 10 | _ -> 0 in
let p = match entry.param with Some _ -> 5 | None -> 0 in
let d = min 5 (String.length entry.desc / 10) in
sw + p + d
in
let best = List.fold_left (fun acc e ->
let k = key_of e in
match SMap.find_opt k acc with
| Some prev when score prev >= score e -> acc
| _ -> SMap.add k e acc
) SMap.empty entries in
let covered = SMap.fold (fun _ e acc ->
match e.switch with
| Both (c, _) -> CSet.add c acc
| _ -> acc
) best CSet.empty in
List.fold_left (fun (seen, acc) e ->
let k = key_of e in
if SSet.mem k seen then (seen, acc)
else match e.switch with
| Short c when CSet.mem c covered -> (seen, acc)
| _ -> (SSet.add k seen, SMap.find k best :: acc)
) (SSet.empty, []) entries |> snd |> List.rev
(* map parameter names to nushell types.
* nushell's extern declarations use typed parameters, so we infer the type
* from the parameter name. file/path-related names become "path" (enables
* path completion), numeric names become "int", everything else is "string". *)
let nushell_type_of_param = function
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
| "FILENAME" | "PATTERNFILE" -> "path"
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
| "LINES" | "DEPTH" | "depth" -> "int"
| _ -> "string"
(* escape a string for use inside nushell double-quoted string literals.
* only double quotes and backslashes need escaping in nushell's syntax. *)
let escape_nu s =
if not (String.contains s '"') && not (String.contains s '\\') then s
else begin
let buf = Buffer.create (String.length s + 4) in
String.iter (fun c -> match c with
| '"' -> Buffer.add_string buf "\\\""
| '\\' -> Buffer.add_string buf "\\\\"
| _ -> Buffer.add_char buf c
) s;
Buffer.contents buf
end
(* format a single flag entry as a nushell extern parameter line.
* output examples:
* " --verbose(-v) # increase verbosity"
* " --output(-o): path # write output to file"
* " -n: int # number of results"
*
* the description is right-padded to column 40 with a "# " comment prefix.
* nushell's syntax for combined short+long is "--long(-s)". *)
let format_flag entry =
let name = match entry.switch with
| Both (s, l) -> Printf.sprintf "--%s(-%c)" l s
| Long l -> Printf.sprintf "--%s" l
| Short s -> Printf.sprintf "-%c" s
in
let typed = match entry.param with
| Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
| None -> ""
in
let flag = " " ^ name ^ typed in
if String.length entry.desc = 0 then flag
else
let pad_len = max 1 (40 - String.length flag) in
flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
(* format a positional argument as a nushell extern parameter line.
* nushell syntax: "...name: type" for variadic, "name?: type" for optional.
* hyphens in names are converted to underscores (nushell identifiers can't
* contain hyphens). *)
let format_positional p =
let name = String.map (function '-' -> '_' | c -> c) p.pos_name in
let prefix = if p.variadic then "..." else "" in
let suffix = if p.optional && not p.variadic then "?" else "" in
let typ = nushell_type_of_param (String.uppercase_ascii p.pos_name) in
Printf.sprintf " %s%s%s: %s" prefix name suffix typ
(* enforce nushell's positional argument ordering rules:
* 1. no required positional may follow an optional one
* 2. at most one variadic ("rest") parameter is allowed
*
* if a required positional appears after an optional one, it's silently
* promoted to optional. duplicate variadic params are dropped. *)
let fixup_positionals positionals =
List.fold_left (fun (saw_opt, saw_rest, acc) p ->
if p.variadic then
if saw_rest then (saw_opt, saw_rest, acc)
else (true, true, p :: acc)
else if saw_opt then
(true, saw_rest, { p with optional = true } :: acc)
else
(p.optional, saw_rest, p :: acc)
) (false, false, []) positionals
|> fun (_, _, acc) -> List.rev acc
(* generate the full nushell extern block for a command.
* produces output like:
* export extern "git add" [
* ...pathspec?: path
* --verbose(-v) # be verbose
* --dry-run(-n) # dry run
* ]
*
* subcommands that weren't resolved into their own full definitions get
* stub externs with just a comment containing their description:
* export extern "git stash" [ # stash changes
* ]
*)
let extern_of cmd_name result =
let entries = dedup_entries result.entries in
let cmd = escape_nu cmd_name in
let positionals = fixup_positionals result.positionals in
let pos_lines = List.map (fun p -> format_positional p ^ "\n") positionals in
let flags = List.map (fun e -> format_flag e ^ "\n") entries in
let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" cmd (String.concat "" pos_lines) (String.concat "" flags) in
let subs = List.map (fun (sc : subcommand) ->
Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
cmd (escape_nu sc.name) (escape_nu sc.desc)
) result.subcommands in
String.concat "" (main :: subs)
(* public alias for extern_of *)
let generate_extern = extern_of
(* derive a nushell module name from a command name.
* replaces non-alphanumeric characters with hyphens and appends "-completions".
* e.g. "git" "git-completions", "docker-compose" "docker-compose-completions" *)
let module_name_of cmd_name =
let s = String.map (function
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
s ^ "-completions"
(* generate a complete nushell module wrapping the extern.
* output: "module git-completions { ... }\n\nuse git-completions *\n"
* the "use" at the end makes the extern immediately available. *)
let generate_module cmd_name result =
let m = module_name_of cmd_name in
Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" m (extern_of cmd_name result) m
(* convenience wrapper: generate an extern from just a list of entries
* (no subcommands, positionals, or description). used when we only have
* flag data and nothing else. *)
let generate_extern_from_entries cmd_name entries =
generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }

802
lib/parser.ml Normal file
View file

@ -0,0 +1,802 @@
(* parser.ml — parse --help output into structured flag/subcommand/positional data.
*
* this module is the core of inshellah's help-text understanding. it takes the
* raw text that a cli tool prints when you run `cmd --help` and extracts:
* - flag entries (short/long switches with optional parameters and descriptions)
* - subcommand listings (name + description pairs)
* - positional arguments (from usage lines)
*
* the parser is built on angstrom (a monadic parser combinator library) for the
* structured flag/subcommand extraction, with hand-rolled imperative parsers for
* usage-line positional extraction (where the format is too varied for clean
* combinator composition).
*
* key design decisions:
* - the angstrom parser runs in prefix-consume mode it doesn't need to parse
* the entire input, just extract what it can recognize. unrecognized lines are
* skipped via skip_non_option_line.
* - multi-line descriptions are handled via indentation-based continuation:
* lines indented 8+ spaces that don't start with '-' are folded into the
* previous entry's description.
* - subcommand detection uses a heuristic: lines with a name followed by 2+
* spaces then a description, where the name is at least 2 chars. section
* headers (like "arguments:") toggle whether name-description pairs are
* treated as subcommands or positionals.
* - positional extraction has two paths: usage-line parsing (the common case)
* and cli11's explicit "positionals:" section format.
*)
open Angstrom
(* strip ansi escape sequences and osc hyperlinks from --help output.
* many modern cli tools emit colored/styled output even when piped,
* so we need to clean this before parsing. handles:
* - csi sequences (esc [ ... final_byte) colors, cursor movement, etc.
* - osc sequences (esc ] ... bel/st) hyperlinks, window titles, etc.
* - other two-byte esc+char sequences *)
let strip_ansi s =
let buf = Buffer.create (String.length s) in
let len = String.length s in
let i = ref 0 in
while !i < len do
if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
let next = s.[!i + 1] in
if next = '[' then begin
(* CSI sequence: ESC [ ... final_byte *)
i := !i + 2;
while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
if !i < len then incr i
end else if next = ']' then begin
(* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
i := !i + 2;
let found = ref false in
while !i < len && not !found do
if s.[!i] = '\x07' then
(incr i; found := true)
else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
(i := !i + 2; found := true)
else
incr i
done
end else begin
(* Other ESC sequence, skip ESC + one char *)
i := !i + 2
end
end else begin
Buffer.add_char buf s.[!i];
incr i
end
done;
Buffer.contents buf
(* --- character class predicates --- *)
(* these are used throughout the angstrom parsers to classify characters.
* they're separated out for readability and reuse. *)
let is_whitespace = function ' ' | '\t' -> true | _ -> false
let is_alphanumeric = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
| _ -> false
(* characters allowed inside parameter names like FILE, output-dir, etc. *)
let is_param_char = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
| _ -> false
(* used to detect all-caps parameter names like FILE, TIME_STYLE *)
let is_upper_or_underscore = function
| 'A' .. 'Z' | '_' -> true
| _ -> false
(* characters allowed in long flag names (--foo-bar, --enable-feature2) *)
let is_long_char = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
| _ -> false
(* --- core types ---
* these types represent the structured output of parsing a help text.
* they are shared across the entire codebase (nushell codegen, store, manpage parser).
*
* switch: a flag can be short-only (-v), long-only (--verbose), or both (-v, --verbose).
* the both variant keeps the pair together so nushell can emit "--verbose(-v)".
*
* param: flags can take mandatory (--output FILE) or optional (--color[=WHEN]) values.
*
* entry: one complete flag definition its switch form, optional parameter, and
* the description text (potentially multi-line, already joined).
*
* help_result: the complete parsed output for a single command. *)
type switch = Short of char | Long of string | Both of char * string
type param = Mandatory of string | Optional of string
type entry = { switch : switch; param : param option; desc : string }
type subcommand = { name : string; desc : string }
type positional = { pos_name : string; optional : bool; variadic : bool }
type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list; description : string }
(* --- low-level angstrom combinators --- *)
(* these are the building blocks for all the parsers below. *)
(* consume horizontal whitespace (spaces and tabs) without crossing lines *)
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
(* end of line — matches either a newline or end of input.
* this is the permissive version used in most places. *)
let eol = end_of_line <|> end_of_input
(* strict end of line — must consume an actual newline character.
* used in skip_non_option_line so we don't accidentally match eof
* and consume it when we shouldn't. *)
let eol_strict = end_of_line
(* --- switch and parameter parsers --- *)
(* these parse the flag name portion of an option line, e.g. "-v", "--verbose" *)
let short_switch = char '-' *> satisfy is_alphanumeric
let long_switch = string "--" *> take_while1 is_long_char
let comma = char ',' *> inline_ws
(* parameter parsers — these handle the various syntaxes tools use to indicate
* that a flag takes a value. the formats are surprisingly diverse:
* --output=FILE (eq_man_param mandatory, common in gnu tools)
* --color[=WHEN] (eq_opt_param optional with = syntax)
* --depth DEPTH (space_upper_param space-separated ALL_CAPS)
* --file <path> (space_angle_param angle brackets)
* --file [<path>] (space_opt_angle_param optional angle brackets)
* --format string (space_type_param go/cobra lowercase type word)
*)
let eq_opt_param =
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
let eq_man_param =
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
(* space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE".
* peculiarity: we peek ahead and check the first char is uppercase, then
* validate the entire word is ALL_CAPS. this prevents false positives where
* a description word like "Do" or "Set" immediately follows the flag name.
* digits are allowed (e.g. "SHA256") but lowercase chars disqualify. *)
let space_upper_param =
char ' ' *> peek_char_fail >>= fun c ->
if is_upper_or_underscore c then
take_while1 is_param_char >>= fun name ->
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
return (Mandatory name)
else
fail "not an all-caps param"
else
fail "not an uppercase param"
(* Angle-bracket param: e.g. "<file>", "<notation>" *)
let angle_param =
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
Mandatory name
(* Space + angle bracket param *)
let space_angle_param =
char ' ' *> angle_param
(* Optional angle bracket param: [<file>] *)
let opt_angle_param =
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
>>| fun name -> Optional name
let space_opt_angle_param =
char ' ' *> opt_angle_param
(* go/cobra style: space + lowercase type word like "string", "list", "int".
* peculiarity: capped at 10 chars to avoid consuming description words.
* go's flag libraries commonly emit "--timeout duration" or "--name string"
* where the type name is a short lowercase word. longer words are almost
* certainly the start of a description, not a type annotation. *)
let space_type_param =
char ' ' *> peek_char_fail >>= fun c ->
if c >= 'a' && c <= 'z' then
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
if String.length name <= 10 then
return (Mandatory name)
else
fail "too long for type param"
else
fail "not a lowercase type param"
(* try each parameter format in order of specificity. the ordering matters:
* eq_opt_param must come before eq_man_param because "[=WHEN]" would otherwise
* partially match as "=WHEN" then fail on the trailing "]". similarly,
* space_opt_angle_param before space_angle_param to catch "[<file>]" before "<file>". *)
let param_parser =
option None
(choice
[ eq_opt_param; eq_man_param;
space_opt_angle_param; space_angle_param;
space_upper_param; space_type_param ]
>>| fun a -> Some a)
(* switch parser — handles the various ways help text presents flag names.
* formats handled (in order of attempt):
* -a, --all (short + comma + long gnu style)
* -a --all (short + space + long some tools omit the comma)
* --all / -a (long + slash + short rare but seen in some tools)
* -a (short only)
* --all (long only)
*
* peculiarity: the ordering is critical because angstrom's choice commits to
* the first parser that makes progress. short_switch consumes "-a", so the
* combined parsers must be tried before the short-only parser. *)
let switch_parser =
choice
[
(short_switch >>= fun s ->
comma *> long_switch >>| fun l -> Both (s, l));
(short_switch >>= fun s ->
char ' ' *> long_switch >>| fun l -> Both (s, l));
(long_switch >>= fun l ->
inline_ws *> char '/' *> inline_ws *>
short_switch >>| fun s -> Both (s, l));
(short_switch >>| fun s -> Short s);
(long_switch >>| fun l -> Long l);
]
(* --- description parsing with multi-line continuation ---
* descriptions in help text often wrap across multiple lines. the convention
* is that continuation lines are deeply indented (8+ spaces) and don't start
* with '-' (which would indicate a new flag entry). we peek ahead to check
* indentation without consuming, then decide whether to fold the line in. *)
(* take the rest of the line as text (does not consume the newline itself) *)
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
(* check if a line is a continuation line: deeply indented, doesn't start with '-'.
* peculiarity: we count tabs as 8 spaces to match typical terminal rendering.
* the 8-space threshold was chosen empirically most help formatters indent
* descriptions at least this much, while flag lines are indented 2-4 spaces. *)
let continuation_line =
peek_string 1 >>= fun _ ->
(* Must start with significant whitespace (8+ spaces or tab) *)
let count_indent s =
let n = ref 0 in
let i = ref 0 in
while !i < String.length s do
(match s.[!i] with
| ' ' -> incr n
| '\t' -> n := !n + 8
| _ -> i := String.length s);
incr i
done;
!n
in
available >>= fun avail ->
if avail = 0 then fail "eof"
else
(* Peek ahead to see indentation level *)
peek_string (min avail 80) >>= fun preview ->
let indent = count_indent preview in
let trimmed = String.trim preview in
let starts_with_dash =
String.length trimmed > 0 && trimmed.[0] = '-'
in
if indent >= 8 && not starts_with_dash then
(* This is a continuation line — consume whitespace + text *)
inline_ws *> rest_of_line <* eol
else
fail "not a continuation line"
(* parse description text: first line (after switch+param) plus any continuation lines.
* blank continuation lines are filtered out, and all lines are trimmed and joined
* with spaces into a single string. *)
let description =
inline_ws *> rest_of_line <* eol >>= fun first_line ->
many continuation_line >>| fun cont_lines ->
let all = first_line :: cont_lines in
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
String.concat " " (List.map String.trim all)
(* description that appears on a separate line below the flag.
* this handles the clap (rust) "long" help format where flags and descriptions
* are on separate lines:
* --verbose
* increase verbosity
* here there's no inline description just deeply-indented continuation lines. *)
let description_below =
many1 continuation_line >>| fun lines ->
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
String.concat " " (List.map String.trim lines)
(* --- line classification for skipping ---
* the parser needs to skip lines it doesn't understand (section headers,
* blank lines, description paragraphs not attached to a flag, etc.)
* without consuming lines that ARE flag entries. *)
(* peek ahead to check if the current line looks like a flag entry.
* an option line starts with whitespace then '-'. *)
let at_option_line =
peek_string 1 >>= fun _ ->
available >>= fun avail ->
if avail = 0 then fail "eof"
else
peek_string (min avail 40) >>= fun preview ->
let s = String.trim preview in
if String.length s > 0 && s.[0] = '-' then return ()
else fail "not an option line"
(* skip a non-option line (section header, blank, description-only, etc.).
* peculiarity: uses eol_strict (not eol) so it won't match at eof this
* prevents the parser from infinitely skipping at the end of input. if the
* line looks like an option line (at_option_line succeeds), we deliberately
* fail so that the entry parser gets a chance at it instead. *)
let skip_non_option_line =
(at_option_line *> fail "this is an option line")
<|> (rest_of_line *> eol_strict *> return ())
(* --- entry parsing --- *)
(* parse a single flag entry: leading whitespace, then switch+param, then description.
* the description can appear on the same line (inline) or on the next line (below).
* if there's no description at all, we accept an empty string.
* the (eol *> description_below) branch handles the clap long-help format. *)
let entry =
inline_ws *>
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
(description <|> (eol *> (description_below <|> return "")))
(* --- subcommand parsing ---
* subcommand lines in help text follow the pattern:
* " name description"
* where the name and description are separated by 2+ spaces.
* some tools also include argument placeholders between name and description:
* " start UNIT... start one or more units"
* " list [PATTERN] list matching units"
*)
let is_subcommand_char = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
| _ -> false
(* skip argument placeholders like UNIT..., [PATTERN...|PID...], <file>
* that appear between the subcommand name and the description.
* only consumes single-space gaps the two-space gap before the
* description is left for the main parser to use as the delimiter.
*
* peculiarity: this is a recursive (fix-point) parser that peeks ahead
* to distinguish single-space argument gaps from the double-space
* description separator. it accepts tokens that start with [, <, or
* are ALL_CAPS (with dots/pipes/commas for variadic syntax). *)
let skip_arg_placeholders =
fix (fun self ->
(* Peek ahead: single space followed by arg-like token *)
available >>= fun avail ->
if avail < 2 then return ()
else
peek_string (min avail 2) >>= fun s2 ->
if String.length s2 >= 2 && s2.[0] = ' ' && s2.[1] <> ' ' then
(* Single space — could be an arg placeholder *)
let next = s2.[1] in
if next = '[' || next = '<'
|| (next >= 'A' && next <= 'Z') then
(* Peek the full token to check if it's ALL_CAPS/brackets *)
peek_string (min avail 80) >>= fun preview ->
(* Extract the token after the single space *)
let tok_start = 1 in
let tok_end = ref tok_start in
while !tok_end < String.length preview
&& preview.[!tok_end] <> ' '
&& preview.[!tok_end] <> '\n'
&& preview.[!tok_end] <> '\r' do
incr tok_end
done;
let tok = String.sub preview tok_start (!tok_end - tok_start) in
(* Accept as placeholder if it starts with [ or < or is ALL_CAPS
(possibly with dots, pipes, dashes) *)
let is_placeholder =
tok.[0] = '[' || tok.[0] = '<'
|| String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || c = '_' || c = '-'
|| c = '.' || c = '|' || c = ',' || (c >= '0' && c <= '9')
) tok
in
if is_placeholder then
advance (1 + String.length tok) *> self
else return ()
else return ()
else return ())
(* parse a subcommand entry line.
* requires: name >= 2 chars, followed by 2+ spaces, then description.
* the name is lowercased for consistent lookup.
*
* peculiarity: if the description starts with "- " (a dash-space prefix),
* it's stripped. some tools format their subcommand lists as:
* " add - add a new item"
* where the "- " is decorative, not part of the description. *)
let subcommand_entry =
inline_ws *>
take_while1 is_subcommand_char >>= fun name ->
if String.length name < 2 then fail "subcommand name too short"
else
skip_arg_placeholders *>
char ' ' *> char ' ' *> inline_ws *>
rest_of_line <* eol >>| fun desc ->
{ name = String.lowercase_ascii name;
desc = let t = String.trim desc in
if String.length t >= 2 && t.[0] = '-' && t.[1] = ' ' then
String.trim (String.sub t 2 (String.length t - 2))
else t }
(* --- section header detection ---
* section headers are critical for disambiguating subcommands from positional
* arguments. lines like "commands:" introduce subcommand sections, while
* "arguments:" or "positionals:" introduce argument sections where the same
* name+description format should NOT be treated as subcommands. *)
(* detect section names that introduce positional argument listings.
* the check is case-insensitive and strips trailing colons. *)
let is_arg_section s =
let lc = String.lowercase_ascii (String.trim s) in
let base = if String.ends_with ~suffix:":" lc
then String.sub lc 0 (String.length lc - 1) |> String.trim
else lc in
base = "arguments" || base = "args" || base = "positionals"
|| base = "positional arguments"
(* a section header: left-aligned (or lightly indented, <= 4 spaces) text
* ending with ':', not starting with '-'. must be consumed BEFORE
* subcommand_entry in the choice combinator, otherwise "commands:" would
* be parsed as a subcommand named "commands" with description ":".
*
* returns a bool indicating whether this is an argument section (true)
* or some other section (false). this drives the subcommand filtering logic
* in help_parser entries under argument sections are excluded from the
* subcommand list. *)
let section_header =
available >>= fun avail ->
if avail = 0 then fail "eof"
else
peek_string (min avail 80) >>= fun preview ->
(* Extract just the first line from the preview *)
let first_line = match String.index_opt preview '\n' with
| Some i -> String.sub preview 0 i
| None -> preview in
let t = String.trim first_line in
let len = String.length t in
let indent = let i = ref 0 in
while !i < String.length first_line && (first_line.[!i] = ' ' || first_line.[!i] = '\t') do incr i done;
!i in
if len >= 2 && t.[len - 1] = ':' && t.[0] <> '-' && indent <= 4 then
rest_of_line <* eol_strict >>| fun line -> is_arg_section line
else fail "not a section header"
(* --- top-level parser ---
* the main help parser: walks through all lines, trying each line as one of:
* 1. a flag entry (starts with whitespace + '-')
* 2. a section header (left-aligned text ending with ':')
* 3. a subcommand line (name + 2+ spaces + description)
* 4. anything else skip
*
* the choice ordering matters: entries are tried first (highest priority),
* then section headers (must beat subcommand_entry to avoid misparse),
* then subcommands, then skip as fallback.
*
* after collecting all items, two post-processing steps happen:
* - subcommands under argument sections are excluded (tracked via
* a running in_arg_sec boolean toggled by section headers)
* - duplicate subcommand names are deduplicated, keeping the entry
* with the longer description (heuristic: more info = better)
*
* peculiarity: positionals are NOT extracted here they come from
* the usage line parser (extract_usage_positionals) or cli11's
* explicit section parser (extract_cli11_positionals), applied later
* in parse_help. *)
let help_parser =
let open Angstrom in
fix (fun _self ->
let try_entry =
entry >>| fun e -> `Entry e
in
let try_section =
section_header >>| fun is_arg -> `Section is_arg
in
let try_subcommand =
subcommand_entry >>| fun sc -> `Subcommand sc
in
let try_skip =
skip_non_option_line >>| fun () -> `Skip
in
many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items ->
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
let subcommands =
List.fold_left (fun (in_arg_sec, acc) item ->
match item with
| `Section is_arg -> (is_arg, acc)
| `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc)
| _ -> (in_arg_sec, acc)
) (false, []) items
|> snd |> List.rev
|> List.fold_left (fun acc sc ->
match List.assoc_opt sc.name acc with
| Some prev when String.length prev.desc >= String.length sc.desc -> acc
| _ -> (sc.name, sc) :: List.remove_assoc sc.name acc
) []
|> List.rev_map snd
in
{ entries; subcommands; positionals = []; description = "" })
(* --- usage line parsing ---
* usage lines look like: "usage: git add [OPTIONS] [--] [<pathspec>...]"
* to extract positional arguments, we first need to skip past the command
* name prefix ("git add") to reach the argument portion.
*
* skip_command_prefix walks word-by-word, treating each space-separated
* token as part of the command name as long as it:
* - is made of "word chars" (alphanumeric, hyphen, underscore, slash, dot)
* - contains at least one lowercase letter (to distinguish from ALL_CAPS
* positional names like FILE)
* - doesn't start with [, <, (, {, or - (which indicate arguments, not
* command name components)
*
* peculiarity: this is an imperative index-walking parser rather than using
* angstrom, because usage lines are a single string (not line-oriented)
* and the format is too varied for clean combinator composition. *)
let skip_command_prefix s =
let len = String.length s in
let i = ref 0 in
let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
let is_word_char = function
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
| _ -> false
in
let rec loop () =
skip_ws ();
if !i >= len then ()
else if s.[!i] = '[' || s.[!i] = '<' || s.[!i] = '(' || s.[!i] = '{' || s.[!i] = '-' then ()
else if is_word_char s.[!i] then begin
let start = !i in
while !i < len && is_word_char s.[!i] do incr i done;
let word = String.sub s start (!i - start) in
let has_lower = ref false in
String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
if not !has_lower then
i := start
else
loop ()
end
in
loop ();
!i
(* parse the argument portion of a usage line into positional definitions.
* handles these syntactic forms:
* <file> - mandatory positional
* [file] - optional positional
* FILE - mandatory positional (ALL_CAPS convention)
* <file>... - variadic (also handles utf-8 ellipsis)
* [file...] - optional variadic
* curly-brace alternatives - skipped, not a positional
* -flag - flags (skipped)
*
* peculiarity: certain all-caps names are skipped because they're not real
* positionals "OPTIONS", "FLAGS", etc. are section labels that sometimes
* appear in usage lines for readability.
*
* deduplication at the end ensures we don't emit the same positional twice
* (can happen when usage lines are reformatted or repeated). *)
let parse_usage_args s =
let len = String.length s in
let i = ref 0 in
let results = ref [] in
let skip_ws () =
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
let is_pos_char c =
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
let read_dots () =
skip_ws ();
if !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' then
(i := !i + 3; true)
else if !i + 2 < len && s.[!i] = '\xe2' && s.[!i+1] = '\x80' && s.[!i+2] = '\xa6' then
(i := !i + 3; true) (* UTF-8 ellipsis … *)
else false
in
let is_skip name =
let u = String.uppercase_ascii name in
u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
in
let is_clean_name name =
String.length name >= 2
&& String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9') || c = '_' || c = '-') name
in
let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
let skip_braces () =
(* Skip {A|c|d|...} alternative blocks *)
if !i < len && s.[!i] = '{' then begin
let depth = ref 1 in
incr i;
while !i < len && !depth > 0 do
if s.[!i] = '{' then incr depth
else if s.[!i] = '}' then decr depth;
incr i
done;
ignore (read_dots ());
true
end else false
in
while !i < len do
skip_ws ();
if !i >= len then ()
else if skip_braces () then ()
else match s.[!i] with
| '[' ->
incr i;
let start = !i in
let depth = ref 1 in
while !i < len && !depth > 0 do
if s.[!i] = '[' then incr depth
else if s.[!i] = ']' then decr depth;
incr i
done;
let bracket_end = !i - 1 in
let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
let inner, has_inner_dots =
if String.ends_with ~suffix:"..." inner then
(String.sub inner 0 (String.length inner - 3) |> String.trim, true)
else (inner, false)
in
let variadic = has_inner_dots || read_dots () in
if String.length inner > 0
&& inner.[0] <> '-'
&& (is_letter inner.[0] || inner.[0] = '<') then begin
let name =
if inner.[0] = '<' then
let e = try String.index inner '>' with Not_found -> String.length inner in
String.sub inner 1 (e - 1)
else inner
in
if is_clean_name name && not (is_skip name) then
results := { pos_name = String.lowercase_ascii name;
optional = true; variadic } :: !results
end
| '<' ->
incr i;
let start = !i in
while !i < len && s.[!i] <> '>' do incr i done;
let name = String.sub s start (!i - start) in
if !i < len then incr i;
let variadic = read_dots () in
if is_clean_name name && not (is_skip name) then
results := { pos_name = String.lowercase_ascii name;
optional = false; variadic } :: !results
| '-' ->
while !i < len && s.[!i] <> ' ' && s.[!i] <> '\t' && s.[!i] <> ']' do incr i done
| c when c >= 'A' && c <= 'Z' ->
let start = !i in
while !i < len && is_pos_char s.[!i] do incr i done;
let name = String.sub s start (!i - start) in
let variadic = read_dots () in
if String.length name >= 2
&& String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
) name
&& not (is_skip name) then
results := { pos_name = String.lowercase_ascii name;
optional = false; variadic } :: !results
| _ ->
incr i
done;
List.rev !results
|> List.fold_left (fun (seen, acc) p ->
if List.mem p.pos_name seen then (seen, acc)
else (p.pos_name :: seen, p :: acc)
) ([], [])
|> snd |> List.rev
(* find the "usage:" line in the help text and extract positionals from it.
* searches line-by-line for a line starting with "usage:" (case-insensitive).
* handles both inline usage ("usage: cmd [OPTIONS] FILE") and the clap style
* where the actual usage is on the next line:
* USAGE:
* cmd [OPTIONS] FILE
*
* also handles the bare "usage" header (no colon) followed by a next line. *)
let extract_usage_positionals text =
let lines = String.split_on_char '\n' text in
let lines_arr = Array.of_list lines in
let len = Array.length lines_arr in
let find_usage_line () =
let rec go i =
if i >= len then None
else
let t = String.trim lines_arr.(i) in
let tlen = String.length t in
let lc = String.lowercase_ascii t in
if tlen >= 6 && String.sub lc 0 6 = "usage:" then begin
let after = String.sub t 6 (tlen - 6) |> String.trim in
if String.length after > 0 then Some after
else if i + 1 < len then
(* Clap style: USAGE:\n cmd [OPTIONS] PATTERN *)
let next = String.trim lines_arr.(i + 1) in
if String.length next > 0 then Some next else None
else None
end else if lc = "usage" then begin
if i + 1 < len then
let next = String.trim lines_arr.(i + 1) in
if String.length next > 0 then Some next else None
else None
end else go (i + 1)
in
go 0
in
match find_usage_line () with
| None -> []
| Some usage ->
let cmd_end = skip_command_prefix usage in
let args = String.sub usage cmd_end (String.length usage - cmd_end) in
parse_usage_args args
(* extract positionals from cli11's explicit "POSITIONALS:" section.
* cli11 (a c++ arg parsing library) emits a dedicated section:
* Positionals:
* name TEXT description here
* count INT another description
*
* this is preferred over usage-line extraction when present because it
* provides more accurate type information. the parser looks for the
* section header, then reads indented lines until a blank or unindented
* line signals the end. type words (TEXT, INT, FLOAT, etc.) between the
* name and description are skipped. *)
let extract_cli11_positionals text =
let lines = String.split_on_char '\n' text in
let rec find_section = function
| [] -> []
| line :: rest ->
let t = String.trim line in
if t = "POSITIONALS:" || t = "Positionals:" then
parse_lines rest []
else
find_section rest
and parse_lines lines acc =
match lines with
| [] -> List.rev acc
| line :: rest ->
let len = String.length line in
if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
List.rev acc
else
let t = String.trim line in
if String.length t = 0 then List.rev acc
else match parse_one t with
| Some p -> parse_lines rest (p :: acc)
| None -> parse_lines rest acc
and parse_one s =
let len = String.length s in
let i = ref 0 in
let is_name_char c =
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c = '_' || c = '-' in
while !i < len && is_name_char s.[!i] do incr i done;
if !i < 2 then None
else
let name = String.sub s 0 !i in
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
(* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
while !i < len && s.[!i] >= 'A' && s.[!i] <= 'Z' do incr i done;
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
let variadic = !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' in
Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
in
find_section lines
(* top-level entry point: parse a --help text string into a help_result.
* steps:
* 1. strip ansi escapes (colors, hyperlinks, etc.)
* 2. run the angstrom help_parser for flags and subcommands
* 3. extract positionals via cli11 format (preferred) or usage line (fallback)
* 4. merge positionals into the result
* uses angstrom's prefix-consume mode we don't need to parse every byte. *)
let parse_help txt =
let clean = strip_ansi txt in
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
| Ok result ->
let cli11 = extract_cli11_positionals clean in
let usage = extract_usage_positionals clean in
let positionals = if cli11 <> [] then cli11 else usage in
Ok { result with positionals }
| Error msg -> Error msg

444
lib/store.ml Normal file
View file

@ -0,0 +1,444 @@
(* store.ml — filesystem-backed cache of parsed completion data.
*
* this module handles persistence of completion data to disk. each command's
* help_result is serialized to json and stored as a file in a cache directory
* (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions
* are stored as .nu files instead.
*
* the store also provides lookup, listing, and subcommand discovery by
* scanning filenames in the cache directory.
*
* file naming convention:
* - spaces in command names become underscores (e.g. "git add" "git_add.json")
* - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json")
* - .json files contain serialized help_result
* - .nu files contain native nushell extern source code
*
* the module includes a minimal hand-rolled json parser/serializer because
* we only need to handle our own output format (no need for a full json library).
*)
open Parser
(* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to
* ~/.cache/inshellah if XDG_CACHE_HOME is not set. *)
let default_store_path () =
let cache = try Sys.getenv "XDG_CACHE_HOME"
with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
Filename.concat cache "inshellah"
(* recursively create directories (equivalent to mkdir -p) *)
let ensure_dir dir =
let rec mkdir_p d =
if Sys.file_exists d then ()
else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in
mkdir_p dir
(* convert command name to safe filename: spaces become underscores,
* non-alphanumeric chars become hyphens.
* e.g. "git add" "git_add", "docker-compose" "docker-compose" *)
let filename_of_command cmd =
String.map (function
| ' ' -> '_'
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c
| _ -> '-') cmd
(* inverse of filename_of_command: underscores back to spaces.
* peculiarity: this is lossy original underscores in command names
* (e.g. "my_tool") would be converted to spaces. in practice this
* doesn't matter because tools with underscores in names are rare,
* and subcommands use space-separated naming. *)
let command_of_filename base =
String.map (function '_' -> ' ' | c -> c) base
(* --- json serialization of help_result ---
* hand-rolled json emitters. we don't use a json library because:
* 1. the schema is fixed and simple we only serialize our own types
* 2. avoiding dependencies keeps the binary small
* 3. printf-style emission is fast and straightforward for our types *)
(* escape a string for json: quotes, backslashes, and control characters.
* control chars below 0x20 are emitted as \u00XX unicode escapes. *)
let escape_json s =
let buf = Buffer.create (String.length s + 4) in
String.iter (fun c -> match c with
| '"' -> Buffer.add_string buf "\\\""
| '\\' -> Buffer.add_string buf "\\\\"
| '\n' -> Buffer.add_string buf "\\n"
| '\t' -> Buffer.add_string buf "\\t"
| '\r' -> Buffer.add_string buf "\\r"
| c when Char.code c < 0x20 ->
Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
| c -> Buffer.add_char buf c
) s;
Buffer.contents buf
let json_string s = Printf.sprintf "\"%s\"" (escape_json s)
let json_null = "null"
let json_switch_of = function
| Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c))
| Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l)
| Both (c, l) ->
Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
(json_string (String.make 1 c)) (json_string l)
let json_param_of = function
| None -> json_null
| Some (Mandatory p) ->
Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p)
| Some (Optional p) ->
Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p)
let json_entry_of e =
Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
(json_switch_of e.switch) (json_param_of e.param) (json_string e.desc)
let json_subcommand_of sc =
Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
let json_positional_of p =
Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
(json_string p.pos_name) p.optional p.variadic
let json_list f items =
"[" ^ String.concat "," (List.map f items) ^ "]"
let json_of_help_result ?(source="help") r =
Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
(json_string source)
(json_string r.description)
(json_list json_entry_of r.entries)
(json_list json_subcommand_of r.subcommands)
(json_list json_positional_of r.positionals)
(* --- json deserialization ---
* minimal hand-rolled recursive-descent json parser. only handles the subset
* we emit: strings, booleans, nulls, arrays, and objects. no number parsing
* (we don't emit numbers). this is intentionally minimal we only read back
* our own serialized format, so robustness against arbitrary json is not needed.
*
* peculiarity: the \u escape handler does basic utf-8 encoding for code points
* up to 0xffff but doesn't handle surrogate pairs. this is fine for our use
* case since we only escape control characters below 0x20. *)
type json =
| Jnull
| Jbool of bool
| Jstring of string
| Jarray of json list
| Jobject of (string * json) list
(* json accessor helpers — return sensible defaults for missing/wrong types *)
let json_get key = function
| Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
| _ -> Jnull
let json_to_string = function Jstring s -> s | _ -> ""
let json_to_bool = function Jbool b -> b | _ -> false
let json_to_list = function Jarray l -> l | _ -> []
exception Json_error of string
(* imperative recursive-descent json parser.
* uses a mutable position ref to walk through the string.
* peculiarity: boolean/null parsing just advances a fixed number of chars
* without validating the actual characters safe because we only read
* our own output, but would be incorrect for arbitrary json. *)
let parse_json s =
let len = String.length s in
let pos = ref 0 in
let peek () = if !pos < len then s.[!pos] else '\x00' in
let advance () = incr pos in
let skip_ws () =
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t'
|| s.[!pos] = '\n' || s.[!pos] = '\r') do
advance ()
done in
let expect c =
skip_ws ();
if peek () <> c then
raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos));
advance () in
let rec parse_value () =
skip_ws ();
match peek () with
| '"' -> Jstring (parse_string ())
| '{' -> parse_object ()
| '[' -> parse_array ()
| 'n' -> advance (); advance (); advance (); advance (); Jnull
| 't' -> advance (); advance (); advance (); advance (); Jbool true
| 'f' ->
advance (); advance (); advance (); advance (); advance (); Jbool false
| c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos))
and parse_string () =
expect '"';
let buf = Buffer.create 32 in
while peek () <> '"' do
if peek () = '\\' then begin
advance ();
(match peek () with
| '"' -> Buffer.add_char buf '"'
| '\\' -> Buffer.add_char buf '\\'
| 'n' -> Buffer.add_char buf '\n'
| 't' -> Buffer.add_char buf '\t'
| 'r' -> Buffer.add_char buf '\r'
| 'u' ->
advance ();
let hex = String.sub s !pos 4 in
pos := !pos + 3;
let code = int_of_string ("0x" ^ hex) in
if code < 128 then Buffer.add_char buf (Char.chr code)
else begin
(* UTF-8 encode *)
if code < 0x800 then begin
Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
end else begin
Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
end
end
| c -> Buffer.add_char buf c);
advance ()
end else begin
Buffer.add_char buf (peek ());
advance ()
end
done;
advance (); (* closing quote *)
Buffer.contents buf
and parse_object () =
expect '{';
skip_ws ();
if peek () = '}' then (advance (); Jobject [])
else begin
let pairs = ref [] in
let cont = ref true in
while !cont do
skip_ws ();
let key = parse_string () in
expect ':';
let value = parse_value () in
pairs := (key, value) :: !pairs;
skip_ws ();
if peek () = ',' then advance ()
else cont := false
done;
expect '}';
Jobject (List.rev !pairs)
end
and parse_array () =
expect '[';
skip_ws ();
if peek () = ']' then (advance (); Jarray [])
else begin
let items = ref [] in
let cont = ref true in
while !cont do
let v = parse_value () in
items := v :: !items;
skip_ws ();
if peek () = ',' then advance ()
else cont := false
done;
expect ']';
Jarray (List.rev !items)
end
in
parse_value ()
(* --- json → ocaml type converters ---
* these reconstruct our parser types from their json representations.
* they mirror the json_*_of serializers above. *)
let switch_of_json j =
match json_to_string (json_get "type" j) with
| "short" ->
let c = json_to_string (json_get "char" j) in
Short (if String.length c > 0 then c.[0] else '?')
| "long" -> Long (json_to_string (json_get "name" j))
| "both" ->
let c = json_to_string (json_get "char" j) in
Both ((if String.length c > 0 then c.[0] else '?'),
json_to_string (json_get "name" j))
| _ -> Long "?"
let param_of_json = function
| Jnull -> None
| j ->
let name = json_to_string (json_get "name" j) in
(match json_to_string (json_get "kind" j) with
| "mandatory" -> Some (Mandatory name)
| "optional" -> Some (Optional name)
| _ -> None)
let entry_of_json j =
{ switch = switch_of_json (json_get "switch" j);
param = param_of_json (json_get "param" j);
desc = json_to_string (json_get "desc" j) }
let subcommand_of_json j =
{ name = json_to_string (json_get "name" j);
desc = json_to_string (json_get "desc" j) }
let positional_of_json j =
{ pos_name = json_to_string (json_get "name" j);
optional = json_to_bool (json_get "optional" j);
variadic = json_to_bool (json_get "variadic" j) }
let help_result_of_json j =
{ entries = List.map entry_of_json (json_to_list (json_get "entries" j));
subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j));
positionals = List.map positional_of_json (json_to_list (json_get "positionals" j));
description = json_to_string (json_get "description" j) }
(* --- filesystem operations --- *)
let write_file path contents =
let oc = open_out path in
output_string oc contents;
close_out oc
let read_file path =
try
let ic = open_in path in
let n = in_channel_length ic in
let s = Bytes.create n in
really_input ic s 0 n;
close_in ic;
Some (Bytes.to_string s)
with _ -> None
(* write a parsed help_result to the store as json *)
let write_result ~dir ?(source="help") command result =
let path = Filename.concat dir (filename_of_command command ^ ".json") in
write_file path (json_of_help_result ~source result)
(* write native nushell completion source to the store as a .nu file *)
let write_native ~dir command data =
let path = Filename.concat dir (filename_of_command command ^ ".nu") in
write_file path data
let is_dir path = Sys.file_exists path && Sys.is_directory path
(* look for a command's data file across multiple store directories.
* checks json first, then .nu. returns the first match found.
* directories are searched in order (user dir before system dirs). *)
let find_file dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let json_path = Filename.concat dir (base ^ ".json") in
if Sys.file_exists json_path then Some json_path
else
let nu_path = Filename.concat dir (base ^ ".nu") in
if Sys.file_exists nu_path then Some nu_path
else None
) dirs
(* look up a command and deserialize its help_result from json.
* only searches for .json files (not .nu, since those can't be deserialized
* back into help_result). returns none if not found or parse fails. *)
let lookup dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let path = Filename.concat dir (base ^ ".json") in
match read_file path with
| Some data ->
(try Some (help_result_of_json (parse_json data))
with _ -> None)
| None -> None
) dirs
(* look up a command's raw data (json or .nu source) without parsing.
* used by the "query" command to dump stored data as-is. *)
let lookup_raw dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let json_path = Filename.concat dir (base ^ ".json") in
match read_file json_path with
| Some _ as r -> r
| None ->
let nu_path = Filename.concat dir (base ^ ".nu") in
read_file nu_path
) dirs
let chop_extension f =
if Filename.check_suffix f ".json" then Some (Filename.chop_suffix f ".json")
else if Filename.check_suffix f ".nu" then Some (Filename.chop_suffix f ".nu")
else None
(* discover subcommands of a command by scanning filenames in the store.
* looks for files whose names start with the command's filename + "_"
* (e.g. for "git", finds "git_add.json", "git_commit.json", etc.)
*
* only returns immediate subcommands (no nested underscores beyond the prefix).
* tries to extract description from the json "description" field if available.
*
* peculiarity: this filesystem-based discovery is used as a fallback when the
* command's own help_result doesn't list subcommands. it enables completion
* for subcommands that were indexed from separate manpages or help runs. *)
let subcommands_of dirs command =
let prefix = filename_of_command command ^ "_" in
let plen = String.length prefix in
let module SMap = Map.Make(String) in
let subs = List.fold_left (fun subs dir ->
if is_dir dir then
Array.fold_left (fun subs f ->
if not (String.starts_with ~prefix f) then subs
else
let is_json = Filename.check_suffix f ".json" in
match chop_extension f with
| None -> subs
| Some b ->
let rest = String.sub b plen (String.length b - plen) in
if String.contains rest '_' || String.length rest = 0 then subs
else if SMap.mem rest subs then subs
else
let desc = if is_json then
match read_file (Filename.concat dir f) with
| Some data ->
(try json_to_string (json_get "description" (parse_json data))
with _ -> "")
| None -> ""
else "" in
SMap.add rest { name = rest; desc } subs
) subs (Sys.readdir dir)
else subs
) SMap.empty dirs in
SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
(* list all indexed commands across all store directories.
* returns a sorted, deduplicated list of command names. *)
let all_commands dirs =
let module SSet = Set.Make(String) in
List.fold_left (fun cmds dir ->
if is_dir dir then
Array.fold_left (fun cmds f ->
match chop_extension f with
| Some b -> SSet.add (command_of_filename b) cmds
| None -> cmds
) cmds (Sys.readdir dir)
else cmds
) SSet.empty dirs
|> SSet.elements
(* determine how a command was indexed: "help", "manpage", "native", etc.
* for json files, reads the "source" field. for .nu files, returns "native".
* used by the "dump" command to show provenance. *)
let file_type_of dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let json_path = Filename.concat dir (base ^ ".json") in
if Sys.file_exists json_path then
(match read_file json_path with
| Some data ->
(try Some (json_to_string (json_get "source" (parse_json data)))
with _ -> Some "json")
| None -> Some "json")
else
let nu_path = Filename.concat dir (base ^ ".nu") in
if Sys.file_exists nu_path then Some "native"
else None
) dirs

92
nix/module.nix Normal file
View file

@ -0,0 +1,92 @@
# NixOS module: automatic nushell completion indexing
#
# Indexes completions using three strategies in priority order:
# 1. Native completion generators (e.g. CMD completions nushell)
# 2. Manpage parsing
# 3. --help output parsing
#
# Produces a directory of .json/.nu files at build time.
# The `complete` command reads from this directory as a system overlay.
#
# Usage:
# { pkgs, ... }: {
# imports = [ ./path/to/inshellah/nix/module.nix ];
# programs.inshellah.enable = true;
# }
{
config,
lib,
pkgs,
...
}:
let
cfg = config.programs.inshellah;
in
{
options.programs.inshellah = {
enable = lib.mkEnableOption "nushell completion indexing via inshellah";
package = lib.mkOption {
type = lib.types.package;
description = "package to use for indexing completions";
};
completionsPath = lib.mkOption {
type = lib.types.str;
default = "/share/inshellah";
description = ''
subdirectory within the system profile where completion files
are placed. used as --system-dir for the completer.
'';
};
ignoreCommands = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
example = [ "problematic-tool" ];
description = ''
list of command names to skip during completion indexing
'';
};
helpOnlyCommands = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
example = [ "nix" ];
description = ''
list of command names to skip manpage parsing for,
using --help scraping instead
'';
};
snippet = lib.mkOption {
type = lib.types.str;
readOnly = true;
};
};
config = lib.mkIf cfg.enable {
environment.systemPackages = [ config.programs.inshellah.package ];
environment.extraSetup =
let
inshellah = "${cfg.package}/bin/inshellah";
destDir = "$out${cfg.completionsPath}";
ignoreFile = pkgs.writeText "inshellah-ignore" (lib.concatStringsSep "\n" cfg.ignoreCommands);
ignoreFlag = lib.optionalString (cfg.ignoreCommands != [ ]) " --ignore ${ignoreFile}";
helpOnlyFile = pkgs.writeText "inshellah-help-only" (lib.concatStringsSep "\n" cfg.helpOnlyCommands);
helpOnlyFlag = lib.optionalString (cfg.helpOnlyCommands != [ ]) " --help-only ${helpOnlyFile}";
in
''
mkdir -p ${destDir}
if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then
${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag} \
2>/dev/null || true
fi
find ${destDir} -maxdepth 1 -empty -delete
'';
};
}

3
test/dune Normal file
View file

@ -0,0 +1,3 @@
(test
(name test_inshellah)
(libraries inshellah str))

492
test/test_inshellah.ml Normal file
View file

@ -0,0 +1,492 @@
open Inshellah.Parser
open Inshellah.Manpage
open Inshellah.Nushell
let failures = ref 0
let passes = ref 0
let check name condition =
if condition then begin
incr passes;
Printf.printf " PASS: %s\n" name
end else begin
incr failures;
Printf.printf " FAIL: %s\n" name
end
let parse txt =
match parse_help txt with
| Ok r -> r
| Error msg -> failwith (Printf.sprintf "parse_help failed: %s" msg)
(* --- Help parser tests --- *)
let test_gnu_basic () =
Printf.printf "\n== GNU basic flags ==\n";
let r = parse " -a, --all do not ignore entries starting with .\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "both switch" (e.switch = Both ('a', "all"));
check "no param" (e.param = None);
check "desc" (String.length e.desc > 0)
let test_gnu_eq_param () =
Printf.printf "\n== GNU = param ==\n";
let r = parse " --block-size=SIZE scale sizes by SIZE\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "long switch" (e.switch = Long "block-size");
check "mandatory param" (e.param = Some (Mandatory "SIZE"))
let test_gnu_opt_param () =
Printf.printf "\n== GNU optional param ==\n";
let r = parse " --color[=WHEN] color the output WHEN\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "long switch" (e.switch = Long "color");
check "optional param" (e.param = Some (Optional "WHEN"))
let test_underscore_param () =
Printf.printf "\n== Underscore in param (TIME_STYLE) ==\n";
let r = parse " --time-style=TIME_STYLE time/date format\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "param with underscore" (e.param = Some (Mandatory "TIME_STYLE"))
let test_short_only () =
Printf.printf "\n== Short-only flag ==\n";
let r = parse " -v verbose output\n" in
check "one entry" (List.length r.entries = 1);
check "short switch" ((List.hd r.entries).switch = Short 'v')
let test_long_only () =
Printf.printf "\n== Long-only flag ==\n";
let r = parse " --help display help\n" in
check "one entry" (List.length r.entries = 1);
check "long switch" ((List.hd r.entries).switch = Long "help")
let test_multiline_desc () =
Printf.printf "\n== Multi-line description ==\n";
let r = parse {| --block-size=SIZE with -l, scale sizes by SIZE when printing them;
e.g., '--block-size=M'; see SIZE format below
|} in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "desc includes continuation" (String.length e.desc > 50)
let test_multiple_entries () =
Printf.printf "\n== Multiple entries ==\n";
let r = parse {| -a, --all do not ignore entries starting with .
-A, --almost-all do not list implied . and ..
--author with -l, print the author of each file
|} in
check "three entries" (List.length r.entries = 3)
let test_clap_short_sections () =
Printf.printf "\n== Clap short with section headers ==\n";
let r = parse {|INPUT OPTIONS:
-e, --regexp=PATTERN A pattern to search for.
-f, --file=PATTERNFILE Search for patterns from the given file.
SEARCH OPTIONS:
-s, --case-sensitive Search case sensitively.
|} in
check "three entries" (List.length r.entries = 3);
let e = List.hd r.entries in
check "first is regexp" (e.switch = Both ('e', "regexp"));
check "first has param" (e.param = Some (Mandatory "PATTERN"))
let test_clap_long_style () =
Printf.printf "\n== Clap long style (desc below flag) ==\n";
let r = parse {| -H, --hidden
Include hidden directories and files.
--no-ignore
Do not respect ignore files.
|} in
check "two entries" (List.length r.entries = 2);
let e = List.hd r.entries in
check "hidden switch" (e.switch = Both ('H', "hidden"));
check "desc below" (String.length e.desc > 0)
let test_clap_long_angle_param () =
Printf.printf "\n== Clap long angle bracket param ==\n";
let r = parse {| --nonprintable-notation <notation>
Set notation for non-printable characters.
|} in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "long switch" (e.switch = Long "nonprintable-notation");
check "angle param" (e.param = Some (Mandatory "notation"))
let test_space_upper_param () =
Printf.printf "\n== Space-separated ALL_CAPS param ==\n";
let r = parse " -f, --foo FOO foo help\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "switch" (e.switch = Both ('f', "foo"));
check "space param" (e.param = Some (Mandatory "FOO"))
let test_go_cobra_flags () =
Printf.printf "\n== Go/Cobra flags ==\n";
let r = parse {|Flags:
-D, --debug Enable debug mode
-H, --host string Daemon socket to connect to
-v, --version Print version information
|} in
check "three flag entries" (List.length r.entries = 3);
(* Check the host flag has a type param *)
let host = List.nth r.entries 1 in
check "host switch" (host.switch = Both ('H', "host"));
check "host type param" (host.param = Some (Mandatory "string"))
let test_go_cobra_subcommands () =
Printf.printf "\n== Go/Cobra subcommands ==\n";
let r = parse {|Common Commands:
run Create and run a new container from an image
exec Execute a command in a running container
build Build an image from a Dockerfile
|} in
check "has subcommands" (List.length r.subcommands > 0)
let test_busybox_tab () =
Printf.printf "\n== Busybox tab-indented ==\n";
let r = parse "\t-1\tOne column output\n\t-a\tInclude names starting with .\n" in
check "two entries" (List.length r.entries = 2);
check "first is -1" ((List.hd r.entries).switch = Short '1')
let test_no_debug_prints () =
Printf.printf "\n== No debug side effects ==\n";
(* The old parser had print_endline at module load time.
If we got here without "opt param is running" on stdout, we're good. *)
check "no debug prints" true
(* --- Manpage parser tests --- *)
let test_manpage_tp_style () =
Printf.printf "\n== Manpage .TP style ==\n";
let groff = {|.SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-all\fR
do not ignore entries starting with .
.TP
\fB\-A\fR, \fB\-\-almost\-all\fR
do not list implied . and ..
.TP
\fB\-\-block\-size\fR=\fISIZE\fR
with \fB\-l\fR, scale sizes by SIZE
.SH AUTHOR
Written by someone.
|} in
let result = parse_manpage_string groff in
check "three entries" (List.length result.entries = 3);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "first is -a/--all" (e.switch = Both ('a', "all"));
check "first desc" (String.length e.desc > 0)
end;
if List.length result.entries >= 3 then begin
let e = List.nth result.entries 2 in
check "block-size switch" (e.switch = Long "block-size");
check "block-size param" (e.param = Some (Mandatory "SIZE"))
end
let test_manpage_ip_style () =
Printf.printf "\n== Manpage .IP style ==\n";
let groff = {|.SH OPTIONS
.IP "\fB\-k\fR, \fB\-\-insecure\fR"
Allow insecure connections.
.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
Write output to file.
.SH SEE ALSO
|} in
let result = parse_manpage_string groff in
check "two entries" (List.length result.entries = 2);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "first is -k/--insecure" (e.switch = Both ('k', "insecure"))
end
let test_manpage_groff_stripping () =
Printf.printf "\n== Groff escape stripping ==\n";
let s = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
check "font escapes removed" (not (String.contains s 'f' && String.contains s 'B'));
check "dashes converted" (String.contains s '-');
let s2 = strip_groff_escapes {|\(aqhello\(aq|} in
check "aq -> quote" (String.contains s2 '\'')
let test_manpage_empty_options () =
Printf.printf "\n== Manpage with no OPTIONS section ==\n";
let groff = {|.SH NAME
foo \- does stuff
.SH DESCRIPTION
Does stuff.
|} in
let result = parse_manpage_string groff in
check "no entries" (List.length result.entries = 0)
let test_slash_switch_separator () =
Printf.printf "\n== Slash switch separator (--long / -s) ==\n";
let r = parse " --verbose / -v Increase verbosity\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "both switch" (e.switch = Both ('v', "verbose"));
check "no param" (e.param = None);
check "desc" (e.desc = "Increase verbosity")
let test_manpage_nix3_style () =
Printf.printf "\n== Manpage nix3 style ==\n";
let groff = {|.SH Options
.SS Logging-related options
.IP "\(bu" 3
.UR #opt-verbose
\f(CR--verbose\fR
.UE
/ \f(CR-v\fR
.IP
Increase the logging verbosity level.
.IP "\(bu" 3
.UR #opt-quiet
\f(CR--quiet\fR
.UE
.IP
Decrease the logging verbosity level.
.SH SEE ALSO
|} in
let result = parse_manpage_string groff in
check "two entries" (List.length result.entries = 2);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "verbose is Both" (e.switch = Both ('v', "verbose"));
check "verbose desc" (String.length e.desc > 0)
end;
if List.length result.entries >= 2 then begin
let e = List.nth result.entries 1 in
check "quiet is Long" (e.switch = Long "quiet");
check "quiet desc" (String.length e.desc > 0)
end
let test_manpage_nix3_with_params () =
Printf.printf "\n== Manpage nix3 with params ==\n";
let groff = {|.SH Options
.IP "\(bu" 3
.UR #opt-arg
\f(CR--arg\fR
.UE
\fIname\fR \fIexpr\fR
.IP
Pass the value as the argument name to Nix functions.
.IP "\(bu" 3
.UR #opt-include
\f(CR--include\fR
.UE
/ \f(CR-I\fR \fIpath\fR
.IP
Add path to search path entries.
.IP
This option may be given multiple times.
.SH SEE ALSO
|} in
let result = parse_manpage_string groff in
check "two entries" (List.length result.entries = 2);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "arg is Long" (e.switch = Long "arg");
check "arg has param" (e.param <> None)
end;
if List.length result.entries >= 2 then begin
let e = List.nth result.entries 1 in
check "include is Both" (e.switch = Both ('I', "include"));
check "include has path param" (e.param = Some (Mandatory "path"))
end
let test_synopsis_subcommand () =
Printf.printf "\n== SYNOPSIS subcommand detection ==\n";
let groff = {|.SH "SYNOPSIS"
.sp
.nf
\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR]
.fi
.SH "DESCRIPTION"
|} in
let cmd = extract_synopsis_command groff in
check "detected git commit" (cmd = Some "git commit")
let test_synopsis_standalone () =
Printf.printf "\n== SYNOPSIS standalone command ==\n";
let groff = {|.SH Synopsis
.LP
\f(CRnix-build\fR [\fIpaths\fR]
.SH Description
|} in
let cmd = extract_synopsis_command groff in
check "detected nix-build" (cmd = Some "nix-build")
let test_synopsis_nix3 () =
Printf.printf "\n== SYNOPSIS nix3 subcommand ==\n";
let groff = {|.SH Synopsis
.LP
\f(CRnix run\fR [\fIoption\fR] \fIinstallable\fR
.SH Description
|} in
let cmd = extract_synopsis_command groff in
check "detected nix run" (cmd = Some "nix run")
(* --- Nushell generation tests --- *)
let contains s sub =
try
let _ = Str.search_forward (Str.regexp_string sub) s 0 in true
with Not_found -> false
let test_nushell_basic () =
Printf.printf "\n== Nushell basic extern ==\n";
let r = parse " -a, --all do not ignore entries starting with .\n" in
let nu = generate_extern "ls" r in
check "has extern" (contains nu "export extern \"ls\"");
check "has --all(-a)" (contains nu "--all(-a)");
check "has comment" (contains nu "# do not ignore")
let test_nushell_param_types () =
Printf.printf "\n== Nushell param type mapping ==\n";
let r = parse {| -w, --width=COLS set output width
--block-size=SIZE scale sizes
-o, --output FILE output file
|} in
let nu = generate_extern "ls" r in
check "COLS -> int" (contains nu "--width(-w): int");
check "SIZE -> string" (contains nu "--block-size: string");
check "FILE -> path" (contains nu "--output(-o): path")
let test_nushell_subcommands () =
Printf.printf "\n== Nushell subcommands ==\n";
let r = parse {|Common Commands:
run Create and run a new container
exec Execute a command
Flags:
-D, --debug Enable debug mode
|} in
let nu = generate_extern "docker" r in
check "has main extern" (contains nu "export extern \"docker\"");
check "has --debug" (contains nu "--debug(-D)");
check "has run subcommand" (contains nu "export extern \"docker run\"");
check "has exec subcommand" (contains nu "export extern \"docker exec\"")
let test_nushell_from_manpage () =
Printf.printf "\n== Nushell from manpage ==\n";
let groff = {|.SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-all\fR
do not ignore entries starting with .
.TP
\fB\-\-block\-size\fR=\fISIZE\fR
scale sizes by SIZE
.SH AUTHOR
|} in
let result = parse_manpage_string groff in
let nu = generate_extern "ls" result in
check "has extern" (contains nu "export extern \"ls\"");
check "has --all(-a)" (contains nu "--all(-a)");
check "has --block-size" (contains nu "--block-size: string")
let test_nushell_module () =
Printf.printf "\n== Nushell module wrapper ==\n";
let r = parse " -v, --verbose verbose output\n" in
let nu = generate_module "myapp" r in
check "has module" (contains nu "module myapp-completions");
check "has extern inside" (contains nu "export extern \"myapp\"");
check "has flag" (contains nu "--verbose(-v)")
let test_dedup_entries () =
Printf.printf "\n== Deduplication ==\n";
let r = parse {| -v, --verbose verbose output
--verbose verbose mode
-v be verbose
|} in
let nu = generate_extern "test" r in
(* Count occurrences of --verbose *)
let count =
let re = Str.regexp_string "--verbose" in
let n = ref 0 in
let i = ref 0 in
(try while true do
let _ = Str.search_forward re nu !i in
incr n; i := Str.match_end ()
done with Not_found -> ());
!n
in
check "verbose appears once" (count = 1);
check "best version kept (Both)" (contains nu "--verbose(-v)")
let test_dedup_manpage () =
Printf.printf "\n== Dedup from manpage ==\n";
let groff = {|.SH OPTIONS
.TP
\fB\-v\fR, \fB\-\-verbose\fR
Be verbose.
.SH DESCRIPTION
Use \fB\-v\fR for verbose output.
Use \fB\-\-verbose\fR to see more.
|} in
let result = parse_manpage_string groff in
let nu = generate_extern "test" result in
check "has --verbose(-v)" (contains nu "--verbose(-v)");
(* Should not have standalone -v or duplicate --verbose *)
let lines = String.split_on_char '\n' nu in
let verbose_lines = List.filter (fun l -> contains l "verbose") lines in
check "only one verbose line" (List.length verbose_lines = 1)
let test_font_boundary_spacing () =
Printf.printf "\n== Font boundary spacing ==\n";
(* \fB--max-results\fR\fIcount\fR should become "--max-results count" *)
let s = strip_groff_escapes {|\fB\-\-max\-results\fR\fIcount\fR|} in
check "has space before param" (contains s "--max-results count");
(* \fB--color\fR[=\fIWHEN\fR] should NOT insert space before = *)
let s2 = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
check "no space before =" (contains s2 "--color[=WHEN]")
let () =
Printf.printf "Running help parser tests...\n";
test_gnu_basic ();
test_gnu_eq_param ();
test_gnu_opt_param ();
test_underscore_param ();
test_short_only ();
test_long_only ();
test_multiline_desc ();
test_multiple_entries ();
test_clap_short_sections ();
test_clap_long_style ();
test_clap_long_angle_param ();
test_space_upper_param ();
test_go_cobra_flags ();
test_go_cobra_subcommands ();
test_busybox_tab ();
test_no_debug_prints ();
Printf.printf "\nRunning manpage parser tests...\n";
test_manpage_tp_style ();
test_manpage_ip_style ();
test_manpage_groff_stripping ();
test_manpage_empty_options ();
test_slash_switch_separator ();
test_manpage_nix3_style ();
test_manpage_nix3_with_params ();
test_synopsis_subcommand ();
test_synopsis_standalone ();
test_synopsis_nix3 ();
Printf.printf "\nRunning nushell generation tests...\n";
test_nushell_basic ();
test_nushell_param_types ();
test_nushell_subcommands ();
test_nushell_from_manpage ();
test_nushell_module ();
Printf.printf "\nRunning dedup and font tests...\n";
test_dedup_entries ();
test_dedup_manpage ();
test_font_boundary_spacing ();
Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures;
if !failures > 0 then exit 1