inshellah/bin/main.ml

1196 lines
52 KiB
OCaml

(* main.ml — cli entry point for inshellah, a nushell completions engine.
*
* inshellah generates nushell "extern" definitions for external commands by
* parsing their manpages and --help output. it has two main modes:
*
* 1. indexing (batch): scan a prefix directory's bin/ and share/man/,
* extract completions for every binary, and write them to a cache dir.
* this is typically run once per nix profile or system update.
*
* 2. completing (interactive): given a command and its current arguments,
* look up the cached data and return JSON completion candidates for
* nushell's custom completer protocol.
*
* the indexing pipeline for each binary:
* a. classify the binary (skip? try --help? try native completions?)
* b. if the tool has native nushell completion support, try various
* subcommand patterns ("completions nushell", "--completion nushell", etc.)
* c. otherwise, run the tool with --help/-h and parse the output
* d. recursively resolve subcommands (depth-limited to 5)
* e. after binaries, parse manpages for any commands not yet covered
*
* parallelism: indexing forks per binary, and subcommand resolution forks
* per subcommand. results are marshaled back via pipes. this gives good
* throughput on multi-core systems while keeping the code simple (no threads,
* no async runtime — just unix fork/pipe/waitpid).
*)
open Inshellah.Parser
open Inshellah.Manpage
open Inshellah.Nushell
open Inshellah.Store
module SSet = Set.Make(String)
(* print usage and exit. called when no valid subcommand is given. *)
let usage () =
Printf.eprintf
{|inshellah - nushell completions engine
Usage:
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
Index completions into a directory of JSON/nu files.
PREFIX is a directory containing bin/ and share/man/.
Default dir: $XDG_CACHE_HOME/inshellah
--ignore FILE skip listed commands entirely
--help-only FILE skip manpages for listed commands, use --help instead
inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]]
Nushell custom completer. Outputs JSON completion candidates.
Falls back to --help resolution if command is not indexed.
--dir takes colon-separated paths. The first path is the writable
user cache; additional paths are read-only system directories.
Manpages are found via sibling share/man of system dir paths.
inshellah query CMD [--dir PATH[:PATH...]]
Print stored completion data for CMD.
inshellah dump [--dir PATH[:PATH...]]
List indexed commands.
inshellah manpage FILE Parse a manpage and emit nushell extern
inshellah manpage-dir DIR Batch-process manpages under DIR
|};
exit 1
(* manpage sections that contain command documentation.
* section 1 = user commands, section 8 = system administration commands. *)
let command_sections = [1; 8]
(* simple substring search using Str *)
let contains_str haystack needle =
try ignore (Str.search_forward (Str.regexp_string needle) haystack 0); true
with Not_found -> false
(* heuristic to detect whether text is valid nushell source code.
* checks for common nushell declaration keywords. the length > 20
* check avoids false positives on short error messages. *)
let is_nushell_source text =
String.length text > 20
&& (contains_str text "export extern"
|| contains_str text "export def"
|| (contains_str text "module " && contains_str text "export"))
(* extract command name from a manpage filename.
* "ls.1.gz" -> strip .gz -> "ls.1" -> chop extension -> "ls" *)
let cmd_name_of_manpage path =
let base = Filename.basename path in
let base =
if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz"
else base in
try Filename.chop_extension base with Invalid_argument _ -> base
(* sanitized environment for child processes.
* strips display-related variables (DISPLAY, WAYLAND_DISPLAY, etc.) to prevent
* gui tools from trying to open windows when we run them with --help.
* without this, some tools would pop up dialogs or hang waiting for a
* display connection. *)
let safe_env = lazy (
Array.of_list (
List.filter (fun var ->
not (String.starts_with ~prefix:"DISPLAY=" var
|| String.starts_with ~prefix:"WAYLAND_DISPLAY=" var
|| String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" var
|| String.starts_with ~prefix:"XAUTHORITY=" var))
(Array.to_list (Unix.environment ()))))
(* non-blocking drain of a pipe fd into a buffer. safe to call repeatedly;
* reads whatever is available without blocking. used by all fork-pipe sites
* to keep pipes drained so children never block on write. *)
let drain_fd rd buf =
let chunk = Bytes.create 8192 in
let continue = ref true in
while !continue do
match Unix.select [rd] [] [] 0.0 with
| (_ :: _, _, _) ->
(try
let bytes_read = Unix.read rd chunk 0 8192 in
if bytes_read = 0 then continue := false
else Buffer.add_subbytes buf chunk 0 bytes_read
with Unix.Unix_error _ -> continue := false)
| _ -> continue := false
done
(* run a command with a timeout, capturing its stdout+stderr.
* forks a child process, redirects stdin from /dev/null, and merges
* stdout+stderr onto a pipe. reads from the pipe with select() polling
* until either the child exits or the deadline is reached.
*
* the child is run in /tmp to prevent tools that create side-effect files
* from polluting the user's working directory. we chdir to /tmp before
* fork and restore after.
*
* the select timeout is capped at 0.05s per iteration to ensure we check
* the deadline frequently even when no data is available.
*
* returns none if the process couldn't be started, produced no output,
* or was killed due to timeout. *)
let run_cmd args timeout_ms =
let (rd, wr) = Unix.pipe () in
let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in
let argv = Array.of_list args in
(* run subprocesses in /tmp so commands that write side-effect files
* don't pollute the working directory *)
let saved_cwd = Sys.getcwd () in
Sys.chdir "/tmp";
let pid =
try Unix.create_process_env (List.hd args) argv
(Lazy.force safe_env) devnull wr wr
with Unix.Unix_error _ ->
Unix.close rd; Unix.close wr; Unix.close devnull; -1 in
Sys.chdir saved_cwd;
Unix.close wr; Unix.close devnull;
if pid < 0 then (Unix.close rd; None)
else begin
let buf = Buffer.create 4096 in
let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in
let chunk = Bytes.create 8192 in
let alive = ref true in
(try while !alive do
let remaining = deadline -. Unix.gettimeofday () in
if remaining <= 0.0 then alive := false
else match Unix.select [rd] [] [] (min remaining 0.05) with
| (_ :: _, _, _) ->
let bytes_read = Unix.read rd chunk 0 8192 in
if bytes_read = 0 then raise Exit
else Buffer.add_subbytes buf chunk 0 bytes_read
| _ -> ()
done with Exit -> ());
Unix.close rd;
if not !alive then begin
(try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ());
ignore (Unix.waitpid [] pid)
end else
ignore (Unix.waitpid [] pid);
if Buffer.length buf > 0 then Some (Buffer.contents buf) else None
end
(* check if a path is a regular file with at least one execute bit set *)
let is_executable path =
try let st = Unix.stat path in
st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0
with Unix.Unix_error _ -> false
(* check if a file is a script by looking for a #! shebang.
* follows symlinks via realpath before reading. *)
let is_script path =
try
let real = Unix.realpath path in
let ic = open_in_bin real in
let has_shebang =
try let b = Bytes.create 2 in
really_input ic b 0 2;
Bytes.get b 0 = '#' && Bytes.get b 1 = '!'
with End_of_file -> false in
close_in ic;
has_shebang
with _ -> false
(* scan an elf binary for string needles without loading the entire file.
* reads the file in 64kb chunks, searching each chunk for the needle strings.
* uses a sliding window (carry) of max_needle bytes between chunks to handle
* needles that span chunk boundaries.
*
* on read failure (e.g. if the path resolves to something unreadable), all
* needles are marked as found. this is a conservative fallback — we'd rather
* try --help on an unreadable binary than skip it.
*
* the inner loop is a manual byte-by-byte comparison rather than using
* String.contains or Str for performance — this runs on every binary
* in the prefix, so it needs to be fast. *)
let elf_scan path needles =
let found = Hashtbl.create 4 in
let remaining () = List.filter (fun needle -> not (Hashtbl.mem found needle)) needles in
(try
let real = Unix.realpath path in
let ic = open_in_bin real in
let magic = Bytes.create 4 in
really_input ic magic 0 4;
if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E'
&& Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin
let max_needle = List.fold_left (fun m needle -> max m (String.length needle)) 0 needles in
let chunk_size = 65536 in
let buf = Bytes.create (chunk_size + max_needle) in
let carry = ref 0 in
let eof = ref false in
while not !eof && remaining () <> [] do
let bytes_read = (try input ic buf !carry chunk_size with End_of_file -> 0) in
if bytes_read = 0 then eof := true
else begin
let total = !carry + bytes_read in
List.iter (fun needle ->
if not (Hashtbl.mem found needle) then begin
let nlen = String.length needle in
let pos = ref 0 in
while !pos <= total - nlen do
if Bytes.get buf !pos = needle.[0] then begin
let matched = ref true in
for j = 1 to nlen - 1 do
if Bytes.get buf (!pos + j) <> needle.[j] then matched := false
done;
if !matched then (Hashtbl.replace found needle true; pos := total)
else incr pos
end else incr pos
done
end
) (remaining ());
let new_carry = min max_needle total in
Bytes.blit buf (total - new_carry) buf 0 new_carry;
carry := new_carry
end
done
end;
close_in ic
with _ ->
List.iter (fun needle -> Hashtbl.replace found needle true) needles);
found
(* detect nix-generated c wrapper scripts and extract the real binary path.
* nix's makeCWrapper creates small c programs that set up the environment
* and exec the real binary. these wrappers won't contain "-h" or "completion"
* in their own binary (they're just wrappers), so elf_scan would say "skip".
* this function reads the wrapper source to find the actual /nix/store/.../bin/...
* target path, so we can try --help on the real binary instead.
*
* caps the read at 64kb to avoid accidentally reading a large non-wrapper
* binary into memory. *)
let nix_wrapper_target path =
try
let real = Unix.realpath path in
let ic = open_in_bin real in
let size = in_channel_length ic in
if size > 65536 then (close_in ic; None)
else begin
let contents = Bytes.create size in
really_input ic contents 0 size; close_in ic;
let contents = Bytes.to_string contents in
if not (contains_str contents "makeCWrapper") then None
else
let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in
try ignore (Str.search_forward re contents 0);
let target = Str.matched_string contents in
if Sys.file_exists target then Some target else None
with Not_found -> None
end
with _ -> None
(* detect nix bash/sh wrapper scripts that exec a real binary.
* nix sometimes generates small shell scripts (e.g. to set env vars like
* XDG_CONFIG_HOME) that exec the real binary. these look like:
* #!/nix/store/.../bash -e
* export FOO=...
* exec -a "$0" "/nix/store/.../bin/.foo-wrapped" "$@"
* we extract the exec target path and resolve through it. *)
let nix_script_wrapper_target path =
try
let real = Unix.realpath path in
let ic = open_in real in
let size = in_channel_length ic in
if size > 4096 then (close_in ic; None)
else begin
let contents = Bytes.create size in
really_input ic contents 0 size; close_in ic;
let contents = Bytes.to_string contents in
if not (contains_str contents "exec") then None
else
let re = Str.regexp "exec[ \t]+\\(-a[ \t]+\"\\$0\"[ \t]+\\)?\"?\\(/nix/store/[a-z0-9]+-[^\" \t\n]+/bin/[a-zA-Z0-9._-]+\\)\"?" in
try ignore (Str.search_forward re contents 0);
let target = Str.matched_group 2 contents in
let target = Unix.realpath target in
if Sys.file_exists target then Some target else None
with Not_found -> None
end
with _ -> None
(* heuristic filter for binary names that should never be indexed.
* skips: empty names, "-", dotfiles, libraries (lib-prefix), daemon wrappers
* (suffixes -daemon, -wrapped), shared objects (.so suffix), and names with no
* alphanumeric characters (e.g. punctuation-only names). *)
let skip_name name =
String.length name = 0 || name = "-" || name.[0] = '.'
|| String.starts_with ~prefix:"lib" name
|| String.ends_with ~suffix:"-daemon" name
|| String.ends_with ~suffix:"-wrapped" name
|| String.ends_with ~suffix:".so" name
|| not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name)
(* classification result for a binary.
* Skip — don't index this binary at all
* Try_help — only try --help (scripts, binaries without "completion" string)
* Try_native_and_help — try native nushell completion first, fall back to --help *)
type bin_class = Skip | Try_help | Try_native_and_help
(* classify an elf binary path for indexing. *)
let classify_elf path =
let scan = elf_scan path ["-h"; "completion"] in
if Hashtbl.mem scan "completion" then Try_native_and_help
else if Hashtbl.mem scan "-h" then Try_help
else Skip
(* classify a binary to decide the indexing strategy.
* decision tree:
* 1. nushell builtin or bad name -> Skip
* 2. not executable -> Skip
* 3. script (has shebang) -> resolve through nix script wrapper if possible,
* otherwise Try_help
* 4. elf binary containing "completion" -> Try_native_and_help
* 5. elf binary containing "-h" -> Try_help
* 6. nix c wrapper -> Try_help (the wrapper itself is just an exec shim)
* 7. otherwise -> Skip (binary has no help infrastructure) *)
let classify_binary bindir name =
if is_nushell_builtin name || skip_name name then Skip
else
let path = Filename.concat bindir name in
if not (is_executable path) then Skip
else if is_script path then
match nix_script_wrapper_target path with
| Some target ->
let cls = classify_elf target in
if cls <> Skip then cls else Try_help
| None -> Try_help
else
let cls = classify_elf path in
if cls <> Skip then cls
else if nix_wrapper_target path <> None then Try_help
else Skip
(* detect available cpu cores by counting "processor" lines in /proc/cpuinfo.
* falls back to 4 if /proc/cpuinfo can't be read (e.g. on non-linux). *)
let num_cores () =
try
let ic = open_in "/proc/cpuinfo" in
let count = ref 0 in
(try while true do
if String.starts_with ~prefix:"processor" (input_line ic) then incr count
done with End_of_file -> ());
close_in ic; max 1 !count
with _ -> 4
(* try to get native nushell completions from a binary.
* tries several common subcommand patterns that tools use for shell completions.
* returns the first one that produces valid nushell source code.
* the 500ms timeout is generous enough for most tools but prevents hangs.
*
* the patterns cover: cobra (go), clap (rust), click (python), and various
* ad-hoc implementations. *)
let try_native_completion bin_path =
List.find_map (fun args ->
match run_cmd args 500 with
| Some text when is_nushell_source text -> Some text
| _ -> None
) [
[bin_path; "completions"; "nushell"];
[bin_path; "completion"; "nushell"];
[bin_path; "--completions"; "nushell"];
[bin_path; "--completion"; "nushell"];
[bin_path; "generate-completion"; "nushell"];
[bin_path; "--generate-completion"; "nushell"];
[bin_path; "gen-completions"; "nushell"];
[bin_path; "shell-completions"; "nushell"];
]
(* parse a manpage file, extracting the command name, its flags/subcommands,
* and any clap-style per-subcommand sections.
* returns none for nushell builtins or failed parses. *)
let parse_manpage_for_command file =
let contents = read_manpage_file file in
let fallback = cmd_name_of_manpage file in
(* the filename encodes the command boundary: "git-stash" = 2 words.
* use this to clamp the synopsis-extracted name, which can be too greedy
* when the synopsis lists subcommand variants. *)
let max_words = List.length (String.split_on_char '-' fallback) in
let clamp_cmd name =
let words = String.split_on_char ' ' name in
if List.length words > max_words then
String.concat " " (List.filteri (fun i _ -> i < max_words) words)
else name in
let cmd = match extract_synopsis_command contents with
| Some name -> clamp_cmd name | None -> fallback in
if is_nushell_builtin cmd then None
else
let result = parse_manpage_string contents in
let sub_sections = extract_subcommand_sections contents in
let result = if sub_sections <> [] then
{ result with subcommands = List.map (fun (name, desc, _) ->
{ name; desc }) sub_sections }
else result in
let subs = List.map (fun (name, _desc, r) ->
(cmd ^ " " ^ name, r)) sub_sections in
Some (cmd, result, subs)
(* "inshellah manpage FILE" — parse one manpage and print the nushell extern *)
let cmd_manpage file =
match parse_manpage_for_command file with
| Some (cmd, result, _) when result.entries <> [] ->
print_string (generate_extern cmd result)
| _ -> ()
(* "inshellah manpage-dir DIR" — batch-process all manpages under a directory *)
let cmd_manpage_dir dir =
List.iter (fun section ->
let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in
if is_dir subdir then
Array.iter (fun file ->
(try cmd_manpage (Filename.concat subdir file) with _ -> ())
) (Sys.readdir subdir)
) command_sections
(* detect rendered manpage output — when --help delegates to man(1), the
* output starts with a header line like "GIT-STASH(1) ... GIT-STASH(1)".
* we check if the first non-blank line matches that pattern. *)
let is_rendered_manpage text =
let lines = String.split_on_char '\n' text in
let first_line = List.find_opt (fun l -> String.trim l <> "") lines in
match first_line with
| None -> false
| Some line ->
let trimmed = String.trim line in
(* look for WORD(DIGIT) at the start of the line *)
try
let paren = String.index trimmed '(' in
paren > 0
&& paren + 2 < String.length trimmed
&& trimmed.[paren + 1] >= '0' && trimmed.[paren + 1] <= '9'
&& trimmed.[paren + 2] = ')'
with Not_found -> false
(* find the raw manpage file for a hyphenated command name like "git-stash".
* first checks the provided man directories directly, then falls back to
* man -w for on-the-fly resolution when no man dirs are known. *)
let find_manpage_path mandirs hyphenated_name =
let try_dirs () =
List.find_map (fun mandir ->
List.find_map (fun section ->
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
List.find_map (fun ext ->
let path = Filename.concat subdir
(Printf.sprintf "%s.%d%s" hyphenated_name section ext) in
if Sys.file_exists path then Some path else None
) [""; ".gz"]
) command_sections
) mandirs in
match try_dirs () with
| Some _ as found -> found
| None ->
(* fallback to man -w when no man dirs provided or file not found *)
match run_cmd ["man"; "-w"; hyphenated_name] 200 with
| Some raw ->
let path = String.trim raw in
if Sys.file_exists path then Some path else None
| None -> None
(* when --help output is a rendered manpage, find and parse the raw manpage
* source instead. returns the main result plus any sub-section results
* (e.g. "git stash push" flags parsed from the git-stash manpage). *)
let try_manpage_fallback mandirs cmd_name =
match find_manpage_path mandirs cmd_name with
| None -> None
| Some path ->
match parse_manpage_for_command path with
| None -> None
| Some (_, result, subs) when result.entries = [] && subs = [] -> None
| Some (_, result, subs) -> Some (result, subs)
(* safety limit: don't accumulate more than 500 subcommand resolution results
* per binary. prevents runaway recursion on tools with enormous subcommand trees. *)
let max_resolve_results = 500
(* safe wrapper around parse_manpage_for_command that catches all exceptions *)
let process_manpage file =
try
match parse_manpage_for_command file with
| Some (cmd, result, subs) when result.entries <> [] || subs <> [] ->
Some (cmd, result, subs)
| _ -> None
with _ -> None
(* collect the set of command names that have manpages in a given man directory.
* used during indexing to skip --help for commands that will be handled by
* the manpage parsing phase instead (manpages are more reliable than --help). *)
let manpaged_commands mandir =
List.fold_left (fun acc section ->
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
if is_dir subdir then
Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc)
acc (Sys.readdir subdir)
else acc
) SSet.empty command_sections
(* parallel structured help resolver — recursively resolves a command and
* all its subcommands by running --help on each, forking a child process
* per subcommand for parallelism.
*
* the resolver works as a breadth-first queue:
* 1. start with the root command in the queue
* 2. fork a child for each queued item (up to num_cores concurrent)
* 3. the child runs --help, parses the output, marshals the result via pipe
* 4. the parent collects results and enqueues discovered subcommands
* 5. repeat until queue is empty and all children have finished
*
* depth is limited to 5 levels and total results to max_resolve_results
* to prevent runaway recursion on pathological command trees.
*
* the child process detects "self-listing" — when a subcommand's --help
* lists itself as a subcommand (e.g. "git help" listing "help" as a
* subcommand of itself). this would cause infinite recursion, so such
* results are discarded.
*
* children close all pipe fds from other pending children immediately
* after fork to prevent fd leaks. the parent drains pipes regularly to
* prevent children from blocking on full pipe buffers. *)
let help_resolve_par ?(timeout=200) ?(mandirs=[]) cmd rest name =
let max_jobs = num_cores () in
let queue = Queue.create () in
Queue.push (rest, name, 0) queue;
let results = ref [] in
(* pending: (pid, rd, buf, cmd_args, cmd_name, depth) *)
let pending = ref [] in
let collect rd buf cmd_args cmd_name depth =
drain_fd rd buf;
(try Unix.close rd with _ -> ());
let data = Buffer.contents buf in
let result : (help_result * subcommand list * (string * help_result) list) option =
if String.length data > 0 then
try Marshal.from_string data 0 with _ -> None
else None in
match result with
| None -> ()
| Some (r, subs, extras) ->
let at_limit = depth >= 5 || List.length !results >= max_resolve_results in
results := (cmd_name, r) :: !results;
(* extras are fully-parsed sub-results from manpage sub-sections —
* add them directly without enqueueing for further resolution *)
List.iter (fun (sub_name, sub_r) ->
if not (List.exists (fun (existing, _) -> existing = sub_name) !results) then
results := (sub_name, sub_r) :: !results
) extras;
if not at_limit then
(* only enqueue subcommands that weren't already covered by extras *)
let extra_names = List.map fst extras in
List.iter (fun (sc : subcommand) ->
let full = cmd_name ^ " " ^ sc.name in
if not (List.exists (fun existing -> existing = full) extra_names) then
Queue.push (cmd_args @ [sc.name], full, depth + 1) queue
) subs in
let reap () =
pending := List.filter (fun (pid, rd, buf, cmd_args, cmd_name, depth) ->
drain_fd rd buf;
match Unix.waitpid [Unix.WNOHANG] pid with
| (0, _) -> true
| _ -> collect rd buf cmd_args cmd_name depth; false
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
(try Unix.close rd with _ -> ()); false
) !pending in
let wait_for_slot () =
while List.length !pending >= max_jobs do
reap ();
if List.length !pending >= max_jobs then begin
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done in
while not (Queue.is_empty queue) || !pending <> [] do
while not (Queue.is_empty queue) do
let (cmd_args, cmd_name, depth) = Queue.pop queue in
wait_for_slot ();
let (rd, wr) = Unix.pipe () in
let pid = Unix.fork () in
if pid = 0 then begin
Unix.close rd;
List.iter (fun (_, prd, _, _, _, _) ->
try Unix.close prd with _ -> ()) !pending;
let result =
let text = match run_cmd (cmd :: cmd_args @ ["--help"]) timeout with
| Some _ as r -> r
| None -> run_cmd (cmd :: cmd_args @ ["-h"]) timeout in
match text with
| None -> None
| Some text ->
(* check for rendered manpage first — when --help delegates to
* man(1), the raw groff source has richer structure than the
* rendered text. parse_help would partially succeed on rendered
* manpage output (extracting flags from OPTIONS) but miss
* subcommands from the COMMANDS section. *)
if is_rendered_manpage text then
let base = Filename.basename cmd in
let hyphenated = String.concat "-" (base :: cmd_args) in
match try_manpage_fallback mandirs hyphenated with
| Some (r, subs) ->
let at_limit = depth >= 5 in
let extra = List.map (fun (sub_name, sub_r) ->
(cmd_name ^ " " ^ sub_name, sub_r)) subs in
let enqueue_subs = if at_limit then [] else r.subcommands in
Some (r, enqueue_subs, extra)
| None ->
(* manpage file not found — fall back to parsing rendered text *)
(match parse_help text with
| Error _ -> None
| Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
| Ok r ->
let at_limit = depth >= 5 in
let subs = if at_limit then [] else r.subcommands in
Some (r, subs, []))
else
match parse_help text with
| Error _ -> None
| Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
| Ok r ->
let self_listed = match cmd_args with
| [] -> false
| _ ->
let leaf = List.nth cmd_args (List.length cmd_args - 1) in
List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in
if self_listed then None
else
let at_limit = depth >= 5 in
let subs = if at_limit then [] else r.subcommands in
Some (r, subs, []) in
let oc = Unix.out_channel_of_descr wr in
Marshal.to_channel oc (result : (help_result * subcommand list * (string * help_result) list) option) [];
close_out oc;
exit 0
end else begin
Unix.close wr;
pending := (pid, rd, Buffer.create 4096, cmd_args, cmd_name, depth) :: !pending
end
done;
if !pending <> [] then begin
reap ();
if !pending <> [] && Queue.is_empty queue then begin
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
end
done;
List.rev !results
(* "inshellah index" — the main indexing command.
* processes all binaries and manpages in the given prefix directories,
* writing completion data to the cache dir.
*
* the pipeline has two phases:
*
* phase 1 (binaries): fork one child per binary. each child:
* - tries native nushell completions (if classified as Try_native_and_help)
* - falls back to help_resolve_par (which itself forks per subcommand)
* - marshals the result back via pipe as a tagged variant:
* `Native of string — raw nushell source
* `Parsed of (string * help_result) list — parsed flag data
* `None — nothing useful extracted
*
* phase 2 (manpages): sequentially parse manpages for commands not yet
* covered by phase 1. manpages are more reliable than --help for many
* gnu tools, but slower to process.
*
* commands on the ignorelist are skipped entirely. commands on the
* help_only list skip manpage parsing and only use --help. commands
* with manpages skip --help in phase 1 (they'll be handled in phase 2).
*
* the done_cmds set tracks which commands have already been indexed to
* prevent duplicates across phases and across multiple prefix directories. *)
(* known privilege-escalation wrappers — defined here (before cmd_index and
* cmd_complete) because both need the list: cmd_index writes @complete
* external stubs, and cmd_complete strips the wrapper to find the real command. *)
let elevation_commands =
["sudo"; "run0"; "doas"; "pkexec"; "su"; "calife"; "sux"; "sudoedit";
"please"; "super"; "priv"]
let cmd_index bindirs mandirs ignorelist help_only dir =
ensure_dir dir;
let done_cmds = ref SSet.empty in
let result_count = ref 0 in
let index_bindir bindir mandir =
if not (is_dir bindir) then
Printf.eprintf "skipping %s (not found)\n" bindir
else begin
let bins = Sys.readdir bindir in
Array.sort String.compare bins;
let manpaged = if is_dir mandir
then manpaged_commands mandir else SSet.empty in
let max_jobs = num_cores () in
let classified = Array.map (fun name ->
if SSet.mem name ignorelist then (name, Skip)
else if SSet.mem name help_only then (name, classify_binary bindir name)
else if SSet.mem name manpaged then (name, Skip)
else (name, classify_binary bindir name)
) bins in
let pending = ref [] in
let process_result name rd buf =
drain_fd rd buf;
(try Unix.close rd with _ -> ());
let data = Buffer.contents buf in
if String.length data > 0 then begin
let result : [`Native of string | `Parsed of (string * help_result) list | `None] =
try Marshal.from_string data 0 with _ -> `None in
(match result with
| `Native src ->
write_native ~dir name src;
incr result_count
| `Parsed pairs ->
List.iter (fun (cmd_name, r) ->
if not (SSet.mem cmd_name !done_cmds) then begin
write_result ~dir ~source:"help" cmd_name r;
done_cmds := SSet.add cmd_name !done_cmds;
incr result_count
end
) pairs
| `None -> ())
end;
done_cmds := SSet.add name !done_cmds in
let reap () =
pending := List.filter (fun (pid, rd, buf, name) ->
drain_fd rd buf;
match Unix.waitpid [Unix.WNOHANG] pid with
| (0, _) -> true
| _ ->
process_result name rd buf;
false
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
(try Unix.close rd with _ -> ()); false
) !pending in
let wait_for_slot () =
while List.length !pending >= max_jobs do
reap ();
if List.length !pending >= max_jobs then begin
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done in
Array.iter (fun (name, classification) ->
match classification with
| Skip -> ()
| Try_help | Try_native_and_help ->
wait_for_slot ();
let (rd, wr) = Unix.pipe () in
let pid = Unix.fork () in
if pid = 0 then begin
Unix.close rd;
List.iter (fun (_, prd, _, _) ->
try Unix.close prd with _ -> ()) !pending;
let result =
try
let path = Filename.concat bindir name in
let native = match classification with
| Try_native_and_help ->
(match try_native_completion path with
| Some src -> Some src | None -> None)
| _ -> None in
match native with
| Some src -> `Native src
| None ->
let pairs = help_resolve_par ~timeout:200 ~mandirs path [] name in
if pairs <> [] then `Parsed pairs else `None
with _ -> `None in
let oc = Unix.out_channel_of_descr wr in
Marshal.to_channel oc
(result : [`Native of string | `Parsed of (string * help_result) list | `None]) [];
close_out oc;
exit 0
end else begin
Unix.close wr;
pending := (pid, rd, Buffer.create 4096, name) :: !pending
end
) classified;
while !pending <> [] do
reap ();
if !pending <> [] then begin
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done;
(* phase 2: manpages *)
if is_dir mandir then
List.iter (fun section ->
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
if is_dir subdir then begin
let files = Sys.readdir subdir in
Array.sort String.compare files;
Array.iter (fun file ->
let base_cmd = cmd_name_of_manpage file in
if SSet.mem base_cmd help_only then ()
else match process_manpage (Filename.concat subdir file) with
| None -> ()
| Some (cmd, result, subs) ->
if not (SSet.mem cmd !done_cmds) then begin
write_result ~dir ~source:"manpage" cmd result;
done_cmds := SSet.add cmd !done_cmds;
incr result_count
end;
List.iter (fun (sub_cmd, sub_result) ->
if not (SSet.mem sub_cmd !done_cmds) then begin
write_result ~dir ~source:"manpage" sub_cmd sub_result;
done_cmds := SSet.add sub_cmd !done_cmds;
incr result_count
end
) subs
) files
end
) command_sections
end in
List.iter2 index_bindir bindirs mandirs;
(* write @complete external stubs for elevation commands (sudo, doas, etc.)
* so nushell routes their completions through the external completer.
* without this, nushell hardcodes sudo/doas to show command-name completion
* and never calls the external completer for their own flags. *)
List.iter (fun cmd ->
let json_path = Filename.concat dir (filename_of_command cmd ^ ".json") in
if Sys.file_exists json_path then
write_native ~dir cmd
(Printf.sprintf "@complete external\nextern \"%s\" []\n" cmd)
) elevation_commands;
Printf.printf "indexed %d commands into %s\n" !result_count dir
(* "inshellah dump" — list all indexed commands with their source type *)
let cmd_dump dirs =
let cmds = all_commands dirs in
Printf.printf "%d commands\n" (List.length cmds);
List.iter (fun cmd ->
let src = match file_type_of dirs cmd with
| Some label -> label | None -> "?" in
Printf.printf " %-40s [%s]\n" cmd src
) cmds
(* search $PATH for an executable with the given name.
* used during completion to find binaries for on-the-fly resolution. *)
let find_in_path name =
try
Sys.getenv "PATH"
|> String.split_on_char ':'
|> List.find_map (fun dir ->
let p = Filename.concat dir name in
if is_executable p then Some p else None)
with Not_found -> None
(* resolve a command's completions on-the-fly and cache the results.
* called during "complete" when a command isn't in the index.
* runs help_resolve_par and writes results to the user's cache dir. *)
let resolve_and_cache ~dir ~mandirs name path =
let pairs = help_resolve_par ~timeout:200 ~mandirs path [] name in
if pairs <> [] then begin
ensure_dir dir;
List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs;
Some pairs
end else None
(* format a single completion candidate as JSON for nushell's completer protocol *)
let completion_json value desc =
Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}"
(escape_json value) (escape_json desc)
(* fuzzy matching: returns a score > 0 if needle is a subsequence of haystack.
* higher scores = better match. scoring tiers:
* - exact match: 1000
* - prefix match: 900 + length bonus (how much of the haystack is covered)
* - subsequence: base 10 per char + bonuses for:
* - word boundary alignment (50): matching at '-', '_', or camelCase transitions
* - consecutive matches (20): matching adjacent characters
*
* this drives the completion candidate ranking. users typing "ser" should see
* "--server" ranked above "--preserve" even though both contain "ser" as a
* subsequence. the word-boundary bonus achieves this. *)
let fuzzy_score needle haystack =
let needle_len = String.length needle and haystack_len = String.length haystack in
if needle_len = 0 then 1
else if needle_len > haystack_len then 0
else if needle = haystack then 1000
else
let needle_lc = String.lowercase_ascii needle
and haystack_lc = String.lowercase_ascii haystack in
if String.starts_with ~prefix:needle_lc haystack_lc then
900 + (needle_len * 100 / haystack_len)
else
let is_boundary hay_idx =
hay_idx = 0 || haystack.[hay_idx - 1] = '-' || haystack.[hay_idx - 1] = '_'
|| (haystack.[hay_idx - 1] >= 'a' && haystack.[hay_idx - 1] <= 'z'
&& haystack.[hay_idx] >= 'A' && haystack.[hay_idx] <= 'Z') in
(* walk haystack matching needle chars as a subsequence *)
let needle_idx, score, _, _ =
String.fold_left (fun (needle_idx, score, hay_idx, prev_match) c ->
if needle_idx >= needle_len then (needle_idx, score, hay_idx + 1, prev_match)
else if c = needle_lc.[needle_idx] then
let bonus = (if is_boundary hay_idx then 50 else 10)
+ (if prev_match = hay_idx - 1 then 20 else 0) in
(needle_idx + 1, score + bonus, hay_idx + 1, hay_idx)
else (needle_idx, score, hay_idx + 1, prev_match)
) (0, 0, 0, -1) haystack_lc in
if needle_idx = needle_len then score else 0
(* scan past the elevation command's flags and arguments to find the real
* command. is_command checks whether a token names a known command.
* returns Some (real_cmd :: args) or None if no command was found. *)
let find_real_command is_command args =
let rec scan = function
| [] -> None
| "--" :: rest -> Some rest
| arg :: rest when String.length arg > 0 && arg.[0] = '-' ->
scan rest
| arg :: _ as cmd_and_rest when is_command arg ->
Some cmd_and_rest
| _ :: rest -> scan rest
in
scan args
(* "inshellah complete CMD [ARGS...]" — the nushell custom completer.
* this is the hot path — called every time the user presses tab in nushell.
*
* the completion logic:
* 1. try to find the command (or longest subcommand prefix) in the store
* 2. if not found, try on-the-fly resolution (find in $PATH, run --help, cache)
* 3. score all candidate completions against the partial input using fuzzy_score
* 4. output scored candidates as a JSON array
*
* subcommand resolution: the lookup tries longest prefix first.
* for "git add --", it first looks for "git add", then "git".
* this ensures subcommand-specific flags are shown.
*
* nushell sends a trailing empty token when the cursor is after a space
* ("git add "). in this case all_tokens includes the empty string.
* when the last token is non-empty, the user is still typing it, so we use
* it as the fuzzy filter. when empty, we show all candidates.
*
* if only a parent command matched (e.g. "git" matched but not "git add"),
* we suppress subcommand suggestions and only show flags. this prevents
* showing sibling subcommands when the user has already committed to a
* specific subcommand path.
*
* file completions: nushell's external completer protocol is either/or —
* you either return custom candidates or fall back to native file completions
* (via null), but can't mix both. we return null (triggering nushell's native
* file completer with colors, sorting, quoting) when:
* - the user is at a leaf command (no subcommands) and not mid-flag
* - or we have no candidates at all
* this ensures file completions appear with full nushell UX. when the user
* IS typing a flag (partial starts with "-"), we return our flag candidates. *)
let cmd_complete spans user_dir system_dirs mandirs =
let dirs = user_dir :: system_dirs in
(* if the command line starts with a privilege-escalation wrapper, scan past
* it to find the real command. we identify the command by checking the store
* and $PATH — this avoids needing per-command option tables which are fragile
* across different implementations. if no real command is found, fall back to
* completing the elevation command itself. *)
let spans = match spans with
| cmd :: rest when List.mem cmd elevation_commands ->
let is_command name =
name <> "" && (lookup dirs name <> None || find_in_path name <> None)
in
(match find_real_command is_command rest with
| Some (_ :: _ as real_spans) -> real_spans
| _ -> spans)
| _ -> spans in
match spans with
| [] -> print_string "null\n"
| cmd_name :: rest ->
(* try longest prefix match: "git add" before "git" *)
let find_result tokens =
let num_tokens = List.length tokens in
List.init num_tokens Fun.id |> List.find_map (fun drop ->
let prefix = List.filteri (fun i _ -> i < num_tokens - drop) tokens in
match prefix with
| [] -> None
| _ ->
let try_name = String.concat " " prefix in
match lookup dirs try_name with
| Some r -> Some (try_name, r, List.length prefix)
| None -> None) in
let all_tokens = cmd_name :: rest in
let last_token = match rest with
| [] -> "" | _ -> List.nth rest (List.length rest - 1) in
(* only treat the last token as a completed subcommand when nushell
* sends a trailing empty token (cursor is after a space).
* otherwise the user is still typing and we treat it as partial. *)
let lookup_tokens = if last_token = "" then all_tokens
else match rest with
| _ :: _ -> cmd_name :: List.rev (List.tl (List.rev rest))
| _ -> [cmd_name] in
let resolve tokens partial =
match find_result tokens with
| Some _ as found -> (found, partial)
| None -> (None, partial) in
let found, partial = resolve lookup_tokens last_token in
(* try on-the-fly resolution when no match or only a parent matched *)
let lookup_depth = List.length lookup_tokens in
let result, partial = match found with
| Some (_, _, depth) when depth >= lookup_depth - 1 ->
(* exact or near-exact match — use it *)
(found, partial)
| _ ->
(* no match, or only a parent matched — try on-the-fly resolution *)
(match find_in_path cmd_name with
| Some path ->
(match resolve_and_cache ~dir:user_dir ~mandirs cmd_name path with
| Some _pairs -> resolve lookup_tokens last_token
| None -> (found, partial))
| None -> (found, partial)) in
let candidates = match result with
| None -> []
| Some (_matched_name, r, depth) ->
(* when the match is shallower than requested, the user already
* typed a subcommand beyond the matched level — don't show
* sibling subcommands, only flags *)
let sub_candidates = if depth < lookup_depth - 1 then [] else
let subs = match r.subcommands with
| _ :: _ -> r.subcommands
| [] -> subcommands_of dirs _matched_name in
List.filter_map (fun (subcommand : subcommand) ->
let score = fuzzy_score partial subcommand.name in
if score > 0 then Some (score, completion_json subcommand.name subcommand.desc) else None
) subs in
(* build flag completion candidates from the entry list.
* for flags with both short and long forms (Both), we pick which form
* to display based on what the user is currently typing:
* - if the partial input matches the short flag better, show the short
* flag as the value and note the long form in the description
* - otherwise (including empty partial), prefer the long flag and note
* the short form in the description
*
* parameter names are appended to descriptions in angle brackets for
* mandatory params and square brackets for optional ones, matching the
* conventions users expect from cli help text. *)
let flag_candidates = List.filter_map (fun (entry : entry) ->
let base_desc = match entry.param with
| Some (Mandatory p) -> if entry.desc <> "" then entry.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">"
| Some (Optional p) -> if entry.desc <> "" then entry.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]"
| None -> entry.desc in
let flag, desc = match entry.switch with
| Long l -> ("--" ^ l, base_desc)
| Short c -> (Printf.sprintf "-%c" c, base_desc)
| Both (c, l) ->
(* score the partial against both forms to decide which to present.
* e.g. typing "-s" scores higher against "-s" than "--squeeze-blank",
* so we show "-s (aka --squeeze-blank)". when the partial is empty or
* matches the long form better, we default to the long form. *)
let long_flag = "--" ^ l in
let short_flag = Printf.sprintf "-%c" c in
let long_score = fuzzy_score partial long_flag in
let short_score = fuzzy_score partial short_flag in
if short_score > long_score then
(short_flag, Printf.sprintf "(aka %s) %s" long_flag base_desc)
else
(long_flag, Printf.sprintf "(aka %s) %s" short_flag base_desc) in
let score = fuzzy_score partial flag in
if score > 0 then Some (score, completion_json flag desc) else None
) r.entries in
let scored = sub_candidates @ flag_candidates in
List.sort (fun (a, _) (b, _) -> compare b a) scored
|> List.map snd in
(* determine whether to return our candidates or fall back to nushell's
* native file completer (via null). nushell's protocol is either/or:
* returning candidates suppresses file completions, returning null
* enables them with full nushell UX (colors, sorting, quoting).
*
* we return null when:
* - we have no candidates at all (unknown command, no match)
* - the user is at a leaf command and not typing a flag — this is
* the position where file arguments are expected, so hand off to
* nushell's native file completer for the best experience *)
let typing_flag = String.length partial > 0 && partial.[0] = '-' in
let has_subcommands = match result with
| Some (matched_name, r, _) ->
r.subcommands <> [] || subcommands_of dirs matched_name <> []
| None -> false in
let want_files = (not typing_flag) && (not has_subcommands) in
if want_files then print_string "null\n"
else if candidates = [] then print_string "null\n"
else Printf.printf "[%s]\n" (String.concat "," candidates)
(* "inshellah query CMD" — print the raw stored data for a command *)
let cmd_query cmd dirs =
match lookup_raw dirs cmd with
| None ->
Printf.eprintf "not found: %s\n" cmd; exit 1
| Some data ->
print_string data; print_newline ()
(* load a newline-separated list of command names to ignore.
* blank lines and lines starting with '#' are skipped. *)
let load_ignorelist path =
try
In_channel.with_open_text path In_channel.input_all
|> String.split_on_char '\n'
|> List.filter_map (fun line ->
let line = String.trim line in
if String.length line > 0 && line.[0] <> '#' then Some line else None)
|> SSet.of_list
with _ -> SSet.empty
(* parse "index" subcommand arguments: prefix dirs + optional --dir, --ignore, --help-only.
* uses a fold over the argument list, accumulating prefixes and option values. *)
let parse_index_args args =
let (prefixes, dir, ignore, help_only, _) =
List.fold_left (fun (prefixes, dir, ignore, help_only, pending) arg ->
match pending with
| Some "--dir" -> (prefixes, arg, ignore, help_only, None)
| Some "--ignore" -> (prefixes, dir, SSet.union ignore (load_ignorelist arg), help_only, None)
| Some "--help-only" -> (prefixes, dir, ignore, SSet.union help_only (load_ignorelist arg), None)
| Some _ -> (prefixes, dir, ignore, help_only, None)
| None ->
match arg with
| "--dir" | "--ignore" | "--help-only" -> (prefixes, dir, ignore, help_only, Some arg)
| _ -> (arg :: prefixes, dir, ignore, help_only, None)
) ([], default_store_path (), SSet.empty, SSet.empty, None) args in
(List.rev prefixes, dir, ignore, help_only)
(* derive the sibling man directory from a store directory path.
* e.g. "/run/current-system/sw/share/inshellah" -> "/run/current-system/sw/share/man" *)
let man_dir_of_system_dir path =
Filename.concat (Filename.dirname path) "man"
(* parse common --dir arguments for complete/query/dump commands.
* --dir takes a colon-separated list of paths. the first path is the writable
* user cache dir; additional paths are read-only system directories.
* man directories are derived from system dir paths as siblings
* (share/inshellah -> share/man). uses a fold over the argument list. *)
let parse_dir_args args =
let (dir_value, rest_args, _) =
List.fold_left (fun (dir_value, rest_args, pending) arg ->
match pending with
| Some "--dir" -> (Some arg, rest_args, None)
| Some _ -> (dir_value, rest_args, None)
| None ->
match arg with
| "--dir" -> (dir_value, rest_args, Some arg)
| _ -> (dir_value, arg :: rest_args, None)
) (None, [], None) args in
let (user_dir, system_dirs) = match dir_value with
| None -> (default_store_path (), [])
| Some v ->
match String.split_on_char ':' v with
| [] -> (default_store_path (), [])
| first :: rest -> (first, rest) in
(user_dir, system_dirs, List.rev rest_args)
(* --- entry point ---
* dispatch on the first argument to the appropriate subcommand handler. *)
let () =
match Array.to_list Sys.argv |> List.tl with
| "index" :: rest ->
let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in
if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1);
let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in
let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in
cmd_index bindirs mandirs ignorelist help_only dir
| "complete" :: rest ->
let (user_dir, system_dirs, spans) = parse_dir_args rest in
let man_dirs = List.filter_map (fun d ->
let m = man_dir_of_system_dir d in
if is_dir m then Some m else None) system_dirs in
cmd_complete spans user_dir system_dirs man_dirs
| "query" :: rest ->
let (user_dir, system_dirs, args) = parse_dir_args rest in
(match args with
| [cmd] -> cmd_query cmd (user_dir :: system_dirs)
| _ -> Printf.eprintf "error: query CMD [--dir PATH[:PATH...]]\n"; exit 1)
| "dump" :: rest ->
let (user_dir, system_dirs, _) = parse_dir_args rest in
cmd_dump (user_dir :: system_dirs)
| ["manpage"; file] -> cmd_manpage file
| ["manpage-dir"; dir] -> cmd_manpage_dir dir
| _ -> usage ()