This commit is contained in:
atagen 2026-03-18 15:40:47 +11:00
commit d16ece28e2
22 changed files with 4798 additions and 0 deletions

0
bin/.ocamlformat Normal file
View file

4
bin/dune Normal file
View file

@ -0,0 +1,4 @@
(executable
(public_name inshellah)
(name main)
(libraries inshellah))

992
bin/main.ml Normal file
View file

@ -0,0 +1,992 @@
(* main.ml — cli entry point for inshellah, a nushell completions engine.
*
* inshellah generates nushell "extern" definitions for external commands by
* parsing their manpages and --help output. it has two main modes:
*
* 1. indexing (batch): scan a prefix directory's bin/ and share/man/,
* extract completions for every binary, and write them to a cache dir.
* this is typically run once per nix profile or system update.
*
* 2. completing (interactive): given a command and its current arguments,
* look up the cached data and return json completion candidates for
* nushell's custom completer protocol.
*
* the indexing pipeline for each binary:
* a. classify the binary (skip? try --help? try native completions?)
* b. if the tool has native nushell completion support, try various
* subcommand patterns ("completions nushell", "--completion nushell", etc.)
* c. otherwise, run the tool with --help/-h and parse the output
* d. recursively resolve subcommands (depth-limited to 5)
* e. after binaries, parse manpages for any commands not yet covered
*
* parallelism: indexing forks per binary, and subcommand resolution forks
* per subcommand. results are marshaled back via pipes. this gives good
* throughput on multi-core systems while keeping the code simple (no threads,
* no async runtime just unix fork/pipe/waitpid).
*)
open Inshellah.Parser
open Inshellah.Manpage
open Inshellah.Nushell
open Inshellah.Store
module SSet = Set.Make(String)
(* print usage and exit. called when no valid subcommand is given. *)
let usage () =
Printf.eprintf
{|inshellah - nushell completions engine
Usage:
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
Index completions into a directory of JSON/nu files.
PREFIX is a directory containing bin/ and share/man/.
Default dir: $XDG_CACHE_HOME/inshellah
--ignore FILE skip listed commands entirely
--help-only FILE skip manpages for listed commands, use --help instead
inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH]
Nushell custom completer. Outputs JSON completion candidates.
Falls back to --help resolution if command is not indexed.
inshellah query CMD [--dir PATH] [--system-dir PATH]
Print stored completion data for CMD.
inshellah dump [--dir PATH] [--system-dir PATH]
List indexed commands.
inshellah manpage FILE Parse a manpage and emit nushell extern
inshellah manpage-dir DIR Batch-process manpages under DIR
|};
exit 1
(* manpage sections that contain command documentation.
* section 1 = user commands, section 8 = system administration commands. *)
let command_sections = [1; 8]
(* simple substring search using Str *)
let contains_str s sub =
try ignore (Str.search_forward (Str.regexp_string sub) s 0); true
with Not_found -> false
(* heuristic to detect whether text is valid nushell source code.
* checks for common nushell declaration keywords. the length > 20
* check avoids false positives on short error messages. *)
let is_nushell_source text =
String.length text > 20
&& (contains_str text "export extern"
|| contains_str text "export def"
|| (contains_str text "module " && contains_str text "export"))
(* extract command name from a manpage filename.
* "ls.1.gz" strip .gz "ls.1" chop extension "ls" *)
let cmd_name_of_manpage path =
let base = Filename.basename path in
let base =
if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz"
else base in
try Filename.chop_extension base with Invalid_argument _ -> base
(* sanitized environment for child processes.
* strips display-related variables (DISPLAY, WAYLAND_DISPLAY, etc.) to prevent
* gui tools from trying to open windows when we run them with --help.
* without this, some tools (e.g. ckb-next) would pop up dialogs or hang
* waiting for a display connection. *)
let safe_env = lazy (
Array.of_list (
List.filter (fun s ->
not (String.starts_with ~prefix:"DISPLAY=" s
|| String.starts_with ~prefix:"WAYLAND_DISPLAY=" s
|| String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" s
|| String.starts_with ~prefix:"XAUTHORITY=" s))
(Array.to_list (Unix.environment ()))))
(* Non-blocking drain of a pipe fd into a buffer. Safe to call repeatedly;
reads whatever is available without blocking. Used by all fork-pipe sites
to keep pipes drained so children never block on write. *)
let drain_fd rd buf =
let chunk = Bytes.create 8192 in
let continue = ref true in
while !continue do
match Unix.select [rd] [] [] 0.0 with
| (_ :: _, _, _) ->
(try
let n = Unix.read rd chunk 0 8192 in
if n = 0 then continue := false
else Buffer.add_subbytes buf chunk 0 n
with Unix.Unix_error _ -> continue := false)
| _ -> continue := false
done
(* run a command with a timeout, capturing its stdout+stderr.
* forks a child process, redirects stdin from /dev/null, and merges
* stdout+stderr onto a pipe. reads from the pipe with select() polling
* until either the child exits or the deadline is reached.
*
* peculiarity: the child is run in /tmp to prevent tools that create
* side-effect files (like ckb-next-dev-detect-report.gz) from polluting
* the user's working directory. we chdir to /tmp before fork and restore after.
*
* peculiarity: the select timeout is capped at 0.05s per iteration to ensure
* we check the deadline frequently even when no data is available.
*
* returns none if the process couldn't be started, produced no output,
* or was killed due to timeout. *)
let run_cmd args timeout_ms =
let (rd, wr) = Unix.pipe () in
let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in
let argv = Array.of_list args in
(* Run subprocesses in /tmp so commands that write side-effect files
(e.g. ckb-next-dev-detect-report.gz) don't pollute the working dir *)
let saved_cwd = Sys.getcwd () in
Sys.chdir "/tmp";
let pid =
try Unix.create_process_env (List.hd args) argv
(Lazy.force safe_env) devnull wr wr
with Unix.Unix_error _ ->
Unix.close rd; Unix.close wr; Unix.close devnull; -1 in
Sys.chdir saved_cwd;
Unix.close wr; Unix.close devnull;
if pid < 0 then (Unix.close rd; None)
else begin
let buf = Buffer.create 4096 in
let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in
let chunk = Bytes.create 8192 in
let alive = ref true in
(try while !alive do
let remaining = deadline -. Unix.gettimeofday () in
if remaining <= 0.0 then alive := false
else match Unix.select [rd] [] [] (min remaining 0.05) with
| (_ :: _, _, _) ->
let n = Unix.read rd chunk 0 8192 in
if n = 0 then raise Exit
else Buffer.add_subbytes buf chunk 0 n
| _ -> ()
done with Exit -> ());
Unix.close rd;
if not !alive then begin
(try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ());
ignore (Unix.waitpid [] pid)
end else
ignore (Unix.waitpid [] pid);
if Buffer.length buf > 0 then Some (Buffer.contents buf) else None
end
(* check if a path is a regular file with at least one execute bit set *)
let is_executable path =
try let st = Unix.stat path in
st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0
with Unix.Unix_error _ -> false
(* check if a file is a script by looking for a #! shebang.
* follows symlinks via realpath before reading. *)
let is_script path =
try
let real = Unix.realpath path in
let ic = open_in_bin real in
let has_shebang =
try let b = Bytes.create 2 in
really_input ic b 0 2;
Bytes.get b 0 = '#' && Bytes.get b 1 = '!'
with End_of_file -> false in
close_in ic;
has_shebang
with _ -> false
(* scan an elf binary for string needles without loading the entire file.
* reads the file in 64kb chunks, searching each chunk for the needle strings.
* uses a sliding window (carry) of max_needle bytes between chunks to handle
* needles that span chunk boundaries.
*
* peculiarity: on read failure (e.g. if the path resolves to something
* unreadable), all needles are marked as found. this is a conservative
* fallback we'd rather try --help on an unreadable binary than skip it.
*
* the inner loop is a manual byte-by-byte comparison rather than using
* String.contains or Str for performance this runs on every binary
* in the prefix, so it needs to be fast. *)
let elf_scan path needles =
let found = Hashtbl.create 4 in
let remaining () = List.filter (fun n -> not (Hashtbl.mem found n)) needles in
(try
let real = Unix.realpath path in
let ic = open_in_bin real in
let magic = Bytes.create 4 in
really_input ic magic 0 4;
if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E'
&& Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin
let max_needle = List.fold_left (fun m n -> max m (String.length n)) 0 needles in
let chunk_size = 65536 in
let buf = Bytes.create (chunk_size + max_needle) in
let carry = ref 0 in
let eof = ref false in
while not !eof && remaining () <> [] do
let n = (try input ic buf !carry chunk_size with End_of_file -> 0) in
if n = 0 then eof := true
else begin
let total = !carry + n in
List.iter (fun needle ->
if not (Hashtbl.mem found needle) then begin
let nlen = String.length needle in
let i = ref 0 in
while !i <= total - nlen do
if Bytes.get buf !i = needle.[0] then begin
let ok = ref true in
for j = 1 to nlen - 1 do
if Bytes.get buf (!i + j) <> needle.[j] then ok := false
done;
if !ok then (Hashtbl.replace found needle true; i := total)
else incr i
end else incr i
done
end
) (remaining ());
let new_carry = min max_needle total in
Bytes.blit buf (total - new_carry) buf 0 new_carry;
carry := new_carry
end
done
end;
close_in ic
with _ ->
List.iter (fun n -> Hashtbl.replace found n true) needles);
found
(* detect nix-generated c wrapper scripts and extract the real binary path.
* nix's makeCWrapper creates small c programs that set up the environment
* and exec the real binary. these wrappers won't contain "-h" or "completion"
* in their own binary (they're just wrappers), so elf_scan would say "skip".
* this function reads the wrapper source to find the actual /nix/store/.../bin/...
* target path, so we can try --help on the real binary instead.
*
* peculiarity: caps the read at 64kb to avoid accidentally reading a large
* non-wrapper binary into memory. *)
let nix_wrapper_target path =
try
let real = Unix.realpath path in
let ic = open_in_bin real in
let n = in_channel_length ic in
if n > 65536 then (close_in ic; None)
else begin
let s = Bytes.create n in
really_input ic s 0 n; close_in ic;
let s = Bytes.to_string s in
if not (contains_str s "makeCWrapper") then None
else
let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in
try ignore (Str.search_forward re s 0);
let target = Str.matched_string s in
if Sys.file_exists target then Some target else None
with Not_found -> None
end
with _ -> None
(* heuristic filter for binary names that should never be indexed.
* skips: empty names, "-", dotfiles, libraries (lib-prefix), daemon wrappers
* (suffixes -daemon, -wrapped), shared objects (.so suffix), and names with no
* alphanumeric characters (e.g. punctuation-only names). *)
let skip_name name =
String.length name = 0 || name = "-" || name.[0] = '.'
|| String.starts_with ~prefix:"lib" name
|| String.ends_with ~suffix:"-daemon" name
|| String.ends_with ~suffix:"-wrapped" name
|| String.ends_with ~suffix:".so" name
|| not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name)
(* classification result for a binary.
* Skip don't index this binary at all
* Try_help only try --help (scripts, binaries without "completion" string)
* Try_native_and_help try native nushell completion first, fall back to --help *)
type bin_class = Skip | Try_help | Try_native_and_help
(* classify a binary to decide the indexing strategy.
* decision tree:
* 1. nushell builtin or bad name Skip
* 2. not executable Skip
* 3. script (has shebang) Try_help (scripts can't have native completions)
* 4. elf binary containing "completion" Try_native_and_help
* 5. elf binary containing "-h" Try_help
* 6. nix wrapper Try_help (the wrapper itself is just an exec shim)
* 7. otherwise Skip (binary has no help infrastructure) *)
let classify_binary bindir name =
if is_nushell_builtin name || skip_name name then Skip
else
let path = Filename.concat bindir name in
if not (is_executable path) then Skip
else if is_script path then Try_help
else
let scan = elf_scan path ["-h"; "completion"] in
if Hashtbl.mem scan "completion" then Try_native_and_help
else if Hashtbl.mem scan "-h" then Try_help
else if nix_wrapper_target path <> None then Try_help
else Skip
(* detect available cpu cores by counting "processor" lines in /proc/cpuinfo.
* falls back to 4 if /proc/cpuinfo can't be read (e.g. on non-linux). *)
let num_cores () =
try
let ic = open_in "/proc/cpuinfo" in
let n = ref 0 in
(try while true do
if String.starts_with ~prefix:"processor" (input_line ic) then incr n
done with End_of_file -> ());
close_in ic; max 1 !n
with _ -> 4
(* try to get native nushell completions from a binary.
* tries several common subcommand patterns that tools use for shell completions.
* returns the first one that produces valid nushell source code.
* the 500ms timeout is generous enough for most tools but prevents hangs.
*
* the patterns cover: cobra (go), clap (rust), click (python), and various
* ad-hoc implementations. *)
let try_native_completion bin_path =
List.find_map (fun args ->
match run_cmd args 500 with
| Some text when is_nushell_source text -> Some text
| _ -> None
) [
[bin_path; "completions"; "nushell"];
[bin_path; "completion"; "nushell"];
[bin_path; "--completions"; "nushell"];
[bin_path; "--completion"; "nushell"];
[bin_path; "generate-completion"; "nushell"];
[bin_path; "--generate-completion"; "nushell"];
[bin_path; "shell-completions"; "nushell"];
]
(* parse a manpage file, extracting the command name, its flags/subcommands,
* and any clap-style per-subcommand sections.
* returns none for nushell builtins or failed parses. *)
let parse_manpage_for_command file =
let contents = read_manpage_file file in
let fallback = cmd_name_of_manpage file in
let cmd = match extract_synopsis_command contents with
| Some name -> name | None -> fallback in
if is_nushell_builtin cmd then None
else
let result = parse_manpage_string contents in
let sub_sections = extract_subcommand_sections contents in
let result = if sub_sections <> [] then
{ result with subcommands = List.map (fun (name, desc, _) ->
{ name; desc }) sub_sections }
else result in
let subs = List.map (fun (name, _desc, r) ->
(cmd ^ " " ^ name, r)) sub_sections in
Some (cmd, result, subs)
(* "inshellah manpage FILE" — parse one manpage and print the nushell extern *)
let cmd_manpage file =
match parse_manpage_for_command file with
| Some (cmd, result, _) when result.entries <> [] ->
print_string (generate_extern cmd result)
| _ -> ()
(* "inshellah manpage-dir DIR" — batch-process all manpages under a directory *)
let cmd_manpage_dir dir =
List.iter (fun section ->
let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in
if is_dir subdir then
Array.iter (fun file ->
(try cmd_manpage (Filename.concat subdir file) with _ -> ())
) (Sys.readdir subdir)
) command_sections
(* safety limit: don't accumulate more than 500 subcommand resolution results
* per binary. prevents runaway recursion on tools with enormous subcommand trees. *)
let max_resolve_results = 500
(* safe wrapper around parse_manpage_for_command that catches all exceptions *)
let process_manpage file =
try
match parse_manpage_for_command file with
| Some (cmd, result, subs) when result.entries <> [] || subs <> [] ->
Some (cmd, result, subs)
| _ -> None
with _ -> None
(* collect the set of command names that have manpages in a given man directory.
* used during indexing to skip --help for commands that will be handled by
* the manpage parsing phase instead (manpages are more reliable than --help). *)
let manpaged_commands mandir =
List.fold_left (fun acc section ->
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
if is_dir subdir then
Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc)
acc (Sys.readdir subdir)
else acc
) SSet.empty command_sections
(* parallel structured help resolver — recursively resolves a command and
* all its subcommands by running --help on each, forking a child process
* per subcommand for parallelism.
*
* the resolver works as a breadth-first queue:
* 1. start with the root command in the queue
* 2. fork a child for each queued item (up to num_cores concurrent)
* 3. the child runs --help, parses the output, marshals the result via pipe
* 4. the parent collects results and enqueues discovered subcommands
* 5. repeat until queue is empty and all children have finished
*
* depth is limited to 5 levels and total results to max_resolve_results
* to prevent runaway recursion on pathological command trees.
*
* peculiarity: the child process detects "self-listing" when a subcommand's
* --help lists itself as a subcommand (e.g. "git help" listing "help" as a
* subcommand of itself). this would cause infinite recursion, so such results
* are discarded.
*
* peculiarity: children close all pipe fds from other pending children
* immediately after fork to prevent fd leaks. the parent drains pipes
* regularly to prevent children from blocking on full pipe buffers. *)
let help_resolve_par ?(timeout=200) cmd rest name =
let max_jobs = num_cores () in
let queue = Queue.create () in
Queue.push (rest, name, 0) queue;
let results = ref [] in
(* pending: (pid, rd, buf, rest, name, depth) *)
let pending = ref [] in
let collect rd buf q_rest q_name q_depth =
drain_fd rd buf;
(try Unix.close rd with _ -> ());
let data = Buffer.contents buf in
let result : (help_result * subcommand list) option =
if String.length data > 0 then
try Marshal.from_string data 0 with _ -> None
else None in
match result with
| None -> ()
| Some (r, subs) ->
let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in
results := (q_name, r) :: !results;
if not at_limit then
List.iter (fun (sc : subcommand) ->
Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue
) subs in
let reap () =
pending := List.filter (fun (pid, rd, buf, q_rest, q_name, q_depth) ->
drain_fd rd buf;
match Unix.waitpid [Unix.WNOHANG] pid with
| (0, _) -> true
| _ -> collect rd buf q_rest q_name q_depth; false
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
(try Unix.close rd with _ -> ()); false
) !pending in
let wait_for_slot () =
while List.length !pending >= max_jobs do
reap ();
if List.length !pending >= max_jobs then begin
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done in
while not (Queue.is_empty queue) || !pending <> [] do
while not (Queue.is_empty queue) do
let (q_rest, q_name, q_depth) = Queue.pop queue in
wait_for_slot ();
let (rd, wr) = Unix.pipe () in
let pid = Unix.fork () in
if pid = 0 then begin
Unix.close rd;
List.iter (fun (_, prd, _, _, _, _) ->
try Unix.close prd with _ -> ()) !pending;
let result =
let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with
| Some _ as r -> r
| None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in
match text with
| None -> None
| Some text ->
(match parse_help text with
| Error _ -> None
| Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
| Ok r ->
let self_listed = match q_rest with
| [] -> false
| _ ->
let leaf = List.nth q_rest (List.length q_rest - 1) in
List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in
if self_listed then None
else
let at_limit = q_depth >= 5 in
let subs = if at_limit then [] else r.subcommands in
Some (r, subs)) in
let oc = Unix.out_channel_of_descr wr in
Marshal.to_channel oc (result : (help_result * subcommand list) option) [];
close_out oc;
exit 0
end else begin
Unix.close wr;
pending := (pid, rd, Buffer.create 4096, q_rest, q_name, q_depth) :: !pending
end
done;
if !pending <> [] then begin
reap ();
if !pending <> [] && Queue.is_empty queue then begin
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
end
done;
List.rev !results
(* "inshellah index" — the main indexing command.
* processes all binaries and manpages in the given prefix directories,
* writing completion data to the cache dir.
*
* the pipeline has two phases:
*
* phase 1 (binaries): fork one child per binary. each child:
* - tries native nushell completions (if classified as Try_native_and_help)
* - falls back to help_resolve_par (which itself forks per subcommand)
* - marshals the result back via pipe as a tagged variant:
* `Native of string raw nushell source
* `Parsed of (string * help_result) list parsed flag data
* `None nothing useful extracted
*
* phase 2 (manpages): sequentially parse manpages for commands not yet
* covered by phase 1. manpages are more reliable than --help for many
* gnu tools, but slower to process.
*
* commands on the ignorelist are skipped entirely. commands on the
* help_only list skip manpage parsing and only use --help. commands
* with manpages skip --help in phase 1 (they'll be handled in phase 2).
*
* peculiarity: the done_cmds set tracks which commands have already been
* indexed to prevent duplicates across phases and across multiple prefix
* directories. *)
let cmd_index bindirs mandirs ignorelist help_only dir =
ensure_dir dir;
let done_cmds = ref SSet.empty in
let n_results = ref 0 in
let index_bindir bindir mandir =
if not (is_dir bindir) then
Printf.eprintf "skipping %s (not found)\n" bindir
else begin
let bins = Sys.readdir bindir in
Array.sort String.compare bins;
let manpaged = if is_dir mandir
then manpaged_commands mandir else SSet.empty in
let max_jobs = num_cores () in
let classified = Array.map (fun name ->
if SSet.mem name ignorelist then (name, Skip)
else if SSet.mem name help_only then (name, classify_binary bindir name)
else if SSet.mem name manpaged then (name, Skip)
else (name, classify_binary bindir name)
) bins in
let pending = ref [] in
let process_result name rd buf =
drain_fd rd buf;
(try Unix.close rd with _ -> ());
let data = Buffer.contents buf in
if String.length data > 0 then begin
let result : [`Native of string | `Parsed of (string * help_result) list | `None] =
try Marshal.from_string data 0 with _ -> `None in
(match result with
| `Native src ->
write_native ~dir name src;
incr n_results
| `Parsed pairs ->
List.iter (fun (cmd_name, r) ->
if not (SSet.mem cmd_name !done_cmds) then begin
write_result ~dir ~source:"help" cmd_name r;
done_cmds := SSet.add cmd_name !done_cmds;
incr n_results
end
) pairs
| `None -> ())
end;
done_cmds := SSet.add name !done_cmds in
let reap () =
pending := List.filter (fun (pid, rd, buf, name) ->
drain_fd rd buf;
match Unix.waitpid [Unix.WNOHANG] pid with
| (0, _) -> true
| _ ->
process_result name rd buf;
false
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
(try Unix.close rd with _ -> ()); false
) !pending in
let wait_for_slot () =
while List.length !pending >= max_jobs do
reap ();
if List.length !pending >= max_jobs then begin
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done in
Array.iter (fun (name, cls) ->
match cls with
| Skip -> ()
| Try_help | Try_native_and_help ->
wait_for_slot ();
let (rd, wr) = Unix.pipe () in
let pid = Unix.fork () in
if pid = 0 then begin
Unix.close rd;
List.iter (fun (_, prd, _, _) ->
try Unix.close prd with _ -> ()) !pending;
let result =
try
let path = Filename.concat bindir name in
let native = match cls with
| Try_native_and_help ->
(match try_native_completion path with
| Some src -> Some src | None -> None)
| _ -> None in
match native with
| Some src -> `Native src
| None ->
let pairs = help_resolve_par ~timeout:200 path [] name in
if pairs <> [] then `Parsed pairs else `None
with _ -> `None in
let oc = Unix.out_channel_of_descr wr in
Marshal.to_channel oc
(result : [`Native of string | `Parsed of (string * help_result) list | `None]) [];
close_out oc;
exit 0
end else begin
Unix.close wr;
pending := (pid, rd, Buffer.create 4096, name) :: !pending
end
) classified;
while !pending <> [] do
reap ();
if !pending <> [] then begin
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
ignore (Unix.select fds [] [] 0.05)
end
done;
(* Phase 2: manpages *)
if is_dir mandir then
List.iter (fun section ->
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
if is_dir subdir then begin
let files = Sys.readdir subdir in
Array.sort String.compare files;
Array.iter (fun file ->
let base_cmd = cmd_name_of_manpage file in
if SSet.mem base_cmd help_only then ()
else match process_manpage (Filename.concat subdir file) with
| None -> ()
| Some (cmd, result, subs) ->
if not (SSet.mem cmd !done_cmds) then begin
write_result ~dir ~source:"manpage" cmd result;
done_cmds := SSet.add cmd !done_cmds;
incr n_results
end;
List.iter (fun (sub_cmd, sub_result) ->
if not (SSet.mem sub_cmd !done_cmds) then begin
write_result ~dir ~source:"manpage" sub_cmd sub_result;
done_cmds := SSet.add sub_cmd !done_cmds;
incr n_results
end
) subs
) files
end
) command_sections
end in
List.iter2 index_bindir bindirs mandirs;
Printf.printf "indexed %d commands into %s\n" !n_results dir
(* "inshellah dump" — list all indexed commands with their source type *)
let cmd_dump dirs =
let cmds = all_commands dirs in
Printf.printf "%d commands\n" (List.length cmds);
List.iter (fun cmd ->
let src = match file_type_of dirs cmd with
| Some s -> s | None -> "?" in
Printf.printf " %-40s [%s]\n" cmd src
) cmds
(* search $PATH for an executable with the given name.
* used during completion to find binaries for on-the-fly resolution. *)
let find_in_path name =
try
Sys.getenv "PATH"
|> String.split_on_char ':'
|> List.find_map (fun dir ->
let p = Filename.concat dir name in
if is_executable p then Some p else None)
with Not_found -> None
(* resolve a command's completions on-the-fly and cache the results.
* called during "complete" when a command isn't in the index.
* runs help_resolve_par and writes results to the user's cache dir. *)
let resolve_and_cache ~dir name path =
let pairs = help_resolve_par ~timeout:200 path [] name in
if pairs <> [] then begin
ensure_dir dir;
List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs;
Some pairs
end else None
(* format a single completion candidate as json for nushell's completer protocol *)
let completion_json value desc =
Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}"
(escape_json value) (escape_json desc)
(* fuzzy matching: returns a score > 0 if needle is a subsequence of haystack.
* higher scores = better match. scoring tiers:
* - exact match: 1000
* - prefix match: 900 + length bonus (how much of the haystack is covered)
* - subsequence: base 10 per char + bonuses for:
* - word boundary alignment (50): matching at '-', '_', or camelCase transitions
* - consecutive matches (20): matching adjacent characters
*
* this drives the completion candidate ranking. users typing "ser" should see
* "--server" ranked above "--preserve" even though both contain "ser" as a
* subsequence. the word-boundary bonus achieves this. *)
let fuzzy_score needle haystack =
let nlen = String.length needle and hlen = String.length haystack in
if nlen = 0 then 1
else if nlen > hlen then 0
else if needle = haystack then 1000
else
let needle = String.lowercase_ascii needle
and haystack_lc = String.lowercase_ascii haystack in
if String.starts_with ~prefix:needle haystack_lc then
900 + (nlen * 100 / hlen)
else
let is_boundary hi =
hi = 0 || haystack.[hi - 1] = '-' || haystack.[hi - 1] = '_'
|| (haystack.[hi - 1] >= 'a' && haystack.[hi - 1] <= 'z'
&& haystack.[hi] >= 'A' && haystack.[hi] <= 'Z') in
(* Walk haystack matching needle chars as a subsequence *)
let ni, score, _, _ =
String.fold_left (fun (ni, score, hi, prev_match) c ->
if ni >= nlen then (ni, score, hi + 1, prev_match)
else if c = needle.[ni] then
let bonus = (if is_boundary hi then 50 else 10)
+ (if prev_match = hi - 1 then 20 else 0) in
(ni + 1, score + bonus, hi + 1, hi)
else (ni, score, hi + 1, prev_match)
) (0, 0, 0, -1) haystack_lc in
if ni = nlen then score else 0
(* known privilege-escalation wrappers. when one of these is the first token,
* we strip it and its options before completing the real command.
*
* rather than maintaining per-command option tables (fragile e.g. sudo's
* -h is --help not --host, flags differ across implementations), we find the
* real command by scanning for the first non-flag token that is a known
* command (exists in the completion store or in $PATH). tokens like "root"
* in "sudo -u root" are skipped because they aren't commands. *)
let elevation_commands =
["sudo"; "run0"; "doas"; "pkexec"; "su"; "calife"; "sux"; "sudoedit";
"please"; "super"; "priv"]
(* scan past the elevation command's flags and arguments to find the real
* command. is_command checks whether a token names a known command.
* returns Some (real_cmd :: args) or None if no command was found. *)
let find_real_command is_command args =
let rec scan = function
| [] -> None
| "--" :: rest -> Some rest
| arg :: rest when String.length arg > 0 && arg.[0] = '-' ->
scan rest
| arg :: _ as cmd_and_rest when is_command arg ->
Some cmd_and_rest
| _ :: rest -> scan rest
in
scan args
(* "inshellah complete CMD [ARGS...]" — the nushell custom completer.
* this is the hot path called every time the user presses tab in nushell.
*
* the completion logic:
* 1. try to find the command (or longest subcommand prefix) in the store
* 2. if not found, try on-the-fly resolution (find in $PATH, run --help, cache)
* 3. score all candidate completions against the partial input using fuzzy_score
* 4. output scored candidates as a json array
*
* subcommand resolution: the lookup tries longest prefix first.
* for "git add --", it first looks for "git add", then "git".
* this ensures subcommand-specific flags are shown.
*
* peculiarity: nushell sends a trailing empty token when the cursor is after
* a space ("git add "). in this case all_tokens includes the empty string.
* when the last token is non-empty, the user is still typing it, so we use
* it as the fuzzy filter. when empty, we show all candidates.
*
* peculiarity: if only a parent command matched (e.g. "git" matched but not
* "git add"), we suppress subcommand suggestions and only show flags. this
* prevents showing sibling subcommands when the user has already committed
* to a specific subcommand path. *)
let cmd_complete spans user_dir system_dirs =
let dirs = user_dir :: system_dirs in
(* if the command line starts with a privilege-escalation wrapper, scan past
* it to find the real command. we identify the command by checking the store
* and $PATH this avoids needing per-command option tables which are fragile
* across different implementations. if no real command is found, fall back to
* completing the elevation command itself. *)
let spans = match spans with
| cmd :: rest when List.mem cmd elevation_commands ->
let is_command name =
name <> "" && (lookup dirs name <> None || find_in_path name <> None)
in
(match find_real_command is_command rest with
| Some (_ :: _ as real_spans) -> real_spans
| _ -> spans)
| _ -> spans in
match spans with
| [] -> print_string "[]\n"
| cmd_name :: rest ->
(* Try longest prefix match: "git add" before "git" *)
let find_result tokens =
let n = List.length tokens in
List.init n Fun.id |> List.find_map (fun drop ->
let prefix = List.filteri (fun i _ -> i < n - drop) tokens in
match prefix with
| [] -> None
| _ ->
let try_name = String.concat " " prefix in
match lookup dirs try_name with
| Some r -> Some (try_name, r, List.length prefix)
| None -> None) in
let all_tokens = cmd_name :: rest in
let last_token = match rest with
| [] -> "" | _ -> List.nth rest (List.length rest - 1) in
(* Only treat the last token as a completed subcommand when nushell
sends a trailing empty token (cursor is after a space).
Otherwise the user is still typing and we treat it as partial. *)
let lookup_tokens = if last_token = "" then all_tokens
else match rest with
| _ :: _ -> cmd_name :: List.rev (List.tl (List.rev rest))
| _ -> [cmd_name] in
let resolve tokens partial =
match find_result tokens with
| Some _ as found -> (found, partial)
| None -> (None, partial) in
let found, partial = resolve lookup_tokens last_token in
(* Try on-the-fly resolution when no match or only a parent matched *)
let n_lookup = List.length lookup_tokens in
let result, partial = match found with
| Some (_, _, depth) when depth >= n_lookup - 1 ->
(* Exact or near-exact match — use it *)
(found, partial)
| _ ->
(* No match, or only a parent matched — try on-the-fly resolution *)
(match find_in_path cmd_name with
| Some path ->
(match resolve_and_cache ~dir:user_dir cmd_name path with
| Some _pairs -> resolve lookup_tokens last_token
| None -> (found, partial))
| None -> (found, partial)) in
let candidates = match result with
| None -> []
| Some (_matched_name, r, depth) ->
(* When the match is shallower than requested, the user already
typed a subcommand beyond the matched level don't show
sibling subcommands, only flags *)
let sub_candidates = if depth < n_lookup - 1 then [] else
let subs = match r.subcommands with
| _ :: _ -> r.subcommands
| [] -> subcommands_of dirs _matched_name in
List.filter_map (fun (sc : subcommand) ->
let s = fuzzy_score partial sc.name in
if s > 0 then Some (s, completion_json sc.name sc.desc) else None
) subs in
(* build flag completion candidates from the entry list.
* for flags with both short and long forms (Both), we pick which form
* to display based on what the user is currently typing:
* - if the partial input matches the short flag better, show the short
* flag as the value and note the long form in the description
* - otherwise (including empty partial), prefer the long flag and note
* the short form in the description
* this keeps the candidate list clean (one entry per flag) while still
* surfacing the alternate form so the user knows about it.
*
* parameter names are appended to descriptions in angle brackets for
* mandatory params and square brackets for optional ones, matching the
* conventions users expect from cli help text. *)
let flag_candidates = List.filter_map (fun (e : entry) ->
let base_desc = match e.param with
| Some (Mandatory p) -> if e.desc <> "" then e.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">"
| Some (Optional p) -> if e.desc <> "" then e.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]"
| None -> e.desc in
let flag, desc = match e.switch with
| Long l -> ("--" ^ l, base_desc)
| Short c -> (Printf.sprintf "-%c" c, base_desc)
| Both (c, l) ->
(* score the partial against both forms to decide which to present.
* e.g. typing "-s" scores higher against "-s" than "--squeeze-blank",
* so we show "-s (aka --squeeze-blank)". when the partial is empty or
* matches the long form better, we default to the long form. *)
let long_flag = "--" ^ l in
let short_flag = Printf.sprintf "-%c" c in
let long_score = fuzzy_score partial long_flag in
let short_score = fuzzy_score partial short_flag in
if short_score > long_score then
(short_flag, Printf.sprintf "(aka %s) %s" long_flag base_desc)
else
(long_flag, Printf.sprintf "(aka %s) %s" short_flag base_desc) in
let s = fuzzy_score partial flag in
if s > 0 then Some (s, completion_json flag desc) else None
) r.entries in
let scored = sub_candidates @ flag_candidates in
List.sort (fun (a, _) (b, _) -> compare b a) scored
|> List.map snd in
Printf.printf "[%s]\n" (String.concat "," candidates)
(* "inshellah query CMD" — print the raw stored data for a command *)
let cmd_query cmd dirs =
match lookup_raw dirs cmd with
| None ->
Printf.eprintf "not found: %s\n" cmd; exit 1
| Some data ->
print_string data; print_newline ()
(* load a newline-separated list of command names to ignore.
* blank lines and lines starting with '#' are skipped. *)
let load_ignorelist path =
try
In_channel.with_open_text path In_channel.input_all
|> String.split_on_char '\n'
|> List.filter_map (fun line ->
let line = String.trim line in
if String.length line > 0 && line.[0] <> '#' then Some line else None)
|> SSet.of_list
with _ -> SSet.empty
(* parse "index" subcommand arguments: prefix dirs + optional --dir, --ignore, --help-only *)
let parse_index_args args =
let rec go prefixes dir ignore help_only = function
| [] -> (List.rev prefixes, dir, ignore, help_only)
| "--dir" :: path :: rest -> go prefixes path ignore help_only rest
| "--ignore" :: path :: rest -> go prefixes dir (SSet.union ignore (load_ignorelist path)) help_only rest
| "--help-only" :: path :: rest -> go prefixes dir ignore (SSet.union help_only (load_ignorelist path)) rest
| prefix :: rest -> go (prefix :: prefixes) dir ignore help_only rest in
go [] (default_store_path ()) SSet.empty SSet.empty args
(* parse common --dir/--system-dir arguments for complete/query/dump commands *)
let parse_dir_args args =
let rec go user_dir system_dirs rest_args = function
| [] -> (user_dir, system_dirs, List.rev rest_args)
| "--dir" :: path :: rest -> go path system_dirs rest_args rest
| "--system-dir" :: path :: rest -> go user_dir (path :: system_dirs) rest_args rest
| arg :: rest -> go user_dir system_dirs (arg :: rest_args) rest in
go (default_store_path ()) [] [] args
(* --- entry point ---
* dispatch on the first argument to the appropriate subcommand handler. *)
let () =
match Array.to_list Sys.argv |> List.tl with
| "index" :: rest ->
let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in
if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1);
let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in
let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in
cmd_index bindirs mandirs ignorelist help_only dir
| "complete" :: rest ->
let (user_dir, system_dirs, spans) = parse_dir_args rest in
cmd_complete spans user_dir system_dirs
| "query" :: rest ->
let (user_dir, system_dirs, args) = parse_dir_args rest in
(match args with
| [cmd] -> cmd_query cmd (user_dir :: system_dirs)
| _ -> Printf.eprintf "error: query CMD [--dir PATH] [--system-dir PATH]\n"; exit 1)
| "dump" :: rest ->
let (user_dir, system_dirs, _) = parse_dir_args rest in
cmd_dump (user_dir :: system_dirs)
| ["manpage"; file] -> cmd_manpage file
| ["manpage-dir"; dir] -> cmd_manpage_dir dir
| _ -> usage ()