(* main.ml — cli entry point for inshellah, a nushell completions engine. * * inshellah generates nushell "extern" definitions for external commands by * parsing their manpages and --help output. it has two main modes: * * 1. indexing (batch): scan a prefix directory's bin/ and share/man/, * extract completions for every binary, and write them to a cache dir. * this is typically run once per nix profile or system update. * * 2. completing (interactive): given a command and its current arguments, * look up the cached data and return JSON completion candidates for * nushell's custom completer protocol. * * the indexing pipeline for each binary: * a. classify the binary (skip? try --help? try native completions?) * b. if the tool has native nushell completion support, run --help and * discover subcommands containing "complet", then try them with "nushell" * c. otherwise, run the tool with --help/-h and parse the output * d. recursively resolve subcommands (depth-limited to 5) * e. after binaries, parse manpages for any commands not yet covered * * parallelism: indexing forks per binary, and subcommand resolution forks * per subcommand. results are marshaled back via pipes. this gives good * throughput on multi-core systems while keeping the code simple (no threads, * no async runtime — just unix fork/pipe/waitpid). *) open Inshellah.Parser open Inshellah.Manpage open Inshellah.Nushell open Inshellah.Store module SSet = Set.Make(String) (* print usage and exit. called when no valid subcommand is given. *) let usage () = Printf.eprintf {|inshellah - nushell completions engine Usage: inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE] Index completions into a directory of JSON/nu files. PREFIX is a directory containing bin/ and share/man/. Default dir: $XDG_CACHE_HOME/inshellah --ignore FILE skip listed commands entirely --help-only FILE skip manpages for listed commands, use --help instead inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]] Nushell custom completer. Outputs JSON completion candidates. Falls back to --help resolution if command is not indexed. --dir takes colon-separated paths. The first path is the writable user cache; additional paths are read-only system directories. Manpages are found via sibling share/man of system dir paths. inshellah query CMD [--dir PATH[:PATH...]] Print stored completion data for CMD. inshellah dump [--dir PATH[:PATH...]] List indexed commands. inshellah manpage FILE Parse a manpage and emit nushell extern inshellah manpage-dir DIR Batch-process manpages under DIR inshellah completions Generate nushell completions for inshellah |}; exit 1 (* manpage sections that contain command documentation. * section 1 = user commands, section 8 = system administration commands. *) let command_sections = [1; 8] (* simple substring search using Str *) let contains_str haystack needle = try ignore (Str.search_forward (Str.regexp_string needle) haystack 0); true with Not_found -> false (* heuristic to detect whether text is valid nushell source code. * checks for common nushell declaration keywords. the length > 20 * check avoids false positives on short error messages. *) let is_nushell_source text = String.length text > 20 && (contains_str text "export extern" || contains_str text "export def" || (contains_str text "module " && contains_str text "export")) (* extract command name from a manpage filename. * "ls.1.gz" -> strip .gz -> "ls.1" -> chop extension -> "ls" *) let cmd_name_of_manpage path = let base = Filename.basename path in let base = if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz" else base in try Filename.chop_extension base with Invalid_argument _ -> base (* sanitized environment for child processes. * strips display-related variables (DISPLAY, WAYLAND_DISPLAY, etc.) to prevent * gui tools from trying to open windows when we run them with --help. * without this, some tools would pop up dialogs or hang waiting for a * display connection. *) let safe_env = lazy ( Array.of_list ( List.filter (fun var -> not (String.starts_with ~prefix:"DISPLAY=" var || String.starts_with ~prefix:"WAYLAND_DISPLAY=" var || String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" var || String.starts_with ~prefix:"XAUTHORITY=" var)) (Array.to_list (Unix.environment ())))) (* non-blocking drain of a pipe fd into a buffer. safe to call repeatedly; * reads whatever is available without blocking. used by all fork-pipe sites * to keep pipes drained so children never block on write. *) let drain_fd rd buf = let chunk = Bytes.create 8192 in let continue = ref true in while !continue do match Unix.select [rd] [] [] 0.0 with | (_ :: _, _, _) -> (try let bytes_read = Unix.read rd chunk 0 8192 in if bytes_read = 0 then continue := false else Buffer.add_subbytes buf chunk 0 bytes_read with Unix.Unix_error _ -> continue := false) | _ -> continue := false done (* run a command with a timeout, capturing its stdout+stderr. * forks a child process, redirects stdin from /dev/null, and merges * stdout+stderr onto a pipe. reads from the pipe with select() polling * until either the child exits or the deadline is reached. * * the child is run in /tmp to prevent tools that create side-effect files * from polluting the user's working directory. we chdir to /tmp before * fork and restore after. * * the select timeout is capped at 0.05s per iteration to ensure we check * the deadline frequently even when no data is available. * * returns none if the process couldn't be started, produced no output, * or was killed due to timeout. *) let run_cmd args timeout_ms = let (rd, wr) = Unix.pipe () in let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in let argv = Array.of_list args in (* run subprocesses in /tmp so commands that write side-effect files * don't pollute the working directory *) let saved_cwd = Sys.getcwd () in Sys.chdir "/tmp"; let pid = try Unix.create_process_env (List.hd args) argv (Lazy.force safe_env) devnull wr wr with Unix.Unix_error _ -> Unix.close rd; Unix.close wr; Unix.close devnull; -1 in Sys.chdir saved_cwd; Unix.close wr; Unix.close devnull; if pid < 0 then (Unix.close rd; None) else begin let buf = Buffer.create 4096 in let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in let chunk = Bytes.create 8192 in let alive = ref true in (try while !alive do let remaining = deadline -. Unix.gettimeofday () in if remaining <= 0.0 then alive := false else match Unix.select [rd] [] [] (min remaining 0.05) with | (_ :: _, _, _) -> let bytes_read = Unix.read rd chunk 0 8192 in if bytes_read = 0 then raise Exit else Buffer.add_subbytes buf chunk 0 bytes_read | _ -> () done with Exit -> ()); Unix.close rd; if not !alive then begin (try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ()); ignore (Unix.waitpid [] pid) end else ignore (Unix.waitpid [] pid); if Buffer.length buf > 0 then Some (Buffer.contents buf) else None end (* check if a path is a regular file with at least one execute bit set *) let is_executable path = try let st = Unix.stat path in st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0 with Unix.Unix_error _ -> false (* check if a file is a script by looking for a #! shebang. * follows symlinks via realpath before reading. *) let is_script path = try let real = Unix.realpath path in let ic = open_in_bin real in let has_shebang = try let b = Bytes.create 2 in really_input ic b 0 2; Bytes.get b 0 = '#' && Bytes.get b 1 = '!' with End_of_file -> false in close_in ic; has_shebang with _ -> false (* scan an elf binary for string needles without loading the entire file. * reads the file in 64kb chunks, searching each chunk for the needle strings. * uses a sliding window (carry) of max_needle bytes between chunks to handle * needles that span chunk boundaries. * * on read failure (e.g. if the path resolves to something unreadable), all * needles are marked as found. this is a conservative fallback — we'd rather * try --help on an unreadable binary than skip it. * * the inner loop is a manual byte-by-byte comparison rather than using * String.contains or Str for performance — this runs on every binary * in the prefix, so it needs to be fast. *) let elf_scan path needles = let found = Hashtbl.create 4 in let remaining () = List.filter (fun needle -> not (Hashtbl.mem found needle)) needles in (try let real = Unix.realpath path in let ic = open_in_bin real in let magic = Bytes.create 4 in really_input ic magic 0 4; if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E' && Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin let max_needle = List.fold_left (fun m needle -> max m (String.length needle)) 0 needles in let chunk_size = 65536 in let buf = Bytes.create (chunk_size + max_needle) in let carry = ref 0 in let eof = ref false in while not !eof && remaining () <> [] do let bytes_read = (try input ic buf !carry chunk_size with End_of_file -> 0) in if bytes_read = 0 then eof := true else begin let total = !carry + bytes_read in List.iter (fun needle -> if not (Hashtbl.mem found needle) then begin let nlen = String.length needle in let pos = ref 0 in while !pos <= total - nlen do if Bytes.get buf !pos = needle.[0] then begin let matched = ref true in for j = 1 to nlen - 1 do if Bytes.get buf (!pos + j) <> needle.[j] then matched := false done; if !matched then (Hashtbl.replace found needle true; pos := total) else incr pos end else incr pos done end ) (remaining ()); let new_carry = min max_needle total in Bytes.blit buf (total - new_carry) buf 0 new_carry; carry := new_carry end done end; close_in ic with _ -> List.iter (fun needle -> Hashtbl.replace found needle true) needles); found (* detect nix-generated c wrapper scripts and extract the real binary path. * nix's makeCWrapper creates small c programs that set up the environment * and exec the real binary. these wrappers won't contain "-h" or "complet" * in their own binary (they're just wrappers), so elf_scan would say "skip". * this function reads the wrapper source to find the actual /nix/store/.../bin/... * target path, so we can try --help on the real binary instead. * * caps the read at 64kb to avoid accidentally reading a large non-wrapper * binary into memory. *) let nix_wrapper_target path = try let real = Unix.realpath path in let ic = open_in_bin real in let size = in_channel_length ic in if size > 65536 then (close_in ic; None) else begin let contents = Bytes.create size in really_input ic contents 0 size; close_in ic; let contents = Bytes.to_string contents in if not (contains_str contents "makeCWrapper") then None else let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in try ignore (Str.search_forward re contents 0); let target = Str.matched_string contents in if Sys.file_exists target then Some target else None with Not_found -> None end with _ -> None (* detect nix bash/sh wrapper scripts that exec a real binary. * nix sometimes generates small shell scripts (e.g. to set env vars like * XDG_CONFIG_HOME) that exec the real binary. these look like: * #!/nix/store/.../bash -e * export FOO=... * exec -a "$0" "/nix/store/.../bin/.foo-wrapped" "$@" * we extract the exec target path and resolve through it. *) let nix_script_wrapper_target path = try let real = Unix.realpath path in let ic = open_in real in let size = in_channel_length ic in if size > 4096 then (close_in ic; None) else begin let contents = Bytes.create size in really_input ic contents 0 size; close_in ic; let contents = Bytes.to_string contents in if not (contains_str contents "exec") then None else let re = Str.regexp "exec[ \t]+\\(-a[ \t]+\"\\$0\"[ \t]+\\)?\"?\\(/nix/store/[a-z0-9]+-[^\" \t\n]+/bin/[a-zA-Z0-9._-]+\\)\"?" in try ignore (Str.search_forward re contents 0); let target = Str.matched_group 2 contents in let target = Unix.realpath target in if Sys.file_exists target then Some target else None with Not_found -> None end with _ -> None (* heuristic filter for binary names that should never be indexed. * skips: empty names, "-", dotfiles, libraries (lib-prefix), daemon wrappers * (suffixes -daemon, -wrapped), shared objects (.so suffix), and names with no * alphanumeric characters (e.g. punctuation-only names). *) let skip_name name = String.length name = 0 || name = "-" || name.[0] = '.' || String.starts_with ~prefix:"lib" name || String.ends_with ~suffix:"-daemon" name || String.ends_with ~suffix:"-wrapped" name || String.ends_with ~suffix:".so" name || not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name) (* classification result for a binary. * Skip — don't index this binary at all * Try_help — only try --help (scripts, binaries without "completion" string) * Try_native_and_help — try native nushell completion first, fall back to --help *) type bin_class = Skip | Try_help | Try_native_and_help (* classify an elf binary path for indexing. *) let classify_elf path = let scan = elf_scan path ["-h"; "complet"] in if Hashtbl.mem scan "complet" then Try_native_and_help else if Hashtbl.mem scan "-h" then Try_help else Skip (* classify a binary to decide the indexing strategy. * decision tree: * 1. nushell builtin or bad name -> Skip * 2. not executable -> Skip * 3. script (has shebang) -> resolve through nix script wrapper if possible, * otherwise Try_help * 4. elf binary containing "complet" -> Try_native_and_help * 5. elf binary containing "-h" -> Try_help * 6. nix c wrapper -> Try_help (the wrapper itself is just an exec shim) * 7. otherwise -> Skip (binary has no help infrastructure) *) let classify_binary bindir name = if is_nushell_builtin name || skip_name name then Skip else let path = Filename.concat bindir name in if not (is_executable path) then Skip else if is_script path then match nix_script_wrapper_target path with | Some target -> let cls = classify_elf target in if cls <> Skip then cls else Try_help | None -> Try_help else let cls = classify_elf path in if cls <> Skip then cls else if nix_wrapper_target path <> None then Try_help else Skip (* detect available cpu cores by counting "processor" lines in /proc/cpuinfo. * falls back to 4 if /proc/cpuinfo can't be read (e.g. on non-linux). *) let num_cores () = try let ic = open_in "/proc/cpuinfo" in let count = ref 0 in (try while true do if String.starts_with ~prefix:"processor" (input_line ic) then incr count done with End_of_file -> ()); close_in ic; max 1 !count with _ -> 4 (* extract words from text that contain any of the given substrings. * words are sequences of [a-zA-Z0-9_-] optionally prefixed with --. * returns a deduplicated list. *) let extract_matching_words text needles = let len = String.length text in let module SSet = Set.Make(String) in let words = ref SSet.empty in let i = ref 0 in while !i < len do while !i < len && not (text.[!i] >= 'a' && text.[!i] <= 'z' || text.[!i] >= 'A' && text.[!i] <= 'Z' || text.[!i] = '-') do incr i done; let start = !i in while !i < len && (text.[!i] >= 'a' && text.[!i] <= 'z' || text.[!i] >= 'A' && text.[!i] <= 'Z' || text.[!i] >= '0' && text.[!i] <= '9' || text.[!i] = '-' || text.[!i] = '_') do incr i done; if !i > start then begin let word = String.sub text start (!i - start) in let lower = String.lowercase_ascii word in if List.exists (fun needle -> try ignore (Str.search_forward (Str.regexp_string needle) lower 0); true with Not_found -> false ) needles then words := SSet.add word !words end done; SSet.elements !words (* try to get native nushell completions from a binary. * runs --help, scans the output for words containing completion-related * substrings ("complet"), then tries each match as a subcommand or flag * with "nushell" as the argument. * * this catches arbitrary patterns (completions, generate-completions, * shell-completion, gen-completions, etc.) without maintaining a hardcoded * list. the worst case is a few failed attempts before falling back to * manpage/--help parsing. *) let try_native_completion bin_path = let help_text = match run_cmd [bin_path; "--help"] 500 with | Some t -> t | None -> "" in if help_text = "" then None else let candidates = extract_matching_words help_text ["complet"] in List.find_map (fun word -> let attempts = if String.starts_with ~prefix:"--" word then [[bin_path; word; "nushell"]] else [[bin_path; word; "nushell"]; [bin_path; "--" ^ word; "nushell"]] in List.find_map (fun args -> match run_cmd args 500 with | Some text when is_nushell_source text -> Some text | _ -> None ) attempts ) candidates (* parse a manpage file, extracting the command name, its flags/subcommands, * and any clap-style per-subcommand sections. * returns none for nushell builtins or failed parses. *) let parse_manpage_for_command file = let contents = read_manpage_file file in let fallback = cmd_name_of_manpage file in (* the filename encodes the command boundary: "git-stash" = 2 words. * use this to clamp the synopsis-extracted name, which can be too greedy * when the synopsis lists subcommand variants. *) let max_words = List.length (String.split_on_char '-' fallback) in let clamp_cmd name = let words = String.split_on_char ' ' name in if List.length words > max_words then String.concat " " (List.filteri (fun i _ -> i < max_words) words) else name in let cmd = match extract_synopsis_command contents with | Some name -> clamp_cmd name | None -> fallback in if is_nushell_builtin cmd then None else let result = parse_manpage_string contents in let sub_sections = extract_subcommand_sections contents in let result = if sub_sections <> [] then { result with subcommands = List.map (fun (name, desc, _) -> { name; desc }) sub_sections } else result in let subs = List.map (fun (name, _desc, r) -> (cmd ^ " " ^ name, r)) sub_sections in Some (cmd, result, subs) (* "inshellah manpage FILE" — parse one manpage and print the nushell extern *) let cmd_manpage file = match parse_manpage_for_command file with | Some (cmd, result, _) when result.entries <> [] -> print_string (generate_extern cmd result) | _ -> () (* "inshellah manpage-dir DIR" — batch-process all manpages under a directory *) let cmd_manpage_dir dir = List.iter (fun section -> let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in if is_dir subdir then Array.iter (fun file -> (try cmd_manpage (Filename.concat subdir file) with _ -> ()) ) (Sys.readdir subdir) ) command_sections (* detect rendered manpage output — when --help delegates to man(1), the * output starts with a header line like "GIT-STASH(1) ... GIT-STASH(1)". * we check if the first non-blank line matches that pattern. *) let is_rendered_manpage text = let lines = String.split_on_char '\n' text in let first_line = List.find_opt (fun l -> String.trim l <> "") lines in match first_line with | None -> false | Some line -> let trimmed = String.trim line in (* look for WORD(DIGIT) at the start of the line *) try let paren = String.index trimmed '(' in paren > 0 && paren + 2 < String.length trimmed && trimmed.[paren + 1] >= '0' && trimmed.[paren + 1] <= '9' && trimmed.[paren + 2] = ')' with Not_found -> false (* find the raw manpage file for a hyphenated command name like "git-stash". * first checks the provided man directories directly, then falls back to * man -w for on-the-fly resolution when no man dirs are known. *) let find_manpage_path mandirs hyphenated_name = let try_dirs () = List.find_map (fun mandir -> List.find_map (fun section -> let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in List.find_map (fun ext -> let path = Filename.concat subdir (Printf.sprintf "%s.%d%s" hyphenated_name section ext) in if Sys.file_exists path then Some path else None ) [""; ".gz"] ) command_sections ) mandirs in match try_dirs () with | Some _ as found -> found | None -> (* fallback to man -w when no man dirs provided or file not found *) match run_cmd ["man"; "-w"; hyphenated_name] 200 with | Some raw -> let path = String.trim raw in if Sys.file_exists path then Some path else None | None -> None (* when --help output is a rendered manpage, find and parse the raw manpage * source instead. returns the main result plus any sub-section results * (e.g. "git stash push" flags parsed from the git-stash manpage). *) let try_manpage_fallback mandirs cmd_name = match find_manpage_path mandirs cmd_name with | None -> None | Some path -> match parse_manpage_for_command path with | None -> None | Some (_, result, subs) when result.entries = [] && subs = [] -> None | Some (_, result, subs) -> Some (result, subs) (* safety limit: don't accumulate more than 500 subcommand resolution results * per binary. prevents runaway recursion on tools with enormous subcommand trees. *) let max_resolve_results = 500 (* safe wrapper around parse_manpage_for_command that catches all exceptions *) let process_manpage file = try match parse_manpage_for_command file with | Some (cmd, result, subs) when result.entries <> [] || subs <> [] -> Some (cmd, result, subs) | _ -> None with _ -> None (* collect the set of command names that have manpages in a given man directory. * used during indexing to skip --help for commands that will be handled by * the manpage parsing phase instead (manpages are more reliable than --help). *) let manpaged_commands mandir = List.fold_left (fun acc section -> let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in if is_dir subdir then Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc) acc (Sys.readdir subdir) else acc ) SSet.empty command_sections (* parallel structured help resolver — recursively resolves a command and * all its subcommands by running --help on each, forking a child process * per subcommand for parallelism. * * the resolver works as a breadth-first queue: * 1. start with the root command in the queue * 2. fork a child for each queued item (up to num_cores concurrent) * 3. the child runs --help, parses the output, marshals the result via pipe * 4. the parent collects results and enqueues discovered subcommands * 5. repeat until queue is empty and all children have finished * * depth is limited to 5 levels and total results to max_resolve_results * to prevent runaway recursion on pathological command trees. * * the child process detects "self-listing" — when a subcommand's --help * lists itself as a subcommand (e.g. "git help" listing "help" as a * subcommand of itself). this would cause infinite recursion, so such * results are discarded. * * children close all pipe fds from other pending children immediately * after fork to prevent fd leaks. the parent drains pipes regularly to * prevent children from blocking on full pipe buffers. *) let help_resolve_par ?(timeout=200) ?(mandirs=[]) cmd rest name = let max_jobs = num_cores () in let queue = Queue.create () in Queue.push (rest, name, 0) queue; let results = ref [] in (* pending: (pid, rd, buf, cmd_args, cmd_name, depth) *) let pending = ref [] in let collect rd buf cmd_args cmd_name depth = drain_fd rd buf; (try Unix.close rd with _ -> ()); let data = Buffer.contents buf in let result : (help_result * subcommand list * (string * help_result) list) option = if String.length data > 0 then try Marshal.from_string data 0 with _ -> None else None in match result with | None -> () | Some (r, subs, extras) -> let at_limit = depth >= 5 || List.length !results >= max_resolve_results in results := (cmd_name, r) :: !results; (* extras are fully-parsed sub-results from manpage sub-sections — * add them directly without enqueueing for further resolution *) List.iter (fun (sub_name, sub_r) -> if not (List.exists (fun (existing, _) -> existing = sub_name) !results) then results := (sub_name, sub_r) :: !results ) extras; if not at_limit then (* only enqueue subcommands that weren't already covered by extras *) let extra_names = List.map fst extras in List.iter (fun (sc : subcommand) -> let full = cmd_name ^ " " ^ sc.name in if not (List.exists (fun existing -> existing = full) extra_names) then Queue.push (cmd_args @ [sc.name], full, depth + 1) queue ) subs in let reap () = pending := List.filter (fun (pid, rd, buf, cmd_args, cmd_name, depth) -> drain_fd rd buf; match Unix.waitpid [Unix.WNOHANG] pid with | (0, _) -> true | _ -> collect rd buf cmd_args cmd_name depth; false | exception Unix.Unix_error (Unix.ECHILD, _, _) -> (try Unix.close rd with _ -> ()); false ) !pending in let wait_for_slot () = while List.length !pending >= max_jobs do reap (); if List.length !pending >= max_jobs then begin let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end done in while not (Queue.is_empty queue) || !pending <> [] do while not (Queue.is_empty queue) do let (cmd_args, cmd_name, depth) = Queue.pop queue in wait_for_slot (); let (rd, wr) = Unix.pipe () in let pid = Unix.fork () in if pid = 0 then begin Unix.close rd; List.iter (fun (_, prd, _, _, _, _) -> try Unix.close prd with _ -> ()) !pending; let result = let text = match run_cmd (cmd :: cmd_args @ ["--help"]) timeout with | Some _ as r -> r | None -> run_cmd (cmd :: cmd_args @ ["-h"]) timeout in match text with | None -> None | Some text -> (* check for rendered manpage first — when --help delegates to * man(1), the raw groff source has richer structure than the * rendered text. parse_help would partially succeed on rendered * manpage output (extracting flags from OPTIONS) but miss * subcommands from the COMMANDS section. *) if is_rendered_manpage text then let base = Filename.basename cmd in let hyphenated = String.concat "-" (base :: cmd_args) in match try_manpage_fallback mandirs hyphenated with | Some (r, subs) -> let at_limit = depth >= 5 in let extra = List.map (fun (sub_name, sub_r) -> (cmd_name ^ " " ^ sub_name, sub_r)) subs in let enqueue_subs = if at_limit then [] else r.subcommands in Some (r, enqueue_subs, extra) | None -> (* manpage file not found — fall back to parsing rendered text *) (match parse_help text with | Error _ -> None | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None | Ok r -> let at_limit = depth >= 5 in let subs = if at_limit then [] else r.subcommands in Some (r, subs, [])) else match parse_help text with | Error _ -> None | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None | Ok r -> let self_listed = match cmd_args with | [] -> false | _ -> let leaf = List.nth cmd_args (List.length cmd_args - 1) in List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in if self_listed then None else let at_limit = depth >= 5 in let subs = if at_limit then [] else r.subcommands in Some (r, subs, []) in let oc = Unix.out_channel_of_descr wr in Marshal.to_channel oc (result : (help_result * subcommand list * (string * help_result) list) option) []; close_out oc; exit 0 end else begin Unix.close wr; pending := (pid, rd, Buffer.create 4096, cmd_args, cmd_name, depth) :: !pending end done; if !pending <> [] then begin reap (); if !pending <> [] && Queue.is_empty queue then begin let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end end done; List.rev !results (* "inshellah index" — the main indexing command. * processes all binaries and manpages in the given prefix directories, * writing completion data to the cache dir. * * the pipeline has two phases: * * phase 1 (binaries): fork one child per binary. each child: * - tries native nushell completions (if classified as Try_native_and_help) * - falls back to help_resolve_par (which itself forks per subcommand) * - marshals the result back via pipe as a tagged variant: * `Native of string — raw nushell source * `Parsed of (string * help_result) list — parsed flag data * `None — nothing useful extracted * * phase 2 (manpages): sequentially parse manpages for commands not yet * covered by phase 1. manpages are more reliable than --help for many * gnu tools, but slower to process. * * commands on the ignorelist are skipped entirely. commands on the * help_only list skip manpage parsing and only use --help. commands * with manpages skip --help in phase 1 (they'll be handled in phase 2). * * the done_cmds set tracks which commands have already been indexed to * prevent duplicates across phases and across multiple prefix directories. *) (* known privilege-escalation wrappers — defined here (before cmd_index and * cmd_complete) because both need the list: cmd_index writes @complete * external stubs, and cmd_complete strips the wrapper to find the real command. *) let elevation_commands = ["sudo"; "run0"; "doas"; "pkexec"; "su"; "calife"; "sux"; "sudoedit"; "please"; "super"; "priv"] let cmd_index bindirs mandirs ignorelist help_only dir = ensure_dir dir; let done_cmds = ref SSet.empty in let result_count = ref 0 in let index_bindir bindir mandir = if not (is_dir bindir) then Printf.eprintf "skipping %s (not found)\n" bindir else begin let bins = Sys.readdir bindir in Array.sort String.compare bins; let manpaged = if is_dir mandir then manpaged_commands mandir else SSet.empty in let max_jobs = num_cores () in let classified = Array.map (fun name -> if SSet.mem name ignorelist then (name, Skip) else if SSet.mem name help_only then (name, classify_binary bindir name) else if SSet.mem name manpaged then (name, Skip) else (name, classify_binary bindir name) ) bins in let pending = ref [] in let process_result name rd buf = drain_fd rd buf; (try Unix.close rd with _ -> ()); let data = Buffer.contents buf in if String.length data > 0 then begin let result : [`Native of string | `Parsed of (string * help_result) list | `None] = try Marshal.from_string data 0 with _ -> `None in (match result with | `Native src -> write_native ~dir name src; incr result_count | `Parsed pairs -> List.iter (fun (cmd_name, r) -> if not (SSet.mem cmd_name !done_cmds) then begin write_result ~dir ~source:"help" cmd_name r; done_cmds := SSet.add cmd_name !done_cmds; incr result_count end ) pairs | `None -> ()) end; done_cmds := SSet.add name !done_cmds in let reap () = pending := List.filter (fun (pid, rd, buf, name) -> drain_fd rd buf; match Unix.waitpid [Unix.WNOHANG] pid with | (0, _) -> true | _ -> process_result name rd buf; false | exception Unix.Unix_error (Unix.ECHILD, _, _) -> (try Unix.close rd with _ -> ()); false ) !pending in let wait_for_slot () = while List.length !pending >= max_jobs do reap (); if List.length !pending >= max_jobs then begin let fds = List.map (fun (_, rd, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end done in Array.iter (fun (name, classification) -> match classification with | Skip -> () | Try_help | Try_native_and_help -> wait_for_slot (); let (rd, wr) = Unix.pipe () in let pid = Unix.fork () in if pid = 0 then begin Unix.close rd; List.iter (fun (_, prd, _, _) -> try Unix.close prd with _ -> ()) !pending; let result = try let path = Filename.concat bindir name in let native = match classification with | Try_native_and_help -> (match try_native_completion path with | Some src -> Some src | None -> None) | _ -> None in match native with | Some src -> `Native src | None -> let pairs = help_resolve_par ~timeout:200 ~mandirs path [] name in if pairs <> [] then `Parsed pairs else `None with _ -> `None in let oc = Unix.out_channel_of_descr wr in Marshal.to_channel oc (result : [`Native of string | `Parsed of (string * help_result) list | `None]) []; close_out oc; exit 0 end else begin Unix.close wr; pending := (pid, rd, Buffer.create 4096, name) :: !pending end ) classified; while !pending <> [] do reap (); if !pending <> [] then begin let fds = List.map (fun (_, rd, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end done; (* phase 2: manpages *) if is_dir mandir then List.iter (fun section -> let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in if is_dir subdir then begin let files = Sys.readdir subdir in Array.sort String.compare files; Array.iter (fun file -> let base_cmd = cmd_name_of_manpage file in if SSet.mem base_cmd help_only then () else match process_manpage (Filename.concat subdir file) with | None -> () | Some (cmd, result, subs) -> if not (SSet.mem cmd !done_cmds) then begin write_result ~dir ~source:"manpage" cmd result; done_cmds := SSet.add cmd !done_cmds; incr result_count end; List.iter (fun (sub_cmd, sub_result) -> if not (SSet.mem sub_cmd !done_cmds) then begin write_result ~dir ~source:"manpage" sub_cmd sub_result; done_cmds := SSet.add sub_cmd !done_cmds; incr result_count end ) subs ) files end ) command_sections end in List.iter2 index_bindir bindirs mandirs; (* write @complete external stubs for elevation commands (sudo, doas, etc.) * so nushell routes their completions through the external completer. * without this, nushell hardcodes sudo/doas to show command-name completion * and never calls the external completer for their own flags. *) List.iter (fun cmd -> let json_path = Filename.concat dir (filename_of_command cmd ^ ".json") in if Sys.file_exists json_path then write_native ~dir cmd (Printf.sprintf "@complete external\nextern \"%s\" []\n" cmd) ) elevation_commands; Printf.printf "indexed %d commands into %s\n" !result_count dir (* "inshellah dump" — list all indexed commands with their source type *) let cmd_dump dirs = let cmds = all_commands dirs in Printf.printf "%d commands\n" (List.length cmds); List.iter (fun cmd -> let src = match file_type_of dirs cmd with | Some label -> label | None -> "?" in Printf.printf " %-40s [%s]\n" cmd src ) cmds (* search $PATH for an executable with the given name. * used during completion to find binaries for on-the-fly resolution. *) let find_in_path name = try Sys.getenv "PATH" |> String.split_on_char ':' |> List.find_map (fun dir -> let p = Filename.concat dir name in if is_executable p then Some p else None) with Not_found -> None (* resolve a command's completions on-the-fly and cache the results. * called during "complete" when a command isn't in the index. * runs help_resolve_par and writes results to the user's cache dir. *) let resolve_and_cache ~dir ~mandirs name path = let pairs = help_resolve_par ~timeout:200 ~mandirs path [] name in if pairs <> [] then begin ensure_dir dir; List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs; Some pairs end else None (* format a single completion candidate as JSON for nushell's completer protocol *) let completion_json value desc = Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}" (escape_json value) (escape_json desc) (* fuzzy matching: returns a score > 0 if needle is a subsequence of haystack. * higher scores = better match. scoring tiers: * - exact match: 1000 * - prefix match: 900 + length bonus (how much of the haystack is covered) * - subsequence: base 10 per char + bonuses for: * - word boundary alignment (50): matching at '-', '_', or camelCase transitions * - consecutive matches (20): matching adjacent characters * * this drives the completion candidate ranking. users typing "ser" should see * "--server" ranked above "--preserve" even though both contain "ser" as a * subsequence. the word-boundary bonus achieves this. *) let fuzzy_score needle haystack = let needle_len = String.length needle and haystack_len = String.length haystack in if needle_len = 0 then 1 else if needle_len > haystack_len then 0 else if needle = haystack then 1000 else let needle_lc = String.lowercase_ascii needle and haystack_lc = String.lowercase_ascii haystack in if String.starts_with ~prefix:needle_lc haystack_lc then 900 + (needle_len * 100 / haystack_len) else let is_boundary hay_idx = hay_idx = 0 || haystack.[hay_idx - 1] = '-' || haystack.[hay_idx - 1] = '_' || (haystack.[hay_idx - 1] >= 'a' && haystack.[hay_idx - 1] <= 'z' && haystack.[hay_idx] >= 'A' && haystack.[hay_idx] <= 'Z') in (* walk haystack matching needle chars as a subsequence *) let needle_idx, score, _, _ = String.fold_left (fun (needle_idx, score, hay_idx, prev_match) c -> if needle_idx >= needle_len then (needle_idx, score, hay_idx + 1, prev_match) else if c = needle_lc.[needle_idx] then let bonus = (if is_boundary hay_idx then 50 else 10) + (if prev_match = hay_idx - 1 then 20 else 0) in (needle_idx + 1, score + bonus, hay_idx + 1, hay_idx) else (needle_idx, score, hay_idx + 1, prev_match) ) (0, 0, 0, -1) haystack_lc in if needle_idx = needle_len then score else 0 (* scan past the elevation command's flags and arguments to find the real * command. is_command checks whether a token names a known command. * returns Some (real_cmd :: args) or None if no command was found. *) let find_real_command is_command args = let rec scan = function | [] -> None | "--" :: rest -> Some rest | arg :: rest when String.length arg > 0 && arg.[0] = '-' -> scan rest | arg :: _ as cmd_and_rest when is_command arg -> Some cmd_and_rest | _ :: rest -> scan rest in scan args (* "inshellah complete CMD [ARGS...]" — the nushell custom completer. * this is the hot path — called every time the user presses tab in nushell. * * the completion logic: * 1. try to find the command (or longest subcommand prefix) in the store * 2. if not found, try on-the-fly resolution (find in $PATH, run --help, cache) * 3. score all candidate completions against the partial input using fuzzy_score * 4. output scored candidates as a JSON array * * subcommand resolution: the lookup tries longest prefix first. * for "git add --", it first looks for "git add", then "git". * this ensures subcommand-specific flags are shown. * * nushell sends a trailing empty token when the cursor is after a space * ("git add "). in this case all_tokens includes the empty string. * when the last token is non-empty, the user is still typing it, so we use * it as the fuzzy filter. when empty, we show all candidates. * * if only a parent command matched (e.g. "git" matched but not "git add"), * we suppress subcommand suggestions and only show flags. this prevents * showing sibling subcommands when the user has already committed to a * specific subcommand path. * * file completions: nushell's external completer protocol is either/or — * you either return custom candidates or fall back to native file completions * (via null), but can't mix both. we return null (triggering nushell's native * file completer with colors, sorting, quoting) when: * - the user is at a leaf command (no subcommands) and not mid-flag * - or we have no candidates at all * this ensures file completions appear with full nushell UX. when the user * IS typing a flag (partial starts with "-"), we return our flag candidates. *) let cmd_complete spans user_dir system_dirs mandirs = let dirs = user_dir :: system_dirs in (* if the command line starts with a privilege-escalation wrapper, scan past * it to find the real command. we identify the command by checking the store * and $PATH — this avoids needing per-command option tables which are fragile * across different implementations. if no real command is found, fall back to * completing the elevation command itself. *) let spans = match spans with | cmd :: rest when List.mem cmd elevation_commands -> let is_command name = name <> "" && (lookup dirs name <> None || find_in_path name <> None) in (match find_real_command is_command rest with | Some (_ :: _ as real_spans) -> real_spans | _ -> spans) | _ -> spans in match spans with | [] -> print_string "null\n" | cmd_name :: rest -> (* try longest prefix match: "git add" before "git" *) let find_result tokens = let num_tokens = List.length tokens in List.init num_tokens Fun.id |> List.find_map (fun drop -> let prefix = List.filteri (fun i _ -> i < num_tokens - drop) tokens in match prefix with | [] -> None | _ -> let try_name = String.concat " " prefix in match lookup dirs try_name with | Some r -> Some (try_name, r, List.length prefix) | None -> None) in let all_tokens = cmd_name :: rest in let last_token = match rest with | [] -> "" | _ -> List.nth rest (List.length rest - 1) in (* only treat the last token as a completed subcommand when nushell * sends a trailing empty token (cursor is after a space). * otherwise the user is still typing and we treat it as partial. *) let lookup_tokens = if last_token = "" then all_tokens else match rest with | _ :: _ -> cmd_name :: List.rev (List.tl (List.rev rest)) | _ -> [cmd_name] in let resolve tokens partial = match find_result tokens with | Some _ as found -> (found, partial) | None -> (None, partial) in let found, partial = resolve lookup_tokens last_token in (* try on-the-fly resolution when no match or only a parent matched *) let lookup_depth = List.length lookup_tokens in let result, partial = match found with | Some (_, _, depth) when depth >= lookup_depth - 1 -> (* exact or near-exact match — use it *) (found, partial) | _ -> (* no match, or only a parent matched — try on-the-fly resolution *) (match find_in_path cmd_name with | Some path -> (match resolve_and_cache ~dir:user_dir ~mandirs cmd_name path with | Some _pairs -> resolve lookup_tokens last_token | None -> (found, partial)) | None -> (found, partial)) in let candidates = match result with | None -> [] | Some (_matched_name, r, depth) -> (* when the match is shallower than requested, the user already * typed a subcommand beyond the matched level — don't show * sibling subcommands, only flags *) let sub_candidates = if depth < lookup_depth - 1 then [] else let subs = match r.subcommands with | _ :: _ -> r.subcommands | [] -> subcommands_of dirs _matched_name in List.filter_map (fun (subcommand : subcommand) -> let score = fuzzy_score partial subcommand.name in if score > 0 then Some (score, completion_json subcommand.name subcommand.desc) else None ) subs in (* build flag completion candidates from the entry list. * for flags with both short and long forms (Both), we pick which form * to display based on what the user is currently typing: * - if the partial input matches the short flag better, show the short * flag as the value and note the long form in the description * - otherwise (including empty partial), prefer the long flag and note * the short form in the description * * parameter names are appended to descriptions in angle brackets for * mandatory params and square brackets for optional ones, matching the * conventions users expect from cli help text. *) let flag_candidates = List.filter_map (fun (entry : entry) -> let base_desc = match entry.param with | Some (Mandatory p) -> if entry.desc <> "" then entry.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">" | Some (Optional p) -> if entry.desc <> "" then entry.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]" | None -> entry.desc in let flag, desc = match entry.switch with | Long l -> ("--" ^ l, base_desc) | Short c -> (Printf.sprintf "-%c" c, base_desc) | Both (c, l) -> (* score the partial against both forms to decide which to present. * e.g. typing "-s" scores higher against "-s" than "--squeeze-blank", * so we show "-s (aka --squeeze-blank)". when the partial is empty or * matches the long form better, we default to the long form. *) let long_flag = "--" ^ l in let short_flag = Printf.sprintf "-%c" c in let long_score = fuzzy_score partial long_flag in let short_score = fuzzy_score partial short_flag in if short_score > long_score then (short_flag, Printf.sprintf "(aka %s) %s" long_flag base_desc) else (long_flag, Printf.sprintf "(aka %s) %s" short_flag base_desc) in let score = fuzzy_score partial flag in if score > 0 then Some (score, completion_json flag desc) else None ) r.entries in let scored = sub_candidates @ flag_candidates in List.sort (fun (a, _) (b, _) -> compare b a) scored |> List.map snd in (* determine whether to return our candidates or fall back to nushell's * native file completer (via null). nushell's protocol is either/or: * returning candidates suppresses file completions, returning null * enables them with full nushell UX (colors, sorting, quoting). * * we return null when: * - we have no candidates at all (unknown command, no match) * - the user is at a leaf command and not typing a flag — this is * the position where file arguments are expected, so hand off to * nushell's native file completer for the best experience *) let typing_flag = String.length partial > 0 && partial.[0] = '-' in let has_subcommands = match result with | Some (matched_name, r, _) -> r.subcommands <> [] || subcommands_of dirs matched_name <> [] | None -> false in let want_files = (not typing_flag) && (not has_subcommands) in if want_files then print_string "null\n" else if candidates = [] then print_string "null\n" else Printf.printf "[%s]\n" (String.concat "," candidates) (* "inshellah query CMD" — print the raw stored data for a command *) let cmd_query cmd dirs = match lookup_raw dirs cmd with | None -> Printf.eprintf "not found: %s\n" cmd; exit 1 | Some data -> print_string data; print_newline () (* load a newline-separated list of command names to ignore. * blank lines and lines starting with '#' are skipped. *) let load_ignorelist path = try In_channel.with_open_text path In_channel.input_all |> String.split_on_char '\n' |> List.filter_map (fun line -> let line = String.trim line in if String.length line > 0 && line.[0] <> '#' then Some line else None) |> SSet.of_list with _ -> SSet.empty (* parse "index" subcommand arguments: prefix dirs + optional --dir, --ignore, --help-only. * uses a fold over the argument list, accumulating prefixes and option values. *) let parse_index_args args = let (prefixes, dir, ignore, help_only, _) = List.fold_left (fun (prefixes, dir, ignore, help_only, pending) arg -> match pending with | Some "--dir" -> (prefixes, arg, ignore, help_only, None) | Some "--ignore" -> (prefixes, dir, SSet.union ignore (load_ignorelist arg), help_only, None) | Some "--help-only" -> (prefixes, dir, ignore, SSet.union help_only (load_ignorelist arg), None) | Some _ -> (prefixes, dir, ignore, help_only, None) | None -> match arg with | "--dir" | "--ignore" | "--help-only" -> (prefixes, dir, ignore, help_only, Some arg) | _ -> (arg :: prefixes, dir, ignore, help_only, None) ) ([], default_store_path (), SSet.empty, SSet.empty, None) args in (List.rev prefixes, dir, ignore, help_only) (* derive the sibling man directory from a store directory path. * e.g. "/run/current-system/sw/share/inshellah" -> "/run/current-system/sw/share/man" *) let man_dir_of_system_dir path = Filename.concat (Filename.dirname path) "man" (* parse common --dir arguments for complete/query/dump commands. * --dir takes a colon-separated list of paths. the first path is the writable * user cache dir; additional paths are read-only system directories. * man directories are derived from system dir paths as siblings * (share/inshellah -> share/man). uses a fold over the argument list. *) let parse_dir_args args = let (dir_value, rest_args, _) = List.fold_left (fun (dir_value, rest_args, pending) arg -> match pending with | Some "--dir" -> (Some arg, rest_args, None) | Some _ -> (dir_value, rest_args, None) | None -> match arg with | "--dir" -> (dir_value, rest_args, Some arg) | _ -> (dir_value, arg :: rest_args, None) ) (None, [], None) args in let (user_dir, system_dirs) = match dir_value with | None -> (default_store_path (), []) | Some v -> match String.split_on_char ':' v with | [] -> (default_store_path (), []) | first :: rest -> (first, rest) in (user_dir, system_dirs, List.rev rest_args) (* "inshellah completions nushell" — emit native nushell extern for inshellah itself *) let cmd_completions_nushell () = let result = { entries = []; subcommands = []; positionals = []; description = "nushell completions engine"; } in let index_result = { entries = [ { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "output directory for cached completions" }; { switch = Long "ignore"; param = Some (Mandatory "FILE"); desc = "skip listed commands entirely" }; { switch = Long "help-only"; param = Some (Mandatory "FILE"); desc = "skip manpages for listed commands, use --help instead" }; ]; subcommands = []; positionals = [ { pos_name = "prefix"; optional = false; variadic = true }; ]; description = "index completions from prefix directories"; } in let complete_result = { entries = [ { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "colon-separated cache paths" }; ]; subcommands = []; positionals = [ { pos_name = "cmd"; optional = false; variadic = false }; { pos_name = "args"; optional = true; variadic = true }; ]; description = "nushell custom completer, outputs JSON candidates"; } in let query_result = { entries = [ { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "colon-separated cache paths" }; ]; subcommands = []; positionals = [ { pos_name = "cmd"; optional = false; variadic = false }; ]; description = "print stored completion data for a command"; } in let dump_result = { entries = [ { switch = Long "dir"; param = Some (Mandatory "PATH"); desc = "colon-separated cache paths" }; ]; subcommands = []; positionals = []; description = "list indexed commands"; } in let manpage_result = { entries = []; subcommands = []; positionals = [ { pos_name = "file"; optional = false; variadic = false }; ]; description = "parse a manpage and emit nushell extern"; } in let manpage_dir_result = { entries = []; subcommands = []; positionals = [ { pos_name = "dir"; optional = false; variadic = false }; ]; description = "batch-process manpages under a directory"; } in let completions_result = { entries = []; subcommands = []; positionals = []; description = "generate nushell completions for inshellah"; } in print_string (generate_extern "inshellah" result); print_string (generate_extern "inshellah index" index_result); print_string (generate_extern "inshellah complete" complete_result); print_string (generate_extern "inshellah query" query_result); print_string (generate_extern "inshellah dump" dump_result); print_string (generate_extern "inshellah manpage" manpage_result); print_string (generate_extern "inshellah manpage-dir" manpage_dir_result); print_string (generate_extern "inshellah completions" completions_result) (* --- entry point --- * dispatch on the first argument to the appropriate subcommand handler. *) let () = match Array.to_list Sys.argv |> List.tl with | "index" :: rest -> let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1); let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in cmd_index bindirs mandirs ignorelist help_only dir | "complete" :: rest -> let (user_dir, system_dirs, spans) = parse_dir_args rest in let man_dirs = List.filter_map (fun d -> let m = man_dir_of_system_dir d in if is_dir m then Some m else None) system_dirs in cmd_complete spans user_dir system_dirs man_dirs | "query" :: rest -> let (user_dir, system_dirs, args) = parse_dir_args rest in (match args with | [cmd] -> cmd_query cmd (user_dir :: system_dirs) | _ -> Printf.eprintf "error: query CMD [--dir PATH[:PATH...]]\n"; exit 1) | "dump" :: rest -> let (user_dir, system_dirs, _) = parse_dir_args rest in cmd_dump (user_dir :: system_dirs) | ["manpage"; file] -> cmd_manpage file | ["manpage-dir"; dir] -> cmd_manpage_dir dir | ["completions"] -> cmd_completions_nushell () | _ -> usage ()