(* main.ml — cli entry point for inshellah, a nushell completions engine. * * inshellah generates nushell "extern" definitions for external commands by * parsing their manpages and --help output. it has two main modes: * * 1. indexing (batch): scan a prefix directory's bin/ and share/man/, * extract completions for every binary, and write them to a cache dir. * this is typically run once per nix profile or system update. * * 2. completing (interactive): given a command and its current arguments, * look up the cached data and return json completion candidates for * nushell's custom completer protocol. * * the indexing pipeline for each binary: * a. classify the binary (skip? try --help? try native completions?) * b. if the tool has native nushell completion support, try various * subcommand patterns ("completions nushell", "--completion nushell", etc.) * c. otherwise, run the tool with --help/-h and parse the output * d. recursively resolve subcommands (depth-limited to 5) * e. after binaries, parse manpages for any commands not yet covered * * parallelism: indexing forks per binary, and subcommand resolution forks * per subcommand. results are marshaled back via pipes. this gives good * throughput on multi-core systems while keeping the code simple (no threads, * no async runtime — just unix fork/pipe/waitpid). *) open Inshellah.Parser open Inshellah.Manpage open Inshellah.Nushell open Inshellah.Store module SSet = Set.Make(String) (* print usage and exit. called when no valid subcommand is given. *) let usage () = Printf.eprintf {|inshellah - nushell completions engine Usage: inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE] Index completions into a directory of JSON/nu files. PREFIX is a directory containing bin/ and share/man/. Default dir: $XDG_CACHE_HOME/inshellah --ignore FILE skip listed commands entirely --help-only FILE skip manpages for listed commands, use --help instead inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH] Nushell custom completer. Outputs JSON completion candidates. Falls back to --help resolution if command is not indexed. inshellah query CMD [--dir PATH] [--system-dir PATH] Print stored completion data for CMD. inshellah dump [--dir PATH] [--system-dir PATH] List indexed commands. inshellah manpage FILE Parse a manpage and emit nushell extern inshellah manpage-dir DIR Batch-process manpages under DIR |}; exit 1 (* manpage sections that contain command documentation. * section 1 = user commands, section 8 = system administration commands. *) let command_sections = [1; 8] (* simple substring search using Str *) let contains_str s sub = try ignore (Str.search_forward (Str.regexp_string sub) s 0); true with Not_found -> false (* heuristic to detect whether text is valid nushell source code. * checks for common nushell declaration keywords. the length > 20 * check avoids false positives on short error messages. *) let is_nushell_source text = String.length text > 20 && (contains_str text "export extern" || contains_str text "export def" || (contains_str text "module " && contains_str text "export")) (* extract command name from a manpage filename. * "ls.1.gz" → strip .gz → "ls.1" → chop extension → "ls" *) let cmd_name_of_manpage path = let base = Filename.basename path in let base = if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz" else base in try Filename.chop_extension base with Invalid_argument _ -> base (* sanitized environment for child processes. * strips display-related variables (DISPLAY, WAYLAND_DISPLAY, etc.) to prevent * gui tools from trying to open windows when we run them with --help. * without this, some tools (e.g. ckb-next) would pop up dialogs or hang * waiting for a display connection. *) let safe_env = lazy ( Array.of_list ( List.filter (fun s -> not (String.starts_with ~prefix:"DISPLAY=" s || String.starts_with ~prefix:"WAYLAND_DISPLAY=" s || String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" s || String.starts_with ~prefix:"XAUTHORITY=" s)) (Array.to_list (Unix.environment ())))) (* Non-blocking drain of a pipe fd into a buffer. Safe to call repeatedly; reads whatever is available without blocking. Used by all fork-pipe sites to keep pipes drained so children never block on write. *) let drain_fd rd buf = let chunk = Bytes.create 8192 in let continue = ref true in while !continue do match Unix.select [rd] [] [] 0.0 with | (_ :: _, _, _) -> (try let n = Unix.read rd chunk 0 8192 in if n = 0 then continue := false else Buffer.add_subbytes buf chunk 0 n with Unix.Unix_error _ -> continue := false) | _ -> continue := false done (* run a command with a timeout, capturing its stdout+stderr. * forks a child process, redirects stdin from /dev/null, and merges * stdout+stderr onto a pipe. reads from the pipe with select() polling * until either the child exits or the deadline is reached. * * peculiarity: the child is run in /tmp to prevent tools that create * side-effect files (like ckb-next-dev-detect-report.gz) from polluting * the user's working directory. we chdir to /tmp before fork and restore after. * * peculiarity: the select timeout is capped at 0.05s per iteration to ensure * we check the deadline frequently even when no data is available. * * returns none if the process couldn't be started, produced no output, * or was killed due to timeout. *) let run_cmd args timeout_ms = let (rd, wr) = Unix.pipe () in let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in let argv = Array.of_list args in (* Run subprocesses in /tmp so commands that write side-effect files (e.g. ckb-next-dev-detect-report.gz) don't pollute the working dir *) let saved_cwd = Sys.getcwd () in Sys.chdir "/tmp"; let pid = try Unix.create_process_env (List.hd args) argv (Lazy.force safe_env) devnull wr wr with Unix.Unix_error _ -> Unix.close rd; Unix.close wr; Unix.close devnull; -1 in Sys.chdir saved_cwd; Unix.close wr; Unix.close devnull; if pid < 0 then (Unix.close rd; None) else begin let buf = Buffer.create 4096 in let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in let chunk = Bytes.create 8192 in let alive = ref true in (try while !alive do let remaining = deadline -. Unix.gettimeofday () in if remaining <= 0.0 then alive := false else match Unix.select [rd] [] [] (min remaining 0.05) with | (_ :: _, _, _) -> let n = Unix.read rd chunk 0 8192 in if n = 0 then raise Exit else Buffer.add_subbytes buf chunk 0 n | _ -> () done with Exit -> ()); Unix.close rd; if not !alive then begin (try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ()); ignore (Unix.waitpid [] pid) end else ignore (Unix.waitpid [] pid); if Buffer.length buf > 0 then Some (Buffer.contents buf) else None end (* check if a path is a regular file with at least one execute bit set *) let is_executable path = try let st = Unix.stat path in st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0 with Unix.Unix_error _ -> false (* check if a file is a script by looking for a #! shebang. * follows symlinks via realpath before reading. *) let is_script path = try let real = Unix.realpath path in let ic = open_in_bin real in let has_shebang = try let b = Bytes.create 2 in really_input ic b 0 2; Bytes.get b 0 = '#' && Bytes.get b 1 = '!' with End_of_file -> false in close_in ic; has_shebang with _ -> false (* scan an elf binary for string needles without loading the entire file. * reads the file in 64kb chunks, searching each chunk for the needle strings. * uses a sliding window (carry) of max_needle bytes between chunks to handle * needles that span chunk boundaries. * * peculiarity: on read failure (e.g. if the path resolves to something * unreadable), all needles are marked as found. this is a conservative * fallback — we'd rather try --help on an unreadable binary than skip it. * * the inner loop is a manual byte-by-byte comparison rather than using * String.contains or Str for performance — this runs on every binary * in the prefix, so it needs to be fast. *) let elf_scan path needles = let found = Hashtbl.create 4 in let remaining () = List.filter (fun n -> not (Hashtbl.mem found n)) needles in (try let real = Unix.realpath path in let ic = open_in_bin real in let magic = Bytes.create 4 in really_input ic magic 0 4; if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E' && Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin let max_needle = List.fold_left (fun m n -> max m (String.length n)) 0 needles in let chunk_size = 65536 in let buf = Bytes.create (chunk_size + max_needle) in let carry = ref 0 in let eof = ref false in while not !eof && remaining () <> [] do let n = (try input ic buf !carry chunk_size with End_of_file -> 0) in if n = 0 then eof := true else begin let total = !carry + n in List.iter (fun needle -> if not (Hashtbl.mem found needle) then begin let nlen = String.length needle in let i = ref 0 in while !i <= total - nlen do if Bytes.get buf !i = needle.[0] then begin let ok = ref true in for j = 1 to nlen - 1 do if Bytes.get buf (!i + j) <> needle.[j] then ok := false done; if !ok then (Hashtbl.replace found needle true; i := total) else incr i end else incr i done end ) (remaining ()); let new_carry = min max_needle total in Bytes.blit buf (total - new_carry) buf 0 new_carry; carry := new_carry end done end; close_in ic with _ -> List.iter (fun n -> Hashtbl.replace found n true) needles); found (* detect nix-generated c wrapper scripts and extract the real binary path. * nix's makeCWrapper creates small c programs that set up the environment * and exec the real binary. these wrappers won't contain "-h" or "completion" * in their own binary (they're just wrappers), so elf_scan would say "skip". * this function reads the wrapper source to find the actual /nix/store/.../bin/... * target path, so we can try --help on the real binary instead. * * peculiarity: caps the read at 64kb to avoid accidentally reading a large * non-wrapper binary into memory. *) let nix_wrapper_target path = try let real = Unix.realpath path in let ic = open_in_bin real in let n = in_channel_length ic in if n > 65536 then (close_in ic; None) else begin let s = Bytes.create n in really_input ic s 0 n; close_in ic; let s = Bytes.to_string s in if not (contains_str s "makeCWrapper") then None else let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in try ignore (Str.search_forward re s 0); let target = Str.matched_string s in if Sys.file_exists target then Some target else None with Not_found -> None end with _ -> None (* heuristic filter for binary names that should never be indexed. * skips: empty names, "-", dotfiles, libraries (lib-prefix), daemon wrappers * (suffixes -daemon, -wrapped), shared objects (.so suffix), and names with no * alphanumeric characters (e.g. punctuation-only names). *) let skip_name name = String.length name = 0 || name = "-" || name.[0] = '.' || String.starts_with ~prefix:"lib" name || String.ends_with ~suffix:"-daemon" name || String.ends_with ~suffix:"-wrapped" name || String.ends_with ~suffix:".so" name || not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name) (* classification result for a binary. * Skip — don't index this binary at all * Try_help — only try --help (scripts, binaries without "completion" string) * Try_native_and_help — try native nushell completion first, fall back to --help *) type bin_class = Skip | Try_help | Try_native_and_help (* classify a binary to decide the indexing strategy. * decision tree: * 1. nushell builtin or bad name → Skip * 2. not executable → Skip * 3. script (has shebang) → Try_help (scripts can't have native completions) * 4. elf binary containing "completion" → Try_native_and_help * 5. elf binary containing "-h" → Try_help * 6. nix wrapper → Try_help (the wrapper itself is just an exec shim) * 7. otherwise → Skip (binary has no help infrastructure) *) let classify_binary bindir name = if is_nushell_builtin name || skip_name name then Skip else let path = Filename.concat bindir name in if not (is_executable path) then Skip else if is_script path then Try_help else let scan = elf_scan path ["-h"; "completion"] in if Hashtbl.mem scan "completion" then Try_native_and_help else if Hashtbl.mem scan "-h" then Try_help else if nix_wrapper_target path <> None then Try_help else Skip (* detect available cpu cores by counting "processor" lines in /proc/cpuinfo. * falls back to 4 if /proc/cpuinfo can't be read (e.g. on non-linux). *) let num_cores () = try let ic = open_in "/proc/cpuinfo" in let n = ref 0 in (try while true do if String.starts_with ~prefix:"processor" (input_line ic) then incr n done with End_of_file -> ()); close_in ic; max 1 !n with _ -> 4 (* try to get native nushell completions from a binary. * tries several common subcommand patterns that tools use for shell completions. * returns the first one that produces valid nushell source code. * the 500ms timeout is generous enough for most tools but prevents hangs. * * the patterns cover: cobra (go), clap (rust), click (python), and various * ad-hoc implementations. *) let try_native_completion bin_path = List.find_map (fun args -> match run_cmd args 500 with | Some text when is_nushell_source text -> Some text | _ -> None ) [ [bin_path; "completions"; "nushell"]; [bin_path; "completion"; "nushell"]; [bin_path; "--completions"; "nushell"]; [bin_path; "--completion"; "nushell"]; [bin_path; "generate-completion"; "nushell"]; [bin_path; "--generate-completion"; "nushell"]; [bin_path; "shell-completions"; "nushell"]; ] (* parse a manpage file, extracting the command name, its flags/subcommands, * and any clap-style per-subcommand sections. * returns none for nushell builtins or failed parses. *) let parse_manpage_for_command file = let contents = read_manpage_file file in let fallback = cmd_name_of_manpage file in let cmd = match extract_synopsis_command contents with | Some name -> name | None -> fallback in if is_nushell_builtin cmd then None else let result = parse_manpage_string contents in let sub_sections = extract_subcommand_sections contents in let result = if sub_sections <> [] then { result with subcommands = List.map (fun (name, desc, _) -> { name; desc }) sub_sections } else result in let subs = List.map (fun (name, _desc, r) -> (cmd ^ " " ^ name, r)) sub_sections in Some (cmd, result, subs) (* "inshellah manpage FILE" — parse one manpage and print the nushell extern *) let cmd_manpage file = match parse_manpage_for_command file with | Some (cmd, result, _) when result.entries <> [] -> print_string (generate_extern cmd result) | _ -> () (* "inshellah manpage-dir DIR" — batch-process all manpages under a directory *) let cmd_manpage_dir dir = List.iter (fun section -> let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in if is_dir subdir then Array.iter (fun file -> (try cmd_manpage (Filename.concat subdir file) with _ -> ()) ) (Sys.readdir subdir) ) command_sections (* safety limit: don't accumulate more than 500 subcommand resolution results * per binary. prevents runaway recursion on tools with enormous subcommand trees. *) let max_resolve_results = 500 (* safe wrapper around parse_manpage_for_command that catches all exceptions *) let process_manpage file = try match parse_manpage_for_command file with | Some (cmd, result, subs) when result.entries <> [] || subs <> [] -> Some (cmd, result, subs) | _ -> None with _ -> None (* collect the set of command names that have manpages in a given man directory. * used during indexing to skip --help for commands that will be handled by * the manpage parsing phase instead (manpages are more reliable than --help). *) let manpaged_commands mandir = List.fold_left (fun acc section -> let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in if is_dir subdir then Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc) acc (Sys.readdir subdir) else acc ) SSet.empty command_sections (* parallel structured help resolver — recursively resolves a command and * all its subcommands by running --help on each, forking a child process * per subcommand for parallelism. * * the resolver works as a breadth-first queue: * 1. start with the root command in the queue * 2. fork a child for each queued item (up to num_cores concurrent) * 3. the child runs --help, parses the output, marshals the result via pipe * 4. the parent collects results and enqueues discovered subcommands * 5. repeat until queue is empty and all children have finished * * depth is limited to 5 levels and total results to max_resolve_results * to prevent runaway recursion on pathological command trees. * * peculiarity: the child process detects "self-listing" — when a subcommand's * --help lists itself as a subcommand (e.g. "git help" listing "help" as a * subcommand of itself). this would cause infinite recursion, so such results * are discarded. * * peculiarity: children close all pipe fds from other pending children * immediately after fork to prevent fd leaks. the parent drains pipes * regularly to prevent children from blocking on full pipe buffers. *) let help_resolve_par ?(timeout=200) cmd rest name = let max_jobs = num_cores () in let queue = Queue.create () in Queue.push (rest, name, 0) queue; let results = ref [] in (* pending: (pid, rd, buf, rest, name, depth) *) let pending = ref [] in let collect rd buf q_rest q_name q_depth = drain_fd rd buf; (try Unix.close rd with _ -> ()); let data = Buffer.contents buf in let result : (help_result * subcommand list) option = if String.length data > 0 then try Marshal.from_string data 0 with _ -> None else None in match result with | None -> () | Some (r, subs) -> let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in results := (q_name, r) :: !results; if not at_limit then List.iter (fun (sc : subcommand) -> Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue ) subs in let reap () = pending := List.filter (fun (pid, rd, buf, q_rest, q_name, q_depth) -> drain_fd rd buf; match Unix.waitpid [Unix.WNOHANG] pid with | (0, _) -> true | _ -> collect rd buf q_rest q_name q_depth; false | exception Unix.Unix_error (Unix.ECHILD, _, _) -> (try Unix.close rd with _ -> ()); false ) !pending in let wait_for_slot () = while List.length !pending >= max_jobs do reap (); if List.length !pending >= max_jobs then begin let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end done in while not (Queue.is_empty queue) || !pending <> [] do while not (Queue.is_empty queue) do let (q_rest, q_name, q_depth) = Queue.pop queue in wait_for_slot (); let (rd, wr) = Unix.pipe () in let pid = Unix.fork () in if pid = 0 then begin Unix.close rd; List.iter (fun (_, prd, _, _, _, _) -> try Unix.close prd with _ -> ()) !pending; let result = let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with | Some _ as r -> r | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in match text with | None -> None | Some text -> (match parse_help text with | Error _ -> None | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None | Ok r -> let self_listed = match q_rest with | [] -> false | _ -> let leaf = List.nth q_rest (List.length q_rest - 1) in List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in if self_listed then None else let at_limit = q_depth >= 5 in let subs = if at_limit then [] else r.subcommands in Some (r, subs)) in let oc = Unix.out_channel_of_descr wr in Marshal.to_channel oc (result : (help_result * subcommand list) option) []; close_out oc; exit 0 end else begin Unix.close wr; pending := (pid, rd, Buffer.create 4096, q_rest, q_name, q_depth) :: !pending end done; if !pending <> [] then begin reap (); if !pending <> [] && Queue.is_empty queue then begin let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end end done; List.rev !results (* "inshellah index" — the main indexing command. * processes all binaries and manpages in the given prefix directories, * writing completion data to the cache dir. * * the pipeline has two phases: * * phase 1 (binaries): fork one child per binary. each child: * - tries native nushell completions (if classified as Try_native_and_help) * - falls back to help_resolve_par (which itself forks per subcommand) * - marshals the result back via pipe as a tagged variant: * `Native of string — raw nushell source * `Parsed of (string * help_result) list — parsed flag data * `None — nothing useful extracted * * phase 2 (manpages): sequentially parse manpages for commands not yet * covered by phase 1. manpages are more reliable than --help for many * gnu tools, but slower to process. * * commands on the ignorelist are skipped entirely. commands on the * help_only list skip manpage parsing and only use --help. commands * with manpages skip --help in phase 1 (they'll be handled in phase 2). * * peculiarity: the done_cmds set tracks which commands have already been * indexed to prevent duplicates across phases and across multiple prefix * directories. *) let cmd_index bindirs mandirs ignorelist help_only dir = ensure_dir dir; let done_cmds = ref SSet.empty in let n_results = ref 0 in let index_bindir bindir mandir = if not (is_dir bindir) then Printf.eprintf "skipping %s (not found)\n" bindir else begin let bins = Sys.readdir bindir in Array.sort String.compare bins; let manpaged = if is_dir mandir then manpaged_commands mandir else SSet.empty in let max_jobs = num_cores () in let classified = Array.map (fun name -> if SSet.mem name ignorelist then (name, Skip) else if SSet.mem name help_only then (name, classify_binary bindir name) else if SSet.mem name manpaged then (name, Skip) else (name, classify_binary bindir name) ) bins in let pending = ref [] in let process_result name rd buf = drain_fd rd buf; (try Unix.close rd with _ -> ()); let data = Buffer.contents buf in if String.length data > 0 then begin let result : [`Native of string | `Parsed of (string * help_result) list | `None] = try Marshal.from_string data 0 with _ -> `None in (match result with | `Native src -> write_native ~dir name src; incr n_results | `Parsed pairs -> List.iter (fun (cmd_name, r) -> if not (SSet.mem cmd_name !done_cmds) then begin write_result ~dir ~source:"help" cmd_name r; done_cmds := SSet.add cmd_name !done_cmds; incr n_results end ) pairs | `None -> ()) end; done_cmds := SSet.add name !done_cmds in let reap () = pending := List.filter (fun (pid, rd, buf, name) -> drain_fd rd buf; match Unix.waitpid [Unix.WNOHANG] pid with | (0, _) -> true | _ -> process_result name rd buf; false | exception Unix.Unix_error (Unix.ECHILD, _, _) -> (try Unix.close rd with _ -> ()); false ) !pending in let wait_for_slot () = while List.length !pending >= max_jobs do reap (); if List.length !pending >= max_jobs then begin let fds = List.map (fun (_, rd, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end done in Array.iter (fun (name, cls) -> match cls with | Skip -> () | Try_help | Try_native_and_help -> wait_for_slot (); let (rd, wr) = Unix.pipe () in let pid = Unix.fork () in if pid = 0 then begin Unix.close rd; List.iter (fun (_, prd, _, _) -> try Unix.close prd with _ -> ()) !pending; let result = try let path = Filename.concat bindir name in let native = match cls with | Try_native_and_help -> (match try_native_completion path with | Some src -> Some src | None -> None) | _ -> None in match native with | Some src -> `Native src | None -> let pairs = help_resolve_par ~timeout:200 path [] name in if pairs <> [] then `Parsed pairs else `None with _ -> `None in let oc = Unix.out_channel_of_descr wr in Marshal.to_channel oc (result : [`Native of string | `Parsed of (string * help_result) list | `None]) []; close_out oc; exit 0 end else begin Unix.close wr; pending := (pid, rd, Buffer.create 4096, name) :: !pending end ) classified; while !pending <> [] do reap (); if !pending <> [] then begin let fds = List.map (fun (_, rd, _, _) -> rd) !pending in ignore (Unix.select fds [] [] 0.05) end done; (* Phase 2: manpages *) if is_dir mandir then List.iter (fun section -> let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in if is_dir subdir then begin let files = Sys.readdir subdir in Array.sort String.compare files; Array.iter (fun file -> let base_cmd = cmd_name_of_manpage file in if SSet.mem base_cmd help_only then () else match process_manpage (Filename.concat subdir file) with | None -> () | Some (cmd, result, subs) -> if not (SSet.mem cmd !done_cmds) then begin write_result ~dir ~source:"manpage" cmd result; done_cmds := SSet.add cmd !done_cmds; incr n_results end; List.iter (fun (sub_cmd, sub_result) -> if not (SSet.mem sub_cmd !done_cmds) then begin write_result ~dir ~source:"manpage" sub_cmd sub_result; done_cmds := SSet.add sub_cmd !done_cmds; incr n_results end ) subs ) files end ) command_sections end in List.iter2 index_bindir bindirs mandirs; Printf.printf "indexed %d commands into %s\n" !n_results dir (* "inshellah dump" — list all indexed commands with their source type *) let cmd_dump dirs = let cmds = all_commands dirs in Printf.printf "%d commands\n" (List.length cmds); List.iter (fun cmd -> let src = match file_type_of dirs cmd with | Some s -> s | None -> "?" in Printf.printf " %-40s [%s]\n" cmd src ) cmds (* search $PATH for an executable with the given name. * used during completion to find binaries for on-the-fly resolution. *) let find_in_path name = try Sys.getenv "PATH" |> String.split_on_char ':' |> List.find_map (fun dir -> let p = Filename.concat dir name in if is_executable p then Some p else None) with Not_found -> None (* resolve a command's completions on-the-fly and cache the results. * called during "complete" when a command isn't in the index. * runs help_resolve_par and writes results to the user's cache dir. *) let resolve_and_cache ~dir name path = let pairs = help_resolve_par ~timeout:200 path [] name in if pairs <> [] then begin ensure_dir dir; List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs; Some pairs end else None (* format a single completion candidate as json for nushell's completer protocol *) let completion_json value desc = Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}" (escape_json value) (escape_json desc) (* fuzzy matching: returns a score > 0 if needle is a subsequence of haystack. * higher scores = better match. scoring tiers: * - exact match: 1000 * - prefix match: 900 + length bonus (how much of the haystack is covered) * - subsequence: base 10 per char + bonuses for: * - word boundary alignment (50): matching at '-', '_', or camelCase transitions * - consecutive matches (20): matching adjacent characters * * this drives the completion candidate ranking. users typing "ser" should see * "--server" ranked above "--preserve" even though both contain "ser" as a * subsequence. the word-boundary bonus achieves this. *) let fuzzy_score needle haystack = let nlen = String.length needle and hlen = String.length haystack in if nlen = 0 then 1 else if nlen > hlen then 0 else if needle = haystack then 1000 else let needle = String.lowercase_ascii needle and haystack_lc = String.lowercase_ascii haystack in if String.starts_with ~prefix:needle haystack_lc then 900 + (nlen * 100 / hlen) else let is_boundary hi = hi = 0 || haystack.[hi - 1] = '-' || haystack.[hi - 1] = '_' || (haystack.[hi - 1] >= 'a' && haystack.[hi - 1] <= 'z' && haystack.[hi] >= 'A' && haystack.[hi] <= 'Z') in (* Walk haystack matching needle chars as a subsequence *) let ni, score, _, _ = String.fold_left (fun (ni, score, hi, prev_match) c -> if ni >= nlen then (ni, score, hi + 1, prev_match) else if c = needle.[ni] then let bonus = (if is_boundary hi then 50 else 10) + (if prev_match = hi - 1 then 20 else 0) in (ni + 1, score + bonus, hi + 1, hi) else (ni, score, hi + 1, prev_match) ) (0, 0, 0, -1) haystack_lc in if ni = nlen then score else 0 (* known privilege-escalation wrappers. when one of these is the first token, * we strip it and its options before completing the real command. * * rather than maintaining per-command option tables (fragile — e.g. sudo's * -h is --help not --host, flags differ across implementations), we find the * real command by scanning for the first non-flag token that is a known * command (exists in the completion store or in $PATH). tokens like "root" * in "sudo -u root" are skipped because they aren't commands. *) let elevation_commands = ["sudo"; "run0"; "doas"; "pkexec"; "su"; "calife"; "sux"; "sudoedit"; "please"; "super"; "priv"] (* scan past the elevation command's flags and arguments to find the real * command. is_command checks whether a token names a known command. * returns Some (real_cmd :: args) or None if no command was found. *) let find_real_command is_command args = let rec scan = function | [] -> None | "--" :: rest -> Some rest | arg :: rest when String.length arg > 0 && arg.[0] = '-' -> scan rest | arg :: _ as cmd_and_rest when is_command arg -> Some cmd_and_rest | _ :: rest -> scan rest in scan args (* "inshellah complete CMD [ARGS...]" — the nushell custom completer. * this is the hot path — called every time the user presses tab in nushell. * * the completion logic: * 1. try to find the command (or longest subcommand prefix) in the store * 2. if not found, try on-the-fly resolution (find in $PATH, run --help, cache) * 3. score all candidate completions against the partial input using fuzzy_score * 4. output scored candidates as a json array * * subcommand resolution: the lookup tries longest prefix first. * for "git add --", it first looks for "git add", then "git". * this ensures subcommand-specific flags are shown. * * peculiarity: nushell sends a trailing empty token when the cursor is after * a space ("git add "). in this case all_tokens includes the empty string. * when the last token is non-empty, the user is still typing it, so we use * it as the fuzzy filter. when empty, we show all candidates. * * peculiarity: if only a parent command matched (e.g. "git" matched but not * "git add"), we suppress subcommand suggestions and only show flags. this * prevents showing sibling subcommands when the user has already committed * to a specific subcommand path. *) let cmd_complete spans user_dir system_dirs = let dirs = user_dir :: system_dirs in (* if the command line starts with a privilege-escalation wrapper, scan past * it to find the real command. we identify the command by checking the store * and $PATH — this avoids needing per-command option tables which are fragile * across different implementations. if no real command is found, fall back to * completing the elevation command itself. *) let spans = match spans with | cmd :: rest when List.mem cmd elevation_commands -> let is_command name = name <> "" && (lookup dirs name <> None || find_in_path name <> None) in (match find_real_command is_command rest with | Some (_ :: _ as real_spans) -> real_spans | _ -> spans) | _ -> spans in match spans with | [] -> print_string "[]\n" | cmd_name :: rest -> (* Try longest prefix match: "git add" before "git" *) let find_result tokens = let n = List.length tokens in List.init n Fun.id |> List.find_map (fun drop -> let prefix = List.filteri (fun i _ -> i < n - drop) tokens in match prefix with | [] -> None | _ -> let try_name = String.concat " " prefix in match lookup dirs try_name with | Some r -> Some (try_name, r, List.length prefix) | None -> None) in let all_tokens = cmd_name :: rest in let last_token = match rest with | [] -> "" | _ -> List.nth rest (List.length rest - 1) in (* Only treat the last token as a completed subcommand when nushell sends a trailing empty token (cursor is after a space). Otherwise the user is still typing and we treat it as partial. *) let lookup_tokens = if last_token = "" then all_tokens else match rest with | _ :: _ -> cmd_name :: List.rev (List.tl (List.rev rest)) | _ -> [cmd_name] in let resolve tokens partial = match find_result tokens with | Some _ as found -> (found, partial) | None -> (None, partial) in let found, partial = resolve lookup_tokens last_token in (* Try on-the-fly resolution when no match or only a parent matched *) let n_lookup = List.length lookup_tokens in let result, partial = match found with | Some (_, _, depth) when depth >= n_lookup - 1 -> (* Exact or near-exact match — use it *) (found, partial) | _ -> (* No match, or only a parent matched — try on-the-fly resolution *) (match find_in_path cmd_name with | Some path -> (match resolve_and_cache ~dir:user_dir cmd_name path with | Some _pairs -> resolve lookup_tokens last_token | None -> (found, partial)) | None -> (found, partial)) in let candidates = match result with | None -> [] | Some (_matched_name, r, depth) -> (* When the match is shallower than requested, the user already typed a subcommand beyond the matched level — don't show sibling subcommands, only flags *) let sub_candidates = if depth < n_lookup - 1 then [] else let subs = match r.subcommands with | _ :: _ -> r.subcommands | [] -> subcommands_of dirs _matched_name in List.filter_map (fun (sc : subcommand) -> let s = fuzzy_score partial sc.name in if s > 0 then Some (s, completion_json sc.name sc.desc) else None ) subs in (* build flag completion candidates from the entry list. * for flags with both short and long forms (Both), we pick which form * to display based on what the user is currently typing: * - if the partial input matches the short flag better, show the short * flag as the value and note the long form in the description * - otherwise (including empty partial), prefer the long flag and note * the short form in the description * this keeps the candidate list clean (one entry per flag) while still * surfacing the alternate form so the user knows about it. * * parameter names are appended to descriptions in angle brackets for * mandatory params and square brackets for optional ones, matching the * conventions users expect from cli help text. *) let flag_candidates = List.filter_map (fun (e : entry) -> let base_desc = match e.param with | Some (Mandatory p) -> if e.desc <> "" then e.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">" | Some (Optional p) -> if e.desc <> "" then e.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]" | None -> e.desc in let flag, desc = match e.switch with | Long l -> ("--" ^ l, base_desc) | Short c -> (Printf.sprintf "-%c" c, base_desc) | Both (c, l) -> (* score the partial against both forms to decide which to present. * e.g. typing "-s" scores higher against "-s" than "--squeeze-blank", * so we show "-s (aka --squeeze-blank)". when the partial is empty or * matches the long form better, we default to the long form. *) let long_flag = "--" ^ l in let short_flag = Printf.sprintf "-%c" c in let long_score = fuzzy_score partial long_flag in let short_score = fuzzy_score partial short_flag in if short_score > long_score then (short_flag, Printf.sprintf "(aka %s) %s" long_flag base_desc) else (long_flag, Printf.sprintf "(aka %s) %s" short_flag base_desc) in let s = fuzzy_score partial flag in if s > 0 then Some (s, completion_json flag desc) else None ) r.entries in let scored = sub_candidates @ flag_candidates in List.sort (fun (a, _) (b, _) -> compare b a) scored |> List.map snd in Printf.printf "[%s]\n" (String.concat "," candidates) (* "inshellah query CMD" — print the raw stored data for a command *) let cmd_query cmd dirs = match lookup_raw dirs cmd with | None -> Printf.eprintf "not found: %s\n" cmd; exit 1 | Some data -> print_string data; print_newline () (* load a newline-separated list of command names to ignore. * blank lines and lines starting with '#' are skipped. *) let load_ignorelist path = try In_channel.with_open_text path In_channel.input_all |> String.split_on_char '\n' |> List.filter_map (fun line -> let line = String.trim line in if String.length line > 0 && line.[0] <> '#' then Some line else None) |> SSet.of_list with _ -> SSet.empty (* parse "index" subcommand arguments: prefix dirs + optional --dir, --ignore, --help-only *) let parse_index_args args = let rec go prefixes dir ignore help_only = function | [] -> (List.rev prefixes, dir, ignore, help_only) | "--dir" :: path :: rest -> go prefixes path ignore help_only rest | "--ignore" :: path :: rest -> go prefixes dir (SSet.union ignore (load_ignorelist path)) help_only rest | "--help-only" :: path :: rest -> go prefixes dir ignore (SSet.union help_only (load_ignorelist path)) rest | prefix :: rest -> go (prefix :: prefixes) dir ignore help_only rest in go [] (default_store_path ()) SSet.empty SSet.empty args (* parse common --dir/--system-dir arguments for complete/query/dump commands *) let parse_dir_args args = let rec go user_dir system_dirs rest_args = function | [] -> (user_dir, system_dirs, List.rev rest_args) | "--dir" :: path :: rest -> go path system_dirs rest_args rest | "--system-dir" :: path :: rest -> go user_dir (path :: system_dirs) rest_args rest | arg :: rest -> go user_dir system_dirs (arg :: rest_args) rest in go (default_store_path ()) [] [] args (* --- entry point --- * dispatch on the first argument to the appropriate subcommand handler. *) let () = match Array.to_list Sys.argv |> List.tl with | "index" :: rest -> let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1); let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in cmd_index bindirs mandirs ignorelist help_only dir | "complete" :: rest -> let (user_dir, system_dirs, spans) = parse_dir_args rest in cmd_complete spans user_dir system_dirs | "query" :: rest -> let (user_dir, system_dirs, args) = parse_dir_args rest in (match args with | [cmd] -> cmd_query cmd (user_dir :: system_dirs) | _ -> Printf.eprintf "error: query CMD [--dir PATH] [--system-dir PATH]\n"; exit 1) | "dump" :: rest -> let (user_dir, system_dirs, _) = parse_dir_args rest in cmd_dump (user_dir :: system_dirs) | ["manpage"; file] -> cmd_manpage file | ["manpage-dir"; dir] -> cmd_manpage_dir dir | _ -> usage ()