init
This commit is contained in:
commit
6ddbd4185e
22 changed files with 3985 additions and 0 deletions
1
.envrc
Normal file
1
.envrc
Normal file
|
|
@ -0,0 +1 @@
|
|||
use flake
|
||||
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
/target
|
||||
/_build
|
||||
/.direnv
|
||||
11
README.md
Normal file
11
README.md
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# inshellah
|
||||
|
||||
nushell completions engine. indexes completions from manpages, native
|
||||
generators, and `--help` output, then serves them to nushell's external
|
||||
completer.
|
||||
|
||||
see `doc/` for details:
|
||||
|
||||
- [nushell integration](doc/nushell-integration.md) — setup, usage, examples
|
||||
- [nixos module](doc/nixos.md) — automatic build-time indexing
|
||||
- [runtime completions](doc/runtime-completions.md) — on-the-fly caching via the completer
|
||||
0
bin/.ocamlformat
Normal file
0
bin/.ocamlformat
Normal file
4
bin/dune
Normal file
4
bin/dune
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
(executable
|
||||
(public_name inshellah)
|
||||
(name main)
|
||||
(libraries inshellah))
|
||||
728
bin/main.ml
Normal file
728
bin/main.ml
Normal file
|
|
@ -0,0 +1,728 @@
|
|||
open Inshellah.Parser
|
||||
open Inshellah.Manpage
|
||||
open Inshellah.Nushell
|
||||
open Inshellah.Store
|
||||
|
||||
module SSet = Set.Make(String)
|
||||
|
||||
let usage () =
|
||||
Printf.eprintf
|
||||
{|inshellah - nushell completions engine
|
||||
|
||||
Usage:
|
||||
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
|
||||
Index completions into a directory of JSON/nu files.
|
||||
PREFIX is a directory containing bin/ and share/man/.
|
||||
Default dir: $XDG_CACHE_HOME/inshellah
|
||||
--ignore FILE skip listed commands entirely
|
||||
--help-only FILE skip manpages for listed commands, use --help instead
|
||||
inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH]
|
||||
Nushell custom completer. Outputs JSON completion candidates.
|
||||
Falls back to --help resolution if command is not indexed.
|
||||
inshellah query CMD [--dir PATH] [--system-dir PATH]
|
||||
Print stored completion data for CMD.
|
||||
inshellah dump [--dir PATH] [--system-dir PATH]
|
||||
List indexed commands.
|
||||
inshellah manpage FILE Parse a manpage and emit nushell extern
|
||||
inshellah manpage-dir DIR Batch-process manpages under DIR
|
||||
|
||||
|};
|
||||
exit 1
|
||||
|
||||
let command_sections = [1; 8]
|
||||
|
||||
let contains_str s sub =
|
||||
try ignore (Str.search_forward (Str.regexp_string sub) s 0); true
|
||||
with Not_found -> false
|
||||
|
||||
let is_nushell_source text =
|
||||
String.length text > 20
|
||||
&& (contains_str text "export extern"
|
||||
|| contains_str text "export def"
|
||||
|| (contains_str text "module " && contains_str text "export"))
|
||||
|
||||
let cmd_name_of_manpage path =
|
||||
let base = Filename.basename path in
|
||||
let base =
|
||||
if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz"
|
||||
else base in
|
||||
try Filename.chop_extension base with Invalid_argument _ -> base
|
||||
|
||||
let safe_env = lazy (
|
||||
Array.of_list (
|
||||
List.filter (fun s ->
|
||||
not (String.starts_with ~prefix:"DISPLAY=" s
|
||||
|| String.starts_with ~prefix:"WAYLAND_DISPLAY=" s
|
||||
|| String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" s
|
||||
|| String.starts_with ~prefix:"XAUTHORITY=" s))
|
||||
(Array.to_list (Unix.environment ()))))
|
||||
|
||||
(* Non-blocking drain of a pipe fd into a buffer. Safe to call repeatedly;
|
||||
reads whatever is available without blocking. Used by all fork-pipe sites
|
||||
to keep pipes drained so children never block on write. *)
|
||||
let drain_fd rd buf =
|
||||
let chunk = Bytes.create 8192 in
|
||||
let continue = ref true in
|
||||
while !continue do
|
||||
match Unix.select [rd] [] [] 0.0 with
|
||||
| (_ :: _, _, _) ->
|
||||
(try
|
||||
let n = Unix.read rd chunk 0 8192 in
|
||||
if n = 0 then continue := false
|
||||
else Buffer.add_subbytes buf chunk 0 n
|
||||
with Unix.Unix_error _ -> continue := false)
|
||||
| _ -> continue := false
|
||||
done
|
||||
|
||||
let run_cmd args timeout_ms =
|
||||
let (rd, wr) = Unix.pipe () in
|
||||
let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in
|
||||
let argv = Array.of_list args in
|
||||
(* Run subprocesses in /tmp so commands that write side-effect files
|
||||
(e.g. ckb-next-dev-detect-report.gz) don't pollute the working dir *)
|
||||
let saved_cwd = Sys.getcwd () in
|
||||
Sys.chdir "/tmp";
|
||||
let pid =
|
||||
try Unix.create_process_env (List.hd args) argv
|
||||
(Lazy.force safe_env) devnull wr wr
|
||||
with Unix.Unix_error _ ->
|
||||
Unix.close rd; Unix.close wr; Unix.close devnull; -1 in
|
||||
Sys.chdir saved_cwd;
|
||||
Unix.close wr; Unix.close devnull;
|
||||
if pid < 0 then (Unix.close rd; None)
|
||||
else begin
|
||||
let buf = Buffer.create 4096 in
|
||||
let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in
|
||||
let chunk = Bytes.create 8192 in
|
||||
let alive = ref true in
|
||||
(try while !alive do
|
||||
let remaining = deadline -. Unix.gettimeofday () in
|
||||
if remaining <= 0.0 then alive := false
|
||||
else match Unix.select [rd] [] [] (min remaining 0.05) with
|
||||
| (_ :: _, _, _) ->
|
||||
let n = Unix.read rd chunk 0 8192 in
|
||||
if n = 0 then raise Exit
|
||||
else Buffer.add_subbytes buf chunk 0 n
|
||||
| _ -> ()
|
||||
done with Exit -> ());
|
||||
Unix.close rd;
|
||||
if not !alive then begin
|
||||
(try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ());
|
||||
ignore (Unix.waitpid [] pid)
|
||||
end else
|
||||
ignore (Unix.waitpid [] pid);
|
||||
if Buffer.length buf > 0 then Some (Buffer.contents buf) else None
|
||||
end
|
||||
|
||||
let is_executable path =
|
||||
try let st = Unix.stat path in
|
||||
st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0
|
||||
with Unix.Unix_error _ -> false
|
||||
|
||||
let is_script path =
|
||||
try
|
||||
let real = Unix.realpath path in
|
||||
let ic = open_in_bin real in
|
||||
let has_shebang =
|
||||
try let b = Bytes.create 2 in
|
||||
really_input ic b 0 2;
|
||||
Bytes.get b 0 = '#' && Bytes.get b 1 = '!'
|
||||
with End_of_file -> false in
|
||||
close_in ic;
|
||||
has_shebang
|
||||
with _ -> false
|
||||
|
||||
let elf_scan path needles =
|
||||
let found = Hashtbl.create 4 in
|
||||
let remaining () = List.filter (fun n -> not (Hashtbl.mem found n)) needles in
|
||||
(try
|
||||
let real = Unix.realpath path in
|
||||
let ic = open_in_bin real in
|
||||
let magic = Bytes.create 4 in
|
||||
really_input ic magic 0 4;
|
||||
if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E'
|
||||
&& Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin
|
||||
let max_needle = List.fold_left (fun m n -> max m (String.length n)) 0 needles in
|
||||
let chunk_size = 65536 in
|
||||
let buf = Bytes.create (chunk_size + max_needle) in
|
||||
let carry = ref 0 in
|
||||
let eof = ref false in
|
||||
while not !eof && remaining () <> [] do
|
||||
let n = (try input ic buf !carry chunk_size with End_of_file -> 0) in
|
||||
if n = 0 then eof := true
|
||||
else begin
|
||||
let total = !carry + n in
|
||||
List.iter (fun needle ->
|
||||
if not (Hashtbl.mem found needle) then begin
|
||||
let nlen = String.length needle in
|
||||
let i = ref 0 in
|
||||
while !i <= total - nlen do
|
||||
if Bytes.get buf !i = needle.[0] then begin
|
||||
let ok = ref true in
|
||||
for j = 1 to nlen - 1 do
|
||||
if Bytes.get buf (!i + j) <> needle.[j] then ok := false
|
||||
done;
|
||||
if !ok then (Hashtbl.replace found needle true; i := total)
|
||||
else incr i
|
||||
end else incr i
|
||||
done
|
||||
end
|
||||
) (remaining ());
|
||||
let new_carry = min max_needle total in
|
||||
Bytes.blit buf (total - new_carry) buf 0 new_carry;
|
||||
carry := new_carry
|
||||
end
|
||||
done
|
||||
end;
|
||||
close_in ic
|
||||
with _ ->
|
||||
List.iter (fun n -> Hashtbl.replace found n true) needles);
|
||||
found
|
||||
|
||||
let nix_wrapper_target path =
|
||||
try
|
||||
let real = Unix.realpath path in
|
||||
let ic = open_in_bin real in
|
||||
let n = in_channel_length ic in
|
||||
if n > 65536 then (close_in ic; None)
|
||||
else begin
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n; close_in ic;
|
||||
let s = Bytes.to_string s in
|
||||
if not (contains_str s "makeCWrapper") then None
|
||||
else
|
||||
let re = Str.regexp "/nix/store/[a-z0-9]+-[^' \n\r\x00]+/bin/[a-zA-Z0-9._-]+" in
|
||||
try ignore (Str.search_forward re s 0);
|
||||
let target = Str.matched_string s in
|
||||
if Sys.file_exists target then Some target else None
|
||||
with Not_found -> None
|
||||
end
|
||||
with _ -> None
|
||||
|
||||
let skip_name name =
|
||||
String.length name = 0 || name = "-" || name.[0] = '.'
|
||||
|| String.starts_with ~prefix:"lib" name
|
||||
|| String.ends_with ~suffix:"-daemon" name
|
||||
|| String.ends_with ~suffix:"-wrapped" name
|
||||
|| String.ends_with ~suffix:".so" name
|
||||
|| not (String.exists (fun c -> (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) name)
|
||||
|
||||
type bin_class = Skip | Try_help | Try_native_and_help
|
||||
|
||||
let classify_binary bindir name =
|
||||
if is_nushell_builtin name || skip_name name then Skip
|
||||
else
|
||||
let path = Filename.concat bindir name in
|
||||
if not (is_executable path) then Skip
|
||||
else if is_script path then Try_help
|
||||
else
|
||||
let scan = elf_scan path ["-h"; "completion"] in
|
||||
if Hashtbl.mem scan "completion" then Try_native_and_help
|
||||
else if Hashtbl.mem scan "-h" then Try_help
|
||||
else if nix_wrapper_target path <> None then Try_help
|
||||
else Skip
|
||||
|
||||
let num_cores () =
|
||||
try
|
||||
let ic = open_in "/proc/cpuinfo" in
|
||||
let n = ref 0 in
|
||||
(try while true do
|
||||
if String.starts_with ~prefix:"processor" (input_line ic) then incr n
|
||||
done with End_of_file -> ());
|
||||
close_in ic; max 1 !n
|
||||
with _ -> 4
|
||||
|
||||
let try_native_completion bin_path =
|
||||
List.find_map (fun args ->
|
||||
match run_cmd args 500 with
|
||||
| Some text when is_nushell_source text -> Some text
|
||||
| _ -> None
|
||||
) [
|
||||
[bin_path; "completions"; "nushell"];
|
||||
[bin_path; "completion"; "nushell"];
|
||||
[bin_path; "--completions"; "nushell"];
|
||||
[bin_path; "--completion"; "nushell"];
|
||||
[bin_path; "generate-completion"; "nushell"];
|
||||
[bin_path; "--generate-completion"; "nushell"];
|
||||
[bin_path; "shell-completions"; "nushell"];
|
||||
]
|
||||
|
||||
let parse_manpage_for_command file =
|
||||
let contents = read_manpage_file file in
|
||||
let fallback = cmd_name_of_manpage file in
|
||||
let cmd = match extract_synopsis_command contents with
|
||||
| Some name -> name | None -> fallback in
|
||||
if is_nushell_builtin cmd then None
|
||||
else
|
||||
let result = parse_manpage_string contents in
|
||||
let sub_sections = extract_subcommand_sections contents in
|
||||
let result = if sub_sections <> [] then
|
||||
{ result with subcommands = List.map (fun (name, desc, _) ->
|
||||
{ name; desc }) sub_sections }
|
||||
else result in
|
||||
let subs = List.map (fun (name, _desc, r) ->
|
||||
(cmd ^ " " ^ name, r)) sub_sections in
|
||||
Some (cmd, result, subs)
|
||||
|
||||
let cmd_manpage file =
|
||||
match parse_manpage_for_command file with
|
||||
| Some (cmd, result, _) when result.entries <> [] ->
|
||||
print_string (generate_extern cmd result)
|
||||
| _ -> ()
|
||||
|
||||
let cmd_manpage_dir dir =
|
||||
List.iter (fun section ->
|
||||
let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in
|
||||
if is_dir subdir then
|
||||
Array.iter (fun file ->
|
||||
(try cmd_manpage (Filename.concat subdir file) with _ -> ())
|
||||
) (Sys.readdir subdir)
|
||||
) command_sections
|
||||
|
||||
let max_resolve_results = 500
|
||||
|
||||
let process_manpage file =
|
||||
try
|
||||
match parse_manpage_for_command file with
|
||||
| Some (cmd, result, subs) when result.entries <> [] || subs <> [] ->
|
||||
Some (cmd, result, subs)
|
||||
| _ -> None
|
||||
with _ -> None
|
||||
|
||||
let manpaged_commands mandir =
|
||||
List.fold_left (fun acc section ->
|
||||
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
|
||||
if is_dir subdir then
|
||||
Array.fold_left (fun acc f -> SSet.add (cmd_name_of_manpage f) acc)
|
||||
acc (Sys.readdir subdir)
|
||||
else acc
|
||||
) SSet.empty command_sections
|
||||
|
||||
(* Parallel structured help resolver — returns (name, help_result) pairs
|
||||
like the old sequential version but forks per subcommand for parallelism. *)
|
||||
let help_resolve_par ?(timeout=200) cmd rest name =
|
||||
let max_jobs = num_cores () in
|
||||
let queue = Queue.create () in
|
||||
Queue.push (rest, name, 0) queue;
|
||||
let results = ref [] in
|
||||
(* pending: (pid, rd, buf, rest, name, depth) *)
|
||||
let pending = ref [] in
|
||||
let collect rd buf q_rest q_name q_depth =
|
||||
drain_fd rd buf;
|
||||
(try Unix.close rd with _ -> ());
|
||||
let data = Buffer.contents buf in
|
||||
let result : (help_result * subcommand list) option =
|
||||
if String.length data > 0 then
|
||||
try Marshal.from_string data 0 with _ -> None
|
||||
else None in
|
||||
match result with
|
||||
| None -> ()
|
||||
| Some (r, subs) ->
|
||||
let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in
|
||||
results := (q_name, r) :: !results;
|
||||
if not at_limit then
|
||||
List.iter (fun (sc : subcommand) ->
|
||||
Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue
|
||||
) subs in
|
||||
let reap () =
|
||||
pending := List.filter (fun (pid, rd, buf, q_rest, q_name, q_depth) ->
|
||||
drain_fd rd buf;
|
||||
match Unix.waitpid [Unix.WNOHANG] pid with
|
||||
| (0, _) -> true
|
||||
| _ -> collect rd buf q_rest q_name q_depth; false
|
||||
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
|
||||
(try Unix.close rd with _ -> ()); false
|
||||
) !pending in
|
||||
let wait_for_slot () =
|
||||
while List.length !pending >= max_jobs do
|
||||
reap ();
|
||||
if List.length !pending >= max_jobs then begin
|
||||
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
|
||||
ignore (Unix.select fds [] [] 0.05)
|
||||
end
|
||||
done in
|
||||
while not (Queue.is_empty queue) || !pending <> [] do
|
||||
while not (Queue.is_empty queue) do
|
||||
let (q_rest, q_name, q_depth) = Queue.pop queue in
|
||||
wait_for_slot ();
|
||||
let (rd, wr) = Unix.pipe () in
|
||||
let pid = Unix.fork () in
|
||||
if pid = 0 then begin
|
||||
Unix.close rd;
|
||||
List.iter (fun (_, prd, _, _, _, _) ->
|
||||
try Unix.close prd with _ -> ()) !pending;
|
||||
let result =
|
||||
let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with
|
||||
| Some _ as r -> r
|
||||
| None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in
|
||||
match text with
|
||||
| None -> None
|
||||
| Some text ->
|
||||
(match parse_help text with
|
||||
| Error _ -> None
|
||||
| Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None
|
||||
| Ok r ->
|
||||
let self_listed = match q_rest with
|
||||
| [] -> false
|
||||
| _ ->
|
||||
let leaf = List.nth q_rest (List.length q_rest - 1) in
|
||||
List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in
|
||||
if self_listed then None
|
||||
else
|
||||
let at_limit = q_depth >= 5 in
|
||||
let subs = if at_limit then [] else r.subcommands in
|
||||
Some (r, subs)) in
|
||||
let oc = Unix.out_channel_of_descr wr in
|
||||
Marshal.to_channel oc (result : (help_result * subcommand list) option) [];
|
||||
close_out oc;
|
||||
exit 0
|
||||
end else begin
|
||||
Unix.close wr;
|
||||
pending := (pid, rd, Buffer.create 4096, q_rest, q_name, q_depth) :: !pending
|
||||
end
|
||||
done;
|
||||
if !pending <> [] then begin
|
||||
reap ();
|
||||
if !pending <> [] && Queue.is_empty queue then begin
|
||||
let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in
|
||||
ignore (Unix.select fds [] [] 0.05)
|
||||
end
|
||||
end
|
||||
done;
|
||||
List.rev !results
|
||||
|
||||
(* Index: fork-per-binary pattern with pipe-based result marshaling.
|
||||
Each child handles one binary completely (including subcommand resolution)
|
||||
and marshals results back via pipe. Children use help_resolve_par
|
||||
which forks per subcommand for parallelism. *)
|
||||
let cmd_index bindirs mandirs ignorelist help_only dir =
|
||||
ensure_dir dir;
|
||||
let done_cmds = ref SSet.empty in
|
||||
let n_results = ref 0 in
|
||||
let index_bindir bindir mandir =
|
||||
if not (is_dir bindir) then
|
||||
Printf.eprintf "skipping %s (not found)\n" bindir
|
||||
else begin
|
||||
let bins = Sys.readdir bindir in
|
||||
Array.sort String.compare bins;
|
||||
let manpaged = if is_dir mandir
|
||||
then manpaged_commands mandir else SSet.empty in
|
||||
let max_jobs = num_cores () in
|
||||
let classified = Array.map (fun name ->
|
||||
if SSet.mem name ignorelist then (name, Skip)
|
||||
else if SSet.mem name help_only then (name, classify_binary bindir name)
|
||||
else if SSet.mem name manpaged then (name, Skip)
|
||||
else (name, classify_binary bindir name)
|
||||
) bins in
|
||||
let pending = ref [] in
|
||||
let process_result name rd buf =
|
||||
drain_fd rd buf;
|
||||
(try Unix.close rd with _ -> ());
|
||||
let data = Buffer.contents buf in
|
||||
if String.length data > 0 then begin
|
||||
let result : [`Native of string | `Parsed of (string * help_result) list | `None] =
|
||||
try Marshal.from_string data 0 with _ -> `None in
|
||||
(match result with
|
||||
| `Native src ->
|
||||
write_native ~dir name src;
|
||||
incr n_results
|
||||
| `Parsed pairs ->
|
||||
List.iter (fun (cmd_name, r) ->
|
||||
if not (SSet.mem cmd_name !done_cmds) then begin
|
||||
write_result ~dir ~source:"help" cmd_name r;
|
||||
done_cmds := SSet.add cmd_name !done_cmds;
|
||||
incr n_results
|
||||
end
|
||||
) pairs
|
||||
| `None -> ())
|
||||
end;
|
||||
done_cmds := SSet.add name !done_cmds in
|
||||
let reap () =
|
||||
pending := List.filter (fun (pid, rd, buf, name) ->
|
||||
drain_fd rd buf;
|
||||
match Unix.waitpid [Unix.WNOHANG] pid with
|
||||
| (0, _) -> true
|
||||
| _ ->
|
||||
process_result name rd buf;
|
||||
false
|
||||
| exception Unix.Unix_error (Unix.ECHILD, _, _) ->
|
||||
(try Unix.close rd with _ -> ()); false
|
||||
) !pending in
|
||||
let wait_for_slot () =
|
||||
while List.length !pending >= max_jobs do
|
||||
reap ();
|
||||
if List.length !pending >= max_jobs then begin
|
||||
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
|
||||
ignore (Unix.select fds [] [] 0.05)
|
||||
end
|
||||
done in
|
||||
Array.iter (fun (name, cls) ->
|
||||
match cls with
|
||||
| Skip -> ()
|
||||
| Try_help | Try_native_and_help ->
|
||||
wait_for_slot ();
|
||||
let (rd, wr) = Unix.pipe () in
|
||||
let pid = Unix.fork () in
|
||||
if pid = 0 then begin
|
||||
Unix.close rd;
|
||||
List.iter (fun (_, prd, _, _) ->
|
||||
try Unix.close prd with _ -> ()) !pending;
|
||||
let result =
|
||||
try
|
||||
let path = Filename.concat bindir name in
|
||||
let native = match cls with
|
||||
| Try_native_and_help ->
|
||||
(match try_native_completion path with
|
||||
| Some src -> Some src | None -> None)
|
||||
| _ -> None in
|
||||
match native with
|
||||
| Some src -> `Native src
|
||||
| None ->
|
||||
let pairs = help_resolve_par ~timeout:200 path [] name in
|
||||
if pairs <> [] then `Parsed pairs else `None
|
||||
with _ -> `None in
|
||||
let oc = Unix.out_channel_of_descr wr in
|
||||
Marshal.to_channel oc
|
||||
(result : [`Native of string | `Parsed of (string * help_result) list | `None]) [];
|
||||
close_out oc;
|
||||
exit 0
|
||||
end else begin
|
||||
Unix.close wr;
|
||||
pending := (pid, rd, Buffer.create 4096, name) :: !pending
|
||||
end
|
||||
) classified;
|
||||
while !pending <> [] do
|
||||
reap ();
|
||||
if !pending <> [] then begin
|
||||
let fds = List.map (fun (_, rd, _, _) -> rd) !pending in
|
||||
ignore (Unix.select fds [] [] 0.05)
|
||||
end
|
||||
done;
|
||||
(* Phase 2: manpages *)
|
||||
if is_dir mandir then
|
||||
List.iter (fun section ->
|
||||
let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in
|
||||
if is_dir subdir then begin
|
||||
let files = Sys.readdir subdir in
|
||||
Array.sort String.compare files;
|
||||
Array.iter (fun file ->
|
||||
let base_cmd = cmd_name_of_manpage file in
|
||||
if SSet.mem base_cmd help_only then ()
|
||||
else match process_manpage (Filename.concat subdir file) with
|
||||
| None -> ()
|
||||
| Some (cmd, result, subs) ->
|
||||
if not (SSet.mem cmd !done_cmds) then begin
|
||||
write_result ~dir ~source:"manpage" cmd result;
|
||||
done_cmds := SSet.add cmd !done_cmds;
|
||||
incr n_results
|
||||
end;
|
||||
List.iter (fun (sub_cmd, sub_result) ->
|
||||
if not (SSet.mem sub_cmd !done_cmds) then begin
|
||||
write_result ~dir ~source:"manpage" sub_cmd sub_result;
|
||||
done_cmds := SSet.add sub_cmd !done_cmds;
|
||||
incr n_results
|
||||
end
|
||||
) subs
|
||||
) files
|
||||
end
|
||||
) command_sections
|
||||
end in
|
||||
List.iter2 index_bindir bindirs mandirs;
|
||||
Printf.printf "indexed %d commands into %s\n" !n_results dir
|
||||
|
||||
let cmd_dump dirs =
|
||||
let cmds = all_commands dirs in
|
||||
Printf.printf "%d commands\n" (List.length cmds);
|
||||
List.iter (fun cmd ->
|
||||
let src = match file_type_of dirs cmd with
|
||||
| Some s -> s | None -> "?" in
|
||||
Printf.printf " %-40s [%s]\n" cmd src
|
||||
) cmds
|
||||
|
||||
let find_in_path name =
|
||||
try
|
||||
Sys.getenv "PATH"
|
||||
|> String.split_on_char ':'
|
||||
|> List.find_map (fun dir ->
|
||||
let p = Filename.concat dir name in
|
||||
if is_executable p then Some p else None)
|
||||
with Not_found -> None
|
||||
|
||||
let resolve_and_cache ~dir name path =
|
||||
let pairs = help_resolve_par ~timeout:200 path [] name in
|
||||
if pairs <> [] then begin
|
||||
ensure_dir dir;
|
||||
List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs;
|
||||
Some pairs
|
||||
end else None
|
||||
|
||||
let completion_json value desc =
|
||||
Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}"
|
||||
(escape_json value) (escape_json desc)
|
||||
|
||||
(* Fuzzy matching: returns a score > 0 if needle is a subsequence of haystack.
|
||||
Higher scores = better match. Scoring:
|
||||
- Exact match: 1000
|
||||
- Prefix match: 900 + length bonus
|
||||
- Subsequence with word-boundary alignment: bonus per boundary hit
|
||||
- Plain subsequence: base score from match density *)
|
||||
let fuzzy_score needle haystack =
|
||||
let nlen = String.length needle and hlen = String.length haystack in
|
||||
if nlen = 0 then 1
|
||||
else if nlen > hlen then 0
|
||||
else if needle = haystack then 1000
|
||||
else
|
||||
let needle = String.lowercase_ascii needle
|
||||
and haystack_lc = String.lowercase_ascii haystack in
|
||||
if String.starts_with ~prefix:needle haystack_lc then
|
||||
900 + (nlen * 100 / hlen)
|
||||
else
|
||||
let is_boundary hi =
|
||||
hi = 0 || haystack.[hi - 1] = '-' || haystack.[hi - 1] = '_'
|
||||
|| (haystack.[hi - 1] >= 'a' && haystack.[hi - 1] <= 'z'
|
||||
&& haystack.[hi] >= 'A' && haystack.[hi] <= 'Z') in
|
||||
(* Walk haystack matching needle chars as a subsequence *)
|
||||
let ni, score, _, _ =
|
||||
String.fold_left (fun (ni, score, hi, prev_match) c ->
|
||||
if ni >= nlen then (ni, score, hi + 1, prev_match)
|
||||
else if c = needle.[ni] then
|
||||
let bonus = (if is_boundary hi then 50 else 10)
|
||||
+ (if prev_match = hi - 1 then 20 else 0) in
|
||||
(ni + 1, score + bonus, hi + 1, hi)
|
||||
else (ni, score, hi + 1, prev_match)
|
||||
) (0, 0, 0, -1) haystack_lc in
|
||||
if ni = nlen then score else 0
|
||||
|
||||
let cmd_complete spans user_dir system_dirs =
|
||||
match spans with
|
||||
| [] -> print_string "[]\n"
|
||||
| cmd_name :: rest ->
|
||||
let dirs = user_dir :: system_dirs in
|
||||
(* Try longest prefix match: "git add" before "git" *)
|
||||
let find_result tokens =
|
||||
let n = List.length tokens in
|
||||
List.init n Fun.id |> List.find_map (fun drop ->
|
||||
let prefix = List.filteri (fun i _ -> i < n - drop) tokens in
|
||||
match prefix with
|
||||
| [] -> None
|
||||
| _ ->
|
||||
let try_name = String.concat " " prefix in
|
||||
match lookup dirs try_name with
|
||||
| Some r -> Some (try_name, r, List.length prefix)
|
||||
| None -> None) in
|
||||
let all_tokens = cmd_name :: rest in
|
||||
let last_token = match rest with
|
||||
| [] -> "" | _ -> List.nth rest (List.length rest - 1) in
|
||||
(* Only treat the last token as a completed subcommand when nushell
|
||||
sends a trailing empty token (cursor is after a space).
|
||||
Otherwise the user is still typing and we treat it as partial. *)
|
||||
let lookup_tokens = if last_token = "" then all_tokens
|
||||
else match rest with
|
||||
| _ :: _ -> cmd_name :: List.rev (List.tl (List.rev rest))
|
||||
| _ -> [cmd_name] in
|
||||
let resolve tokens partial =
|
||||
match find_result tokens with
|
||||
| Some _ as found -> (found, partial)
|
||||
| None -> (None, partial) in
|
||||
let found, partial = resolve lookup_tokens last_token in
|
||||
(* Try on-the-fly resolution when no match or only a parent matched *)
|
||||
let n_lookup = List.length lookup_tokens in
|
||||
let result, partial = match found with
|
||||
| Some (_, _, depth) when depth >= n_lookup - 1 ->
|
||||
(* Exact or near-exact match — use it *)
|
||||
(found, partial)
|
||||
| _ ->
|
||||
(* No match, or only a parent matched — try on-the-fly resolution *)
|
||||
(match find_in_path cmd_name with
|
||||
| Some path ->
|
||||
(match resolve_and_cache ~dir:user_dir cmd_name path with
|
||||
| Some _pairs -> resolve lookup_tokens last_token
|
||||
| None -> (found, partial))
|
||||
| None -> (found, partial)) in
|
||||
let candidates = match result with
|
||||
| None -> []
|
||||
| Some (_matched_name, r, depth) ->
|
||||
(* When the match is shallower than requested, the user already
|
||||
typed a subcommand beyond the matched level — don't show
|
||||
sibling subcommands, only flags *)
|
||||
let sub_candidates = if depth < n_lookup - 1 then [] else
|
||||
let subs = match r.subcommands with
|
||||
| _ :: _ -> r.subcommands
|
||||
| [] -> subcommands_of dirs _matched_name in
|
||||
List.filter_map (fun (sc : subcommand) ->
|
||||
let s = fuzzy_score partial sc.name in
|
||||
if s > 0 then Some (s, completion_json sc.name sc.desc) else None
|
||||
) subs in
|
||||
let flag_candidates = List.filter_map (fun (e : entry) ->
|
||||
let desc = match e.param with
|
||||
| Some (Mandatory p) -> if e.desc <> "" then e.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">"
|
||||
| Some (Optional p) -> if e.desc <> "" then e.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]"
|
||||
| None -> e.desc in
|
||||
let flag = match e.switch with
|
||||
| Long l -> "--" ^ l
|
||||
| Short c -> Printf.sprintf "-%c" c
|
||||
| Both (_, l) -> "--" ^ l in
|
||||
let s = fuzzy_score partial flag in
|
||||
if s > 0 then Some (s, completion_json flag desc) else None
|
||||
) r.entries in
|
||||
let scored = sub_candidates @ flag_candidates in
|
||||
List.sort (fun (a, _) (b, _) -> compare b a) scored
|
||||
|> List.map snd in
|
||||
Printf.printf "[%s]\n" (String.concat "," candidates)
|
||||
|
||||
let cmd_query cmd dirs =
|
||||
match lookup_raw dirs cmd with
|
||||
| None ->
|
||||
Printf.eprintf "not found: %s\n" cmd; exit 1
|
||||
| Some data ->
|
||||
print_string data; print_newline ()
|
||||
|
||||
let load_ignorelist path =
|
||||
try
|
||||
In_channel.with_open_text path In_channel.input_all
|
||||
|> String.split_on_char '\n'
|
||||
|> List.filter_map (fun line ->
|
||||
let line = String.trim line in
|
||||
if String.length line > 0 && line.[0] <> '#' then Some line else None)
|
||||
|> SSet.of_list
|
||||
with _ -> SSet.empty
|
||||
|
||||
let parse_index_args args =
|
||||
let rec go prefixes dir ignore help_only = function
|
||||
| [] -> (List.rev prefixes, dir, ignore, help_only)
|
||||
| "--dir" :: path :: rest -> go prefixes path ignore help_only rest
|
||||
| "--ignore" :: path :: rest -> go prefixes dir (SSet.union ignore (load_ignorelist path)) help_only rest
|
||||
| "--help-only" :: path :: rest -> go prefixes dir ignore (SSet.union help_only (load_ignorelist path)) rest
|
||||
| prefix :: rest -> go (prefix :: prefixes) dir ignore help_only rest in
|
||||
go [] (default_store_path ()) SSet.empty SSet.empty args
|
||||
|
||||
let parse_dir_args args =
|
||||
let rec go user_dir system_dirs rest_args = function
|
||||
| [] -> (user_dir, system_dirs, List.rev rest_args)
|
||||
| "--dir" :: path :: rest -> go path system_dirs rest_args rest
|
||||
| "--system-dir" :: path :: rest -> go user_dir (path :: system_dirs) rest_args rest
|
||||
| arg :: rest -> go user_dir system_dirs (arg :: rest_args) rest in
|
||||
go (default_store_path ()) [] [] args
|
||||
|
||||
let () =
|
||||
match Array.to_list Sys.argv |> List.tl with
|
||||
| "index" :: rest ->
|
||||
let (prefixes, dir, ignorelist, help_only) = parse_index_args rest in
|
||||
if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1);
|
||||
let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in
|
||||
let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in
|
||||
cmd_index bindirs mandirs ignorelist help_only dir
|
||||
| "complete" :: rest ->
|
||||
let (user_dir, system_dirs, spans) = parse_dir_args rest in
|
||||
cmd_complete spans user_dir system_dirs
|
||||
| "query" :: rest ->
|
||||
let (user_dir, system_dirs, args) = parse_dir_args rest in
|
||||
(match args with
|
||||
| [cmd] -> cmd_query cmd (user_dir :: system_dirs)
|
||||
| _ -> Printf.eprintf "error: query CMD [--dir PATH] [--system-dir PATH]\n"; exit 1)
|
||||
| "dump" :: rest ->
|
||||
let (user_dir, system_dirs, _) = parse_dir_args rest in
|
||||
cmd_dump (user_dir :: system_dirs)
|
||||
| ["manpage"; file] -> cmd_manpage file
|
||||
| ["manpage-dir"; dir] -> cmd_manpage_dir dir
|
||||
| _ -> usage ()
|
||||
192
doc/nixos.md
Normal file
192
doc/nixos.md
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
# nixos integration
|
||||
|
||||
inshellah provides a nixos module that automatically indexes nushell
|
||||
completions for all installed packages at system build time.
|
||||
|
||||
## enabling
|
||||
|
||||
```nix
|
||||
# in your flake.nix outputs:
|
||||
{
|
||||
nixosConfigurations.myhost = nixpkgs.lib.nixosSystem {
|
||||
modules = [
|
||||
inshellah.nixosModules.default
|
||||
{
|
||||
programs.inshellah.enable = true;
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
or if importing the module directly:
|
||||
|
||||
```nix
|
||||
# configuration.nix
|
||||
{ pkgs, ... }: {
|
||||
imports = [ ./path/to/inshellah/nix/module.nix ];
|
||||
programs.inshellah = {
|
||||
enable = true;
|
||||
package = pkgs.inshellah; # or your local build
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
## what happens at build time
|
||||
|
||||
the module hooks into `environment.extraSetup`, which runs during the
|
||||
system profile build (the `buildEnv` that creates `/run/current-system/sw`).
|
||||
at that point, all system packages are merged, so `$out/bin` contains every
|
||||
executable and `$out/share/man` contains every manpage.
|
||||
|
||||
inshellah runs a single command:
|
||||
|
||||
```
|
||||
inshellah index "$out" --dir $out/share/inshellah
|
||||
```
|
||||
|
||||
this executes a three-phase pipeline:
|
||||
|
||||
### phase 1: native completion detection (parallel)
|
||||
|
||||
for each executable, inshellah scans the elf binary for the string
|
||||
`completion`. if found, it probes common patterns like
|
||||
`CMD completions nushell` to see if the program can generate its own
|
||||
nushell completions. native output is used verbatim — these are always
|
||||
higher quality than parsed completions.
|
||||
|
||||
programs like `niri`, and any clap/cobra tool with nushell support,
|
||||
are handled this way.
|
||||
|
||||
### phase 2: manpage parsing (sequential)
|
||||
|
||||
for commands not covered by phase 1, inshellah parses manpages from
|
||||
man1 (user commands) and man8 (sysadmin commands). it handles:
|
||||
|
||||
- gnu `.TP` style (coreutils, help2man)
|
||||
- `.IP` style (curl, hand-written)
|
||||
- `.PP`+`.RS`/`.RE` style (git, docbook)
|
||||
- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
|
||||
- mdoc (bsd) format
|
||||
- deroff fallback for unusual formats
|
||||
|
||||
synopsis sections are parsed to detect subcommands: `git-commit.1`
|
||||
generates `export extern "git commit"`, not `export extern "git-commit"`.
|
||||
|
||||
### phase 3: --help fallback (parallel)
|
||||
|
||||
remaining executables without manpages get `--help` (or `-h`) called
|
||||
with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
|
||||
to skip those that don't support help flags. shell scripts are run
|
||||
directly (they're fast). execution is parallelized to available cores.
|
||||
|
||||
### output
|
||||
|
||||
each command gets its own file in `/share/inshellah` under the system
|
||||
profile. native generators produce `.nu` files; parsed results produce
|
||||
`.json` files. the `complete` command reads both formats.
|
||||
|
||||
nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
|
||||
nushell provides its own completions.
|
||||
|
||||
### performance
|
||||
|
||||
on a typical nixos system (~950 executables, ~1600 manpages):
|
||||
- total time: ~4-10 seconds
|
||||
- native gzip decompression (camlzip, no process spawning)
|
||||
- parallel --help with core-scaled forking
|
||||
- elf string scanning to skip ~15% of binaries
|
||||
|
||||
## module options
|
||||
|
||||
```nix
|
||||
programs.inshellah = {
|
||||
enable = true;
|
||||
|
||||
# the inshellah package (set automatically by the flake module)
|
||||
package = pkgs.inshellah;
|
||||
|
||||
# where to place indexed completion files under the system profile
|
||||
# default: "/share/inshellah"
|
||||
completionsPath = "/share/inshellah";
|
||||
|
||||
# commands to skip entirely during indexing
|
||||
ignoreCommands = [ "problematic-tool" ];
|
||||
|
||||
# commands to skip manpage parsing for (uses --help instead)
|
||||
helpOnlyCommands = [ "nix" ];
|
||||
};
|
||||
```
|
||||
|
||||
## using the completer
|
||||
|
||||
the flake module sets a read-only `snippet` option containing the nushell
|
||||
config needed to wire up the completer. you can access it via
|
||||
`config.programs.inshellah.snippet` and paste it into your nushell config,
|
||||
or source it from a file generated by your nixos config.
|
||||
|
||||
the snippet sets up the external completer pointing at the system index
|
||||
at `/run/current-system/sw/share/inshellah`:
|
||||
|
||||
```nu
|
||||
let inshellah_complete = {|spans|
|
||||
inshellah complete ...$spans --system-dir /run/current-system/sw/share/inshellah | from json
|
||||
}
|
||||
$env.config.completions.external = {
|
||||
enable: true
|
||||
max_results: 100
|
||||
completer: $inshellah_complete
|
||||
}
|
||||
```
|
||||
|
||||
## home manager and other user-level package managers
|
||||
|
||||
the nixos module only indexes packages installed at the system level
|
||||
(those that end up in `/run/current-system/sw`). if you use home-manager,
|
||||
nix-env, or another user-level package manager, those binaries and
|
||||
manpages live elsewhere — typically under `/etc/profiles/per-user/<name>`
|
||||
or `~/.nix-profile`.
|
||||
|
||||
to get completions for user-installed packages, run `inshellah index`
|
||||
against those prefixes separately:
|
||||
|
||||
```sh
|
||||
# home-manager / per-user profile
|
||||
inshellah index /etc/profiles/per-user/$USER
|
||||
|
||||
# classic nix-env profile
|
||||
inshellah index ~/.nix-profile
|
||||
```
|
||||
|
||||
this indexes into the default user cache (`$XDG_CACHE_HOME/inshellah`),
|
||||
which the completer searches automatically. you can re-run this after
|
||||
installing new packages, or add it to a home-manager activation script.
|
||||
|
||||
if you want to automate this in home-manager:
|
||||
|
||||
```nix
|
||||
# home.nix
|
||||
home.activation.inshellah-index = lib.hm.dag.entryAfter [ "writeBoundary" ] ''
|
||||
${pkgs.inshellah}/bin/inshellah index /etc/profiles/per-user/$USER 2>/dev/null || true
|
||||
'';
|
||||
```
|
||||
|
||||
the completer will then search both the system index (`--system-dir`)
|
||||
and the user cache, so completions from both sources are available.
|
||||
|
||||
## troubleshooting
|
||||
|
||||
**completions not appearing**: ensure the completer is configured in
|
||||
your nushell config (see above). check that the system index exists:
|
||||
`ls /run/current-system/sw/share/inshellah/`.
|
||||
|
||||
**missing completions for a specific command**: check if it's a nushell
|
||||
built-in (`help commands | where name == "thecommand"`). built-ins are
|
||||
excluded because nushell serves its own completions for them.
|
||||
|
||||
**stale completions after update**: completions regenerate on every
|
||||
`nixos-rebuild`. if a command changed its flags, rebuild to pick up
|
||||
the changes.
|
||||
|
||||
**build-time errors**: indexing failures are non-fatal (`|| true`).
|
||||
check `journalctl` for the build log if completions are missing.
|
||||
184
doc/nushell-integration.md
Normal file
184
doc/nushell-integration.md
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
# using inshellah completions in nushell
|
||||
|
||||
inshellah indexes completions from three sources (in priority order):
|
||||
1. **native generators** — programs that can emit nushell completions directly
|
||||
2. **manpages** — groff/troff/mdoc manpage parsing
|
||||
3. **`--help` output** — parsing help text as a fallback
|
||||
|
||||
indexed data is stored as `.json` and `.nu` files in a directory that the
|
||||
`complete` command reads from at tab-completion time.
|
||||
|
||||
## quick start
|
||||
|
||||
index completions from a system prefix:
|
||||
|
||||
```sh
|
||||
# index from a prefix containing bin/ and share/man/
|
||||
inshellah index /usr
|
||||
|
||||
# index from multiple prefixes
|
||||
inshellah index /usr /usr/local
|
||||
|
||||
# store in a custom directory
|
||||
inshellah index /usr --dir ~/my-completions
|
||||
```
|
||||
|
||||
parse a single manpage:
|
||||
|
||||
```sh
|
||||
inshellah manpage /usr/share/man/man1/git.1.gz
|
||||
```
|
||||
|
||||
batch-process all manpages under a directory (man1 and man8):
|
||||
|
||||
```sh
|
||||
inshellah manpage-dir /usr/share/man
|
||||
```
|
||||
|
||||
## commands
|
||||
|
||||
```
|
||||
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
|
||||
index completions into a directory of json/nu files.
|
||||
PREFIX is a directory containing bin/ and share/man/.
|
||||
default dir: $XDG_CACHE_HOME/inshellah
|
||||
--ignore FILE skip listed commands entirely
|
||||
--help-only FILE skip manpages for listed commands, use --help instead
|
||||
|
||||
inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH]
|
||||
nushell custom completer. outputs json completion candidates.
|
||||
falls back to --help resolution if command is not indexed.
|
||||
|
||||
inshellah query CMD [--dir PATH] [--system-dir PATH]
|
||||
print stored completion data for CMD.
|
||||
|
||||
inshellah dump [--dir PATH] [--system-dir PATH]
|
||||
list indexed commands.
|
||||
|
||||
inshellah manpage FILE
|
||||
parse a manpage and emit nushell extern block.
|
||||
|
||||
inshellah manpage-dir DIR
|
||||
batch-process manpages under DIR (man1 and man8 sections).
|
||||
```
|
||||
|
||||
## the index pipeline
|
||||
|
||||
the `index` command runs a three-phase pipeline over all executables
|
||||
in each `PREFIX/bin`:
|
||||
|
||||
### phase 1: native completion detection (parallel)
|
||||
|
||||
for each executable, inshellah scans the elf binary for the string
|
||||
`completion`. if found, it probes common patterns like
|
||||
`CMD completions nushell` to see if the program can generate its own
|
||||
nushell completions. native output is used verbatim — these are always
|
||||
higher quality than parsed completions.
|
||||
|
||||
programs like `niri`, and any clap/cobra tool with nushell support,
|
||||
are handled this way.
|
||||
|
||||
### phase 2: manpage parsing (sequential)
|
||||
|
||||
for commands not covered by phase 1, inshellah parses manpages from
|
||||
man1 (user commands) and man8 (sysadmin commands). it handles:
|
||||
|
||||
- gnu `.TP` style (coreutils, help2man)
|
||||
- `.IP` style (curl, hand-written)
|
||||
- `.PP`+`.RS`/`.RE` style (git, docbook)
|
||||
- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
|
||||
- mdoc (bsd) format
|
||||
- deroff fallback for unusual formats
|
||||
|
||||
synopsis sections are parsed to detect subcommands: `git-commit.1`
|
||||
generates `export extern "git commit"`, not `export extern "git-commit"`.
|
||||
|
||||
### phase 3: --help fallback (parallel)
|
||||
|
||||
remaining executables without manpages get `--help` (or `-h`) called
|
||||
with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
|
||||
to skip those that don't support help flags. shell scripts are run
|
||||
directly (they're fast). execution is parallelized to available cores.
|
||||
|
||||
subcommands are recursively resolved — if `--help` output lists
|
||||
subcommands, inshellah runs `CMD SUBCMD --help` for each.
|
||||
|
||||
### output
|
||||
|
||||
each command gets its own file in the index directory. native generators
|
||||
produce `.nu` files; parsed results produce `.json` files. the `complete`
|
||||
command reads both formats.
|
||||
|
||||
nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
|
||||
nushell provides its own completions.
|
||||
|
||||
### performance
|
||||
|
||||
on a typical nixos system (~950 executables, ~1600 manpages):
|
||||
- total time: ~4-10 seconds
|
||||
- native gzip decompression (camlzip, no process spawning)
|
||||
- parallel --help with core-scaled forking
|
||||
- elf string scanning to skip ~15% of binaries
|
||||
|
||||
## the completer
|
||||
|
||||
the `complete` command is designed to be wired into nushell as an
|
||||
external completer. it reads from the index directory (`--dir`) and
|
||||
optional system directories (`--system-dir`), performs fuzzy matching,
|
||||
and outputs json completion candidates.
|
||||
|
||||
if a command is not indexed, `complete` falls back to on-the-fly
|
||||
`--help` resolution — it runs the command's help, caches the result
|
||||
in the user directory, and returns completions immediately.
|
||||
|
||||
### setting up the completer
|
||||
|
||||
```nu
|
||||
# ~/.config/nushell/config.nu
|
||||
$env.config.completions.external = {
|
||||
enable: true
|
||||
completer: {|spans|
|
||||
inshellah complete ...$spans
|
||||
| from json
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
with the nixos module, use the provided `snippet` option value (see
|
||||
[nixos.md](nixos.md)) which points at the system index automatically.
|
||||
|
||||
## nixos module
|
||||
|
||||
enable automatic completion indexing at system build time:
|
||||
|
||||
```nix
|
||||
{
|
||||
imports = [ ./path/to/inshellah/nix/module.nix ];
|
||||
programs.inshellah.enable = true;
|
||||
}
|
||||
```
|
||||
|
||||
this runs `inshellah index` during the system profile build. see
|
||||
[nixos.md](nixos.md) for full details.
|
||||
|
||||
## what gets generated
|
||||
|
||||
the `manpage` and `manpage-dir` commands emit nushell `extern` blocks
|
||||
with flags, parameter types, and descriptions:
|
||||
|
||||
```nu
|
||||
export extern "rg" [
|
||||
--regexp(-e): string # a pattern to search for
|
||||
--file(-f): path # search for patterns from the given file
|
||||
--count(-c) # only show the count of matching lines
|
||||
--color: string # controls when to use color
|
||||
--max-depth: int # limit the depth of directory traversal
|
||||
]
|
||||
```
|
||||
|
||||
subcommand manpages (e.g. `git-commit.1`) are detected via synopsis
|
||||
parsing and generate the correct nushell name (`git commit` not
|
||||
`git-commit`).
|
||||
|
||||
nushell built-in commands (ls, cd, mv, etc.) are excluded since nushell
|
||||
provides its own completions for these.
|
||||
84
doc/runtime-completions.md
Normal file
84
doc/runtime-completions.md
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
# runtime completion resolution
|
||||
|
||||
the `complete` command has built-in on-the-fly resolution: when a command
|
||||
is not found in the index, it falls back to running `--help`, caches the
|
||||
result, and returns completions immediately. this means commands installed
|
||||
outside the system profile (via cargo, pip, npm, go, etc.) get completions
|
||||
on first tab-press with no manual setup.
|
||||
|
||||
## how it works
|
||||
|
||||
when you type `docker compose up --<TAB>`:
|
||||
|
||||
1. nushell calls `inshellah complete docker compose up --`
|
||||
2. inshellah looks up the index for the longest matching prefix
|
||||
3. if found, it fuzzy-matches flags and subcommands against the partial input
|
||||
4. if not found, it locates the binary in `$PATH`, runs `--help`,
|
||||
recursively resolves subcommands, caches the results in the user
|
||||
directory (`$XDG_CACHE_HOME/inshellah`), and returns completions
|
||||
|
||||
all subsequent completions for that command are instant (served from cache).
|
||||
|
||||
## setup
|
||||
|
||||
the completer works with no extra configuration beyond the basic setup:
|
||||
|
||||
```nu
|
||||
# ~/.config/nushell/config.nu
|
||||
$env.config.completions.external = {
|
||||
enable: true
|
||||
completer: {|spans|
|
||||
inshellah complete ...$spans
|
||||
| from json
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
with the nixos module, add `--system-dir` to also search the system index:
|
||||
|
||||
```nu
|
||||
$env.config.completions.external = {
|
||||
enable: true
|
||||
completer: {|spans|
|
||||
inshellah complete ...$spans --system-dir /run/current-system/sw/share/inshellah
|
||||
| from json
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
or use the `snippet` option provided by the flake module (see
|
||||
[nixos.md](nixos.md)).
|
||||
|
||||
## cache management
|
||||
|
||||
the user cache lives at `$XDG_CACHE_HOME/inshellah` (typically
|
||||
`~/.cache/inshellah`).
|
||||
|
||||
```sh
|
||||
# list cached commands
|
||||
inshellah dump
|
||||
|
||||
# view cached data for a command
|
||||
inshellah query docker
|
||||
|
||||
# clear cache
|
||||
rm -rf ~/.cache/inshellah/
|
||||
|
||||
# re-index from a prefix
|
||||
inshellah index /usr --dir ~/.cache/inshellah
|
||||
```
|
||||
|
||||
## when to use this vs build-time indexing
|
||||
|
||||
the nixos module (`programs.inshellah.enable = true`) handles system
|
||||
packages at build time. runtime resolution covers:
|
||||
|
||||
- commands installed outside the system profile (cargo, pip, npm, go)
|
||||
- subcommand completions at arbitrary depth
|
||||
- systems without the nixos module
|
||||
|
||||
for upfront indexing on non-nixos systems:
|
||||
|
||||
```sh
|
||||
inshellah index /usr /usr/local
|
||||
```
|
||||
28
dune-project
Normal file
28
dune-project
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
(lang dune 3.20)
|
||||
|
||||
(name inshellah)
|
||||
|
||||
(generate_opam_files true)
|
||||
|
||||
(source
|
||||
(github username/reponame))
|
||||
|
||||
(authors "atagen <boss@atagen.co>")
|
||||
|
||||
(maintainers "atagen <boss@atagen.co>")
|
||||
|
||||
(license GPL-3.0-or-later)
|
||||
|
||||
(package
|
||||
(name inshellah)
|
||||
(synopsis "Nushell completions generator")
|
||||
(description
|
||||
"Inshellah parses manpages and --help switches to generate completions for nushell.")
|
||||
(depends
|
||||
ocaml
|
||||
dune
|
||||
angstrom
|
||||
angstrom-unix
|
||||
camlzip)
|
||||
(tags
|
||||
(shell completions nushell parser angstrom)))
|
||||
27
flake.lock
generated
Normal file
27
flake.lock
generated
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1773385838,
|
||||
"narHash": "sha256-ylF2AGl08seexxlLvMqj3jd+yZq56W9zicwe51mp0Pw=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "fef542e7a88eec2b698389e6279464fd479926b6",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nixos",
|
||||
"ref": "nixpkgs-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
71
flake.nix
Normal file
71
flake.nix
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
|
||||
|
||||
outputs =
|
||||
{ self, nixpkgs }:
|
||||
let
|
||||
forAllSystems =
|
||||
f:
|
||||
nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ] (
|
||||
system: f (import nixpkgs { inherit system; })
|
||||
);
|
||||
in
|
||||
{
|
||||
devShells = forAllSystems (pkgs: {
|
||||
default = pkgs.mkShell {
|
||||
packages = with pkgs.ocamlPackages; [
|
||||
dune_3
|
||||
ocaml
|
||||
angstrom
|
||||
angstrom-unix
|
||||
camlzip
|
||||
ppx_inline_test
|
||||
ocaml-lsp
|
||||
ocamlformat
|
||||
ocamlformat-rpc-lib
|
||||
utop
|
||||
];
|
||||
};
|
||||
});
|
||||
|
||||
packages = forAllSystems (pkgs: {
|
||||
default = pkgs.ocamlPackages.buildDunePackage {
|
||||
pname = "inshellah";
|
||||
version = "0.1";
|
||||
src = ./.;
|
||||
nativeBuildInputs = [ pkgs.git ];
|
||||
buildInputs = with pkgs.ocamlPackages; [
|
||||
dune_3
|
||||
ocaml
|
||||
angstrom
|
||||
angstrom-unix
|
||||
camlzip
|
||||
];
|
||||
|
||||
meta.mainProgram = "inshellah";
|
||||
};
|
||||
});
|
||||
|
||||
nixosModules.default =
|
||||
{
|
||||
pkgs,
|
||||
lib,
|
||||
config,
|
||||
...
|
||||
}:
|
||||
{
|
||||
imports = [ ./nix/module.nix ];
|
||||
programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default;
|
||||
programs.inshellah.snippet = ''
|
||||
let inshellah_complete = {|spans|
|
||||
${lib.getExe config.programs.inshellah.package} complete ...$spans --system-dir /run/current-system/sw/${config.programs.inshellah.completionsPath} | from json
|
||||
}
|
||||
$env.config.completions.external = {
|
||||
enable: true
|
||||
max_results: 100
|
||||
completer: $inshellah_complete
|
||||
}
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
35
inshellah.opam
Normal file
35
inshellah.opam
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
# This file is generated by dune, edit dune-project instead
|
||||
opam-version: "2.0"
|
||||
synopsis: "Nushell completions generator"
|
||||
description:
|
||||
"Inshellah parses manpages and --help switches to generate completions for nushell."
|
||||
maintainer: ["atagen <boss@atagen.co>"]
|
||||
authors: ["atagen <boss@atagen.co>"]
|
||||
license: "GPL-3.0-or-later"
|
||||
tags: ["shell" "completions" "nushell" "parser" "angstrom"]
|
||||
homepage: "https://github.com/username/reponame"
|
||||
bug-reports: "https://github.com/username/reponame/issues"
|
||||
depends: [
|
||||
"ocaml"
|
||||
"dune" {>= "3.20"}
|
||||
"angstrom"
|
||||
"angstrom-unix"
|
||||
"camlzip"
|
||||
"odoc" {with-doc}
|
||||
]
|
||||
build: [
|
||||
["dune" "subst"] {dev}
|
||||
[
|
||||
"dune"
|
||||
"build"
|
||||
"-p"
|
||||
name
|
||||
"-j"
|
||||
jobs
|
||||
"@install"
|
||||
"@runtest" {with-test}
|
||||
"@doc" {with-doc}
|
||||
]
|
||||
]
|
||||
dev-repo: "git+https://github.com/username/reponame.git"
|
||||
x-maintenance-intent: ["(latest)"]
|
||||
0
lib/.ocamlformat
Normal file
0
lib/.ocamlformat
Normal file
3
lib/dune
Normal file
3
lib/dune
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
(library
|
||||
(name inshellah)
|
||||
(libraries angstrom angstrom-unix camlzip str unix))
|
||||
911
lib/manpage.ml
Normal file
911
lib/manpage.ml
Normal file
|
|
@ -0,0 +1,911 @@
|
|||
open Parser
|
||||
|
||||
(* --- Groff escape/formatting stripper --- *)
|
||||
|
||||
let strip_groff_escapes s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let last = ref '\000' in
|
||||
let put c = Buffer.add_char buf c; last := c in
|
||||
let is_alnum c =
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
|
||||
in
|
||||
while !i < len do
|
||||
if s.[!i] = '\\' && !i + 1 < len then begin
|
||||
let next = s.[!i + 1] in
|
||||
match next with
|
||||
| 'f' ->
|
||||
(* Font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *)
|
||||
if !i + 2 < len then begin
|
||||
let fc = s.[!i + 2] in
|
||||
(* Insert space before italic font to preserve word boundaries
|
||||
e.g. \fB--max-results\fR\fIcount\fR → "--max-results count" *)
|
||||
if fc = 'I' && is_alnum !last then put ' ';
|
||||
if fc = '(' then
|
||||
i := !i + 5 (* \f(XX *)
|
||||
else if fc = '[' then begin
|
||||
i := !i + 3;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
i := !i + 3 (* \fX *)
|
||||
end else
|
||||
i := !i + 2
|
||||
| '-' ->
|
||||
put '-';
|
||||
i := !i + 2
|
||||
| '&' | '/' | ',' ->
|
||||
(* Zero-width characters *)
|
||||
i := !i + 2
|
||||
| '(' ->
|
||||
(* Two-char named character: \(aq, \(lq, \(rq, etc. *)
|
||||
if !i + 3 < len then begin
|
||||
let name = String.sub s (!i + 2) 2 in
|
||||
(match name with
|
||||
| "aq" -> put '\''
|
||||
| "lq" | "Lq" -> put '"'
|
||||
| "rq" | "Rq" -> put '"'
|
||||
| "em" | "en" -> put '-'
|
||||
| _ -> ());
|
||||
i := !i + 4
|
||||
end else
|
||||
i := !i + 2
|
||||
| '[' ->
|
||||
(* Named character: \[...] *)
|
||||
i := !i + 2;
|
||||
let start = !i in
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then begin
|
||||
let name = String.sub s start (!i - start) in
|
||||
(match name with
|
||||
| "aq" -> put '\''
|
||||
| "lq" | "Lq" -> put '"'
|
||||
| "rq" | "Rq" -> put '"'
|
||||
| _ -> ());
|
||||
incr i
|
||||
end
|
||||
| 's' ->
|
||||
(* Size escape: \sN, \s+N, \s-N, \s'N' *)
|
||||
i := !i + 2;
|
||||
if !i < len && (s.[!i] = '+' || s.[!i] = '-') then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i
|
||||
| 'm' ->
|
||||
(* Color escape: \m[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| 'X' ->
|
||||
(* Device control: \X'...' *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '\'' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '\'' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| '*' ->
|
||||
(* String variable: \*X or \*(XX or \*[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'n' ->
|
||||
(* Number register: \nX or \n(XX or \n[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'e' ->
|
||||
put '\\';
|
||||
i := !i + 2
|
||||
| '\\' ->
|
||||
put '\\';
|
||||
i := !i + 2
|
||||
| ' ' ->
|
||||
put ' ';
|
||||
i := !i + 2
|
||||
| _ ->
|
||||
(* Unknown escape, skip *)
|
||||
i := !i + 2
|
||||
end else begin
|
||||
put s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Strip inline macro formatting: .BI, .BR, .IR, etc.
|
||||
These macros alternate between fonts for their arguments.
|
||||
We just concatenate the arguments. *)
|
||||
let strip_inline_macro_args s =
|
||||
(* Arguments are separated by spaces, quoted strings are kept together *)
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if s.[!i] = '"' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '"' do
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
done;
|
||||
if !i < len then incr i
|
||||
end else if s.[!i] = ' ' || s.[!i] = '\t' then begin
|
||||
incr i
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
let strip_groff line =
|
||||
let s = strip_groff_escapes line in
|
||||
String.trim s
|
||||
|
||||
(* --- Line classification --- *)
|
||||
|
||||
type groff_line =
|
||||
| Macro of string * string (* e.g. ("SH", "OPTIONS") or ("TP", "") *)
|
||||
| Text of string (* plain text after stroff stripping *)
|
||||
| Blank
|
||||
| Comment
|
||||
|
||||
let classify_line line =
|
||||
let len = String.length line in
|
||||
if len = 0 then Blank
|
||||
else if len >= 2 && line.[0] = '.' && line.[1] = '\\' && (len < 3 || line.[2] = '"') then
|
||||
Comment
|
||||
else if len >= 3 && line.[0] = '\\' && line.[1] = '"' then
|
||||
Comment
|
||||
else if line.[0] = '.' || line.[0] = '\'' then begin
|
||||
(* Macro line *)
|
||||
let rest = String.sub line 1 (len - 1) in
|
||||
let rest = String.trim rest in
|
||||
(* Split into macro name and arguments *)
|
||||
let space_pos =
|
||||
try Some (String.index rest ' ')
|
||||
with Not_found ->
|
||||
try Some (String.index rest '\t')
|
||||
with Not_found -> None
|
||||
in
|
||||
match space_pos with
|
||||
| Some pos ->
|
||||
let name = String.sub rest 0 pos in
|
||||
let args = String.trim (String.sub rest (pos + 1) (String.length rest - pos - 1)) in
|
||||
(* Strip quotes from args *)
|
||||
let args =
|
||||
let alen = String.length args in
|
||||
if alen >= 2 && args.[0] = '"' && args.[alen - 1] = '"' then
|
||||
String.sub args 1 (alen - 2)
|
||||
else args
|
||||
in
|
||||
Macro (name, args)
|
||||
| None ->
|
||||
Macro (rest, "")
|
||||
end else begin
|
||||
let stripped = strip_groff line in
|
||||
if String.length stripped = 0 then Blank
|
||||
else Text stripped
|
||||
end
|
||||
|
||||
(* Check for dot-backslash-quote style comments more carefully *)
|
||||
let is_comment_line line =
|
||||
let len = String.length line in
|
||||
(len >= 3 && line.[0] = '.' && line.[1] = '\\' && line.[2] = '"')
|
||||
|| (len >= 2 && line.[0] = '\\' && line.[1] = '"')
|
||||
|
||||
let classify_line line =
|
||||
if is_comment_line line then Comment
|
||||
else classify_line line
|
||||
|
||||
(* --- Section extraction --- *)
|
||||
|
||||
let extract_options_section lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec collect_until_next_sh lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", _) :: _ -> List.rev acc
|
||||
| line :: rest -> collect_until_next_sh rest (line :: acc)
|
||||
in
|
||||
let is_options_section name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "OPTIONS"
|
||||
|| (String.length s > 0 &&
|
||||
try let _ = Str.search_forward (Str.regexp_string "OPTION") s 0 in true
|
||||
with Not_found -> false)
|
||||
in
|
||||
(* First pass: look for OPTIONS section *)
|
||||
let rec find_options = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest when is_options_section args ->
|
||||
Some (collect_until_next_sh rest [])
|
||||
| _ :: rest -> find_options rest
|
||||
in
|
||||
(* Fallback: DESCRIPTION section *)
|
||||
let rec find_description = function
|
||||
| [] -> []
|
||||
| Macro ("SH", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "DESCRIPTION" ->
|
||||
collect_until_next_sh rest []
|
||||
| _ :: rest -> find_description rest
|
||||
in
|
||||
match find_options classified with
|
||||
| Some section -> section
|
||||
| None -> find_description classified
|
||||
|
||||
(* --- Strategy-based entry extraction --- *)
|
||||
|
||||
(* Collect text lines until next macro or blank *)
|
||||
let rec collect_text_lines lines acc =
|
||||
match lines with
|
||||
| Text s :: rest -> collect_text_lines rest (s :: acc)
|
||||
| _ -> (String.concat " " (List.rev acc), lines)
|
||||
|
||||
(* Parse a tag line to extract entry using the Angstrom switch_parser *)
|
||||
let parse_tag_to_entry tag desc =
|
||||
let tag = strip_groff_escapes tag in
|
||||
let tag = String.trim tag in
|
||||
match Angstrom.parse_string ~consume:Angstrom.Consume.Prefix
|
||||
(Angstrom.lift2 (fun sw p -> (sw, p)) switch_parser param_parser) tag with
|
||||
| Ok (switch, param) -> Some { switch; param; desc }
|
||||
| Error _ -> None
|
||||
|
||||
(* Extract tag text from a macro line (.B, .I preserve spaces; .BI/.BR/.IR alternate) *)
|
||||
let tag_of_macro name args =
|
||||
match name with
|
||||
| "B" | "I" -> strip_groff_escapes args |> String.trim
|
||||
| _ -> strip_inline_macro_args args |> strip_groff_escapes |> String.trim
|
||||
|
||||
(* Strategy A: .TP style (most common — GNU coreutils, help2man) *)
|
||||
let strategy_tp lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("TP", _) :: rest ->
|
||||
(* Next line is the tag — could be Text or a formatting macro *)
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| Macro (("B" | "I" | "BI" | "BR" | "IR") as m, args) :: rest2 ->
|
||||
let tag = tag_of_macro m args in
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy B: .IP style (curl, hand-written) *)
|
||||
let strategy_ip lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("IP", tag) :: rest ->
|
||||
let tag = strip_groff_escapes tag in
|
||||
let (desc, rest2) = collect_text_lines rest [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest2 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy C: .PP + .RS/.RE style (git, DocBook) *)
|
||||
let strategy_pp_rs lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("PP", _) :: rest ->
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
(* Look for .RS ... text ... .RE *)
|
||||
let rec collect_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
collect_in_rs rest3 desc_acc
|
||||
| Text s :: rest3 ->
|
||||
(* Sometimes description follows directly *)
|
||||
collect_rs rest3 (s :: desc_acc)
|
||||
| _ -> (String.concat " " (List.rev desc_acc), lines)
|
||||
and collect_in_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
(String.concat " " (List.rev desc_acc), rest3)
|
||||
| Text s :: rest3 ->
|
||||
collect_in_rs rest3 (s :: desc_acc)
|
||||
| Macro ("PP", _) :: _ | Macro ("SH", _) :: _ ->
|
||||
(String.concat " " (List.rev desc_acc), lines)
|
||||
| _ :: rest3 -> collect_in_rs rest3 desc_acc
|
||||
| [] -> (String.concat " " (List.rev desc_acc), [])
|
||||
in
|
||||
let (desc, rest3) = collect_rs rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy D: Deroff fallback — strip all groff, use help text parser *)
|
||||
let strategy_deroff_lines lines =
|
||||
let buf = Buffer.create 256 in
|
||||
List.iter (fun line ->
|
||||
match line with
|
||||
| Text s ->
|
||||
Buffer.add_string buf s;
|
||||
Buffer.add_char buf '\n'
|
||||
| Macro (("BI" | "BR" | "IR" | "B" | "I"), args) ->
|
||||
let text = strip_inline_macro_args args in
|
||||
let text = strip_groff_escapes text in
|
||||
Buffer.add_string buf text;
|
||||
Buffer.add_char buf '\n'
|
||||
| Blank -> Buffer.add_char buf '\n'
|
||||
| _ -> ()
|
||||
) lines;
|
||||
let text = Buffer.contents buf in
|
||||
match parse_help text with
|
||||
| Ok result -> result.entries
|
||||
| Error _ -> []
|
||||
|
||||
(* Strategy E: Nix3-style bullet .IP with .UR/.UE hyperlinks *)
|
||||
let strategy_nix lines =
|
||||
let is_bullet_ip args =
|
||||
String.length (String.trim args) > 0
|
||||
in
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("IP", args) :: rest when is_bullet_ip args ->
|
||||
(* Collect tag: skip UR/UE macros, collect Text lines *)
|
||||
let rec collect_tag lines parts =
|
||||
match lines with
|
||||
| Macro ("UR", _) :: rest2 -> collect_tag rest2 parts
|
||||
| Macro ("UE", _) :: rest2 -> collect_tag rest2 parts
|
||||
| Text s :: rest2 -> collect_tag rest2 (s :: parts)
|
||||
| _ -> (String.concat " " (List.rev parts), lines)
|
||||
in
|
||||
let (tag, rest2) = collect_tag rest [] in
|
||||
(* Collect description after the description .IP marker *)
|
||||
let rec collect_desc lines parts =
|
||||
match lines with
|
||||
| Macro ("IP", dargs) :: rest3 when not (is_bullet_ip dargs) ->
|
||||
collect_desc_text rest3 parts
|
||||
| _ -> (String.concat " " (List.rev parts), lines)
|
||||
and collect_desc_text lines parts =
|
||||
match lines with
|
||||
| Text s :: rest3 -> collect_desc_text rest3 (s :: parts)
|
||||
| Macro ("IP", args) :: _ when is_bullet_ip args ->
|
||||
(String.concat " " (List.rev parts), lines)
|
||||
| Macro (("SS" | "SH"), _) :: _ ->
|
||||
(String.concat " " (List.rev parts), lines)
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
skip_rs rest3 parts 1
|
||||
| Macro ("IP", _) :: rest3 ->
|
||||
(* Non-bullet IP = continuation paragraph *)
|
||||
collect_desc_text rest3 parts
|
||||
| Macro _ :: rest3 -> collect_desc_text rest3 parts
|
||||
| Blank :: rest3 -> collect_desc_text rest3 parts
|
||||
| Comment :: rest3 -> collect_desc_text rest3 parts
|
||||
| [] -> (String.concat " " (List.rev parts), [])
|
||||
and skip_rs lines parts depth =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
if depth <= 1 then collect_desc_text rest3 parts
|
||||
else skip_rs rest3 parts (depth - 1)
|
||||
| Macro ("RS", _) :: rest3 -> skip_rs rest3 parts (depth + 1)
|
||||
| _ :: rest3 -> skip_rs rest3 parts depth
|
||||
| [] -> (String.concat " " (List.rev parts), [])
|
||||
in
|
||||
let (desc, rest3) = collect_desc rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Count macros of a given type *)
|
||||
let count_macro name lines =
|
||||
List.fold_left (fun n line ->
|
||||
match line with Macro (m, _) when m = name -> n + 1 | _ -> n
|
||||
) 0 lines
|
||||
|
||||
(* Auto-detect and try strategies, return the one with most entries *)
|
||||
let extract_entries lines =
|
||||
let tp = count_macro "TP" lines
|
||||
and ip = count_macro "IP" lines
|
||||
and pp = count_macro "PP" lines
|
||||
and rs = count_macro "RS" lines
|
||||
and ur = count_macro "UR" lines in
|
||||
let specialized = List.filter_map Fun.id [
|
||||
(if tp > 0 then Some ("TP", strategy_tp lines) else None);
|
||||
(if ip > 0 then Some ("IP", strategy_ip lines) else None);
|
||||
(if pp > 0 && rs > 0 then Some ("PP+RS", strategy_pp_rs lines) else None);
|
||||
(if ur > 0 && ip > 0 then Some ("nix", strategy_nix lines) else None);
|
||||
] in
|
||||
let candidates = match List.filter (fun (_, e) -> e <> []) specialized with
|
||||
| [] -> [("deroff", strategy_deroff_lines lines)]
|
||||
| filtered -> filtered
|
||||
in
|
||||
List.fold_left (fun (_, best) (name, entries) ->
|
||||
if List.length entries >= List.length best then (name, entries)
|
||||
else (name, best)
|
||||
) ("none", []) candidates |> snd
|
||||
|
||||
(* --- NAME section description extraction --- *)
|
||||
|
||||
let extract_name_description contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
let classified = List.map classify_line lines in
|
||||
let rec find = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "NAME" ->
|
||||
collect rest []
|
||||
| _ :: rest -> find rest
|
||||
and collect lines acc =
|
||||
match lines with
|
||||
| Macro ("SH", _) :: _ | [] -> finish acc
|
||||
| Text s :: rest -> collect rest (s :: acc)
|
||||
| Macro (("B" | "BI" | "BR" | "I" | "IR"), args) :: rest ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| Macro ("Nm", args) :: rest ->
|
||||
let s = strip_groff_escapes args |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| Macro ("Nd", args) :: rest ->
|
||||
let s = strip_groff_escapes args |> String.trim in
|
||||
collect rest (if String.length s > 0 then ("\\- " ^ s) :: acc else acc)
|
||||
| _ :: rest -> collect rest acc
|
||||
and finish acc =
|
||||
let full = String.concat " " (List.rev acc) |> String.trim in
|
||||
(* NAME lines look like: "git-add \- Add file contents to the index" *)
|
||||
let sep = Str.regexp {| *\\- *\| +- +|} in
|
||||
match Str.bounded_split sep full 2 with
|
||||
| [_; desc] -> Some (String.trim desc)
|
||||
| _ -> None
|
||||
in
|
||||
find classified
|
||||
|
||||
(* --- SYNOPSIS command name extraction --- *)
|
||||
|
||||
let extract_synopsis_command_lines lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let is_synopsis name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "SYNOPSIS"
|
||||
in
|
||||
let extract_cmd line =
|
||||
let words = String.split_on_char ' ' (String.trim line) in
|
||||
let words = List.filter (fun w -> String.length w > 0) words in
|
||||
let is_cmd_char = function
|
||||
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.' -> true
|
||||
| _ -> false
|
||||
in
|
||||
let rec take = function
|
||||
| [] -> []
|
||||
| w :: rest ->
|
||||
if String.length w > 0
|
||||
&& (w.[0] = '[' || w.[0] = '-' || w.[0] = '<'
|
||||
|| w.[0] = '(' || w.[0] = '{')
|
||||
then []
|
||||
else if String.for_all is_cmd_char w then
|
||||
w :: take rest
|
||||
else []
|
||||
in
|
||||
match take words with
|
||||
| [] -> None
|
||||
| cmd -> Some (String.concat " " cmd)
|
||||
in
|
||||
let rec find = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest when is_synopsis args -> collect rest
|
||||
| _ :: rest -> find rest
|
||||
and collect = function
|
||||
| [] -> None
|
||||
| Macro ("SH", _) :: _ -> None
|
||||
| Text s :: _ ->
|
||||
let s = String.trim s in
|
||||
if String.length s > 0 then extract_cmd s else None
|
||||
| Macro (("B" | "BI" | "BR"), args) :: _ ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
if String.length s > 0 then extract_cmd s else None
|
||||
| _ :: rest -> collect rest
|
||||
in
|
||||
find classified
|
||||
|
||||
let extract_synopsis_command contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
extract_synopsis_command_lines lines
|
||||
|
||||
(* --- SYNOPSIS positional extraction --- *)
|
||||
|
||||
let extract_synopsis_positionals_lines lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let is_synopsis name =
|
||||
String.uppercase_ascii (String.trim name) = "SYNOPSIS"
|
||||
in
|
||||
let rec find = function
|
||||
| [] -> []
|
||||
| Macro ("SH", args) :: rest when is_synopsis args -> collect rest []
|
||||
| _ :: rest -> find rest
|
||||
and collect lines acc =
|
||||
match lines with
|
||||
| [] -> finish acc
|
||||
| Macro ("SH", _) :: _ -> finish acc
|
||||
| Macro ("SS", _) :: _ -> finish acc
|
||||
| Macro ("br", _) :: _ -> finish acc
|
||||
| Text s :: rest ->
|
||||
let s = strip_groff_escapes s |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| Macro (("B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"), args) :: rest ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| _ :: rest -> collect rest acc
|
||||
and finish acc =
|
||||
let parts = List.rev acc in
|
||||
let full = String.concat " " parts |> String.trim in
|
||||
if String.length full = 0 then []
|
||||
else
|
||||
let cmd_end = skip_command_prefix full in
|
||||
let args = String.sub full cmd_end (String.length full - cmd_end) in
|
||||
parse_usage_args args
|
||||
in
|
||||
find classified
|
||||
|
||||
(* --- mdoc (BSD) format support --- *)
|
||||
|
||||
let is_mdoc lines =
|
||||
List.exists (fun l ->
|
||||
match classify_line l with Macro ("Sh", _) -> true | _ -> false
|
||||
) lines
|
||||
|
||||
let mdoc_text_of line =
|
||||
match line with
|
||||
| Text s -> Some (strip_groff_escapes s)
|
||||
| Macro (m, args) ->
|
||||
(match m with
|
||||
| "Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt"
|
||||
| "Oo" | "Oc" | "Op" -> None
|
||||
| _ ->
|
||||
let s = strip_groff_escapes args |> String.trim in
|
||||
if s = "" then None else Some s)
|
||||
| _ -> None
|
||||
|
||||
let parse_mdoc_it args =
|
||||
let words = String.split_on_char ' ' args
|
||||
|> List.filter (fun w -> w <> "" && w <> "Ns") in
|
||||
let param = match words with
|
||||
| _ :: _ :: "Ar" :: p :: _ -> Some (Mandatory p)
|
||||
| _ -> None
|
||||
in
|
||||
match words with
|
||||
| "Fl" :: c :: _ when String.length c = 1 && is_alphanumeric c.[0] ->
|
||||
Some { switch = Short c.[0]; param; desc = "" }
|
||||
| "Fl" :: name :: _ when String.length name > 1 && name.[0] = '-' ->
|
||||
Some { switch = Long (String.sub name 1 (String.length name - 1)); param; desc = "" }
|
||||
| _ -> None
|
||||
|
||||
let positional_of_mdoc_line optional args =
|
||||
let words = String.split_on_char ' ' args
|
||||
|> List.filter (fun w -> w <> "") in
|
||||
match words with
|
||||
| name :: _ when String.length name >= 2 ->
|
||||
Some { pos_name = String.lowercase_ascii name;
|
||||
optional; variadic = List.mem "..." words }
|
||||
| _ -> None
|
||||
|
||||
let parse_mdoc_lines lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec skip_to_el = function
|
||||
| [] -> []
|
||||
| Macro ("El", _) :: rest -> rest
|
||||
| _ :: rest -> skip_to_el rest
|
||||
in
|
||||
let rec collect_desc acc = function
|
||||
| [] -> (acc, [])
|
||||
| (Macro ("It", _) | Macro ("El", _)
|
||||
| Macro ("Sh", _) | Macro ("Ss", _)) :: _ as rest -> (acc, rest)
|
||||
| line :: rest ->
|
||||
collect_desc (match mdoc_text_of line with Some s -> s :: acc | None -> acc) rest
|
||||
in
|
||||
let desc_of rest =
|
||||
let parts, rest = collect_desc [] rest in
|
||||
(String.concat " " (List.rev parts) |> String.trim, rest)
|
||||
in
|
||||
let parse_it args rest entries =
|
||||
let desc, rest = desc_of rest in
|
||||
let entries = match parse_mdoc_it args with
|
||||
| Some e -> { e with desc } :: entries
|
||||
| None -> entries
|
||||
in
|
||||
(entries, rest)
|
||||
in
|
||||
let rec parse_option_list entries = function
|
||||
| [] -> (entries, [])
|
||||
| Macro ("El", _) :: rest -> (entries, rest)
|
||||
| Macro ("It", args) :: rest ->
|
||||
let entries, rest = parse_it args rest entries in
|
||||
parse_option_list entries rest
|
||||
| _ :: rest -> parse_option_list entries rest
|
||||
in
|
||||
let rec scan entries positionals = function
|
||||
| [] -> (entries, positionals)
|
||||
| Macro ("Bl", _) :: Macro ("It", it_args) :: rest ->
|
||||
let words = String.split_on_char ' ' it_args
|
||||
|> List.filter (fun w -> w <> "") in
|
||||
if (match words with "Fl" :: _ -> true | _ -> false) then
|
||||
let entries, rest = parse_it it_args rest entries in
|
||||
let entries, rest = parse_option_list entries rest in
|
||||
scan entries positionals rest
|
||||
else
|
||||
scan entries positionals (skip_to_el rest)
|
||||
| Macro ("Bl", _) :: rest -> scan entries positionals (skip_to_el rest)
|
||||
| Macro ("Sh", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "SYNOPSIS" ->
|
||||
let positionals, rest = parse_synopsis positionals rest in
|
||||
scan entries positionals rest
|
||||
| _ :: rest -> scan entries positionals rest
|
||||
and parse_synopsis positionals = function
|
||||
| [] -> (positionals, [])
|
||||
| Macro ("Sh", _) :: _ as rest -> (positionals, rest)
|
||||
| Macro ("Ar", args) :: rest ->
|
||||
let positionals = match positional_of_mdoc_line false args with
|
||||
| Some p -> p :: positionals | None -> positionals in
|
||||
parse_synopsis positionals rest
|
||||
| Macro ("Op", args) :: rest ->
|
||||
let words = String.split_on_char ' ' args
|
||||
|> List.filter (fun w -> w <> "") in
|
||||
let positionals = match words with
|
||||
| "Ar" :: _ ->
|
||||
(match positional_of_mdoc_line true args with
|
||||
| Some p -> p :: positionals | None -> positionals)
|
||||
| _ -> positionals in
|
||||
parse_synopsis positionals rest
|
||||
| _ :: rest -> parse_synopsis positionals rest
|
||||
in
|
||||
let entries, positionals = scan [] [] classified in
|
||||
let positionals =
|
||||
List.rev positionals
|
||||
|> List.fold_left (fun (seen, acc) p ->
|
||||
if List.mem p.pos_name seen then (seen, acc)
|
||||
else (p.pos_name :: seen, p :: acc)
|
||||
) ([], [])
|
||||
|> snd |> List.rev
|
||||
in
|
||||
{ entries = List.rev entries; subcommands = []; positionals; description = "" }
|
||||
|
||||
(* --- COMMANDS section subcommand extraction --- *)
|
||||
|
||||
(* Extract subcommands from COMMANDS/COMMAND sections.
|
||||
These use .PP + bold name + .RS/.RE blocks, e.g.:
|
||||
.PP
|
||||
\fBstart\fR \fIUNIT\fR...
|
||||
.RS 4
|
||||
Start (activate) one or more units.
|
||||
.RE *)
|
||||
let extract_commands_section lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec collect_until_next_sh lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", _) :: _ -> List.rev acc
|
||||
| line :: rest -> collect_until_next_sh rest (line :: acc)
|
||||
in
|
||||
let is_commands_section name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "COMMANDS" || s = "COMMAND"
|
||||
in
|
||||
let rec find_commands acc = function
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", args) :: rest when is_commands_section args ->
|
||||
find_commands (collect_until_next_sh rest [] :: acc) rest
|
||||
| _ :: rest -> find_commands acc rest
|
||||
in
|
||||
let sections = find_commands [] classified in
|
||||
List.concat sections
|
||||
|
||||
(* Extract subcommand name from a bold groff text like
|
||||
"\fBlist\-units\fR [\fIPATTERN\fR...]" → "list-units" *)
|
||||
let extract_bold_command_name text =
|
||||
let s = String.trim text in
|
||||
(* Look for \fB...\fR at the start *)
|
||||
if String.length s >= 4
|
||||
&& s.[0] = '\\' && s.[1] = 'f' && s.[2] = 'B' then
|
||||
let start = 3 in
|
||||
let end_marker = "\\fR" in
|
||||
match String.split_on_char '\\' (String.sub s start (String.length s - start)) with
|
||||
| name_part :: _ ->
|
||||
let name = strip_groff_escapes ("\\fB" ^ name_part ^ end_marker) |> String.trim in
|
||||
(* Must look like a subcommand: lowercase, hyphens, no leading dash *)
|
||||
if String.length name >= 2
|
||||
&& name.[0] <> '-'
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_'
|
||||
) name
|
||||
then Some name
|
||||
else None
|
||||
| [] -> None
|
||||
else
|
||||
(* Try already-stripped text *)
|
||||
let stripped = strip_groff_escapes s in
|
||||
let first_word = match String.split_on_char ' ' stripped with
|
||||
| w :: _ -> w | [] -> "" in
|
||||
if String.length first_word >= 2
|
||||
&& first_word.[0] <> '-'
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_'
|
||||
) first_word
|
||||
then Some first_word
|
||||
else None
|
||||
|
||||
let extract_subcommands_from_commands lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("PP", _) :: rest ->
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
(* Check if this is a subcommand (bold name, not a flag) *)
|
||||
begin match extract_bold_command_name tag with
|
||||
| Some name ->
|
||||
(* Collect description from .RS/.RE block *)
|
||||
let rec collect_desc lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
collect_in_rs rest3 desc_acc
|
||||
| Text s :: rest3 ->
|
||||
collect_desc rest3 (s :: desc_acc)
|
||||
| _ -> (String.concat " " (List.rev desc_acc), lines)
|
||||
and collect_in_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
(String.concat " " (List.rev desc_acc), rest3)
|
||||
| Text s :: rest3 ->
|
||||
collect_in_rs rest3 (s :: desc_acc)
|
||||
| Macro ("PP", _) :: _ | Macro ("SH", _) :: _ | Macro ("SS", _) :: _ ->
|
||||
(String.concat " " (List.rev desc_acc), lines)
|
||||
| _ :: rest3 -> collect_in_rs rest3 desc_acc
|
||||
| [] -> (String.concat " " (List.rev desc_acc), [])
|
||||
in
|
||||
let (desc, rest3) = collect_desc rest2 [] in
|
||||
let desc = String.trim desc in
|
||||
(* Take first sentence as description *)
|
||||
let short_desc = match String.split_on_char '.' desc with
|
||||
| first :: _ when String.length first > 0 -> String.trim first
|
||||
| _ -> desc in
|
||||
let sc : subcommand = { name; desc = short_desc } in
|
||||
walk rest3 (sc :: acc)
|
||||
| None -> walk rest2 acc
|
||||
end
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* --- Top-level API --- *)
|
||||
|
||||
let parse_manpage_lines lines =
|
||||
if is_mdoc lines then
|
||||
parse_mdoc_lines lines
|
||||
else begin
|
||||
let options_section = extract_options_section lines in
|
||||
let entries = extract_entries options_section in
|
||||
let positionals = extract_synopsis_positionals_lines lines in
|
||||
let commands_section = extract_commands_section lines in
|
||||
let subcommands = extract_subcommands_from_commands commands_section in
|
||||
{ entries; subcommands; positionals; description = "" }
|
||||
end
|
||||
|
||||
let parse_manpage_string contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
let result = parse_manpage_lines lines in
|
||||
let description = match extract_name_description contents with
|
||||
| Some d -> d | None -> "" in
|
||||
{ result with description }
|
||||
|
||||
(* --- Clap-style SUBCOMMAND section extraction --- *)
|
||||
(* Manpages generated by clap (Rust) put each subcommand under its own
|
||||
.SH SUBCOMMAND header with a Usage: line giving the name. *)
|
||||
|
||||
let extract_subcommand_sections contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
let classified = List.map classify_line lines in
|
||||
(* Split into sections at .SH boundaries *)
|
||||
let rec collect_sections acc current_name current_lines = function
|
||||
| [] ->
|
||||
let acc = match current_name with
|
||||
| Some n -> (n, List.rev current_lines) :: acc
|
||||
| None -> acc in
|
||||
List.rev acc
|
||||
| Macro ("SH", args) :: rest ->
|
||||
let acc = match current_name with
|
||||
| Some n -> (n, List.rev current_lines) :: acc
|
||||
| None -> acc in
|
||||
let name = String.uppercase_ascii (String.trim args) in
|
||||
if name = "SUBCOMMAND" || name = "SUBCOMMANDS" then
|
||||
collect_sections acc (Some name) [] rest
|
||||
else
|
||||
collect_sections acc None [] rest
|
||||
| line :: rest ->
|
||||
collect_sections acc current_name (line :: current_lines) rest
|
||||
in
|
||||
let sections = collect_sections [] None [] classified in
|
||||
(* For each SUBCOMMAND section, extract name from Usage: line and parse entries *)
|
||||
let usage_re = Str.regexp {|Usage: \([a-zA-Z0-9_-]+\)|} in
|
||||
let matches_usage s =
|
||||
try ignore (Str.search_forward usage_re s 0); Some (Str.matched_group 1 s)
|
||||
with Not_found -> None in
|
||||
List.filter_map (fun (_header, section_lines) ->
|
||||
let name, desc_lines =
|
||||
List.fold_left (fun (name, desc_lines) line ->
|
||||
match name with
|
||||
| Some _ -> (name, desc_lines)
|
||||
| None ->
|
||||
match line with
|
||||
| Text s ->
|
||||
(match matches_usage s with
|
||||
| Some _ as found -> (found, desc_lines)
|
||||
| None -> (None, s :: desc_lines))
|
||||
| Macro (("TP" | "B" | "BI" | "BR"), args) ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
(matches_usage s, desc_lines)
|
||||
| _ -> (None, desc_lines)
|
||||
) (None, []) section_lines in
|
||||
match name with
|
||||
| None -> None
|
||||
| Some subcmd_name ->
|
||||
let entries = extract_entries section_lines in
|
||||
let desc = String.concat " " (List.rev desc_lines)
|
||||
|> strip_groff_escapes |> String.trim in
|
||||
let desc = Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" desc in
|
||||
Some (subcmd_name, desc, { entries; subcommands = []; positionals = []; description = desc })
|
||||
) sections
|
||||
|
||||
let read_manpage_file path =
|
||||
if Filename.check_suffix path ".gz" then begin
|
||||
let ic = Gzip.open_in path in
|
||||
let buf = Buffer.create 8192 in
|
||||
let chunk = Bytes.create 8192 in
|
||||
(try while true do
|
||||
let n = Gzip.input ic chunk 0 8192 in
|
||||
if n = 0 then raise Exit
|
||||
else Buffer.add_subbytes buf chunk 0 n
|
||||
done with Exit | End_of_file -> ());
|
||||
Gzip.close_in ic;
|
||||
Buffer.contents buf
|
||||
end else begin
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
Bytes.to_string s
|
||||
end
|
||||
|
||||
let parse_manpage_file path =
|
||||
read_manpage_file path |> parse_manpage_string
|
||||
163
lib/nushell.ml
Normal file
163
lib/nushell.ml
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
open Parser
|
||||
|
||||
module SSet = Set.Make(String)
|
||||
module SMap = Map.Make(String)
|
||||
module CSet = Set.Make(Char)
|
||||
|
||||
(* Nushell built-in commands and keywords *)
|
||||
let nushell_builtins = [
|
||||
"alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
|
||||
"bits"; "break"; "bytes";
|
||||
"cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
|
||||
"columns"; "commandline"; "compact"; "complete"; "config"; "const";
|
||||
"continue"; "cp";
|
||||
"date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
|
||||
"do"; "drop"; "du";
|
||||
"each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
|
||||
"exit"; "explain"; "explore"; "export"; "export-env"; "extern";
|
||||
"fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
|
||||
"generate"; "get"; "glob"; "grid"; "group-by";
|
||||
"hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
|
||||
"history"; "http";
|
||||
"if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
|
||||
"is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
|
||||
"job"; "join";
|
||||
"keybindings"; "kill";
|
||||
"last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
|
||||
"match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
|
||||
"move"; "mut"; "mv";
|
||||
"nu-check"; "nu-highlight";
|
||||
"open"; "overlay";
|
||||
"panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
|
||||
"query";
|
||||
"random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
|
||||
"roll"; "rotate"; "run-external";
|
||||
"save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
|
||||
"slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
|
||||
"stor"; "str"; "sys";
|
||||
"table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
|
||||
"try"; "tutor";
|
||||
"ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
|
||||
"upsert"; "url"; "use";
|
||||
"values"; "version"; "view";
|
||||
"watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
|
||||
"zip";
|
||||
]
|
||||
|
||||
let builtin_set = lazy (SSet.of_list nushell_builtins)
|
||||
|
||||
let is_nushell_builtin cmd =
|
||||
SSet.mem cmd (Lazy.force builtin_set)
|
||||
|
||||
let dedup_entries entries =
|
||||
let key_of entry =
|
||||
match entry.switch with
|
||||
| Short c -> Printf.sprintf "-%c" c
|
||||
| Long l | Both (_, l) -> Printf.sprintf "--%s" l
|
||||
in
|
||||
let score entry =
|
||||
let sw = match entry.switch with Both _ -> 10 | _ -> 0 in
|
||||
let p = match entry.param with Some _ -> 5 | None -> 0 in
|
||||
let d = min 5 (String.length entry.desc / 10) in
|
||||
sw + p + d
|
||||
in
|
||||
let best = List.fold_left (fun acc e ->
|
||||
let k = key_of e in
|
||||
match SMap.find_opt k acc with
|
||||
| Some prev when score prev >= score e -> acc
|
||||
| _ -> SMap.add k e acc
|
||||
) SMap.empty entries in
|
||||
let covered = SMap.fold (fun _ e acc ->
|
||||
match e.switch with
|
||||
| Both (c, _) -> CSet.add c acc
|
||||
| _ -> acc
|
||||
) best CSet.empty in
|
||||
List.fold_left (fun (seen, acc) e ->
|
||||
let k = key_of e in
|
||||
if SSet.mem k seen then (seen, acc)
|
||||
else match e.switch with
|
||||
| Short c when CSet.mem c covered -> (seen, acc)
|
||||
| _ -> (SSet.add k seen, SMap.find k best :: acc)
|
||||
) (SSet.empty, []) entries |> snd |> List.rev
|
||||
|
||||
let nushell_type_of_param = function
|
||||
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
|
||||
| "FILENAME" | "PATTERNFILE" -> "path"
|
||||
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
|
||||
| "LINES" | "DEPTH" | "depth" -> "int"
|
||||
| _ -> "string"
|
||||
|
||||
let escape_nu s =
|
||||
if not (String.contains s '"') && not (String.contains s '\\') then s
|
||||
else begin
|
||||
let buf = Buffer.create (String.length s + 4) in
|
||||
String.iter (fun c -> match c with
|
||||
| '"' -> Buffer.add_string buf "\\\""
|
||||
| '\\' -> Buffer.add_string buf "\\\\"
|
||||
| _ -> Buffer.add_char buf c
|
||||
) s;
|
||||
Buffer.contents buf
|
||||
end
|
||||
|
||||
let format_flag entry =
|
||||
let name = match entry.switch with
|
||||
| Both (s, l) -> Printf.sprintf "--%s(-%c)" l s
|
||||
| Long l -> Printf.sprintf "--%s" l
|
||||
| Short s -> Printf.sprintf "-%c" s
|
||||
in
|
||||
let typed = match entry.param with
|
||||
| Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
|
||||
| None -> ""
|
||||
in
|
||||
let flag = " " ^ name ^ typed in
|
||||
if String.length entry.desc = 0 then flag
|
||||
else
|
||||
let pad_len = max 1 (40 - String.length flag) in
|
||||
flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
|
||||
|
||||
let format_positional p =
|
||||
let name = String.map (function '-' -> '_' | c -> c) p.pos_name in
|
||||
let prefix = if p.variadic then "..." else "" in
|
||||
let suffix = if p.optional && not p.variadic then "?" else "" in
|
||||
let typ = nushell_type_of_param (String.uppercase_ascii p.pos_name) in
|
||||
Printf.sprintf " %s%s%s: %s" prefix name suffix typ
|
||||
|
||||
let fixup_positionals positionals =
|
||||
(* Nushell rules: no required after optional, only one rest param *)
|
||||
List.fold_left (fun (saw_opt, saw_rest, acc) p ->
|
||||
if p.variadic then
|
||||
if saw_rest then (saw_opt, saw_rest, acc)
|
||||
else (true, true, p :: acc)
|
||||
else if saw_opt then
|
||||
(true, saw_rest, { p with optional = true } :: acc)
|
||||
else
|
||||
(p.optional, saw_rest, p :: acc)
|
||||
) (false, false, []) positionals
|
||||
|> fun (_, _, acc) -> List.rev acc
|
||||
|
||||
let extern_of cmd_name result =
|
||||
let entries = dedup_entries result.entries in
|
||||
let cmd = escape_nu cmd_name in
|
||||
let positionals = fixup_positionals result.positionals in
|
||||
let pos_lines = List.map (fun p -> format_positional p ^ "\n") positionals in
|
||||
let flags = List.map (fun e -> format_flag e ^ "\n") entries in
|
||||
let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" cmd (String.concat "" pos_lines) (String.concat "" flags) in
|
||||
let subs = List.map (fun (sc : subcommand) ->
|
||||
Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
|
||||
cmd (escape_nu sc.name) (escape_nu sc.desc)
|
||||
) result.subcommands in
|
||||
String.concat "" (main :: subs)
|
||||
|
||||
let generate_extern = extern_of
|
||||
|
||||
let module_name_of cmd_name =
|
||||
let s = String.map (function
|
||||
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
|
||||
s ^ "-completions"
|
||||
|
||||
let generate_module cmd_name result =
|
||||
let m = module_name_of cmd_name in
|
||||
Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" m (extern_of cmd_name result) m
|
||||
|
||||
let generate_extern_from_entries cmd_name entries =
|
||||
generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }
|
||||
587
lib/parser.ml
Normal file
587
lib/parser.ml
Normal file
|
|
@ -0,0 +1,587 @@
|
|||
open Angstrom
|
||||
|
||||
(* Strip ANSI escape sequences and OSC hyperlinks from --help output *)
|
||||
let strip_ansi s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
|
||||
let next = s.[!i + 1] in
|
||||
if next = '[' then begin
|
||||
(* CSI sequence: ESC [ ... final_byte *)
|
||||
i := !i + 2;
|
||||
while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
|
||||
if !i < len then incr i
|
||||
end else if next = ']' then begin
|
||||
(* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
|
||||
i := !i + 2;
|
||||
let found = ref false in
|
||||
while !i < len && not !found do
|
||||
if s.[!i] = '\x07' then
|
||||
(incr i; found := true)
|
||||
else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
|
||||
(i := !i + 2; found := true)
|
||||
else
|
||||
incr i
|
||||
done
|
||||
end else begin
|
||||
(* Other ESC sequence, skip ESC + one char *)
|
||||
i := !i + 2
|
||||
end
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
let is_whitespace = function ' ' | '\t' -> true | _ -> false
|
||||
|
||||
let is_alphanumeric = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_param_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_upper_or_underscore = function
|
||||
| 'A' .. 'Z' | '_' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_long_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
|
||||
| _ -> false
|
||||
|
||||
type switch = Short of char | Long of string | Both of char * string
|
||||
type param = Mandatory of string | Optional of string
|
||||
type entry = { switch : switch; param : param option; desc : string }
|
||||
type subcommand = { name : string; desc : string }
|
||||
type positional = { pos_name : string; optional : bool; variadic : bool }
|
||||
type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list; description : string }
|
||||
|
||||
(* --- Low-level combinators --- *)
|
||||
|
||||
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
|
||||
let eol = end_of_line <|> end_of_input
|
||||
let eol_strict = end_of_line (* Must consume a newline, no EOF match *)
|
||||
|
||||
let short_switch = char '-' *> satisfy is_alphanumeric
|
||||
let long_switch = string "--" *> take_while1 is_long_char
|
||||
let comma = char ',' *> inline_ws
|
||||
|
||||
(* Parameter parsers *)
|
||||
let eq_opt_param =
|
||||
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
|
||||
|
||||
let eq_man_param =
|
||||
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
|
||||
|
||||
(* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *)
|
||||
let space_upper_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if is_upper_or_underscore c then
|
||||
take_while1 is_param_char >>= fun name ->
|
||||
(* Ensure it's truly all-uppercase (not a description word like "Do") *)
|
||||
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "not an all-caps param"
|
||||
else
|
||||
fail "not an uppercase param"
|
||||
|
||||
(* Angle-bracket param: e.g. "<file>", "<notation>" *)
|
||||
let angle_param =
|
||||
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
|
||||
Mandatory name
|
||||
|
||||
(* Space + angle bracket param *)
|
||||
let space_angle_param =
|
||||
char ' ' *> angle_param
|
||||
|
||||
(* Optional angle bracket param: [<file>] *)
|
||||
let opt_angle_param =
|
||||
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
|
||||
>>| fun name -> Optional name
|
||||
|
||||
let space_opt_angle_param =
|
||||
char ' ' *> opt_angle_param
|
||||
|
||||
(* Go/Cobra style: space + lowercase type word like "string", "list", "int" *)
|
||||
let space_type_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if c >= 'a' && c <= 'z' then
|
||||
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
|
||||
(* Only short type-like words *)
|
||||
if String.length name <= 10 then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "too long for type param"
|
||||
else
|
||||
fail "not a lowercase type param"
|
||||
|
||||
let param_parser =
|
||||
option None
|
||||
(choice
|
||||
[ eq_opt_param; eq_man_param;
|
||||
space_opt_angle_param; space_angle_param;
|
||||
space_upper_param; space_type_param ]
|
||||
>>| fun a -> Some a)
|
||||
|
||||
(* Switch parser: -a, --all | -a --all | --all / -a | -a | --all *)
|
||||
let switch_parser =
|
||||
choice
|
||||
[
|
||||
(short_switch >>= fun s ->
|
||||
comma *> long_switch >>| fun l -> Both (s, l));
|
||||
(short_switch >>= fun s ->
|
||||
char ' ' *> long_switch >>| fun l -> Both (s, l));
|
||||
(long_switch >>= fun l ->
|
||||
inline_ws *> char '/' *> inline_ws *>
|
||||
short_switch >>| fun s -> Both (s, l));
|
||||
(short_switch >>| fun s -> Short s);
|
||||
(long_switch >>| fun l -> Long l);
|
||||
]
|
||||
|
||||
(* --- Description parsing with multi-line continuation --- *)
|
||||
|
||||
(* Take the rest of the line as text (does not consume newline) *)
|
||||
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
|
||||
|
||||
(* Check if a line is a continuation line: deeply indented, doesn't start with '-' *)
|
||||
let continuation_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
(* Must start with significant whitespace (8+ spaces or tab) *)
|
||||
let count_indent s =
|
||||
let n = ref 0 in
|
||||
let i = ref 0 in
|
||||
while !i < String.length s do
|
||||
(match s.[!i] with
|
||||
| ' ' -> incr n
|
||||
| '\t' -> n := !n + 8
|
||||
| _ -> i := String.length s);
|
||||
incr i
|
||||
done;
|
||||
!n
|
||||
in
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
(* Peek ahead to see indentation level *)
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
let indent = count_indent preview in
|
||||
let trimmed = String.trim preview in
|
||||
let starts_with_dash =
|
||||
String.length trimmed > 0 && trimmed.[0] = '-'
|
||||
in
|
||||
if indent >= 8 && not starts_with_dash then
|
||||
(* This is a continuation line — consume whitespace + text *)
|
||||
inline_ws *> rest_of_line <* eol
|
||||
else
|
||||
fail "not a continuation line"
|
||||
|
||||
let description =
|
||||
inline_ws *> rest_of_line <* eol >>= fun first_line ->
|
||||
many continuation_line >>| fun cont_lines ->
|
||||
let all = first_line :: cont_lines in
|
||||
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
|
||||
String.concat " " (List.map String.trim all)
|
||||
|
||||
(* Description that appears on a separate line below the flag (Clap long style) *)
|
||||
let description_below =
|
||||
many1 continuation_line >>| fun lines ->
|
||||
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
|
||||
String.concat " " (List.map String.trim lines)
|
||||
|
||||
(* --- Line classification for skipping --- *)
|
||||
|
||||
(* An option line starts with whitespace then '-' *)
|
||||
let at_option_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
peek_string (min avail 40) >>= fun preview ->
|
||||
let s = String.trim preview in
|
||||
if String.length s > 0 && s.[0] = '-' then return ()
|
||||
else fail "not an option line"
|
||||
|
||||
(* Skip a non-option line (section header, blank, description-only, etc.) *)
|
||||
let skip_non_option_line =
|
||||
(* Don't skip if this looks like an option line *)
|
||||
(at_option_line *> fail "this is an option line")
|
||||
<|> (rest_of_line *> eol_strict *> return ())
|
||||
|
||||
(* --- Entry parsing --- *)
|
||||
|
||||
(* Parse a single flag entry *)
|
||||
let entry =
|
||||
inline_ws *>
|
||||
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
|
||||
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
|
||||
(description <|> (eol *> (description_below <|> return "")))
|
||||
|
||||
(* --- Subcommand parsing --- *)
|
||||
|
||||
(* A subcommand line: " name description"
|
||||
Also handles argument placeholders: " name UNIT... description" *)
|
||||
let is_subcommand_char = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
|
||||
| _ -> false
|
||||
|
||||
(* Skip argument placeholders like UNIT..., [PATTERN...|PID...], <file>
|
||||
that appear between the subcommand name and the description.
|
||||
Only consumes single-space gaps — the two-space gap before the
|
||||
description is left for the main parser. *)
|
||||
let skip_arg_placeholders =
|
||||
fix (fun self ->
|
||||
(* Peek ahead: single space followed by arg-like token *)
|
||||
available >>= fun avail ->
|
||||
if avail < 2 then return ()
|
||||
else
|
||||
peek_string (min avail 2) >>= fun s2 ->
|
||||
if String.length s2 >= 2 && s2.[0] = ' ' && s2.[1] <> ' ' then
|
||||
(* Single space — could be an arg placeholder *)
|
||||
let next = s2.[1] in
|
||||
if next = '[' || next = '<'
|
||||
|| (next >= 'A' && next <= 'Z') then
|
||||
(* Peek the full token to check if it's ALL_CAPS/brackets *)
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
(* Extract the token after the single space *)
|
||||
let tok_start = 1 in
|
||||
let tok_end = ref tok_start in
|
||||
while !tok_end < String.length preview
|
||||
&& preview.[!tok_end] <> ' '
|
||||
&& preview.[!tok_end] <> '\n'
|
||||
&& preview.[!tok_end] <> '\r' do
|
||||
incr tok_end
|
||||
done;
|
||||
let tok = String.sub preview tok_start (!tok_end - tok_start) in
|
||||
(* Accept as placeholder if it starts with [ or < or is ALL_CAPS
|
||||
(possibly with dots, pipes, dashes) *)
|
||||
let is_placeholder =
|
||||
tok.[0] = '[' || tok.[0] = '<'
|
||||
|| String.for_all (fun c ->
|
||||
(c >= 'A' && c <= 'Z') || c = '_' || c = '-'
|
||||
|| c = '.' || c = '|' || c = ',' || (c >= '0' && c <= '9')
|
||||
) tok
|
||||
in
|
||||
if is_placeholder then
|
||||
advance (1 + String.length tok) *> self
|
||||
else return ()
|
||||
else return ()
|
||||
else return ())
|
||||
|
||||
let subcommand_entry =
|
||||
inline_ws *>
|
||||
take_while1 is_subcommand_char >>= fun name ->
|
||||
if String.length name < 2 then fail "subcommand name too short"
|
||||
else
|
||||
skip_arg_placeholders *>
|
||||
char ' ' *> char ' ' *> inline_ws *>
|
||||
rest_of_line <* eol >>| fun desc ->
|
||||
{ name = String.lowercase_ascii name;
|
||||
desc = let t = String.trim desc in
|
||||
if String.length t >= 2 && t.[0] = '-' && t.[1] = ' ' then
|
||||
String.trim (String.sub t 2 (String.length t - 2))
|
||||
else t }
|
||||
|
||||
(* --- Section header detection --- *)
|
||||
|
||||
(* Detect lines like "Arguments:", "POSITIONALS:", etc. that introduce
|
||||
positional-argument sections (where name+desc lines are NOT subcommands) *)
|
||||
let is_arg_section s =
|
||||
let lc = String.lowercase_ascii (String.trim s) in
|
||||
let base = if String.ends_with ~suffix:":" lc
|
||||
then String.sub lc 0 (String.length lc - 1) |> String.trim
|
||||
else lc in
|
||||
base = "arguments" || base = "args" || base = "positionals"
|
||||
|| base = "positional arguments"
|
||||
|
||||
(* A section header: left-aligned (or lightly indented) text ending with ':',
|
||||
not starting with '-'. Must be consumed BEFORE subcommand_entry in choice. *)
|
||||
let section_header =
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
(* Extract just the first line from the preview *)
|
||||
let first_line = match String.index_opt preview '\n' with
|
||||
| Some i -> String.sub preview 0 i
|
||||
| None -> preview in
|
||||
let t = String.trim first_line in
|
||||
let len = String.length t in
|
||||
let indent = let i = ref 0 in
|
||||
while !i < String.length first_line && (first_line.[!i] = ' ' || first_line.[!i] = '\t') do incr i done;
|
||||
!i in
|
||||
if len >= 2 && t.[len - 1] = ':' && t.[0] <> '-' && indent <= 4 then
|
||||
rest_of_line <* eol_strict >>| fun line -> is_arg_section line
|
||||
else fail "not a section header"
|
||||
|
||||
(* --- Top-level parser --- *)
|
||||
|
||||
(* The main help parser: walks through lines, skipping non-option content,
|
||||
collecting entries and subcommands *)
|
||||
let help_parser =
|
||||
let open Angstrom in
|
||||
fix (fun _self ->
|
||||
(* Try to parse an entry *)
|
||||
let try_entry =
|
||||
entry >>| fun e -> `Entry e
|
||||
in
|
||||
(* Detect section headers to track arg vs command sections *)
|
||||
let try_section =
|
||||
section_header >>| fun is_arg -> `Section is_arg
|
||||
in
|
||||
(* Try to parse a subcommand *)
|
||||
let try_subcommand =
|
||||
subcommand_entry >>| fun sc -> `Subcommand sc
|
||||
in
|
||||
(* Skip one non-option line *)
|
||||
let try_skip =
|
||||
skip_non_option_line >>| fun () -> `Skip
|
||||
in
|
||||
many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items ->
|
||||
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
|
||||
(* Only keep subcommands that didn't appear under an Arguments/Positionals section *)
|
||||
let subcommands =
|
||||
List.fold_left (fun (in_arg_sec, acc) item ->
|
||||
match item with
|
||||
| `Section is_arg -> (is_arg, acc)
|
||||
| `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc)
|
||||
| _ -> (in_arg_sec, acc)
|
||||
) (false, []) items
|
||||
|> snd |> List.rev
|
||||
|> List.fold_left (fun acc sc ->
|
||||
match List.assoc_opt sc.name acc with
|
||||
| Some prev when String.length prev.desc >= String.length sc.desc -> acc
|
||||
| _ -> (sc.name, sc) :: List.remove_assoc sc.name acc
|
||||
) []
|
||||
|> List.rev_map snd
|
||||
in
|
||||
{ entries; subcommands; positionals = []; description = "" })
|
||||
|
||||
let skip_command_prefix s =
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
|
||||
let is_word_char = function
|
||||
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
|
||||
| _ -> false
|
||||
in
|
||||
let rec loop () =
|
||||
skip_ws ();
|
||||
if !i >= len then ()
|
||||
else if s.[!i] = '[' || s.[!i] = '<' || s.[!i] = '(' || s.[!i] = '{' || s.[!i] = '-' then ()
|
||||
else if is_word_char s.[!i] then begin
|
||||
let start = !i in
|
||||
while !i < len && is_word_char s.[!i] do incr i done;
|
||||
let word = String.sub s start (!i - start) in
|
||||
let has_lower = ref false in
|
||||
String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
|
||||
if not !has_lower then
|
||||
i := start
|
||||
else
|
||||
loop ()
|
||||
end
|
||||
in
|
||||
loop ();
|
||||
!i
|
||||
|
||||
let parse_usage_args s =
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let results = ref [] in
|
||||
let skip_ws () =
|
||||
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
|
||||
let is_pos_char c =
|
||||
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
|
||||
let read_dots () =
|
||||
skip_ws ();
|
||||
if !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' then
|
||||
(i := !i + 3; true)
|
||||
else if !i + 2 < len && s.[!i] = '\xe2' && s.[!i+1] = '\x80' && s.[!i+2] = '\xa6' then
|
||||
(i := !i + 3; true) (* UTF-8 ellipsis … *)
|
||||
else false
|
||||
in
|
||||
let is_skip name =
|
||||
let u = String.uppercase_ascii name in
|
||||
u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
|
||||
in
|
||||
let is_clean_name name =
|
||||
String.length name >= 2
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
||||
|| (c >= '0' && c <= '9') || c = '_' || c = '-') name
|
||||
in
|
||||
let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
|
||||
let skip_braces () =
|
||||
(* Skip {A|c|d|...} alternative blocks *)
|
||||
if !i < len && s.[!i] = '{' then begin
|
||||
let depth = ref 1 in
|
||||
incr i;
|
||||
while !i < len && !depth > 0 do
|
||||
if s.[!i] = '{' then incr depth
|
||||
else if s.[!i] = '}' then decr depth;
|
||||
incr i
|
||||
done;
|
||||
ignore (read_dots ());
|
||||
true
|
||||
end else false
|
||||
in
|
||||
while !i < len do
|
||||
skip_ws ();
|
||||
if !i >= len then ()
|
||||
else if skip_braces () then ()
|
||||
else match s.[!i] with
|
||||
| '[' ->
|
||||
incr i;
|
||||
let start = !i in
|
||||
let depth = ref 1 in
|
||||
while !i < len && !depth > 0 do
|
||||
if s.[!i] = '[' then incr depth
|
||||
else if s.[!i] = ']' then decr depth;
|
||||
incr i
|
||||
done;
|
||||
let bracket_end = !i - 1 in
|
||||
let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
|
||||
let inner, has_inner_dots =
|
||||
if String.ends_with ~suffix:"..." inner then
|
||||
(String.sub inner 0 (String.length inner - 3) |> String.trim, true)
|
||||
else (inner, false)
|
||||
in
|
||||
let variadic = has_inner_dots || read_dots () in
|
||||
if String.length inner > 0
|
||||
&& inner.[0] <> '-'
|
||||
&& (is_letter inner.[0] || inner.[0] = '<') then begin
|
||||
let name =
|
||||
if inner.[0] = '<' then
|
||||
let e = try String.index inner '>' with Not_found -> String.length inner in
|
||||
String.sub inner 1 (e - 1)
|
||||
else inner
|
||||
in
|
||||
if is_clean_name name && not (is_skip name) then
|
||||
results := { pos_name = String.lowercase_ascii name;
|
||||
optional = true; variadic } :: !results
|
||||
end
|
||||
| '<' ->
|
||||
incr i;
|
||||
let start = !i in
|
||||
while !i < len && s.[!i] <> '>' do incr i done;
|
||||
let name = String.sub s start (!i - start) in
|
||||
if !i < len then incr i;
|
||||
let variadic = read_dots () in
|
||||
if is_clean_name name && not (is_skip name) then
|
||||
results := { pos_name = String.lowercase_ascii name;
|
||||
optional = false; variadic } :: !results
|
||||
| '-' ->
|
||||
while !i < len && s.[!i] <> ' ' && s.[!i] <> '\t' && s.[!i] <> ']' do incr i done
|
||||
| c when c >= 'A' && c <= 'Z' ->
|
||||
let start = !i in
|
||||
while !i < len && is_pos_char s.[!i] do incr i done;
|
||||
let name = String.sub s start (!i - start) in
|
||||
let variadic = read_dots () in
|
||||
if String.length name >= 2
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
|
||||
) name
|
||||
&& not (is_skip name) then
|
||||
results := { pos_name = String.lowercase_ascii name;
|
||||
optional = false; variadic } :: !results
|
||||
| _ ->
|
||||
incr i
|
||||
done;
|
||||
List.rev !results
|
||||
|> List.fold_left (fun (seen, acc) p ->
|
||||
if List.mem p.pos_name seen then (seen, acc)
|
||||
else (p.pos_name :: seen, p :: acc)
|
||||
) ([], [])
|
||||
|> snd |> List.rev
|
||||
|
||||
let extract_usage_positionals text =
|
||||
let lines = String.split_on_char '\n' text in
|
||||
let lines_arr = Array.of_list lines in
|
||||
let len = Array.length lines_arr in
|
||||
let find_usage_line () =
|
||||
let rec go i =
|
||||
if i >= len then None
|
||||
else
|
||||
let t = String.trim lines_arr.(i) in
|
||||
let tlen = String.length t in
|
||||
let lc = String.lowercase_ascii t in
|
||||
if tlen >= 6 && String.sub lc 0 6 = "usage:" then begin
|
||||
let after = String.sub t 6 (tlen - 6) |> String.trim in
|
||||
if String.length after > 0 then Some after
|
||||
else if i + 1 < len then
|
||||
(* Clap style: USAGE:\n cmd [OPTIONS] PATTERN *)
|
||||
let next = String.trim lines_arr.(i + 1) in
|
||||
if String.length next > 0 then Some next else None
|
||||
else None
|
||||
end else if lc = "usage" then begin
|
||||
if i + 1 < len then
|
||||
let next = String.trim lines_arr.(i + 1) in
|
||||
if String.length next > 0 then Some next else None
|
||||
else None
|
||||
end else go (i + 1)
|
||||
in
|
||||
go 0
|
||||
in
|
||||
match find_usage_line () with
|
||||
| None -> []
|
||||
| Some usage ->
|
||||
let cmd_end = skip_command_prefix usage in
|
||||
let args = String.sub usage cmd_end (String.length usage - cmd_end) in
|
||||
parse_usage_args args
|
||||
|
||||
let extract_cli11_positionals text =
|
||||
let lines = String.split_on_char '\n' text in
|
||||
let rec find_section = function
|
||||
| [] -> []
|
||||
| line :: rest ->
|
||||
let t = String.trim line in
|
||||
if t = "POSITIONALS:" || t = "Positionals:" then
|
||||
parse_lines rest []
|
||||
else
|
||||
find_section rest
|
||||
and parse_lines lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| line :: rest ->
|
||||
let len = String.length line in
|
||||
if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
|
||||
List.rev acc
|
||||
else
|
||||
let t = String.trim line in
|
||||
if String.length t = 0 then List.rev acc
|
||||
else match parse_one t with
|
||||
| Some p -> parse_lines rest (p :: acc)
|
||||
| None -> parse_lines rest acc
|
||||
and parse_one s =
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let is_name_char c =
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9') || c = '_' || c = '-' in
|
||||
while !i < len && is_name_char s.[!i] do incr i done;
|
||||
if !i < 2 then None
|
||||
else
|
||||
let name = String.sub s 0 !i in
|
||||
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
|
||||
(* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
|
||||
while !i < len && s.[!i] >= 'A' && s.[!i] <= 'Z' do incr i done;
|
||||
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
|
||||
let variadic = !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' in
|
||||
Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
|
||||
in
|
||||
find_section lines
|
||||
|
||||
let parse_help txt =
|
||||
let clean = strip_ansi txt in
|
||||
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
|
||||
| Ok result ->
|
||||
let cli11 = extract_cli11_positionals clean in
|
||||
let usage = extract_usage_positionals clean in
|
||||
let positionals = if cli11 <> [] then cli11 else usage in
|
||||
Ok { result with positionals }
|
||||
| Error msg -> Error msg
|
||||
366
lib/store.ml
Normal file
366
lib/store.ml
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
open Parser
|
||||
|
||||
let default_store_path () =
|
||||
let cache = try Sys.getenv "XDG_CACHE_HOME"
|
||||
with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
|
||||
Filename.concat cache "inshellah"
|
||||
|
||||
let ensure_dir dir =
|
||||
let rec mkdir_p d =
|
||||
if Sys.file_exists d then ()
|
||||
else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in
|
||||
mkdir_p dir
|
||||
|
||||
let filename_of_command cmd =
|
||||
String.map (function
|
||||
| ' ' -> '_'
|
||||
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c
|
||||
| _ -> '-') cmd
|
||||
|
||||
let command_of_filename base =
|
||||
String.map (function '_' -> ' ' | c -> c) base
|
||||
|
||||
(* --- JSON serialization of help_result --- *)
|
||||
|
||||
let escape_json s =
|
||||
let buf = Buffer.create (String.length s + 4) in
|
||||
String.iter (fun c -> match c with
|
||||
| '"' -> Buffer.add_string buf "\\\""
|
||||
| '\\' -> Buffer.add_string buf "\\\\"
|
||||
| '\n' -> Buffer.add_string buf "\\n"
|
||||
| '\t' -> Buffer.add_string buf "\\t"
|
||||
| '\r' -> Buffer.add_string buf "\\r"
|
||||
| c when Char.code c < 0x20 ->
|
||||
Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
|
||||
| c -> Buffer.add_char buf c
|
||||
) s;
|
||||
Buffer.contents buf
|
||||
|
||||
let json_string s = Printf.sprintf "\"%s\"" (escape_json s)
|
||||
let json_null = "null"
|
||||
|
||||
let json_switch_of = function
|
||||
| Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c))
|
||||
| Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l)
|
||||
| Both (c, l) ->
|
||||
Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
|
||||
(json_string (String.make 1 c)) (json_string l)
|
||||
|
||||
let json_param_of = function
|
||||
| None -> json_null
|
||||
| Some (Mandatory p) ->
|
||||
Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p)
|
||||
| Some (Optional p) ->
|
||||
Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p)
|
||||
|
||||
let json_entry_of e =
|
||||
Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
|
||||
(json_switch_of e.switch) (json_param_of e.param) (json_string e.desc)
|
||||
|
||||
let json_subcommand_of sc =
|
||||
Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
|
||||
|
||||
let json_positional_of p =
|
||||
Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
|
||||
(json_string p.pos_name) p.optional p.variadic
|
||||
|
||||
let json_list f items =
|
||||
"[" ^ String.concat "," (List.map f items) ^ "]"
|
||||
|
||||
let json_of_help_result ?(source="help") r =
|
||||
Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
|
||||
(json_string source)
|
||||
(json_string r.description)
|
||||
(json_list json_entry_of r.entries)
|
||||
(json_list json_subcommand_of r.subcommands)
|
||||
(json_list json_positional_of r.positionals)
|
||||
|
||||
(* --- JSON deserialization --- *)
|
||||
|
||||
(* Minimal JSON parser — just enough for our own output *)
|
||||
|
||||
type json =
|
||||
| Jnull
|
||||
| Jbool of bool
|
||||
| Jstring of string
|
||||
| Jarray of json list
|
||||
| Jobject of (string * json) list
|
||||
|
||||
let json_get key = function
|
||||
| Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
|
||||
| _ -> Jnull
|
||||
|
||||
let json_to_string = function Jstring s -> s | _ -> ""
|
||||
let json_to_bool = function Jbool b -> b | _ -> false
|
||||
let json_to_list = function Jarray l -> l | _ -> []
|
||||
|
||||
exception Json_error of string
|
||||
|
||||
let parse_json s =
|
||||
let len = String.length s in
|
||||
let pos = ref 0 in
|
||||
let peek () = if !pos < len then s.[!pos] else '\x00' in
|
||||
let advance () = incr pos in
|
||||
let skip_ws () =
|
||||
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t'
|
||||
|| s.[!pos] = '\n' || s.[!pos] = '\r') do
|
||||
advance ()
|
||||
done in
|
||||
let expect c =
|
||||
skip_ws ();
|
||||
if peek () <> c then
|
||||
raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos));
|
||||
advance () in
|
||||
let rec parse_value () =
|
||||
skip_ws ();
|
||||
match peek () with
|
||||
| '"' -> Jstring (parse_string ())
|
||||
| '{' -> parse_object ()
|
||||
| '[' -> parse_array ()
|
||||
| 'n' -> advance (); advance (); advance (); advance (); Jnull
|
||||
| 't' -> advance (); advance (); advance (); advance (); Jbool true
|
||||
| 'f' ->
|
||||
advance (); advance (); advance (); advance (); advance (); Jbool false
|
||||
| c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos))
|
||||
and parse_string () =
|
||||
expect '"';
|
||||
let buf = Buffer.create 32 in
|
||||
while peek () <> '"' do
|
||||
if peek () = '\\' then begin
|
||||
advance ();
|
||||
(match peek () with
|
||||
| '"' -> Buffer.add_char buf '"'
|
||||
| '\\' -> Buffer.add_char buf '\\'
|
||||
| 'n' -> Buffer.add_char buf '\n'
|
||||
| 't' -> Buffer.add_char buf '\t'
|
||||
| 'r' -> Buffer.add_char buf '\r'
|
||||
| 'u' ->
|
||||
advance ();
|
||||
let hex = String.sub s !pos 4 in
|
||||
pos := !pos + 3;
|
||||
let code = int_of_string ("0x" ^ hex) in
|
||||
if code < 128 then Buffer.add_char buf (Char.chr code)
|
||||
else begin
|
||||
(* UTF-8 encode *)
|
||||
if code < 0x800 then begin
|
||||
Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
|
||||
end else begin
|
||||
Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
|
||||
end
|
||||
end
|
||||
| c -> Buffer.add_char buf c);
|
||||
advance ()
|
||||
end else begin
|
||||
Buffer.add_char buf (peek ());
|
||||
advance ()
|
||||
end
|
||||
done;
|
||||
advance (); (* closing quote *)
|
||||
Buffer.contents buf
|
||||
and parse_object () =
|
||||
expect '{';
|
||||
skip_ws ();
|
||||
if peek () = '}' then (advance (); Jobject [])
|
||||
else begin
|
||||
let pairs = ref [] in
|
||||
let cont = ref true in
|
||||
while !cont do
|
||||
skip_ws ();
|
||||
let key = parse_string () in
|
||||
expect ':';
|
||||
let value = parse_value () in
|
||||
pairs := (key, value) :: !pairs;
|
||||
skip_ws ();
|
||||
if peek () = ',' then advance ()
|
||||
else cont := false
|
||||
done;
|
||||
expect '}';
|
||||
Jobject (List.rev !pairs)
|
||||
end
|
||||
and parse_array () =
|
||||
expect '[';
|
||||
skip_ws ();
|
||||
if peek () = ']' then (advance (); Jarray [])
|
||||
else begin
|
||||
let items = ref [] in
|
||||
let cont = ref true in
|
||||
while !cont do
|
||||
let v = parse_value () in
|
||||
items := v :: !items;
|
||||
skip_ws ();
|
||||
if peek () = ',' then advance ()
|
||||
else cont := false
|
||||
done;
|
||||
expect ']';
|
||||
Jarray (List.rev !items)
|
||||
end
|
||||
in
|
||||
parse_value ()
|
||||
|
||||
let switch_of_json j =
|
||||
match json_to_string (json_get "type" j) with
|
||||
| "short" ->
|
||||
let c = json_to_string (json_get "char" j) in
|
||||
Short (if String.length c > 0 then c.[0] else '?')
|
||||
| "long" -> Long (json_to_string (json_get "name" j))
|
||||
| "both" ->
|
||||
let c = json_to_string (json_get "char" j) in
|
||||
Both ((if String.length c > 0 then c.[0] else '?'),
|
||||
json_to_string (json_get "name" j))
|
||||
| _ -> Long "?"
|
||||
|
||||
let param_of_json = function
|
||||
| Jnull -> None
|
||||
| j ->
|
||||
let name = json_to_string (json_get "name" j) in
|
||||
(match json_to_string (json_get "kind" j) with
|
||||
| "mandatory" -> Some (Mandatory name)
|
||||
| "optional" -> Some (Optional name)
|
||||
| _ -> None)
|
||||
|
||||
let entry_of_json j =
|
||||
{ switch = switch_of_json (json_get "switch" j);
|
||||
param = param_of_json (json_get "param" j);
|
||||
desc = json_to_string (json_get "desc" j) }
|
||||
|
||||
let subcommand_of_json j =
|
||||
{ name = json_to_string (json_get "name" j);
|
||||
desc = json_to_string (json_get "desc" j) }
|
||||
|
||||
let positional_of_json j =
|
||||
{ pos_name = json_to_string (json_get "name" j);
|
||||
optional = json_to_bool (json_get "optional" j);
|
||||
variadic = json_to_bool (json_get "variadic" j) }
|
||||
|
||||
let help_result_of_json j =
|
||||
{ entries = List.map entry_of_json (json_to_list (json_get "entries" j));
|
||||
subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j));
|
||||
positionals = List.map positional_of_json (json_to_list (json_get "positionals" j));
|
||||
description = json_to_string (json_get "description" j) }
|
||||
|
||||
(* --- Filesystem operations --- *)
|
||||
|
||||
let write_file path contents =
|
||||
let oc = open_out path in
|
||||
output_string oc contents;
|
||||
close_out oc
|
||||
|
||||
let read_file path =
|
||||
try
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
Some (Bytes.to_string s)
|
||||
with _ -> None
|
||||
|
||||
let write_result ~dir ?(source="help") command result =
|
||||
let path = Filename.concat dir (filename_of_command command ^ ".json") in
|
||||
write_file path (json_of_help_result ~source result)
|
||||
|
||||
let write_native ~dir command data =
|
||||
let path = Filename.concat dir (filename_of_command command ^ ".nu") in
|
||||
write_file path data
|
||||
|
||||
let is_dir path = Sys.file_exists path && Sys.is_directory path
|
||||
|
||||
let find_file dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
if Sys.file_exists json_path then Some json_path
|
||||
else
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
if Sys.file_exists nu_path then Some nu_path
|
||||
else None
|
||||
) dirs
|
||||
|
||||
let lookup dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let path = Filename.concat dir (base ^ ".json") in
|
||||
match read_file path with
|
||||
| Some data ->
|
||||
(try Some (help_result_of_json (parse_json data))
|
||||
with _ -> None)
|
||||
| None -> None
|
||||
) dirs
|
||||
|
||||
let lookup_raw dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
match read_file json_path with
|
||||
| Some _ as r -> r
|
||||
| None ->
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
read_file nu_path
|
||||
) dirs
|
||||
|
||||
let chop_extension f =
|
||||
if Filename.check_suffix f ".json" then Some (Filename.chop_suffix f ".json")
|
||||
else if Filename.check_suffix f ".nu" then Some (Filename.chop_suffix f ".nu")
|
||||
else None
|
||||
|
||||
let subcommands_of dirs command =
|
||||
let prefix = filename_of_command command ^ "_" in
|
||||
let plen = String.length prefix in
|
||||
let module SMap = Map.Make(String) in
|
||||
let subs = List.fold_left (fun subs dir ->
|
||||
if is_dir dir then
|
||||
Array.fold_left (fun subs f ->
|
||||
if not (String.starts_with ~prefix f) then subs
|
||||
else
|
||||
let is_json = Filename.check_suffix f ".json" in
|
||||
match chop_extension f with
|
||||
| None -> subs
|
||||
| Some b ->
|
||||
let rest = String.sub b plen (String.length b - plen) in
|
||||
if String.contains rest '_' || String.length rest = 0 then subs
|
||||
else if SMap.mem rest subs then subs
|
||||
else
|
||||
let desc = if is_json then
|
||||
match read_file (Filename.concat dir f) with
|
||||
| Some data ->
|
||||
(try json_to_string (json_get "description" (parse_json data))
|
||||
with _ -> "")
|
||||
| None -> ""
|
||||
else "" in
|
||||
SMap.add rest { name = rest; desc } subs
|
||||
) subs (Sys.readdir dir)
|
||||
else subs
|
||||
) SMap.empty dirs in
|
||||
SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
|
||||
|
||||
let all_commands dirs =
|
||||
let module SSet = Set.Make(String) in
|
||||
List.fold_left (fun cmds dir ->
|
||||
if is_dir dir then
|
||||
Array.fold_left (fun cmds f ->
|
||||
match chop_extension f with
|
||||
| Some b -> SSet.add (command_of_filename b) cmds
|
||||
| None -> cmds
|
||||
) cmds (Sys.readdir dir)
|
||||
else cmds
|
||||
) SSet.empty dirs
|
||||
|> SSet.elements
|
||||
|
||||
let file_type_of dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
if Sys.file_exists json_path then
|
||||
(match read_file json_path with
|
||||
| Some data ->
|
||||
(try Some (json_to_string (json_get "source" (parse_json data)))
|
||||
with _ -> Some "json")
|
||||
| None -> Some "json")
|
||||
else
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
if Sys.file_exists nu_path then Some "native"
|
||||
else None
|
||||
) dirs
|
||||
92
nix/module.nix
Normal file
92
nix/module.nix
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# NixOS module: automatic nushell completion indexing
|
||||
#
|
||||
# Indexes completions using three strategies in priority order:
|
||||
# 1. Native completion generators (e.g. CMD completions nushell)
|
||||
# 2. Manpage parsing
|
||||
# 3. --help output parsing
|
||||
#
|
||||
# Produces a directory of .json/.nu files at build time.
|
||||
# The `complete` command reads from this directory as a system overlay.
|
||||
#
|
||||
# Usage:
|
||||
# { pkgs, ... }: {
|
||||
# imports = [ ./path/to/inshellah/nix/module.nix ];
|
||||
# programs.inshellah.enable = true;
|
||||
# }
|
||||
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
|
||||
let
|
||||
cfg = config.programs.inshellah;
|
||||
in
|
||||
{
|
||||
options.programs.inshellah = {
|
||||
enable = lib.mkEnableOption "nushell completion indexing via inshellah";
|
||||
|
||||
package = lib.mkOption {
|
||||
type = lib.types.package;
|
||||
description = "package to use for indexing completions";
|
||||
};
|
||||
|
||||
completionsPath = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "/share/inshellah";
|
||||
description = ''
|
||||
subdirectory within the system profile where completion files
|
||||
are placed. used as --system-dir for the completer.
|
||||
'';
|
||||
};
|
||||
|
||||
ignoreCommands = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.str;
|
||||
default = [ ];
|
||||
example = [ "problematic-tool" ];
|
||||
description = ''
|
||||
list of command names to skip during completion indexing
|
||||
'';
|
||||
};
|
||||
|
||||
helpOnlyCommands = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.str;
|
||||
default = [ ];
|
||||
example = [ "nix" ];
|
||||
description = ''
|
||||
list of command names to skip manpage parsing for,
|
||||
using --help scraping instead
|
||||
'';
|
||||
};
|
||||
|
||||
snippet = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
readOnly = true;
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
environment.systemPackages = [ config.programs.inshellah.package ];
|
||||
environment.extraSetup =
|
||||
let
|
||||
inshellah = "${cfg.package}/bin/inshellah";
|
||||
destDir = "$out${cfg.completionsPath}";
|
||||
ignoreFile = pkgs.writeText "inshellah-ignore" (lib.concatStringsSep "\n" cfg.ignoreCommands);
|
||||
ignoreFlag = lib.optionalString (cfg.ignoreCommands != [ ]) " --ignore ${ignoreFile}";
|
||||
helpOnlyFile = pkgs.writeText "inshellah-help-only" (lib.concatStringsSep "\n" cfg.helpOnlyCommands);
|
||||
helpOnlyFlag = lib.optionalString (cfg.helpOnlyCommands != [ ]) " --help-only ${helpOnlyFile}";
|
||||
in
|
||||
''
|
||||
mkdir -p ${destDir}
|
||||
|
||||
if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then
|
||||
${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag} \
|
||||
2>/dev/null || true
|
||||
fi
|
||||
|
||||
find ${destDir} -maxdepth 1 -empty -delete
|
||||
'';
|
||||
};
|
||||
}
|
||||
3
test/dune
Normal file
3
test/dune
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
(test
|
||||
(name test_inshellah)
|
||||
(libraries inshellah str))
|
||||
492
test/test_inshellah.ml
Normal file
492
test/test_inshellah.ml
Normal file
|
|
@ -0,0 +1,492 @@
|
|||
open Inshellah.Parser
|
||||
open Inshellah.Manpage
|
||||
open Inshellah.Nushell
|
||||
|
||||
let failures = ref 0
|
||||
let passes = ref 0
|
||||
|
||||
let check name condition =
|
||||
if condition then begin
|
||||
incr passes;
|
||||
Printf.printf " PASS: %s\n" name
|
||||
end else begin
|
||||
incr failures;
|
||||
Printf.printf " FAIL: %s\n" name
|
||||
end
|
||||
|
||||
let parse txt =
|
||||
match parse_help txt with
|
||||
| Ok r -> r
|
||||
| Error msg -> failwith (Printf.sprintf "parse_help failed: %s" msg)
|
||||
|
||||
(* --- Help parser tests --- *)
|
||||
|
||||
let test_gnu_basic () =
|
||||
Printf.printf "\n== GNU basic flags ==\n";
|
||||
let r = parse " -a, --all do not ignore entries starting with .\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "both switch" (e.switch = Both ('a', "all"));
|
||||
check "no param" (e.param = None);
|
||||
check "desc" (String.length e.desc > 0)
|
||||
|
||||
let test_gnu_eq_param () =
|
||||
Printf.printf "\n== GNU = param ==\n";
|
||||
let r = parse " --block-size=SIZE scale sizes by SIZE\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "long switch" (e.switch = Long "block-size");
|
||||
check "mandatory param" (e.param = Some (Mandatory "SIZE"))
|
||||
|
||||
let test_gnu_opt_param () =
|
||||
Printf.printf "\n== GNU optional param ==\n";
|
||||
let r = parse " --color[=WHEN] color the output WHEN\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "long switch" (e.switch = Long "color");
|
||||
check "optional param" (e.param = Some (Optional "WHEN"))
|
||||
|
||||
let test_underscore_param () =
|
||||
Printf.printf "\n== Underscore in param (TIME_STYLE) ==\n";
|
||||
let r = parse " --time-style=TIME_STYLE time/date format\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "param with underscore" (e.param = Some (Mandatory "TIME_STYLE"))
|
||||
|
||||
let test_short_only () =
|
||||
Printf.printf "\n== Short-only flag ==\n";
|
||||
let r = parse " -v verbose output\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
check "short switch" ((List.hd r.entries).switch = Short 'v')
|
||||
|
||||
let test_long_only () =
|
||||
Printf.printf "\n== Long-only flag ==\n";
|
||||
let r = parse " --help display help\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
check "long switch" ((List.hd r.entries).switch = Long "help")
|
||||
|
||||
let test_multiline_desc () =
|
||||
Printf.printf "\n== Multi-line description ==\n";
|
||||
let r = parse {| --block-size=SIZE with -l, scale sizes by SIZE when printing them;
|
||||
e.g., '--block-size=M'; see SIZE format below
|
||||
|} in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "desc includes continuation" (String.length e.desc > 50)
|
||||
|
||||
let test_multiple_entries () =
|
||||
Printf.printf "\n== Multiple entries ==\n";
|
||||
let r = parse {| -a, --all do not ignore entries starting with .
|
||||
-A, --almost-all do not list implied . and ..
|
||||
--author with -l, print the author of each file
|
||||
|} in
|
||||
check "three entries" (List.length r.entries = 3)
|
||||
|
||||
let test_clap_short_sections () =
|
||||
Printf.printf "\n== Clap short with section headers ==\n";
|
||||
let r = parse {|INPUT OPTIONS:
|
||||
-e, --regexp=PATTERN A pattern to search for.
|
||||
-f, --file=PATTERNFILE Search for patterns from the given file.
|
||||
SEARCH OPTIONS:
|
||||
-s, --case-sensitive Search case sensitively.
|
||||
|} in
|
||||
check "three entries" (List.length r.entries = 3);
|
||||
let e = List.hd r.entries in
|
||||
check "first is regexp" (e.switch = Both ('e', "regexp"));
|
||||
check "first has param" (e.param = Some (Mandatory "PATTERN"))
|
||||
|
||||
let test_clap_long_style () =
|
||||
Printf.printf "\n== Clap long style (desc below flag) ==\n";
|
||||
let r = parse {| -H, --hidden
|
||||
Include hidden directories and files.
|
||||
|
||||
--no-ignore
|
||||
Do not respect ignore files.
|
||||
|} in
|
||||
check "two entries" (List.length r.entries = 2);
|
||||
let e = List.hd r.entries in
|
||||
check "hidden switch" (e.switch = Both ('H', "hidden"));
|
||||
check "desc below" (String.length e.desc > 0)
|
||||
|
||||
let test_clap_long_angle_param () =
|
||||
Printf.printf "\n== Clap long angle bracket param ==\n";
|
||||
let r = parse {| --nonprintable-notation <notation>
|
||||
Set notation for non-printable characters.
|
||||
|} in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "long switch" (e.switch = Long "nonprintable-notation");
|
||||
check "angle param" (e.param = Some (Mandatory "notation"))
|
||||
|
||||
let test_space_upper_param () =
|
||||
Printf.printf "\n== Space-separated ALL_CAPS param ==\n";
|
||||
let r = parse " -f, --foo FOO foo help\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "switch" (e.switch = Both ('f', "foo"));
|
||||
check "space param" (e.param = Some (Mandatory "FOO"))
|
||||
|
||||
let test_go_cobra_flags () =
|
||||
Printf.printf "\n== Go/Cobra flags ==\n";
|
||||
let r = parse {|Flags:
|
||||
-D, --debug Enable debug mode
|
||||
-H, --host string Daemon socket to connect to
|
||||
-v, --version Print version information
|
||||
|} in
|
||||
check "three flag entries" (List.length r.entries = 3);
|
||||
(* Check the host flag has a type param *)
|
||||
let host = List.nth r.entries 1 in
|
||||
check "host switch" (host.switch = Both ('H', "host"));
|
||||
check "host type param" (host.param = Some (Mandatory "string"))
|
||||
|
||||
let test_go_cobra_subcommands () =
|
||||
Printf.printf "\n== Go/Cobra subcommands ==\n";
|
||||
let r = parse {|Common Commands:
|
||||
run Create and run a new container from an image
|
||||
exec Execute a command in a running container
|
||||
build Build an image from a Dockerfile
|
||||
|} in
|
||||
check "has subcommands" (List.length r.subcommands > 0)
|
||||
|
||||
let test_busybox_tab () =
|
||||
Printf.printf "\n== Busybox tab-indented ==\n";
|
||||
let r = parse "\t-1\tOne column output\n\t-a\tInclude names starting with .\n" in
|
||||
check "two entries" (List.length r.entries = 2);
|
||||
check "first is -1" ((List.hd r.entries).switch = Short '1')
|
||||
|
||||
let test_no_debug_prints () =
|
||||
Printf.printf "\n== No debug side effects ==\n";
|
||||
(* The old parser had print_endline at module load time.
|
||||
If we got here without "opt param is running" on stdout, we're good. *)
|
||||
check "no debug prints" true
|
||||
|
||||
(* --- Manpage parser tests --- *)
|
||||
|
||||
let test_manpage_tp_style () =
|
||||
Printf.printf "\n== Manpage .TP style ==\n";
|
||||
let groff = {|.SH OPTIONS
|
||||
.TP
|
||||
\fB\-a\fR, \fB\-\-all\fR
|
||||
do not ignore entries starting with .
|
||||
.TP
|
||||
\fB\-A\fR, \fB\-\-almost\-all\fR
|
||||
do not list implied . and ..
|
||||
.TP
|
||||
\fB\-\-block\-size\fR=\fISIZE\fR
|
||||
with \fB\-l\fR, scale sizes by SIZE
|
||||
.SH AUTHOR
|
||||
Written by someone.
|
||||
|} in
|
||||
let result = parse_manpage_string groff in
|
||||
check "three entries" (List.length result.entries = 3);
|
||||
if List.length result.entries >= 1 then begin
|
||||
let e = List.hd result.entries in
|
||||
check "first is -a/--all" (e.switch = Both ('a', "all"));
|
||||
check "first desc" (String.length e.desc > 0)
|
||||
end;
|
||||
if List.length result.entries >= 3 then begin
|
||||
let e = List.nth result.entries 2 in
|
||||
check "block-size switch" (e.switch = Long "block-size");
|
||||
check "block-size param" (e.param = Some (Mandatory "SIZE"))
|
||||
end
|
||||
|
||||
let test_manpage_ip_style () =
|
||||
Printf.printf "\n== Manpage .IP style ==\n";
|
||||
let groff = {|.SH OPTIONS
|
||||
.IP "\fB\-k\fR, \fB\-\-insecure\fR"
|
||||
Allow insecure connections.
|
||||
.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
|
||||
Write output to file.
|
||||
.SH SEE ALSO
|
||||
|} in
|
||||
let result = parse_manpage_string groff in
|
||||
check "two entries" (List.length result.entries = 2);
|
||||
if List.length result.entries >= 1 then begin
|
||||
let e = List.hd result.entries in
|
||||
check "first is -k/--insecure" (e.switch = Both ('k', "insecure"))
|
||||
end
|
||||
|
||||
let test_manpage_groff_stripping () =
|
||||
Printf.printf "\n== Groff escape stripping ==\n";
|
||||
let s = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
|
||||
check "font escapes removed" (not (String.contains s 'f' && String.contains s 'B'));
|
||||
check "dashes converted" (String.contains s '-');
|
||||
let s2 = strip_groff_escapes {|\(aqhello\(aq|} in
|
||||
check "aq -> quote" (String.contains s2 '\'')
|
||||
|
||||
let test_manpage_empty_options () =
|
||||
Printf.printf "\n== Manpage with no OPTIONS section ==\n";
|
||||
let groff = {|.SH NAME
|
||||
foo \- does stuff
|
||||
.SH DESCRIPTION
|
||||
Does stuff.
|
||||
|} in
|
||||
let result = parse_manpage_string groff in
|
||||
check "no entries" (List.length result.entries = 0)
|
||||
|
||||
let test_slash_switch_separator () =
|
||||
Printf.printf "\n== Slash switch separator (--long / -s) ==\n";
|
||||
let r = parse " --verbose / -v Increase verbosity\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "both switch" (e.switch = Both ('v', "verbose"));
|
||||
check "no param" (e.param = None);
|
||||
check "desc" (e.desc = "Increase verbosity")
|
||||
|
||||
let test_manpage_nix3_style () =
|
||||
Printf.printf "\n== Manpage nix3 style ==\n";
|
||||
let groff = {|.SH Options
|
||||
.SS Logging-related options
|
||||
.IP "\(bu" 3
|
||||
.UR #opt-verbose
|
||||
\f(CR--verbose\fR
|
||||
.UE
|
||||
/ \f(CR-v\fR
|
||||
.IP
|
||||
Increase the logging verbosity level.
|
||||
.IP "\(bu" 3
|
||||
.UR #opt-quiet
|
||||
\f(CR--quiet\fR
|
||||
.UE
|
||||
.IP
|
||||
Decrease the logging verbosity level.
|
||||
.SH SEE ALSO
|
||||
|} in
|
||||
let result = parse_manpage_string groff in
|
||||
check "two entries" (List.length result.entries = 2);
|
||||
if List.length result.entries >= 1 then begin
|
||||
let e = List.hd result.entries in
|
||||
check "verbose is Both" (e.switch = Both ('v', "verbose"));
|
||||
check "verbose desc" (String.length e.desc > 0)
|
||||
end;
|
||||
if List.length result.entries >= 2 then begin
|
||||
let e = List.nth result.entries 1 in
|
||||
check "quiet is Long" (e.switch = Long "quiet");
|
||||
check "quiet desc" (String.length e.desc > 0)
|
||||
end
|
||||
|
||||
let test_manpage_nix3_with_params () =
|
||||
Printf.printf "\n== Manpage nix3 with params ==\n";
|
||||
let groff = {|.SH Options
|
||||
.IP "\(bu" 3
|
||||
.UR #opt-arg
|
||||
\f(CR--arg\fR
|
||||
.UE
|
||||
\fIname\fR \fIexpr\fR
|
||||
.IP
|
||||
Pass the value as the argument name to Nix functions.
|
||||
.IP "\(bu" 3
|
||||
.UR #opt-include
|
||||
\f(CR--include\fR
|
||||
.UE
|
||||
/ \f(CR-I\fR \fIpath\fR
|
||||
.IP
|
||||
Add path to search path entries.
|
||||
.IP
|
||||
This option may be given multiple times.
|
||||
.SH SEE ALSO
|
||||
|} in
|
||||
let result = parse_manpage_string groff in
|
||||
check "two entries" (List.length result.entries = 2);
|
||||
if List.length result.entries >= 1 then begin
|
||||
let e = List.hd result.entries in
|
||||
check "arg is Long" (e.switch = Long "arg");
|
||||
check "arg has param" (e.param <> None)
|
||||
end;
|
||||
if List.length result.entries >= 2 then begin
|
||||
let e = List.nth result.entries 1 in
|
||||
check "include is Both" (e.switch = Both ('I', "include"));
|
||||
check "include has path param" (e.param = Some (Mandatory "path"))
|
||||
end
|
||||
|
||||
let test_synopsis_subcommand () =
|
||||
Printf.printf "\n== SYNOPSIS subcommand detection ==\n";
|
||||
let groff = {|.SH "SYNOPSIS"
|
||||
.sp
|
||||
.nf
|
||||
\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR]
|
||||
.fi
|
||||
.SH "DESCRIPTION"
|
||||
|} in
|
||||
let cmd = extract_synopsis_command groff in
|
||||
check "detected git commit" (cmd = Some "git commit")
|
||||
|
||||
let test_synopsis_standalone () =
|
||||
Printf.printf "\n== SYNOPSIS standalone command ==\n";
|
||||
let groff = {|.SH Synopsis
|
||||
.LP
|
||||
\f(CRnix-build\fR [\fIpaths\fR]
|
||||
.SH Description
|
||||
|} in
|
||||
let cmd = extract_synopsis_command groff in
|
||||
check "detected nix-build" (cmd = Some "nix-build")
|
||||
|
||||
let test_synopsis_nix3 () =
|
||||
Printf.printf "\n== SYNOPSIS nix3 subcommand ==\n";
|
||||
let groff = {|.SH Synopsis
|
||||
.LP
|
||||
\f(CRnix run\fR [\fIoption\fR] \fIinstallable\fR
|
||||
.SH Description
|
||||
|} in
|
||||
let cmd = extract_synopsis_command groff in
|
||||
check "detected nix run" (cmd = Some "nix run")
|
||||
|
||||
(* --- Nushell generation tests --- *)
|
||||
|
||||
let contains s sub =
|
||||
try
|
||||
let _ = Str.search_forward (Str.regexp_string sub) s 0 in true
|
||||
with Not_found -> false
|
||||
|
||||
let test_nushell_basic () =
|
||||
Printf.printf "\n== Nushell basic extern ==\n";
|
||||
let r = parse " -a, --all do not ignore entries starting with .\n" in
|
||||
let nu = generate_extern "ls" r in
|
||||
check "has extern" (contains nu "export extern \"ls\"");
|
||||
check "has --all(-a)" (contains nu "--all(-a)");
|
||||
check "has comment" (contains nu "# do not ignore")
|
||||
|
||||
let test_nushell_param_types () =
|
||||
Printf.printf "\n== Nushell param type mapping ==\n";
|
||||
let r = parse {| -w, --width=COLS set output width
|
||||
--block-size=SIZE scale sizes
|
||||
-o, --output FILE output file
|
||||
|} in
|
||||
let nu = generate_extern "ls" r in
|
||||
check "COLS -> int" (contains nu "--width(-w): int");
|
||||
check "SIZE -> string" (contains nu "--block-size: string");
|
||||
check "FILE -> path" (contains nu "--output(-o): path")
|
||||
|
||||
let test_nushell_subcommands () =
|
||||
Printf.printf "\n== Nushell subcommands ==\n";
|
||||
let r = parse {|Common Commands:
|
||||
run Create and run a new container
|
||||
exec Execute a command
|
||||
|
||||
Flags:
|
||||
-D, --debug Enable debug mode
|
||||
|} in
|
||||
let nu = generate_extern "docker" r in
|
||||
check "has main extern" (contains nu "export extern \"docker\"");
|
||||
check "has --debug" (contains nu "--debug(-D)");
|
||||
check "has run subcommand" (contains nu "export extern \"docker run\"");
|
||||
check "has exec subcommand" (contains nu "export extern \"docker exec\"")
|
||||
|
||||
let test_nushell_from_manpage () =
|
||||
Printf.printf "\n== Nushell from manpage ==\n";
|
||||
let groff = {|.SH OPTIONS
|
||||
.TP
|
||||
\fB\-a\fR, \fB\-\-all\fR
|
||||
do not ignore entries starting with .
|
||||
.TP
|
||||
\fB\-\-block\-size\fR=\fISIZE\fR
|
||||
scale sizes by SIZE
|
||||
.SH AUTHOR
|
||||
|} in
|
||||
let result = parse_manpage_string groff in
|
||||
let nu = generate_extern "ls" result in
|
||||
check "has extern" (contains nu "export extern \"ls\"");
|
||||
check "has --all(-a)" (contains nu "--all(-a)");
|
||||
check "has --block-size" (contains nu "--block-size: string")
|
||||
|
||||
let test_nushell_module () =
|
||||
Printf.printf "\n== Nushell module wrapper ==\n";
|
||||
let r = parse " -v, --verbose verbose output\n" in
|
||||
let nu = generate_module "myapp" r in
|
||||
check "has module" (contains nu "module myapp-completions");
|
||||
check "has extern inside" (contains nu "export extern \"myapp\"");
|
||||
check "has flag" (contains nu "--verbose(-v)")
|
||||
|
||||
let test_dedup_entries () =
|
||||
Printf.printf "\n== Deduplication ==\n";
|
||||
let r = parse {| -v, --verbose verbose output
|
||||
--verbose verbose mode
|
||||
-v be verbose
|
||||
|} in
|
||||
let nu = generate_extern "test" r in
|
||||
(* Count occurrences of --verbose *)
|
||||
let count =
|
||||
let re = Str.regexp_string "--verbose" in
|
||||
let n = ref 0 in
|
||||
let i = ref 0 in
|
||||
(try while true do
|
||||
let _ = Str.search_forward re nu !i in
|
||||
incr n; i := Str.match_end ()
|
||||
done with Not_found -> ());
|
||||
!n
|
||||
in
|
||||
check "verbose appears once" (count = 1);
|
||||
check "best version kept (Both)" (contains nu "--verbose(-v)")
|
||||
|
||||
let test_dedup_manpage () =
|
||||
Printf.printf "\n== Dedup from manpage ==\n";
|
||||
let groff = {|.SH OPTIONS
|
||||
.TP
|
||||
\fB\-v\fR, \fB\-\-verbose\fR
|
||||
Be verbose.
|
||||
.SH DESCRIPTION
|
||||
Use \fB\-v\fR for verbose output.
|
||||
Use \fB\-\-verbose\fR to see more.
|
||||
|} in
|
||||
let result = parse_manpage_string groff in
|
||||
let nu = generate_extern "test" result in
|
||||
check "has --verbose(-v)" (contains nu "--verbose(-v)");
|
||||
(* Should not have standalone -v or duplicate --verbose *)
|
||||
let lines = String.split_on_char '\n' nu in
|
||||
let verbose_lines = List.filter (fun l -> contains l "verbose") lines in
|
||||
check "only one verbose line" (List.length verbose_lines = 1)
|
||||
|
||||
let test_font_boundary_spacing () =
|
||||
Printf.printf "\n== Font boundary spacing ==\n";
|
||||
(* \fB--max-results\fR\fIcount\fR should become "--max-results count" *)
|
||||
let s = strip_groff_escapes {|\fB\-\-max\-results\fR\fIcount\fR|} in
|
||||
check "has space before param" (contains s "--max-results count");
|
||||
(* \fB--color\fR[=\fIWHEN\fR] should NOT insert space before = *)
|
||||
let s2 = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
|
||||
check "no space before =" (contains s2 "--color[=WHEN]")
|
||||
|
||||
let () =
|
||||
Printf.printf "Running help parser tests...\n";
|
||||
test_gnu_basic ();
|
||||
test_gnu_eq_param ();
|
||||
test_gnu_opt_param ();
|
||||
test_underscore_param ();
|
||||
test_short_only ();
|
||||
test_long_only ();
|
||||
test_multiline_desc ();
|
||||
test_multiple_entries ();
|
||||
test_clap_short_sections ();
|
||||
test_clap_long_style ();
|
||||
test_clap_long_angle_param ();
|
||||
test_space_upper_param ();
|
||||
test_go_cobra_flags ();
|
||||
test_go_cobra_subcommands ();
|
||||
test_busybox_tab ();
|
||||
test_no_debug_prints ();
|
||||
|
||||
Printf.printf "\nRunning manpage parser tests...\n";
|
||||
test_manpage_tp_style ();
|
||||
test_manpage_ip_style ();
|
||||
test_manpage_groff_stripping ();
|
||||
test_manpage_empty_options ();
|
||||
test_slash_switch_separator ();
|
||||
test_manpage_nix3_style ();
|
||||
test_manpage_nix3_with_params ();
|
||||
test_synopsis_subcommand ();
|
||||
test_synopsis_standalone ();
|
||||
test_synopsis_nix3 ();
|
||||
|
||||
Printf.printf "\nRunning nushell generation tests...\n";
|
||||
test_nushell_basic ();
|
||||
test_nushell_param_types ();
|
||||
test_nushell_subcommands ();
|
||||
test_nushell_from_manpage ();
|
||||
test_nushell_module ();
|
||||
|
||||
Printf.printf "\nRunning dedup and font tests...\n";
|
||||
test_dedup_entries ();
|
||||
test_dedup_manpage ();
|
||||
test_font_boundary_spacing ();
|
||||
|
||||
Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures;
|
||||
if !failures > 0 then exit 1
|
||||
Loading…
Add table
Add a link
Reference in a new issue