diff --git a/bin/main.ml b/bin/main.ml index ce1a179..fd1e665 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -1,25 +1,28 @@ open Inshellah.Parser open Inshellah.Manpage open Inshellah.Nushell +open Inshellah.Store module SSet = Set.Make(String) let usage () = Printf.eprintf - {|inshellah - generate nushell completions + {|inshellah - nushell completions engine Usage: - inshellah generate BINDIR MANDIR -o OUTDIR - Full generation: native completions, manpages, and --help fallback. - One .nu file per command. + inshellah index PREFIX... [--dir PATH] + Index completions into a directory of JSON/nu files. + PREFIX is a directory containing bin/ and share/man/. + Default dir: $XDG_CACHE_HOME/inshellah + inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH] + Nushell custom completer. Outputs JSON completion candidates. + Falls back to --help resolution if command is not indexed. + inshellah query CMD [--dir PATH] [--system-dir PATH] + Print stored completion data for CMD. + inshellah dump [--dir PATH] [--system-dir PATH] + List indexed commands. inshellah manpage FILE Parse a manpage and emit nushell extern inshellah manpage-dir DIR Batch-process manpages under DIR - inshellah help [--iterative] CMD [ARGS...] - Run CMD ARGS --help, parse and emit extern. - Recursively resolves subcommands unless - --iterative is given. - inshellah parse-help CMD Read --help text from stdin, emit extern - inshellah demo Run built-in demo |}; exit 1 @@ -36,13 +39,6 @@ let is_nushell_source text = || contains_str text "export def" || (contains_str text "module " && contains_str text "export")) -let filename_of_cmd cmd = - String.map (function - | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c | _ -> '-') cmd - -let write_file path contents = - let oc = open_out path in output_string oc contents; close_out oc - let cmd_name_of_manpage path = let base = Filename.basename path in let base = @@ -59,15 +55,37 @@ let safe_env = lazy ( || String.starts_with ~prefix:"XAUTHORITY=" s)) (Array.to_list (Unix.environment ())))) +(* Non-blocking drain of a pipe fd into a buffer. Safe to call repeatedly; + reads whatever is available without blocking. Used by all fork-pipe sites + to keep pipes drained so children never block on write. *) +let drain_fd rd buf = + let chunk = Bytes.create 8192 in + let continue = ref true in + while !continue do + match Unix.select [rd] [] [] 0.0 with + | (_ :: _, _, _) -> + (try + let n = Unix.read rd chunk 0 8192 in + if n = 0 then continue := false + else Buffer.add_subbytes buf chunk 0 n + with Unix.Unix_error _ -> continue := false) + | _ -> continue := false + done + let run_cmd args timeout_ms = let (rd, wr) = Unix.pipe () in let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in let argv = Array.of_list args in + (* Run subprocesses in /tmp so commands that write side-effect files + (e.g. ckb-next-dev-detect-report.gz) don't pollute the working dir *) + let saved_cwd = Sys.getcwd () in + Sys.chdir "/tmp"; let pid = try Unix.create_process_env (List.hd args) argv (Lazy.force safe_env) devnull wr wr with Unix.Unix_error _ -> Unix.close rd; Unix.close wr; Unix.close devnull; -1 in + Sys.chdir saved_cwd; Unix.close wr; Unix.close devnull; if pid < 0 then (Unix.close rd; None) else begin @@ -250,124 +268,7 @@ let cmd_manpage_dir dir = ) (Sys.readdir subdir) ) command_sections -let help_resolve ?(timeout=10_000) cmd rest name = - let max_jobs = num_cores () in - let queue = Queue.create () in - Queue.push (rest, name, 0, "") queue; - let results = ref [] in - (* pending: (pid, rd, rest, name, depth, desc) *) - let pending = ref [] in - let collect rd p_rest p_name p_depth p_desc = - let ic = Unix.in_channel_of_descr rd in - let[@warning "-8"] result : (string * (string * string) list) option = - try Marshal.from_channel ic with _ -> None in - close_in ic; - match result with - | None -> - if p_desc <> "" then - results := Printf.sprintf "export extern \"%s\" [ # %s\n]\n" - (escape_nu p_name) (escape_nu p_desc) :: !results - | Some (code, subs) -> - results := code :: !results; - if p_depth < 5 then - List.iter (fun (sc_name, sc_desc) -> - Queue.push - (p_rest @ [sc_name], p_name ^ " " ^ sc_name, - p_depth + 1, sc_desc) queue - ) subs in - let reap () = - pending := List.filter (fun (pid, rd, p_rest, p_name, p_depth, p_desc) -> - match Unix.waitpid [Unix.WNOHANG] pid with - | (0, _) -> true - | _ -> collect rd p_rest p_name p_depth p_desc; false - | exception Unix.Unix_error (Unix.ECHILD, _, _) -> - collect rd p_rest p_name p_depth p_desc; false - ) !pending in - let wait_slot () = - while List.length !pending >= max_jobs do - reap (); - if List.length !pending >= max_jobs then - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () - done in - while not (Queue.is_empty queue) || !pending <> [] do - while not (Queue.is_empty queue) do - let (q_rest, q_name, q_depth, q_desc) = Queue.pop queue in - wait_slot (); - let (rd, wr) = Unix.pipe () in - let pid = Unix.fork () in - if pid = 0 then begin - Unix.close rd; - List.iter (fun (_, prd, _, _, _, _) -> - try Unix.close prd with _ -> ()) !pending; - let result = - let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with - | Some _ as r -> r - | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in - match text with - | None -> None - | Some text -> - (match parse_help text with - | Error _ -> None - | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None - | Ok r -> - let at_limit = q_depth >= 5 in - let code = generate_extern q_name - (if at_limit then r else { r with subcommands = [] }) in - let subs = if at_limit then [] - else List.map (fun (sc : subcommand) -> (sc.name, sc.desc)) - r.subcommands in - Some (code, subs)) in - let oc = Unix.out_channel_of_descr wr in - Marshal.to_channel oc (result : (string * (string * string) list) option) []; - close_out oc; - exit 0 - end else begin - Unix.close wr; - pending := (pid, rd, q_rest, q_name, q_depth, q_desc) :: !pending - end - done; - if !pending <> [] then begin - reap (); - if !pending <> [] && Queue.is_empty queue then begin - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () - end - end - done; - match !results with - | [] -> None - | rs -> Some (String.concat "\n" (List.rev rs)) - -let cmd_help args = - let iterative, cmd_args = match args with - | "--iterative" :: rest -> (true, rest) - | _ -> (false, args) - in - match cmd_args with - | [] -> Printf.eprintf "error: help requires a command name\n"; exit 1 - | cmd :: rest -> - let name = String.concat " " (Filename.basename cmd :: rest) in - if iterative then - (match run_cmd (cmd :: rest @ ["--help"]) 10_000 with - | None -> Printf.eprintf "no output from %s --help\n" name; exit 1 - | Some text -> - (match parse_help text with - | Ok r -> print_string (generate_extern name r) - | Error msg -> Printf.eprintf "parse error for %s: %s\n" name msg; exit 1)) - else - (match help_resolve cmd rest name with - | None -> Printf.eprintf "no output from %s --help\n" name; exit 1 - | Some output -> print_string output) - -let cmd_parse_help cmd = - let buf = Buffer.create 4096 in - (try while true do - Buffer.add_string buf (input_line stdin); Buffer.add_char buf '\n' - done with End_of_file -> ()); - (match parse_help (Buffer.contents buf) with - | Ok r -> print_string (generate_extern cmd r) - | Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd msg; exit 1) +let max_resolve_results = 500 let process_manpage file = try @@ -390,124 +291,393 @@ let manpaged_commands mandir = else acc ) SSet.empty command_sections -let cmd_generate bindir mandir outdir = - let done_cmds = ref SSet.empty in - let bins = Sys.readdir bindir in - Array.sort String.compare bins; - let manpaged = manpaged_commands mandir in +(* Parallel structured help resolver — returns (name, help_result) pairs + like the old sequential version but forks per subcommand for parallelism. *) +let help_resolve_par ?(timeout=200) cmd rest name = let max_jobs = num_cores () in - let classified = Array.map (fun name -> - if SSet.mem name manpaged then (name, Skip) - else (name, classify_binary bindir name) - ) bins in + let queue = Queue.create () in + Queue.push (rest, name, 0) queue; + let results = ref [] in + (* pending: (pid, rd, buf, rest, name, depth) *) let pending = ref [] in + let collect rd buf q_rest q_name q_depth = + drain_fd rd buf; + (try Unix.close rd with _ -> ()); + let data = Buffer.contents buf in + let result : (help_result * subcommand list) option = + if String.length data > 0 then + try Marshal.from_string data 0 with _ -> None + else None in + match result with + | None -> () + | Some (r, subs) -> + let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in + results := (q_name, r) :: !results; + if not at_limit then + List.iter (fun (sc : subcommand) -> + Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue + ) subs in let reap () = - pending := List.filter (fun pid -> + pending := List.filter (fun (pid, rd, buf, q_rest, q_name, q_depth) -> + drain_fd rd buf; match Unix.waitpid [Unix.WNOHANG] pid with - | (0, _) -> true | _ -> false - | exception Unix.Unix_error (Unix.ECHILD, _, _) -> false + | (0, _) -> true + | _ -> collect rd buf q_rest q_name q_depth; false + | exception Unix.Unix_error (Unix.ECHILD, _, _) -> + (try Unix.close rd with _ -> ()); false ) !pending in - let wait_slot () = + let wait_for_slot () = while List.length !pending >= max_jobs do reap (); - if List.length !pending >= max_jobs then - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () + if List.length !pending >= max_jobs then begin + let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) + end done in - Array.iter (fun (name, cls) -> - match cls with - | Skip -> () - | Try_help | Try_native_and_help -> - wait_slot (); + while not (Queue.is_empty queue) || !pending <> [] do + while not (Queue.is_empty queue) do + let (q_rest, q_name, q_depth) = Queue.pop queue in + wait_for_slot (); + let (rd, wr) = Unix.pipe () in let pid = Unix.fork () in if pid = 0 then begin - (try - let path = Filename.concat bindir name in - let native_ok = match cls with - | Try_native_and_help -> - (match try_native_completion path with - | Some src -> - write_file (Filename.concat outdir (filename_of_cmd name ^ ".nu")) src; - true - | None -> false) - | _ -> false in - if not native_ok then begin - match help_resolve ~timeout:200 path [] name with - | Some content when String.length content > 0 -> - let m = module_name_of name in - let src = Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" m content m in - write_file (Filename.concat outdir (filename_of_cmd name ^ ".nu")) src - | _ -> () - end; - exit 0 - with _ -> exit 1) + Unix.close rd; + List.iter (fun (_, prd, _, _, _, _) -> + try Unix.close prd with _ -> ()) !pending; + let result = + let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with + | Some _ as r -> r + | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in + match text with + | None -> None + | Some text -> + (match parse_help text with + | Error _ -> None + | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None + | Ok r -> + let self_listed = match q_rest with + | [] -> false + | _ -> + let leaf = List.nth q_rest (List.length q_rest - 1) in + List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in + if self_listed then None + else + let at_limit = q_depth >= 5 in + let subs = if at_limit then [] else r.subcommands in + Some ({ r with subcommands = [] }, subs)) in + let oc = Unix.out_channel_of_descr wr in + Marshal.to_channel oc (result : (help_result * subcommand list) option) []; + close_out oc; + exit 0 end else begin - pending := pid :: !pending; - done_cmds := SSet.add name !done_cmds + Unix.close wr; + pending := (pid, rd, Buffer.create 4096, q_rest, q_name, q_depth) :: !pending + end + done; + if !pending <> [] then begin + reap (); + if !pending <> [] && Queue.is_empty queue then begin + let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) end - ) classified; - while !pending <> [] do - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () - done; - List.iter (fun section -> - let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in - if Sys.file_exists subdir && Sys.is_directory subdir then begin - let files = Sys.readdir subdir in - Array.sort String.compare files; - Array.iter (fun file -> - match process_manpage (Filename.concat subdir file) with - | None -> () - | Some (cmd, result) -> - let base = List.hd (String.split_on_char ' ' cmd) in - if SSet.mem cmd !done_cmds then () - else begin - done_cmds := SSet.add cmd !done_cmds; - let outpath = Filename.concat outdir (filename_of_cmd base ^ ".nu") in - if Sys.file_exists outpath then begin - let existing = - let ic = open_in outpath in - let n = in_channel_length ic in - let s = Bytes.create n in - really_input ic s 0 n; close_in ic; - Bytes.to_string s in - let mod_name = module_name_of base in - let use_line = Printf.sprintf "\nuse %s *\n" mod_name in - let base_content = - if contains_str existing use_line then - String.sub existing 0 - (Str.search_forward (Str.regexp_string use_line) existing 0) - else existing in - write_file outpath - (String.concat "" [base_content; generate_extern cmd result; use_line]) - end else - write_file outpath (generate_module base result) - end - ) files end - ) command_sections + done; + List.rev !results -let cmd_demo () = - Printf.printf "# Generated by: inshellah demo\n\n"; - match parse_help - {|Usage: ls [OPTION]... [FILE]... - -a, --all do not ignore entries starting with . - -A, --almost-all do not list implied . and .. - --block-size=SIZE with -l, scale sizes by SIZE when printing - --color[=WHEN] color the output WHEN - -h, --human-readable with -l and -s, print sizes like 1K 234M 2G etc. - --help display this help and exit - --version output version information and exit -|} with - | Ok r -> print_string (generate_extern "ls" r) - | Error msg -> Printf.eprintf "parse error: %s\n" msg +(* Index: fork-per-binary pattern with pipe-based result marshaling. + Each child handles one binary completely (including subcommand resolution) + and marshals results back via pipe. Children use help_resolve_par + which forks per subcommand for parallelism. *) +let cmd_index bindirs mandirs ignorelist dir = + ensure_dir dir; + let done_cmds = ref SSet.empty in + let n_results = ref 0 in + let index_bindir bindir mandir = + if not (Sys.file_exists bindir && Sys.is_directory bindir) then + Printf.eprintf "skipping %s (not found)\n" bindir + else begin + let bins = Sys.readdir bindir in + Array.sort String.compare bins; + let manpaged = if Sys.file_exists mandir && Sys.is_directory mandir + then manpaged_commands mandir else SSet.empty in + let max_jobs = num_cores () in + let classified = Array.map (fun name -> + if SSet.mem name manpaged || SSet.mem name ignorelist then (name, Skip) + else (name, classify_binary bindir name) + ) bins in + let pending = ref [] in + let process_result name rd buf = + drain_fd rd buf; + (try Unix.close rd with _ -> ()); + let data = Buffer.contents buf in + if String.length data > 0 then begin + let result : [`Native of string | `Parsed of (string * help_result) list | `None] = + try Marshal.from_string data 0 with _ -> `None in + (match result with + | `Native src -> + write_native ~dir name src; + incr n_results + | `Parsed pairs -> + List.iter (fun (cmd_name, r) -> + if not (SSet.mem cmd_name !done_cmds) then begin + write_result ~dir ~source:"help" cmd_name r; + done_cmds := SSet.add cmd_name !done_cmds; + incr n_results + end + ) pairs + | `None -> ()) + end; + done_cmds := SSet.add name !done_cmds in + let reap () = + pending := List.filter (fun (pid, rd, buf, name) -> + drain_fd rd buf; + match Unix.waitpid [Unix.WNOHANG] pid with + | (0, _) -> true + | _ -> + process_result name rd buf; + false + | exception Unix.Unix_error (Unix.ECHILD, _, _) -> + (try Unix.close rd with _ -> ()); false + ) !pending in + let wait_for_slot () = + while List.length !pending >= max_jobs do + reap (); + if List.length !pending >= max_jobs then begin + let fds = List.map (fun (_, rd, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) + end + done in + Array.iter (fun (name, cls) -> + match cls with + | Skip -> () + | Try_help | Try_native_and_help -> + wait_for_slot (); + let (rd, wr) = Unix.pipe () in + let pid = Unix.fork () in + if pid = 0 then begin + Unix.close rd; + List.iter (fun (_, prd, _, _) -> + try Unix.close prd with _ -> ()) !pending; + let result = + try + let path = Filename.concat bindir name in + let native = match cls with + | Try_native_and_help -> + (match try_native_completion path with + | Some src -> Some src | None -> None) + | _ -> None in + match native with + | Some src -> `Native src + | None -> + let pairs = help_resolve_par ~timeout:200 path [] name in + if pairs <> [] then `Parsed pairs else `None + with _ -> `None in + let oc = Unix.out_channel_of_descr wr in + Marshal.to_channel oc + (result : [`Native of string | `Parsed of (string * help_result) list | `None]) []; + close_out oc; + exit 0 + end else begin + Unix.close wr; + pending := (pid, rd, Buffer.create 4096, name) :: !pending + end + ) classified; + while !pending <> [] do + reap (); + if !pending <> [] then begin + let fds = List.map (fun (_, rd, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) + end + done; + (* Phase 2: manpages *) + if Sys.file_exists mandir && Sys.is_directory mandir then + List.iter (fun section -> + let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in + if Sys.file_exists subdir && Sys.is_directory subdir then begin + let files = Sys.readdir subdir in + Array.sort String.compare files; + Array.iter (fun file -> + match process_manpage (Filename.concat subdir file) with + | None -> () + | Some (cmd, result) -> + if not (SSet.mem cmd !done_cmds) then begin + write_result ~dir ~source:"manpage" cmd result; + done_cmds := SSet.add cmd !done_cmds; + incr n_results + end + ) files + end + ) command_sections + end in + List.iter2 index_bindir bindirs mandirs; + Printf.printf "indexed %d commands into %s\n" !n_results dir + +let cmd_dump dirs = + let cmds = all_commands dirs in + Printf.printf "%d commands\n" (List.length cmds); + List.iter (fun cmd -> + let src = match file_type_of dirs cmd with + | Some s -> s | None -> "?" in + Printf.printf " %-40s [%s]\n" cmd src + ) cmds + +let find_in_path name = + try + let path_var = Sys.getenv "PATH" in + let dirs = String.split_on_char ':' path_var in + let rec go = function + | [] -> None + | dir :: rest -> + let p = Filename.concat dir name in + if is_executable p then Some p else go rest in + go dirs + with Not_found -> None + +let resolve_and_cache ~dir name path = + let pairs = help_resolve_par ~timeout:200 path [] name in + if pairs <> [] then begin + ensure_dir dir; + List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs; + Some pairs + end else None + +let completion_json value desc = + Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}" + (escape_json value) (escape_json desc) + +let flag_completions prefix entries = + let candidates = ref [] in + List.iter (fun (e : entry) -> + let desc = match e.param with + | Some (Mandatory p) -> if e.desc <> "" then e.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">" + | Some (Optional p) -> if e.desc <> "" then e.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]" + | None -> e.desc in + (match e.switch with + | Long l -> + let flag = "--" ^ l in + if String.starts_with ~prefix flag then + candidates := completion_json flag desc :: !candidates + | Short c -> + let flag = Printf.sprintf "-%c" c in + if String.starts_with ~prefix flag then + candidates := completion_json flag desc :: !candidates + | Both (c, l) -> + let long = "--" ^ l in + let short = Printf.sprintf "-%c" c in + if String.starts_with ~prefix long then + candidates := completion_json long desc :: !candidates + else if String.starts_with ~prefix short then + candidates := completion_json short desc :: !candidates) + ) entries; + List.rev !candidates + +let cmd_complete spans user_dir system_dirs = + match spans with + | [] -> print_string "[]\n" + | cmd_name :: rest -> + let dirs = user_dir :: system_dirs in + (* Try longest subcommand match first: "git add" before "git" *) + let rec find_result tokens = + match tokens with + | [] -> None + | _ -> + let try_name = String.concat " " tokens in + match lookup dirs try_name with + | Some r -> Some (try_name, r, List.length tokens) + | None -> + find_result (List.rev (List.tl (List.rev tokens))) in + let all_tokens = cmd_name :: (match rest with + | _ :: _ when List.length rest >= 1 -> + (* exclude the partial last token from subcommand lookup *) + List.rev (List.tl (List.rev rest)) + | _ -> []) in + let found = find_result all_tokens in + (* If not found at all, try on-the-fly resolution for the base command *) + let result = match found with + | Some _ -> found + | None -> + (match find_in_path cmd_name with + | Some path -> + (match resolve_and_cache ~dir:user_dir cmd_name path with + | Some _pairs -> + find_result all_tokens + | None -> None) + | None -> None) in + let partial = match rest with + | [] -> "" + | _ -> List.nth rest (List.length rest - 1) in + (match result with + | None -> print_string "[]\n" + | Some (_matched_name, r, _depth) -> + let candidates = ref [] in + if String.starts_with ~prefix:"-" partial then + candidates := flag_completions partial r.entries + else begin + List.iter (fun (sc : subcommand) -> + if partial = "" || String.starts_with ~prefix:partial sc.name then + candidates := completion_json sc.name sc.desc :: !candidates + ) r.subcommands; + candidates := List.rev !candidates; + if partial = "" || !candidates = [] then + candidates := !candidates @ flag_completions partial r.entries + end; + Printf.printf "[%s]\n" (String.concat "," !candidates)) + +let cmd_query cmd dirs = + match lookup_raw dirs cmd with + | None -> + Printf.eprintf "not found: %s\n" cmd; exit 1 + | Some data -> + print_string data; print_newline () + +let load_ignorelist path = + try + let ic = open_in path in + let lines = ref [] in + (try while true do + let line = String.trim (input_line ic) in + if String.length line > 0 && line.[0] <> '#' then + lines := line :: !lines + done with End_of_file -> ()); + close_in ic; + SSet.of_list !lines + with _ -> SSet.empty + +let parse_index_args args = + let rec go prefixes dir ignore = function + | [] -> (List.rev prefixes, dir, ignore) + | "--dir" :: path :: rest -> go prefixes path ignore rest + | "--ignore" :: path :: rest -> go prefixes dir (SSet.union ignore (load_ignorelist path)) rest + | prefix :: rest -> go (prefix :: prefixes) dir ignore rest in + go [] (default_store_path ()) SSet.empty args + +let parse_dir_args args = + let rec go user_dir system_dirs rest_args = function + | [] -> (user_dir, system_dirs, List.rev rest_args) + | "--dir" :: path :: rest -> go path system_dirs rest_args rest + | "--system-dir" :: path :: rest -> go user_dir (path :: system_dirs) rest_args rest + | arg :: rest -> go user_dir system_dirs (arg :: rest_args) rest in + go (default_store_path ()) [] [] args let () = match Array.to_list Sys.argv |> List.tl with - | ["generate"; bindir; mandir; "-o"; outdir] -> cmd_generate bindir mandir outdir + | "index" :: rest -> + let (prefixes, dir, ignorelist) = parse_index_args rest in + if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1); + let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in + let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in + cmd_index bindirs mandirs ignorelist dir + | "complete" :: rest -> + let (user_dir, system_dirs, spans) = parse_dir_args rest in + cmd_complete spans user_dir system_dirs + | "query" :: rest -> + let (user_dir, system_dirs, args) = parse_dir_args rest in + (match args with + | [cmd] -> cmd_query cmd (user_dir :: system_dirs) + | _ -> Printf.eprintf "error: query CMD [--dir PATH] [--system-dir PATH]\n"; exit 1) + | "dump" :: rest -> + let (user_dir, system_dirs, _) = parse_dir_args rest in + cmd_dump (user_dir :: system_dirs) | ["manpage"; file] -> cmd_manpage file | ["manpage-dir"; dir] -> cmd_manpage_dir dir - | "help" :: rest -> cmd_help rest - | ["parse-help"; cmd] -> cmd_parse_help cmd - | ["demo"] -> cmd_demo () | _ -> usage () diff --git a/dune-project b/dune-project index 514770f..89cddbb 100644 --- a/dune-project +++ b/dune-project @@ -24,6 +24,7 @@ angstrom angstrom-unix camlzip + sqlite3 (ppx_inline_test :with-test)) (tags (shell completions nushell parser angstrom))) diff --git a/flake.nix b/flake.nix index 90a94f9..64ec763 100644 --- a/flake.nix +++ b/flake.nix @@ -7,89 +7,66 @@ forAllSystems = f: nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ] ( - system: f (import nixpkgs { inherit system; }) system + system: f (import nixpkgs { inherit system; }) ); in { - devShells = forAllSystems ( - pkgs: sys: { - default = pkgs.mkShell { - packages = with pkgs.ocamlPackages; [ - dune_3 - ocaml - angstrom - angstrom-unix - camlzip - ppx_inline_test - ocaml-lsp - ocamlformat - ocamlformat-rpc-lib - utop - ]; - }; - } - ); + devShells = forAllSystems (pkgs: { + default = pkgs.mkShell { + packages = with pkgs.ocamlPackages; [ + dune_3 + ocaml + angstrom + angstrom-unix + camlzip + ppx_inline_test + ocaml-lsp + ocamlformat + ocamlformat-rpc-lib + utop + ]; + }; + }); - packages = forAllSystems ( - pkgs: sys: { - default = pkgs.ocamlPackages.buildDunePackage { - pname = "inshellah"; - version = "0.1"; - src = ./.; - nativeBuildInputs = [ pkgs.git ]; - buildInputs = with pkgs.ocamlPackages; [ - dune_3 - ocaml - angstrom - angstrom-unix - camlzip - ]; + packages = forAllSystems (pkgs: { + default = pkgs.ocamlPackages.buildDunePackage { + pname = "inshellah"; + version = "0.1"; + src = ./.; + nativeBuildInputs = [ pkgs.git ]; + buildInputs = with pkgs.ocamlPackages; [ + dune_3 + ocaml + angstrom + angstrom-unix + camlzip + ]; - meta.mainProgram = "inshellah"; - - }; - } - ); - - checks = forAllSystems ( - pkgs: sys: - let - # Evaluate a minimal NixOS config that enables the module. - # If the module has infinite recursion, this evaluation will fail. - mockSystem = nixpkgs.lib.nixosSystem { - system = sys; - modules = [ - self.nixosModules.default - { - # Minimal config to make NixOS evaluation happy - boot.loader.grub.device = "nodev"; - fileSystems."/" = { device = "/dev/sda1"; fsType = "ext4"; }; - programs.inshellah.enable = true; - environment.systemPackages = [ pkgs.hello ]; - } - ]; - }; - in - { - module-no-infinite-recursion = pkgs.runCommandLocal "inshellah-module-test" { - # Force evaluation of extraSetup and systemPackages at eval time. - # If the module has infinite recursion, this derivation can't even - # be instantiated. - extraSetupLen = builtins.stringLength mockSystem.config.environment.extraSetup; - syspkgCount = builtins.length mockSystem.config.environment.systemPackages; - } '' - echo "environment.extraSetup length: $extraSetupLen" - echo "environment.systemPackages count: $syspkgCount" - touch $out - ''; - } - ); + meta.mainProgram = "inshellah"; + }; + }); nixosModules.default = - { pkgs, ... }: + { + pkgs, + lib, + config, + ... + }: { imports = [ ./nix/module.nix ]; programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default; + programs.inshellah.snippet = '' + let cache_home = if ('XDG_CACHE_HOME' in $env) { "--dir " + $env.XDG_CACHE_HOME } else if ('HOME' in $env) { "--dir " + $env.HOME + '/.cache/inshellah' } else { "" } + let inshellah_complete = {|spans| + ${lib.getExe config.programs.inshellah.package} complete $spans $cache_home --system-dir /run/current-system/sw/${config.programs.inshellah.completionsPath} + } + $env.config.completions.external = { + enable: true + max_results: 100 + completer: $inshellah_complete + } + ''; }; }; } diff --git a/inshellah.opam b/inshellah.opam index 8d020f1..cf4885a 100644 --- a/inshellah.opam +++ b/inshellah.opam @@ -15,6 +15,7 @@ depends: [ "angstrom" "angstrom-unix" "camlzip" + "sqlite3" "ppx_inline_test" {with-test} "odoc" {with-doc} ] diff --git a/lib/parser.ml b/lib/parser.ml index 81bf201..3fcc807 100644 --- a/lib/parser.ml +++ b/lib/parser.ml @@ -234,7 +234,43 @@ let subcommand_entry = else char ' ' *> char ' ' *> inline_ws *> rest_of_line <* eol >>| fun desc -> - { name; desc = String.trim desc } + { name = String.lowercase_ascii name; + desc = let t = String.trim desc in + if String.length t >= 2 && t.[0] = '-' && t.[1] = ' ' then + String.trim (String.sub t 2 (String.length t - 2)) + else t } + +(* --- Section header detection --- *) + +(* Detect lines like "Arguments:", "POSITIONALS:", etc. that introduce + positional-argument sections (where name+desc lines are NOT subcommands) *) +let is_arg_section s = + let lc = String.lowercase_ascii (String.trim s) in + let base = if String.ends_with ~suffix:":" lc + then String.sub lc 0 (String.length lc - 1) |> String.trim + else lc in + base = "arguments" || base = "args" || base = "positionals" + || base = "positional arguments" + +(* A section header: left-aligned (or lightly indented) text ending with ':', + not starting with '-'. Must be consumed BEFORE subcommand_entry in choice. *) +let section_header = + available >>= fun avail -> + if avail = 0 then fail "eof" + else + peek_string (min avail 80) >>= fun preview -> + (* Extract just the first line from the preview *) + let first_line = match String.index_opt preview '\n' with + | Some i -> String.sub preview 0 i + | None -> preview in + let t = String.trim first_line in + let len = String.length t in + let indent = let i = ref 0 in + while !i < String.length first_line && (first_line.[!i] = ' ' || first_line.[!i] = '\t') do incr i done; + !i in + if len >= 2 && t.[len - 1] = ':' && t.[0] <> '-' && indent <= 4 then + rest_of_line <* eol_strict >>| fun line -> is_arg_section line + else fail "not a section header" (* --- Top-level parser --- *) @@ -247,6 +283,10 @@ let help_parser = let try_entry = entry >>| fun e -> `Entry e in + (* Detect section headers to track arg vs command sections *) + let try_section = + section_header >>| fun is_arg -> `Section is_arg + in (* Try to parse a subcommand *) let try_subcommand = subcommand_entry >>| fun sc -> `Subcommand sc @@ -255,10 +295,17 @@ let help_parser = let try_skip = skip_non_option_line >>| fun () -> `Skip in - many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items -> + many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items -> let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in + (* Only keep subcommands that didn't appear under an Arguments/Positionals section *) let subcommands = - List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items + List.fold_left (fun (in_arg_sec, acc) item -> + match item with + | `Section is_arg -> (is_arg, acc) + | `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc) + | _ -> (in_arg_sec, acc) + ) (false, []) items + |> snd |> List.rev |> List.fold_left (fun acc sc -> match List.assoc_opt sc.name acc with | Some prev when String.length prev.desc >= String.length sc.desc -> acc diff --git a/lib/store.ml b/lib/store.ml new file mode 100644 index 0000000..467b798 --- /dev/null +++ b/lib/store.ml @@ -0,0 +1,353 @@ +open Parser + +let default_store_path () = + let cache = try Sys.getenv "XDG_CACHE_HOME" + with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in + Filename.concat cache "inshellah" + +let ensure_dir dir = + let rec mkdir_p d = + if Sys.file_exists d then () + else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in + mkdir_p dir + +let filename_of_command cmd = + String.map (function + | ' ' -> '_' + | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c + | _ -> '-') cmd + +let command_of_filename base = + String.map (function '_' -> ' ' | c -> c) base + +(* --- JSON serialization of help_result --- *) + +let escape_json s = + let buf = Buffer.create (String.length s + 4) in + String.iter (fun c -> match c with + | '"' -> Buffer.add_string buf "\\\"" + | '\\' -> Buffer.add_string buf "\\\\" + | '\n' -> Buffer.add_string buf "\\n" + | '\t' -> Buffer.add_string buf "\\t" + | '\r' -> Buffer.add_string buf "\\r" + | c when Char.code c < 0x20 -> + Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) + | c -> Buffer.add_char buf c + ) s; + Buffer.contents buf + +let json_string s = Printf.sprintf "\"%s\"" (escape_json s) +let json_null = "null" + +let json_switch_of = function + | Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c)) + | Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l) + | Both (c, l) -> + Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}" + (json_string (String.make 1 c)) (json_string l) + +let json_param_of = function + | None -> json_null + | Some (Mandatory p) -> + Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p) + | Some (Optional p) -> + Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p) + +let json_entry_of e = + Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}" + (json_switch_of e.switch) (json_param_of e.param) (json_string e.desc) + +let json_subcommand_of sc = + Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc) + +let json_positional_of p = + Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}" + (json_string p.pos_name) p.optional p.variadic + +let json_list f items = + "[" ^ String.concat "," (List.map f items) ^ "]" + +let json_of_help_result ?(source="help") r = + Printf.sprintf "{\"source\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}" + (json_string source) + (json_list json_entry_of r.entries) + (json_list json_subcommand_of r.subcommands) + (json_list json_positional_of r.positionals) + +(* --- JSON deserialization --- *) + +(* Minimal JSON parser — just enough for our own output *) + +type json = + | Jnull + | Jbool of bool + | Jstring of string + | Jarray of json list + | Jobject of (string * json) list + +let json_get key = function + | Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull) + | _ -> Jnull + +let json_to_string = function Jstring s -> s | _ -> "" +let json_to_bool = function Jbool b -> b | _ -> false +let json_to_list = function Jarray l -> l | _ -> [] + +exception Json_error of string + +let parse_json s = + let len = String.length s in + let pos = ref 0 in + let peek () = if !pos < len then s.[!pos] else '\x00' in + let advance () = incr pos in + let skip_ws () = + while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' + || s.[!pos] = '\n' || s.[!pos] = '\r') do + advance () + done in + let expect c = + skip_ws (); + if peek () <> c then + raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos)); + advance () in + let rec parse_value () = + skip_ws (); + match peek () with + | '"' -> Jstring (parse_string ()) + | '{' -> parse_object () + | '[' -> parse_array () + | 'n' -> advance (); advance (); advance (); advance (); Jnull + | 't' -> advance (); advance (); advance (); advance (); Jbool true + | 'f' -> + advance (); advance (); advance (); advance (); advance (); Jbool false + | c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos)) + and parse_string () = + expect '"'; + let buf = Buffer.create 32 in + while peek () <> '"' do + if peek () = '\\' then begin + advance (); + (match peek () with + | '"' -> Buffer.add_char buf '"' + | '\\' -> Buffer.add_char buf '\\' + | 'n' -> Buffer.add_char buf '\n' + | 't' -> Buffer.add_char buf '\t' + | 'r' -> Buffer.add_char buf '\r' + | 'u' -> + advance (); + let hex = String.sub s !pos 4 in + pos := !pos + 3; + let code = int_of_string ("0x" ^ hex) in + if code < 128 then Buffer.add_char buf (Char.chr code) + else begin + (* UTF-8 encode *) + if code < 0x800 then begin + Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6))); + Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) + end else begin + Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12))); + Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f))); + Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) + end + end + | c -> Buffer.add_char buf c); + advance () + end else begin + Buffer.add_char buf (peek ()); + advance () + end + done; + advance (); (* closing quote *) + Buffer.contents buf + and parse_object () = + expect '{'; + skip_ws (); + if peek () = '}' then (advance (); Jobject []) + else begin + let pairs = ref [] in + let cont = ref true in + while !cont do + skip_ws (); + let key = parse_string () in + expect ':'; + let value = parse_value () in + pairs := (key, value) :: !pairs; + skip_ws (); + if peek () = ',' then advance () + else cont := false + done; + expect '}'; + Jobject (List.rev !pairs) + end + and parse_array () = + expect '['; + skip_ws (); + if peek () = ']' then (advance (); Jarray []) + else begin + let items = ref [] in + let cont = ref true in + while !cont do + let v = parse_value () in + items := v :: !items; + skip_ws (); + if peek () = ',' then advance () + else cont := false + done; + expect ']'; + Jarray (List.rev !items) + end + in + parse_value () + +let switch_of_json j = + match json_to_string (json_get "type" j) with + | "short" -> + let c = json_to_string (json_get "char" j) in + Short (if String.length c > 0 then c.[0] else '?') + | "long" -> Long (json_to_string (json_get "name" j)) + | "both" -> + let c = json_to_string (json_get "char" j) in + Both ((if String.length c > 0 then c.[0] else '?'), + json_to_string (json_get "name" j)) + | _ -> Long "?" + +let param_of_json = function + | Jnull -> None + | j -> + let name = json_to_string (json_get "name" j) in + (match json_to_string (json_get "kind" j) with + | "mandatory" -> Some (Mandatory name) + | "optional" -> Some (Optional name) + | _ -> None) + +let entry_of_json j = + { switch = switch_of_json (json_get "switch" j); + param = param_of_json (json_get "param" j); + desc = json_to_string (json_get "desc" j) } + +let subcommand_of_json j = + { name = json_to_string (json_get "name" j); + desc = json_to_string (json_get "desc" j) } + +let positional_of_json j = + { pos_name = json_to_string (json_get "name" j); + optional = json_to_bool (json_get "optional" j); + variadic = json_to_bool (json_get "variadic" j) } + +let help_result_of_json j = + { entries = List.map entry_of_json (json_to_list (json_get "entries" j)); + subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j)); + positionals = List.map positional_of_json (json_to_list (json_get "positionals" j)) } + +(* --- Filesystem operations --- *) + +let write_file path contents = + let oc = open_out path in + output_string oc contents; + close_out oc + +let read_file path = + try + let ic = open_in path in + let n = in_channel_length ic in + let s = Bytes.create n in + really_input ic s 0 n; + close_in ic; + Some (Bytes.to_string s) + with _ -> None + +let write_result ~dir ?(source="help") command result = + let path = Filename.concat dir (filename_of_command command ^ ".json") in + write_file path (json_of_help_result ~source result) + +let write_native ~dir command data = + let path = Filename.concat dir (filename_of_command command ^ ".nu") in + write_file path data + +let find_file dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let json_path = Filename.concat dir (base ^ ".json") in + if Sys.file_exists json_path then Some json_path + else + let nu_path = Filename.concat dir (base ^ ".nu") in + if Sys.file_exists nu_path then Some nu_path + else go rest in + go dirs + +let lookup dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let path = Filename.concat dir (base ^ ".json") in + (match read_file path with + | Some data -> + (try Some (help_result_of_json (parse_json data)) + with _ -> None) + | None -> go rest) in + go dirs + +let lookup_raw dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let json_path = Filename.concat dir (base ^ ".json") in + (match read_file json_path with + | Some _ as r -> r + | None -> + let nu_path = Filename.concat dir (base ^ ".nu") in + match read_file nu_path with + | Some _ as r -> r + | None -> go rest) in + go dirs + +let has_command dirs command = + find_file dirs command <> None + +let all_commands dirs = + let module SSet = Set.Make(String) in + let cmds = ref SSet.empty in + List.iter (fun dir -> + if Sys.file_exists dir && Sys.is_directory dir then + Array.iter (fun f -> + let base = + if Filename.check_suffix f ".json" then + Some (Filename.chop_suffix f ".json") + else if Filename.check_suffix f ".nu" then + Some (Filename.chop_suffix f ".nu") + else None in + match base with + | Some b -> cmds := SSet.add (command_of_filename b) !cmds + | None -> () + ) (Sys.readdir dir) + ) dirs; + SSet.elements !cmds + +let delete ~dir command = + let base = filename_of_command command in + let json_path = Filename.concat dir (base ^ ".json") in + let nu_path = Filename.concat dir (base ^ ".nu") in + (try Sys.remove json_path with Sys_error _ -> ()); + (try Sys.remove nu_path with Sys_error _ -> ()) + +let file_type_of dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let json_path = Filename.concat dir (base ^ ".json") in + if Sys.file_exists json_path then + (match read_file json_path with + | Some data -> + (try Some (json_to_string (json_get "source" (parse_json data))) + with _ -> Some "json") + | None -> Some "json") + else + let nu_path = Filename.concat dir (base ^ ".nu") in + if Sys.file_exists nu_path then Some "native" + else go rest in + go dirs diff --git a/nix/module.nix b/nix/module.nix index 2778e7c..975263e 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -1,11 +1,12 @@ -# NixOS module: automatic nushell completion generation +# NixOS module: automatic nushell completion indexing # -# Generates completions using three strategies in priority order: +# Indexes completions using three strategies in priority order: # 1. Native completion generators (e.g. CMD completions nushell) # 2. Manpage parsing # 3. --help output parsing # -# Runs as a single pass during the system profile build. +# Produces a directory of .json/.nu files at build time. +# The `complete` command reads from this directory as a system overlay. # # Usage: # { pkgs, ... }: { @@ -25,53 +26,50 @@ let in { options.programs.inshellah = { - enable = lib.mkEnableOption "nushell completion generation via inshellah"; + enable = lib.mkEnableOption "nushell completion indexing via inshellah"; package = lib.mkOption { type = lib.types.package; - description = "The inshellah package to use for generating completions."; + description = "The inshellah package to use for indexing completions."; }; - generatedCompletionsPath = lib.mkOption { + completionsPath = lib.mkOption { type = lib.types.str; - default = "/share/nushell/vendor/autoload"; + default = "/share/inshellah"; description = '' - Subdirectory within the merged environment where completion files - are placed. The default matches nushell's vendor autoload convention - (discovered via XDG_DATA_DIRS). + Subdirectory within the system profile where completion files + are placed. Used as the system-dir for the completer. ''; }; + + ignoreCommands = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = [ ]; + example = [ "problematic-tool" ]; + description = '' + List of command names to skip during completion indexing. + ''; + }; + + snippet = lib.mkOption { + type = lib.types.str; + readOnly = true; + }; }; config = lib.mkIf cfg.enable { - environment.pathsToLink = [ cfg.generatedCompletionsPath ]; - environment.extraSetup = let inshellah = "${cfg.package}/bin/inshellah"; - destDir = "$out${cfg.generatedCompletionsPath}"; - segments = lib.filter (s: s != "") (lib.splitString "/" cfg.generatedCompletionsPath); - derefPath = lib.concatMapStringsSep "\n " (seg: '' - _cur="$_cur/${seg}" - if [ -L "$_cur" ]; then - _target=$(readlink "$_cur") - rm "$_cur" - mkdir -p "$_cur" - if [ -d "$_target" ]; then - cp -rT "$_target" "$_cur" - chmod -R u+w "$_cur" - fi - fi'') segments; + destDir = "$out${cfg.completionsPath}"; + ignoreFile = pkgs.writeText "inshellah-ignore" (lib.concatStringsSep "\n" cfg.ignoreCommands); + ignoreFlag = lib.optionalString (cfg.ignoreCommands != [ ]) " --ignore ${ignoreFile}"; in '' - _cur="$out" - ${derefPath} mkdir -p ${destDir} - # Generate all completions in one pass: - # native generators > manpages > --help fallback if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then - ${inshellah} generate "$out/bin" "$out/share/man" -o ${destDir} \ + ${inshellah} index "$out" --dir ${destDir}${ignoreFlag} \ 2>/dev/null || true fi