From 1d0d3465c17a9d39f3cded691f04e1fba3159f2c Mon Sep 17 00:00:00 2001 From: atagen Date: Mon, 23 Mar 2026 02:17:42 +1100 Subject: [PATCH 1/6] SAVE GAME --- bin/main.ml | 236 ++++++++++++++++++++++++++++++++- dune-project | 1 + flake.nix | 2 + lib/dune | 2 +- lib/parser.ml | 53 +++++++- lib/store.ml | 344 +++++++++++++++++++++++++++++++++++++++++++++++++ nix/module.nix | 13 +- 7 files changed, 641 insertions(+), 10 deletions(-) create mode 100644 lib/store.ml diff --git a/bin/main.ml b/bin/main.ml index ce1a179..9ac0556 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -1,14 +1,21 @@ open Inshellah.Parser open Inshellah.Manpage open Inshellah.Nushell +open Inshellah.Store module SSet = Set.Make(String) let usage () = Printf.eprintf - {|inshellah - generate nushell completions + {|inshellah - nushell completions engine Usage: + inshellah index PREFIX... [--db PATH] + Index completions into a SQLite database. + PREFIX is a directory containing bin/ and share/man/. + Default db: $XDG_CACHE_HOME/inshellah/completions.db + inshellah dump [--db PATH] + Show stats and commands in the database. inshellah generate BINDIR MANDIR -o OUTDIR Full generation: native completions, manpages, and --help fallback. One .nu file per command. @@ -250,6 +257,8 @@ let cmd_manpage_dir dir = ) (Sys.readdir subdir) ) command_sections +let max_resolve_results = 500 + let help_resolve ?(timeout=10_000) cmd rest name = let max_jobs = num_cores () in let queue = Queue.create () in @@ -269,7 +278,7 @@ let help_resolve ?(timeout=10_000) cmd rest name = (escape_nu p_name) (escape_nu p_desc) :: !results | Some (code, subs) -> results := code :: !results; - if p_depth < 5 then + if p_depth < 5 && List.length !results < max_resolve_results then List.iter (fun (sc_name, sc_desc) -> Queue.push (p_rest @ [sc_name], p_name ^ " " ^ sc_name, @@ -311,6 +320,17 @@ let help_resolve ?(timeout=10_000) cmd rest name = | Error _ -> None | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None | Ok r -> + (* If the subcommand we just queried appears in its own + subcommand list, the command is echoing the parent help + (e.g. nil ignores subcommands when --help is present). + Abandon this branch to avoid infinite recursion. *) + let self_listed = match q_rest with + | [] -> false + | _ -> + let leaf = List.nth q_rest (List.length q_rest - 1) in + List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in + if self_listed then None + else let at_limit = q_depth >= 5 in let code = generate_extern q_name (if at_limit then r else { r with subcommands = [] }) in @@ -390,14 +410,14 @@ let manpaged_commands mandir = else acc ) SSet.empty command_sections -let cmd_generate bindir mandir outdir = +let cmd_generate bindir mandir outdir ignorelist = let done_cmds = ref SSet.empty in let bins = Sys.readdir bindir in Array.sort String.compare bins; let manpaged = manpaged_commands mandir in let max_jobs = num_cores () in let classified = Array.map (fun name -> - if SSet.mem name manpaged then (name, Skip) + if SSet.mem name manpaged || SSet.mem name ignorelist then (name, Skip) else (name, classify_binary bindir name) ) bins in let pending = ref [] in @@ -487,6 +507,176 @@ let cmd_generate bindir mandir outdir = end ) command_sections + +(* Sequential help resolver for use inside forked children. + No forking — just iterates through subcommands with run_cmd directly. *) +let help_resolve_seq ?(timeout=200) cmd rest name = + let queue = Queue.create () in + Queue.push (rest, name, 0) queue; + let results = ref [] in + while not (Queue.is_empty queue) do + let (q_rest, q_name, q_depth) = Queue.pop queue in + let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with + | Some _ as r -> r + | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in + match text with + | None -> () + | Some text -> + (match parse_help text with + | Error _ -> () + | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> () + | Ok r -> + let self_listed = match q_rest with + | [] -> false + | _ -> + let leaf = List.nth q_rest (List.length q_rest - 1) in + List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in + if not self_listed then begin + let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in + results := (q_name, { r with subcommands = [] }) :: !results; + if not at_limit then + List.iter (fun (sc : subcommand) -> + Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue + ) r.subcommands + end) + done; + List.rev !results + +(* Index: mirrors cmd_generate's fork-per-binary pattern. + Each child handles one binary completely (including subcommand resolution) + and marshals all results back in one shot. No nested forking — children + use help_resolve_seq which is purely sequential. *) +let cmd_index bindirs mandirs ignorelist db_path = + let db = init db_path in + begin_transaction db; + let done_cmds = ref SSet.empty in + let n_results = ref 0 in + let index_bindir bindir mandir = + if not (Sys.file_exists bindir && Sys.is_directory bindir) then + Printf.eprintf "skipping %s (not found)\n" bindir + else begin + let bins = Sys.readdir bindir in + Array.sort String.compare bins; + let manpaged = if Sys.file_exists mandir && Sys.is_directory mandir + then manpaged_commands mandir else SSet.empty in + let max_jobs = num_cores () in + let classified = Array.map (fun name -> + if SSet.mem name manpaged || SSet.mem name ignorelist then (name, Skip) + else (name, classify_binary bindir name) + ) bins in + let pending = ref [] in + let reap () = + pending := List.filter (fun (pid, rd, name) -> + match Unix.waitpid [Unix.WNOHANG] pid with + | (0, _) -> true + | _ -> + let ic = Unix.in_channel_of_descr rd in + (let[@warning "-8"] result : [`Native of string | `Parsed of (string * help_result) list | `None] = + try Marshal.from_channel ic with _ -> `None in + match result with + | `Native src -> + upsert_raw db ~source:"native" name src; + incr n_results + | `Parsed pairs -> + List.iter (fun (cmd_name, r) -> + if not (SSet.mem cmd_name !done_cmds) then begin + upsert db ~source:"help" cmd_name r; + done_cmds := SSet.add cmd_name !done_cmds; + incr n_results + end + ) pairs + | `None -> ()); + close_in ic; + done_cmds := SSet.add name !done_cmds; + false + | exception Unix.Unix_error (Unix.ECHILD, _, _) -> + (try Unix.close rd with _ -> ()); false + ) !pending in + let wait_slot () = + while List.length !pending >= max_jobs do + reap (); + if List.length !pending >= max_jobs then + (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); + reap () + done in + Array.iter (fun (name, cls) -> + match cls with + | Skip -> () + | Try_help | Try_native_and_help -> + wait_slot (); + let (rd, wr) = Unix.pipe () in + let pid = Unix.fork () in + if pid = 0 then begin + Unix.close rd; + List.iter (fun (_, prd, _) -> + try Unix.close prd with _ -> ()) !pending; + let result = + try + let path = Filename.concat bindir name in + let native = match cls with + | Try_native_and_help -> + (match try_native_completion path with + | Some src -> Some src | None -> None) + | _ -> None in + match native with + | Some src -> `Native src + | None -> + let pairs = help_resolve_seq ~timeout:200 path [] name in + if pairs <> [] then `Parsed pairs else `None + with _ -> `None in + let oc = Unix.out_channel_of_descr wr in + Marshal.to_channel oc + (result : [`Native of string | `Parsed of (string * help_result) list | `None]) []; + close_out oc; + exit 0 + end else begin + Unix.close wr; + pending := (pid, rd, name) :: !pending + end + ) classified; + while !pending <> [] do + (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); + reap () + done; + (* Phase 2: manpages *) + if Sys.file_exists mandir && Sys.is_directory mandir then + List.iter (fun section -> + let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in + if Sys.file_exists subdir && Sys.is_directory subdir then begin + let files = Sys.readdir subdir in + Array.sort String.compare files; + Array.iter (fun file -> + match process_manpage (Filename.concat subdir file) with + | None -> () + | Some (cmd, result) -> + if not (SSet.mem cmd !done_cmds) then begin + upsert db ~source:"manpage" cmd result; + done_cmds := SSet.add cmd !done_cmds; + incr n_results + end + ) files + end + ) command_sections + end in + List.iter2 index_bindir bindirs mandirs; + commit db; + Printf.printf "indexed %d commands into %s\n" !n_results db_path; + close db + +let cmd_dump db_path = + let db = init db_path in + let (count, sources) = stats db in + Printf.printf "database: %s\n" db_path; + Printf.printf "commands: %d (from %d sources)\n" count sources; + let cmds = all_commands db in + List.iter (fun cmd -> + match lookup db cmd with + | None -> () + | Some (_data, source) -> + Printf.printf " %-40s [%s]\n" cmd source + ) cmds; + close db + let cmd_demo () = Printf.printf "# Generated by: inshellah demo\n\n"; match parse_help @@ -502,9 +692,45 @@ let cmd_demo () = | Ok r -> print_string (generate_extern "ls" r) | Error msg -> Printf.eprintf "parse error: %s\n" msg +let load_ignorelist path = + try + let ic = open_in path in + let lines = ref [] in + (try while true do + let line = String.trim (input_line ic) in + if String.length line > 0 && line.[0] <> '#' then + lines := line :: !lines + done with End_of_file -> ()); + close_in ic; + SSet.of_list !lines + with _ -> SSet.empty + +let parse_index_args args = + let rec go prefixes db ignore = function + | [] -> (List.rev prefixes, db, ignore) + | "--db" :: path :: rest -> go prefixes path ignore rest + | "--ignore" :: path :: rest -> go prefixes db (SSet.union ignore (load_ignorelist path)) rest + | dir :: rest -> go (dir :: prefixes) db ignore rest in + go [] (default_db_path ()) SSet.empty args + let () = match Array.to_list Sys.argv |> List.tl with - | ["generate"; bindir; mandir; "-o"; outdir] -> cmd_generate bindir mandir outdir + | ["generate"; bindir; mandir; "-o"; outdir] -> + cmd_generate bindir mandir outdir SSet.empty + | ["generate"; bindir; mandir; "-o"; outdir; "--ignore"; ignore_file] -> + cmd_generate bindir mandir outdir (load_ignorelist ignore_file) + | "index" :: rest -> + let (prefixes, db_path, ignorelist) = parse_index_args rest in + if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1); + let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in + let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in + cmd_index bindirs mandirs ignorelist db_path + | "dump" :: rest -> + let db_path = match rest with + | ["--db"; path] -> path + | [] -> default_db_path () + | _ -> Printf.eprintf "error: dump [--db PATH]\n"; exit 1 in + cmd_dump db_path | ["manpage"; file] -> cmd_manpage file | ["manpage-dir"; dir] -> cmd_manpage_dir dir | "help" :: rest -> cmd_help rest diff --git a/dune-project b/dune-project index 514770f..89cddbb 100644 --- a/dune-project +++ b/dune-project @@ -24,6 +24,7 @@ angstrom angstrom-unix camlzip + sqlite3 (ppx_inline_test :with-test)) (tags (shell completions nushell parser angstrom))) diff --git a/flake.nix b/flake.nix index 90a94f9..8636276 100644 --- a/flake.nix +++ b/flake.nix @@ -20,6 +20,7 @@ angstrom angstrom-unix camlzip + ocaml_sqlite3 ppx_inline_test ocaml-lsp ocamlformat @@ -43,6 +44,7 @@ angstrom angstrom-unix camlzip + ocaml_sqlite3 ]; meta.mainProgram = "inshellah"; diff --git a/lib/dune b/lib/dune index 38defe1..338f98a 100644 --- a/lib/dune +++ b/lib/dune @@ -1,3 +1,3 @@ (library (name inshellah) - (libraries angstrom angstrom-unix camlzip str unix)) + (libraries angstrom angstrom-unix camlzip sqlite3 str unix)) diff --git a/lib/parser.ml b/lib/parser.ml index 81bf201..3fcc807 100644 --- a/lib/parser.ml +++ b/lib/parser.ml @@ -234,7 +234,43 @@ let subcommand_entry = else char ' ' *> char ' ' *> inline_ws *> rest_of_line <* eol >>| fun desc -> - { name; desc = String.trim desc } + { name = String.lowercase_ascii name; + desc = let t = String.trim desc in + if String.length t >= 2 && t.[0] = '-' && t.[1] = ' ' then + String.trim (String.sub t 2 (String.length t - 2)) + else t } + +(* --- Section header detection --- *) + +(* Detect lines like "Arguments:", "POSITIONALS:", etc. that introduce + positional-argument sections (where name+desc lines are NOT subcommands) *) +let is_arg_section s = + let lc = String.lowercase_ascii (String.trim s) in + let base = if String.ends_with ~suffix:":" lc + then String.sub lc 0 (String.length lc - 1) |> String.trim + else lc in + base = "arguments" || base = "args" || base = "positionals" + || base = "positional arguments" + +(* A section header: left-aligned (or lightly indented) text ending with ':', + not starting with '-'. Must be consumed BEFORE subcommand_entry in choice. *) +let section_header = + available >>= fun avail -> + if avail = 0 then fail "eof" + else + peek_string (min avail 80) >>= fun preview -> + (* Extract just the first line from the preview *) + let first_line = match String.index_opt preview '\n' with + | Some i -> String.sub preview 0 i + | None -> preview in + let t = String.trim first_line in + let len = String.length t in + let indent = let i = ref 0 in + while !i < String.length first_line && (first_line.[!i] = ' ' || first_line.[!i] = '\t') do incr i done; + !i in + if len >= 2 && t.[len - 1] = ':' && t.[0] <> '-' && indent <= 4 then + rest_of_line <* eol_strict >>| fun line -> is_arg_section line + else fail "not a section header" (* --- Top-level parser --- *) @@ -247,6 +283,10 @@ let help_parser = let try_entry = entry >>| fun e -> `Entry e in + (* Detect section headers to track arg vs command sections *) + let try_section = + section_header >>| fun is_arg -> `Section is_arg + in (* Try to parse a subcommand *) let try_subcommand = subcommand_entry >>| fun sc -> `Subcommand sc @@ -255,10 +295,17 @@ let help_parser = let try_skip = skip_non_option_line >>| fun () -> `Skip in - many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items -> + many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items -> let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in + (* Only keep subcommands that didn't appear under an Arguments/Positionals section *) let subcommands = - List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items + List.fold_left (fun (in_arg_sec, acc) item -> + match item with + | `Section is_arg -> (is_arg, acc) + | `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc) + | _ -> (in_arg_sec, acc) + ) (false, []) items + |> snd |> List.rev |> List.fold_left (fun acc sc -> match List.assoc_opt sc.name acc with | Some prev when String.length prev.desc >= String.length sc.desc -> acc diff --git a/lib/store.ml b/lib/store.ml new file mode 100644 index 0000000..b340192 --- /dev/null +++ b/lib/store.ml @@ -0,0 +1,344 @@ +open Parser + +let default_db_path () = + let cache = try Sys.getenv "XDG_CACHE_HOME" + with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in + Filename.concat cache "inshellah/completions.db" + +let ensure_parent path = + let dir = Filename.dirname path in + let rec mkdir_p d = + if Sys.file_exists d then () + else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in + mkdir_p dir + +let init db_path = + ensure_parent db_path; + let db = Sqlite3.db_open db_path in + let exec sql = + match Sqlite3.exec db sql with + | Sqlite3.Rc.OK -> () + | rc -> failwith (Printf.sprintf "sqlite: %s: %s" (Sqlite3.Rc.to_string rc) sql) in + exec "PRAGMA journal_mode=WAL"; + exec "PRAGMA synchronous=NORMAL"; + exec {|CREATE TABLE IF NOT EXISTS completions ( + command TEXT PRIMARY KEY, + data TEXT NOT NULL, + source TEXT, + updated_at INTEGER NOT NULL + )|}; + db + +let close db = ignore (Sqlite3.db_close db) + +(* --- JSON serialization of help_result --- *) + +let escape_json s = + let buf = Buffer.create (String.length s + 4) in + String.iter (fun c -> match c with + | '"' -> Buffer.add_string buf "\\\"" + | '\\' -> Buffer.add_string buf "\\\\" + | '\n' -> Buffer.add_string buf "\\n" + | '\t' -> Buffer.add_string buf "\\t" + | '\r' -> Buffer.add_string buf "\\r" + | c when Char.code c < 0x20 -> + Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) + | c -> Buffer.add_char buf c + ) s; + Buffer.contents buf + +let json_string s = Printf.sprintf "\"%s\"" (escape_json s) +let json_null = "null" + +let json_switch_of = function + | Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c)) + | Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l) + | Both (c, l) -> + Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}" + (json_string (String.make 1 c)) (json_string l) + +let json_param_of = function + | None -> json_null + | Some (Mandatory p) -> + Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p) + | Some (Optional p) -> + Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p) + +let json_entry_of e = + Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}" + (json_switch_of e.switch) (json_param_of e.param) (json_string e.desc) + +let json_subcommand_of sc = + Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc) + +let json_positional_of p = + Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}" + (json_string p.pos_name) p.optional p.variadic + +let json_list f items = + "[" ^ String.concat "," (List.map f items) ^ "]" + +let json_of_help_result r = + Printf.sprintf "{\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}" + (json_list json_entry_of r.entries) + (json_list json_subcommand_of r.subcommands) + (json_list json_positional_of r.positionals) + +(* --- JSON deserialization --- *) + +(* Minimal JSON parser — just enough for our own output *) + +type json = + | Jnull + | Jbool of bool + | Jstring of string + | Jarray of json list + | Jobject of (string * json) list + +let json_get key = function + | Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull) + | _ -> Jnull + +let json_to_string = function Jstring s -> s | _ -> "" +let json_to_bool = function Jbool b -> b | _ -> false +let json_to_list = function Jarray l -> l | _ -> [] + +exception Json_error of string + +let parse_json s = + let len = String.length s in + let pos = ref 0 in + let peek () = if !pos < len then s.[!pos] else '\x00' in + let advance () = incr pos in + let skip_ws () = + while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' + || s.[!pos] = '\n' || s.[!pos] = '\r') do + advance () + done in + let expect c = + skip_ws (); + if peek () <> c then + raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos)); + advance () in + let rec parse_value () = + skip_ws (); + match peek () with + | '"' -> Jstring (parse_string ()) + | '{' -> parse_object () + | '[' -> parse_array () + | 'n' -> advance (); advance (); advance (); advance (); Jnull + | 't' -> advance (); advance (); advance (); advance (); Jbool true + | 'f' -> + advance (); advance (); advance (); advance (); advance (); Jbool false + | c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos)) + and parse_string () = + expect '"'; + let buf = Buffer.create 32 in + while peek () <> '"' do + if peek () = '\\' then begin + advance (); + (match peek () with + | '"' -> Buffer.add_char buf '"' + | '\\' -> Buffer.add_char buf '\\' + | 'n' -> Buffer.add_char buf '\n' + | 't' -> Buffer.add_char buf '\t' + | 'r' -> Buffer.add_char buf '\r' + | 'u' -> + advance (); + let hex = String.sub s !pos 4 in + pos := !pos + 3; + let code = int_of_string ("0x" ^ hex) in + if code < 128 then Buffer.add_char buf (Char.chr code) + else begin + (* UTF-8 encode *) + if code < 0x800 then begin + Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6))); + Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) + end else begin + Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12))); + Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f))); + Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) + end + end + | c -> Buffer.add_char buf c); + advance () + end else begin + Buffer.add_char buf (peek ()); + advance () + end + done; + advance (); (* closing quote *) + Buffer.contents buf + and parse_object () = + expect '{'; + skip_ws (); + if peek () = '}' then (advance (); Jobject []) + else begin + let pairs = ref [] in + let cont = ref true in + while !cont do + skip_ws (); + let key = parse_string () in + expect ':'; + let value = parse_value () in + pairs := (key, value) :: !pairs; + skip_ws (); + if peek () = ',' then advance () + else cont := false + done; + expect '}'; + Jobject (List.rev !pairs) + end + and parse_array () = + expect '['; + skip_ws (); + if peek () = ']' then (advance (); Jarray []) + else begin + let items = ref [] in + let cont = ref true in + while !cont do + let v = parse_value () in + items := v :: !items; + skip_ws (); + if peek () = ',' then advance () + else cont := false + done; + expect ']'; + Jarray (List.rev !items) + end + in + parse_value () + +let switch_of_json j = + match json_to_string (json_get "type" j) with + | "short" -> + let c = json_to_string (json_get "char" j) in + Short (if String.length c > 0 then c.[0] else '?') + | "long" -> Long (json_to_string (json_get "name" j)) + | "both" -> + let c = json_to_string (json_get "char" j) in + Both ((if String.length c > 0 then c.[0] else '?'), + json_to_string (json_get "name" j)) + | _ -> Long "?" + +let param_of_json = function + | Jnull -> None + | j -> + let name = json_to_string (json_get "name" j) in + (match json_to_string (json_get "kind" j) with + | "mandatory" -> Some (Mandatory name) + | "optional" -> Some (Optional name) + | _ -> None) + +let entry_of_json j = + { switch = switch_of_json (json_get "switch" j); + param = param_of_json (json_get "param" j); + desc = json_to_string (json_get "desc" j) } + +let subcommand_of_json j = + { name = json_to_string (json_get "name" j); + desc = json_to_string (json_get "desc" j) } + +let positional_of_json j = + { pos_name = json_to_string (json_get "name" j); + optional = json_to_bool (json_get "optional" j); + variadic = json_to_bool (json_get "variadic" j) } + +let help_result_of_json j = + { entries = List.map entry_of_json (json_to_list (json_get "entries" j)); + subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j)); + positionals = List.map positional_of_json (json_to_list (json_get "positionals" j)) } + +(* --- Database operations --- *) + +let upsert db ?(source="help") command result = + let json = json_of_help_result result in + let now = int_of_float (Unix.gettimeofday ()) in + let stmt = Sqlite3.prepare db + "INSERT INTO completions (command, data, source, updated_at) VALUES (?, ?, ?, ?) + ON CONFLICT(command) DO UPDATE SET data=excluded.data, source=excluded.source, updated_at=excluded.updated_at" in + ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); + ignore (Sqlite3.bind stmt 2 (Sqlite3.Data.TEXT json)); + ignore (Sqlite3.bind stmt 3 (Sqlite3.Data.TEXT source)); + ignore (Sqlite3.bind stmt 4 (Sqlite3.Data.INT (Int64.of_int now))); + (match Sqlite3.step stmt with + | Sqlite3.Rc.DONE -> () + | rc -> failwith (Printf.sprintf "upsert %s: %s" command (Sqlite3.Rc.to_string rc))); + ignore (Sqlite3.finalize stmt) + +let upsert_raw db ?(source="native") command data = + let now = int_of_float (Unix.gettimeofday ()) in + let stmt = Sqlite3.prepare db + "INSERT INTO completions (command, data, source, updated_at) VALUES (?, ?, ?, ?) + ON CONFLICT(command) DO UPDATE SET data=excluded.data, source=excluded.source, updated_at=excluded.updated_at" in + ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); + ignore (Sqlite3.bind stmt 2 (Sqlite3.Data.TEXT data)); + ignore (Sqlite3.bind stmt 3 (Sqlite3.Data.TEXT source)); + ignore (Sqlite3.bind stmt 4 (Sqlite3.Data.INT (Int64.of_int now))); + (match Sqlite3.step stmt with + | Sqlite3.Rc.DONE -> () + | rc -> failwith (Printf.sprintf "upsert_raw %s: %s" command (Sqlite3.Rc.to_string rc))); + ignore (Sqlite3.finalize stmt) + +let lookup db command = + let stmt = Sqlite3.prepare db + "SELECT data, source FROM completions WHERE command = ?" in + ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); + let result = match Sqlite3.step stmt with + | Sqlite3.Rc.ROW -> + let data = Sqlite3.column_text stmt 0 in + let source = Sqlite3.column_text stmt 1 in + Some (data, source) + | _ -> None in + ignore (Sqlite3.finalize stmt); + result + +let lookup_result db command = + match lookup db command with + | None -> None + | Some (data, _source) -> + (try Some (help_result_of_json (parse_json data)) + with _ -> None) + +let has_command db command = + let stmt = Sqlite3.prepare db + "SELECT 1 FROM completions WHERE command = ?" in + ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); + let found = Sqlite3.step stmt = Sqlite3.Rc.ROW in + ignore (Sqlite3.finalize stmt); + found + +let all_commands db = + let stmt = Sqlite3.prepare db "SELECT command FROM completions ORDER BY command" in + let results = ref [] in + while Sqlite3.step stmt = Sqlite3.Rc.ROW do + results := Sqlite3.column_text stmt 0 :: !results + done; + ignore (Sqlite3.finalize stmt); + List.rev !results + +let delete db command = + let stmt = Sqlite3.prepare db "DELETE FROM completions WHERE command = ?" in + ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); + ignore (Sqlite3.step stmt); + ignore (Sqlite3.finalize stmt) + +let begin_transaction db = + match Sqlite3.exec db "BEGIN IMMEDIATE" with + | Sqlite3.Rc.OK -> () | _ -> () + +let commit db = + match Sqlite3.exec db "COMMIT" with + | Sqlite3.Rc.OK -> () | _ -> () + +let stats db = + let stmt = Sqlite3.prepare db + "SELECT COUNT(*), COUNT(DISTINCT source) FROM completions" in + let result = match Sqlite3.step stmt with + | Sqlite3.Rc.ROW -> + let count = Sqlite3.column_int stmt 0 in + let sources = Sqlite3.column_int stmt 1 in + (count, sources) + | _ -> (0, 0) in + ignore (Sqlite3.finalize stmt); + result diff --git a/nix/module.nix b/nix/module.nix index 2778e7c..4491dbf 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -41,6 +41,15 @@ in (discovered via XDG_DATA_DIRS). ''; }; + + ignoreCommands = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = []; + example = [ "meat" "problematic-tool" ]; + description = '' + List of command names to skip during completion generation. + ''; + }; }; config = lib.mkIf cfg.enable { @@ -62,6 +71,8 @@ in chmod -R u+w "$_cur" fi fi'') segments; + ignoreFile = pkgs.writeText "inshellah-ignore" (lib.concatStringsSep "\n" cfg.ignoreCommands); + ignoreFlag = lib.optionalString (cfg.ignoreCommands != []) " --ignore ${ignoreFile}"; in '' _cur="$out" @@ -71,7 +82,7 @@ in # Generate all completions in one pass: # native generators > manpages > --help fallback if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then - ${inshellah} generate "$out/bin" "$out/share/man" -o ${destDir} \ + ${inshellah} generate "$out/bin" "$out/share/man" -o ${destDir}${ignoreFlag} \ 2>/dev/null || true fi From 3080a5f64dd7481d03937123352928e48f568675 Mon Sep 17 00:00:00 2001 From: atagen Date: Mon, 23 Mar 2026 02:20:27 +1100 Subject: [PATCH 2/6] run subprocesses in /tmp to avoid cwd pollution Commands like ckb-next-dev-detect write side-effect files (reports, tarballs) into the working directory when invoked with --help. Chdir to /tmp around create_process_env so these don't land in the project tree. --- bin/main.ml | 5 +++++ inshellah.opam | 1 + 2 files changed, 6 insertions(+) diff --git a/bin/main.ml b/bin/main.ml index 9ac0556..9c4d4b4 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -70,11 +70,16 @@ let run_cmd args timeout_ms = let (rd, wr) = Unix.pipe () in let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in let argv = Array.of_list args in + (* Run subprocesses in /tmp so commands that write side-effect files + (e.g. ckb-next-dev-detect-report.gz) don't pollute the working dir *) + let saved_cwd = Sys.getcwd () in + Sys.chdir "/tmp"; let pid = try Unix.create_process_env (List.hd args) argv (Lazy.force safe_env) devnull wr wr with Unix.Unix_error _ -> Unix.close rd; Unix.close wr; Unix.close devnull; -1 in + Sys.chdir saved_cwd; Unix.close wr; Unix.close devnull; if pid < 0 then (Unix.close rd; None) else begin diff --git a/inshellah.opam b/inshellah.opam index 8d020f1..cf4885a 100644 --- a/inshellah.opam +++ b/inshellah.opam @@ -15,6 +15,7 @@ depends: [ "angstrom" "angstrom-unix" "camlzip" + "sqlite3" "ppx_inline_test" {with-test} "odoc" {with-doc} ] From 53be599d91eb98d04f88611e68e96741f140555f Mon Sep 17 00:00:00 2001 From: atagen Date: Mon, 23 Mar 2026 11:40:34 +1100 Subject: [PATCH 3/6] fix parallelism, create completer --- bin/main.ml | 582 +++++++++++++++++++++++----------------------------- 1 file changed, 259 insertions(+), 323 deletions(-) diff --git a/bin/main.ml b/bin/main.ml index 9c4d4b4..90f92e7 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -14,19 +14,13 @@ Usage: Index completions into a SQLite database. PREFIX is a directory containing bin/ and share/man/. Default db: $XDG_CACHE_HOME/inshellah/completions.db + inshellah complete CMD [ARGS...] [--db PATH] + Nushell custom completer. Outputs JSON completion candidates. + Falls back to --help resolution if command is not in the database. inshellah dump [--db PATH] Show stats and commands in the database. - inshellah generate BINDIR MANDIR -o OUTDIR - Full generation: native completions, manpages, and --help fallback. - One .nu file per command. inshellah manpage FILE Parse a manpage and emit nushell extern inshellah manpage-dir DIR Batch-process manpages under DIR - inshellah help [--iterative] CMD [ARGS...] - Run CMD ARGS --help, parse and emit extern. - Recursively resolves subcommands unless - --iterative is given. - inshellah parse-help CMD Read --help text from stdin, emit extern - inshellah demo Run built-in demo |}; exit 1 @@ -43,13 +37,6 @@ let is_nushell_source text = || contains_str text "export def" || (contains_str text "module " && contains_str text "export")) -let filename_of_cmd cmd = - String.map (function - | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c | _ -> '-') cmd - -let write_file path contents = - let oc = open_out path in output_string oc contents; close_out oc - let cmd_name_of_manpage path = let base = Filename.basename path in let base = @@ -66,6 +53,23 @@ let safe_env = lazy ( || String.starts_with ~prefix:"XAUTHORITY=" s)) (Array.to_list (Unix.environment ())))) +(* Non-blocking drain of a pipe fd into a buffer. Safe to call repeatedly; + reads whatever is available without blocking. Used by all fork-pipe sites + to keep pipes drained so children never block on write. *) +let drain_fd rd buf = + let chunk = Bytes.create 8192 in + let continue = ref true in + while !continue do + match Unix.select [rd] [] [] 0.0 with + | (_ :: _, _, _) -> + (try + let n = Unix.read rd chunk 0 8192 in + if n = 0 then continue := false + else Buffer.add_subbytes buf chunk 0 n + with Unix.Unix_error _ -> continue := false) + | _ -> continue := false + done + let run_cmd args timeout_ms = let (rd, wr) = Unix.pipe () in let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in @@ -264,136 +268,6 @@ let cmd_manpage_dir dir = let max_resolve_results = 500 -let help_resolve ?(timeout=10_000) cmd rest name = - let max_jobs = num_cores () in - let queue = Queue.create () in - Queue.push (rest, name, 0, "") queue; - let results = ref [] in - (* pending: (pid, rd, rest, name, depth, desc) *) - let pending = ref [] in - let collect rd p_rest p_name p_depth p_desc = - let ic = Unix.in_channel_of_descr rd in - let[@warning "-8"] result : (string * (string * string) list) option = - try Marshal.from_channel ic with _ -> None in - close_in ic; - match result with - | None -> - if p_desc <> "" then - results := Printf.sprintf "export extern \"%s\" [ # %s\n]\n" - (escape_nu p_name) (escape_nu p_desc) :: !results - | Some (code, subs) -> - results := code :: !results; - if p_depth < 5 && List.length !results < max_resolve_results then - List.iter (fun (sc_name, sc_desc) -> - Queue.push - (p_rest @ [sc_name], p_name ^ " " ^ sc_name, - p_depth + 1, sc_desc) queue - ) subs in - let reap () = - pending := List.filter (fun (pid, rd, p_rest, p_name, p_depth, p_desc) -> - match Unix.waitpid [Unix.WNOHANG] pid with - | (0, _) -> true - | _ -> collect rd p_rest p_name p_depth p_desc; false - | exception Unix.Unix_error (Unix.ECHILD, _, _) -> - collect rd p_rest p_name p_depth p_desc; false - ) !pending in - let wait_slot () = - while List.length !pending >= max_jobs do - reap (); - if List.length !pending >= max_jobs then - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () - done in - while not (Queue.is_empty queue) || !pending <> [] do - while not (Queue.is_empty queue) do - let (q_rest, q_name, q_depth, q_desc) = Queue.pop queue in - wait_slot (); - let (rd, wr) = Unix.pipe () in - let pid = Unix.fork () in - if pid = 0 then begin - Unix.close rd; - List.iter (fun (_, prd, _, _, _, _) -> - try Unix.close prd with _ -> ()) !pending; - let result = - let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with - | Some _ as r -> r - | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in - match text with - | None -> None - | Some text -> - (match parse_help text with - | Error _ -> None - | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None - | Ok r -> - (* If the subcommand we just queried appears in its own - subcommand list, the command is echoing the parent help - (e.g. nil ignores subcommands when --help is present). - Abandon this branch to avoid infinite recursion. *) - let self_listed = match q_rest with - | [] -> false - | _ -> - let leaf = List.nth q_rest (List.length q_rest - 1) in - List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in - if self_listed then None - else - let at_limit = q_depth >= 5 in - let code = generate_extern q_name - (if at_limit then r else { r with subcommands = [] }) in - let subs = if at_limit then [] - else List.map (fun (sc : subcommand) -> (sc.name, sc.desc)) - r.subcommands in - Some (code, subs)) in - let oc = Unix.out_channel_of_descr wr in - Marshal.to_channel oc (result : (string * (string * string) list) option) []; - close_out oc; - exit 0 - end else begin - Unix.close wr; - pending := (pid, rd, q_rest, q_name, q_depth, q_desc) :: !pending - end - done; - if !pending <> [] then begin - reap (); - if !pending <> [] && Queue.is_empty queue then begin - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () - end - end - done; - match !results with - | [] -> None - | rs -> Some (String.concat "\n" (List.rev rs)) - -let cmd_help args = - let iterative, cmd_args = match args with - | "--iterative" :: rest -> (true, rest) - | _ -> (false, args) - in - match cmd_args with - | [] -> Printf.eprintf "error: help requires a command name\n"; exit 1 - | cmd :: rest -> - let name = String.concat " " (Filename.basename cmd :: rest) in - if iterative then - (match run_cmd (cmd :: rest @ ["--help"]) 10_000 with - | None -> Printf.eprintf "no output from %s --help\n" name; exit 1 - | Some text -> - (match parse_help text with - | Ok r -> print_string (generate_extern name r) - | Error msg -> Printf.eprintf "parse error for %s: %s\n" name msg; exit 1)) - else - (match help_resolve cmd rest name with - | None -> Printf.eprintf "no output from %s --help\n" name; exit 1 - | Some output -> print_string output) - -let cmd_parse_help cmd = - let buf = Buffer.create 4096 in - (try while true do - Buffer.add_string buf (input_line stdin); Buffer.add_char buf '\n' - done with End_of_file -> ()); - (match parse_help (Buffer.contents buf) with - | Ok r -> print_string (generate_extern cmd r) - | Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd msg; exit 1) - let process_manpage file = try let contents = read_manpage_file file in @@ -415,142 +289,103 @@ let manpaged_commands mandir = else acc ) SSet.empty command_sections -let cmd_generate bindir mandir outdir ignorelist = - let done_cmds = ref SSet.empty in - let bins = Sys.readdir bindir in - Array.sort String.compare bins; - let manpaged = manpaged_commands mandir in +(* Parallel structured help resolver — returns (name, help_result) pairs + like the old sequential version but forks per subcommand for parallelism. *) +let help_resolve_par ?(timeout=200) cmd rest name = let max_jobs = num_cores () in - let classified = Array.map (fun name -> - if SSet.mem name manpaged || SSet.mem name ignorelist then (name, Skip) - else (name, classify_binary bindir name) - ) bins in - let pending = ref [] in - let reap () = - pending := List.filter (fun pid -> - match Unix.waitpid [Unix.WNOHANG] pid with - | (0, _) -> true | _ -> false - | exception Unix.Unix_error (Unix.ECHILD, _, _) -> false - ) !pending in - let wait_slot () = - while List.length !pending >= max_jobs do - reap (); - if List.length !pending >= max_jobs then - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () - done in - Array.iter (fun (name, cls) -> - match cls with - | Skip -> () - | Try_help | Try_native_and_help -> - wait_slot (); - let pid = Unix.fork () in - if pid = 0 then begin - (try - let path = Filename.concat bindir name in - let native_ok = match cls with - | Try_native_and_help -> - (match try_native_completion path with - | Some src -> - write_file (Filename.concat outdir (filename_of_cmd name ^ ".nu")) src; - true - | None -> false) - | _ -> false in - if not native_ok then begin - match help_resolve ~timeout:200 path [] name with - | Some content when String.length content > 0 -> - let m = module_name_of name in - let src = Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" m content m in - write_file (Filename.concat outdir (filename_of_cmd name ^ ".nu")) src - | _ -> () - end; - exit 0 - with _ -> exit 1) - end else begin - pending := pid :: !pending; - done_cmds := SSet.add name !done_cmds - end - ) classified; - while !pending <> [] do - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () - done; - List.iter (fun section -> - let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in - if Sys.file_exists subdir && Sys.is_directory subdir then begin - let files = Sys.readdir subdir in - Array.sort String.compare files; - Array.iter (fun file -> - match process_manpage (Filename.concat subdir file) with - | None -> () - | Some (cmd, result) -> - let base = List.hd (String.split_on_char ' ' cmd) in - if SSet.mem cmd !done_cmds then () - else begin - done_cmds := SSet.add cmd !done_cmds; - let outpath = Filename.concat outdir (filename_of_cmd base ^ ".nu") in - if Sys.file_exists outpath then begin - let existing = - let ic = open_in outpath in - let n = in_channel_length ic in - let s = Bytes.create n in - really_input ic s 0 n; close_in ic; - Bytes.to_string s in - let mod_name = module_name_of base in - let use_line = Printf.sprintf "\nuse %s *\n" mod_name in - let base_content = - if contains_str existing use_line then - String.sub existing 0 - (Str.search_forward (Str.regexp_string use_line) existing 0) - else existing in - write_file outpath - (String.concat "" [base_content; generate_extern cmd result; use_line]) - end else - write_file outpath (generate_module base result) - end - ) files - end - ) command_sections - - -(* Sequential help resolver for use inside forked children. - No forking — just iterates through subcommands with run_cmd directly. *) -let help_resolve_seq ?(timeout=200) cmd rest name = let queue = Queue.create () in Queue.push (rest, name, 0) queue; let results = ref [] in - while not (Queue.is_empty queue) do - let (q_rest, q_name, q_depth) = Queue.pop queue in - let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with - | Some _ as r -> r - | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in - match text with + (* pending: (pid, rd, buf, rest, name, depth) *) + let pending = ref [] in + let collect rd buf q_rest q_name q_depth = + drain_fd rd buf; + (try Unix.close rd with _ -> ()); + let data = Buffer.contents buf in + let result : (help_result * subcommand list) option = + if String.length data > 0 then + try Marshal.from_string data 0 with _ -> None + else None in + match result with | None -> () - | Some text -> - (match parse_help text with - | Error _ -> () - | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> () - | Ok r -> - let self_listed = match q_rest with - | [] -> false - | _ -> - let leaf = List.nth q_rest (List.length q_rest - 1) in - List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in - if not self_listed then begin - let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in - results := (q_name, { r with subcommands = [] }) :: !results; - if not at_limit then - List.iter (fun (sc : subcommand) -> - Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue - ) r.subcommands - end) + | Some (r, subs) -> + let at_limit = q_depth >= 5 || List.length !results >= max_resolve_results in + results := (q_name, r) :: !results; + if not at_limit then + List.iter (fun (sc : subcommand) -> + Queue.push (q_rest @ [sc.name], q_name ^ " " ^ sc.name, q_depth + 1) queue + ) subs in + let reap () = + pending := List.filter (fun (pid, rd, buf, q_rest, q_name, q_depth) -> + drain_fd rd buf; + match Unix.waitpid [Unix.WNOHANG] pid with + | (0, _) -> true + | _ -> collect rd buf q_rest q_name q_depth; false + | exception Unix.Unix_error (Unix.ECHILD, _, _) -> + (try Unix.close rd with _ -> ()); false + ) !pending in + let wait_for_slot () = + while List.length !pending >= max_jobs do + reap (); + if List.length !pending >= max_jobs then begin + let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) + end + done in + while not (Queue.is_empty queue) || !pending <> [] do + while not (Queue.is_empty queue) do + let (q_rest, q_name, q_depth) = Queue.pop queue in + wait_for_slot (); + let (rd, wr) = Unix.pipe () in + let pid = Unix.fork () in + if pid = 0 then begin + Unix.close rd; + List.iter (fun (_, prd, _, _, _, _) -> + try Unix.close prd with _ -> ()) !pending; + let result = + let text = match run_cmd (cmd :: q_rest @ ["--help"]) timeout with + | Some _ as r -> r + | None -> run_cmd (cmd :: q_rest @ ["-h"]) timeout in + match text with + | None -> None + | Some text -> + (match parse_help text with + | Error _ -> None + | Ok r when r.entries = [] && r.subcommands = [] && r.positionals = [] -> None + | Ok r -> + let self_listed = match q_rest with + | [] -> false + | _ -> + let leaf = List.nth q_rest (List.length q_rest - 1) in + List.exists (fun (sc : subcommand) -> sc.name = leaf) r.subcommands in + if self_listed then None + else + let at_limit = q_depth >= 5 in + let subs = if at_limit then [] else r.subcommands in + Some ({ r with subcommands = [] }, subs)) in + let oc = Unix.out_channel_of_descr wr in + Marshal.to_channel oc (result : (help_result * subcommand list) option) []; + close_out oc; + exit 0 + end else begin + Unix.close wr; + pending := (pid, rd, Buffer.create 4096, q_rest, q_name, q_depth) :: !pending + end + done; + if !pending <> [] then begin + reap (); + if !pending <> [] && Queue.is_empty queue then begin + let fds = List.map (fun (_, rd, _, _, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) + end + end done; List.rev !results -(* Index: mirrors cmd_generate's fork-per-binary pattern. +(* Index: fork-per-binary pattern with pipe-based result marshaling. Each child handles one binary completely (including subcommand resolution) - and marshals all results back in one shot. No nested forking — children - use help_resolve_seq which is purely sequential. *) + and marshals results back via pipe. Children use help_resolve_par + which forks per subcommand for parallelism. *) let cmd_index bindirs mandirs ignorelist db_path = let db = init db_path in begin_transaction db; @@ -570,50 +405,57 @@ let cmd_index bindirs mandirs ignorelist db_path = else (name, classify_binary bindir name) ) bins in let pending = ref [] in + let process_result name rd buf = + drain_fd rd buf; + (try Unix.close rd with _ -> ()); + let data = Buffer.contents buf in + if String.length data > 0 then begin + let result : [`Native of string | `Parsed of (string * help_result) list | `None] = + try Marshal.from_string data 0 with _ -> `None in + (match result with + | `Native src -> + upsert_raw db ~source:"native" name src; + incr n_results + | `Parsed pairs -> + List.iter (fun (cmd_name, r) -> + if not (SSet.mem cmd_name !done_cmds) then begin + upsert db ~source:"help" cmd_name r; + done_cmds := SSet.add cmd_name !done_cmds; + incr n_results + end + ) pairs + | `None -> ()) + end; + done_cmds := SSet.add name !done_cmds in let reap () = - pending := List.filter (fun (pid, rd, name) -> + pending := List.filter (fun (pid, rd, buf, name) -> + drain_fd rd buf; match Unix.waitpid [Unix.WNOHANG] pid with | (0, _) -> true | _ -> - let ic = Unix.in_channel_of_descr rd in - (let[@warning "-8"] result : [`Native of string | `Parsed of (string * help_result) list | `None] = - try Marshal.from_channel ic with _ -> `None in - match result with - | `Native src -> - upsert_raw db ~source:"native" name src; - incr n_results - | `Parsed pairs -> - List.iter (fun (cmd_name, r) -> - if not (SSet.mem cmd_name !done_cmds) then begin - upsert db ~source:"help" cmd_name r; - done_cmds := SSet.add cmd_name !done_cmds; - incr n_results - end - ) pairs - | `None -> ()); - close_in ic; - done_cmds := SSet.add name !done_cmds; + process_result name rd buf; false | exception Unix.Unix_error (Unix.ECHILD, _, _) -> (try Unix.close rd with _ -> ()); false ) !pending in - let wait_slot () = + let wait_for_slot () = while List.length !pending >= max_jobs do reap (); - if List.length !pending >= max_jobs then - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () + if List.length !pending >= max_jobs then begin + let fds = List.map (fun (_, rd, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) + end done in Array.iter (fun (name, cls) -> match cls with | Skip -> () | Try_help | Try_native_and_help -> - wait_slot (); + wait_for_slot (); let (rd, wr) = Unix.pipe () in let pid = Unix.fork () in if pid = 0 then begin Unix.close rd; - List.iter (fun (_, prd, _) -> + List.iter (fun (_, prd, _, _) -> try Unix.close prd with _ -> ()) !pending; let result = try @@ -626,7 +468,7 @@ let cmd_index bindirs mandirs ignorelist db_path = match native with | Some src -> `Native src | None -> - let pairs = help_resolve_seq ~timeout:200 path [] name in + let pairs = help_resolve_par ~timeout:200 path [] name in if pairs <> [] then `Parsed pairs else `None with _ -> `None in let oc = Unix.out_channel_of_descr wr in @@ -636,12 +478,15 @@ let cmd_index bindirs mandirs ignorelist db_path = exit 0 end else begin Unix.close wr; - pending := (pid, rd, name) :: !pending + pending := (pid, rd, Buffer.create 4096, name) :: !pending end ) classified; while !pending <> [] do - (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); - reap () + reap (); + if !pending <> [] then begin + let fds = List.map (fun (_, rd, _, _) -> rd) !pending in + ignore (Unix.select fds [] [] 0.05) + end done; (* Phase 2: manpages *) if Sys.file_exists mandir && Sys.is_directory mandir then @@ -682,20 +527,111 @@ let cmd_dump db_path = ) cmds; close db -let cmd_demo () = - Printf.printf "# Generated by: inshellah demo\n\n"; - match parse_help - {|Usage: ls [OPTION]... [FILE]... - -a, --all do not ignore entries starting with . - -A, --almost-all do not list implied . and .. - --block-size=SIZE with -l, scale sizes by SIZE when printing - --color[=WHEN] color the output WHEN - -h, --human-readable with -l and -s, print sizes like 1K 234M 2G etc. - --help display this help and exit - --version output version information and exit -|} with - | Ok r -> print_string (generate_extern "ls" r) - | Error msg -> Printf.eprintf "parse error: %s\n" msg +let find_in_path name = + try + let path_var = Sys.getenv "PATH" in + let dirs = String.split_on_char ':' path_var in + let rec go = function + | [] -> None + | dir :: rest -> + let p = Filename.concat dir name in + if is_executable p then Some p else go rest in + go dirs + with Not_found -> None + +let resolve_and_cache db name path = + let pairs = help_resolve_par ~timeout:200 path [] name in + if pairs <> [] then begin + List.iter (fun (cmd_name, r) -> upsert db cmd_name r) pairs; + Some pairs + end else None + +let completion_json value desc = + Printf.sprintf "{\"value\":\"%s\",\"description\":\"%s\"}" + (escape_json value) (escape_json desc) + +let flag_completions prefix entries = + let candidates = ref [] in + List.iter (fun (e : entry) -> + let desc = match e.param with + | Some (Mandatory p) -> if e.desc <> "" then e.desc ^ " <" ^ p ^ ">" else "<" ^ p ^ ">" + | Some (Optional p) -> if e.desc <> "" then e.desc ^ " [" ^ p ^ "]" else "[" ^ p ^ "]" + | None -> e.desc in + (match e.switch with + | Long l -> + let flag = "--" ^ l in + if String.starts_with ~prefix flag then + candidates := completion_json flag desc :: !candidates + | Short c -> + let flag = Printf.sprintf "-%c" c in + if String.starts_with ~prefix flag then + candidates := completion_json flag desc :: !candidates + | Both (c, l) -> + let long = "--" ^ l in + let short = Printf.sprintf "-%c" c in + if String.starts_with ~prefix long then + candidates := completion_json long desc :: !candidates + else if String.starts_with ~prefix short then + candidates := completion_json short desc :: !candidates) + ) entries; + List.rev !candidates + +let cmd_complete spans db_path = + match spans with + | [] -> print_string "[]\n" + | cmd_name :: rest -> + let db = init db_path in + (* Try longest subcommand match first: "git add" before "git" *) + let rec find_result tokens = + match tokens with + | [] -> None + | _ -> + let try_name = String.concat " " tokens in + match lookup_result db try_name with + | Some r -> Some (try_name, r, List.length tokens) + | None -> + find_result (List.rev (List.tl (List.rev tokens))) in + let all_tokens = cmd_name :: (match rest with + | _ :: _ when List.length rest >= 1 -> + (* exclude the partial last token from subcommand lookup *) + List.rev (List.tl (List.rev rest)) + | _ -> []) in + let found = find_result all_tokens in + (* If not found at all, try on-the-fly resolution for the base command *) + let result = match found with + | Some _ -> found + | None -> + (match find_in_path cmd_name with + | Some path -> + (match resolve_and_cache db cmd_name path with + | Some _pairs -> + (* Look up again after caching *) + find_result all_tokens + | None -> None) + | None -> None) in + let partial = match rest with + | [] -> "" + | _ -> List.nth rest (List.length rest - 1) in + (match result with + | None -> print_string "[]\n" + | Some (_matched_name, r, _depth) -> + let candidates = ref [] in + (* Flag completions when partial starts with - *) + if String.starts_with ~prefix:"-" partial then + candidates := flag_completions partial r.entries + else begin + (* Subcommand completions *) + List.iter (fun (sc : subcommand) -> + if partial = "" || String.starts_with ~prefix:partial sc.name then + candidates := completion_json sc.name sc.desc :: !candidates + ) r.subcommands; + candidates := List.rev !candidates; + (* Also offer flags if no subcommand prefix or few subcommand matches *) + if partial = "" || !candidates = [] then + candidates := !candidates @ flag_completions partial r.entries + end; + Printf.printf "[%s]\n" (String.concat "," !candidates)); + close db let load_ignorelist path = try @@ -720,16 +656,19 @@ let parse_index_args args = let () = match Array.to_list Sys.argv |> List.tl with - | ["generate"; bindir; mandir; "-o"; outdir] -> - cmd_generate bindir mandir outdir SSet.empty - | ["generate"; bindir; mandir; "-o"; outdir; "--ignore"; ignore_file] -> - cmd_generate bindir mandir outdir (load_ignorelist ignore_file) | "index" :: rest -> let (prefixes, db_path, ignorelist) = parse_index_args rest in if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1); let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in cmd_index bindirs mandirs ignorelist db_path + | "complete" :: rest -> + let rec parse_complete_args spans db = function + | [] -> (List.rev spans, db) + | "--db" :: path :: rest -> parse_complete_args spans path rest + | arg :: rest -> parse_complete_args (arg :: spans) db rest in + let (spans, db_path) = parse_complete_args [] (default_db_path ()) rest in + cmd_complete spans db_path | "dump" :: rest -> let db_path = match rest with | ["--db"; path] -> path @@ -738,7 +677,4 @@ let () = cmd_dump db_path | ["manpage"; file] -> cmd_manpage file | ["manpage-dir"; dir] -> cmd_manpage_dir dir - | "help" :: rest -> cmd_help rest - | ["parse-help"; cmd] -> cmd_parse_help cmd - | ["demo"] -> cmd_demo () | _ -> usage () From 17967da43e9fa54e247c238c1f81f8a7e57e749f Mon Sep 17 00:00:00 2001 From: atagen Date: Mon, 23 Mar 2026 11:45:54 +1100 Subject: [PATCH 4/6] add db control commands --- bin/main.ml | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/bin/main.ml b/bin/main.ml index 90f92e7..ed3a049 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -17,6 +17,10 @@ Usage: inshellah complete CMD [ARGS...] [--db PATH] Nushell custom completer. Outputs JSON completion candidates. Falls back to --help resolution if command is not in the database. + inshellah query CMD [--db PATH] + Print stored completion data for CMD as JSON. + inshellah clear [CMD...] [--db PATH] + Clear the database, or remove specific commands. inshellah dump [--db PATH] Show stats and commands in the database. inshellah manpage FILE Parse a manpage and emit nushell extern @@ -633,6 +637,34 @@ let cmd_complete spans db_path = Printf.printf "[%s]\n" (String.concat "," !candidates)); close db +let cmd_query cmd db_path = + let db = init db_path in + (match lookup db cmd with + | None -> + Printf.eprintf "not found: %s\n" cmd; close db; exit 1 + | Some (data, source) -> + Printf.printf "# source: %s\n%s\n" source data); + close db + +let cmd_clear cmds db_path = + let db = init db_path in + (match cmds with + | [] -> + (match Sqlite3.exec db "DELETE FROM completions" with + | Sqlite3.Rc.OK -> + Printf.printf "cleared all commands from %s\n" db_path + | rc -> + Printf.eprintf "error: %s\n" (Sqlite3.Rc.to_string rc); exit 1) + | _ -> + List.iter (fun cmd -> + if has_command db cmd then begin + delete db cmd; + Printf.printf "removed %s\n" cmd + end else + Printf.eprintf "not found: %s\n" cmd + ) cmds); + close db + let load_ignorelist path = try let ic = open_in path in @@ -669,6 +701,19 @@ let () = | arg :: rest -> parse_complete_args (arg :: spans) db rest in let (spans, db_path) = parse_complete_args [] (default_db_path ()) rest in cmd_complete spans db_path + | "query" :: rest -> + let (cmd, db_path) = match rest with + | [cmd] -> (cmd, default_db_path ()) + | [cmd; "--db"; path] -> (cmd, path) + | _ -> Printf.eprintf "error: query CMD [--db PATH]\n"; exit 1 in + cmd_query cmd db_path + | "clear" :: rest -> + let rec parse_clear_args cmds db = function + | [] -> (List.rev cmds, db) + | "--db" :: path :: rest -> parse_clear_args cmds path rest + | cmd :: rest -> parse_clear_args (cmd :: cmds) db rest in + let (cmds, db_path) = parse_clear_args [] (default_db_path ()) rest in + cmd_clear cmds db_path | "dump" :: rest -> let db_path = match rest with | ["--db"; path] -> path From f2d0a42fd75e2ddda287e5b9b15ff6f278413457 Mon Sep 17 00:00:00 2001 From: atagen Date: Mon, 23 Mar 2026 12:17:45 +1100 Subject: [PATCH 5/6] switch from sqlite to json --- bin/main.ml | 162 ++++++++++++++---------------------- flake.nix | 2 - lib/dune | 2 +- lib/store.ml | 221 +++++++++++++++++++++++++------------------------ nix/module.nix | 30 +++---- 5 files changed, 191 insertions(+), 226 deletions(-) diff --git a/bin/main.ml b/bin/main.ml index ed3a049..fd1e665 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -10,19 +10,17 @@ let usage () = {|inshellah - nushell completions engine Usage: - inshellah index PREFIX... [--db PATH] - Index completions into a SQLite database. + inshellah index PREFIX... [--dir PATH] + Index completions into a directory of JSON/nu files. PREFIX is a directory containing bin/ and share/man/. - Default db: $XDG_CACHE_HOME/inshellah/completions.db - inshellah complete CMD [ARGS...] [--db PATH] + Default dir: $XDG_CACHE_HOME/inshellah + inshellah complete CMD [ARGS...] [--dir PATH] [--system-dir PATH] Nushell custom completer. Outputs JSON completion candidates. - Falls back to --help resolution if command is not in the database. - inshellah query CMD [--db PATH] - Print stored completion data for CMD as JSON. - inshellah clear [CMD...] [--db PATH] - Clear the database, or remove specific commands. - inshellah dump [--db PATH] - Show stats and commands in the database. + Falls back to --help resolution if command is not indexed. + inshellah query CMD [--dir PATH] [--system-dir PATH] + Print stored completion data for CMD. + inshellah dump [--dir PATH] [--system-dir PATH] + List indexed commands. inshellah manpage FILE Parse a manpage and emit nushell extern inshellah manpage-dir DIR Batch-process manpages under DIR @@ -390,9 +388,8 @@ let help_resolve_par ?(timeout=200) cmd rest name = Each child handles one binary completely (including subcommand resolution) and marshals results back via pipe. Children use help_resolve_par which forks per subcommand for parallelism. *) -let cmd_index bindirs mandirs ignorelist db_path = - let db = init db_path in - begin_transaction db; +let cmd_index bindirs mandirs ignorelist dir = + ensure_dir dir; let done_cmds = ref SSet.empty in let n_results = ref 0 in let index_bindir bindir mandir = @@ -418,12 +415,12 @@ let cmd_index bindirs mandirs ignorelist db_path = try Marshal.from_string data 0 with _ -> `None in (match result with | `Native src -> - upsert_raw db ~source:"native" name src; + write_native ~dir name src; incr n_results | `Parsed pairs -> List.iter (fun (cmd_name, r) -> if not (SSet.mem cmd_name !done_cmds) then begin - upsert db ~source:"help" cmd_name r; + write_result ~dir ~source:"help" cmd_name r; done_cmds := SSet.add cmd_name !done_cmds; incr n_results end @@ -504,7 +501,7 @@ let cmd_index bindirs mandirs ignorelist db_path = | None -> () | Some (cmd, result) -> if not (SSet.mem cmd !done_cmds) then begin - upsert db ~source:"manpage" cmd result; + write_result ~dir ~source:"manpage" cmd result; done_cmds := SSet.add cmd !done_cmds; incr n_results end @@ -513,23 +510,16 @@ let cmd_index bindirs mandirs ignorelist db_path = ) command_sections end in List.iter2 index_bindir bindirs mandirs; - commit db; - Printf.printf "indexed %d commands into %s\n" !n_results db_path; - close db + Printf.printf "indexed %d commands into %s\n" !n_results dir -let cmd_dump db_path = - let db = init db_path in - let (count, sources) = stats db in - Printf.printf "database: %s\n" db_path; - Printf.printf "commands: %d (from %d sources)\n" count sources; - let cmds = all_commands db in +let cmd_dump dirs = + let cmds = all_commands dirs in + Printf.printf "%d commands\n" (List.length cmds); List.iter (fun cmd -> - match lookup db cmd with - | None -> () - | Some (_data, source) -> - Printf.printf " %-40s [%s]\n" cmd source - ) cmds; - close db + let src = match file_type_of dirs cmd with + | Some s -> s | None -> "?" in + Printf.printf " %-40s [%s]\n" cmd src + ) cmds let find_in_path name = try @@ -543,10 +533,11 @@ let find_in_path name = go dirs with Not_found -> None -let resolve_and_cache db name path = +let resolve_and_cache ~dir name path = let pairs = help_resolve_par ~timeout:200 path [] name in if pairs <> [] then begin - List.iter (fun (cmd_name, r) -> upsert db cmd_name r) pairs; + ensure_dir dir; + List.iter (fun (cmd_name, r) -> write_result ~dir cmd_name r) pairs; Some pairs end else None @@ -580,18 +571,18 @@ let flag_completions prefix entries = ) entries; List.rev !candidates -let cmd_complete spans db_path = +let cmd_complete spans user_dir system_dirs = match spans with | [] -> print_string "[]\n" | cmd_name :: rest -> - let db = init db_path in + let dirs = user_dir :: system_dirs in (* Try longest subcommand match first: "git add" before "git" *) let rec find_result tokens = match tokens with | [] -> None | _ -> let try_name = String.concat " " tokens in - match lookup_result db try_name with + match lookup dirs try_name with | Some r -> Some (try_name, r, List.length tokens) | None -> find_result (List.rev (List.tl (List.rev tokens))) in @@ -607,9 +598,8 @@ let cmd_complete spans db_path = | None -> (match find_in_path cmd_name with | Some path -> - (match resolve_and_cache db cmd_name path with + (match resolve_and_cache ~dir:user_dir cmd_name path with | Some _pairs -> - (* Look up again after caching *) find_result all_tokens | None -> None) | None -> None) in @@ -620,50 +610,25 @@ let cmd_complete spans db_path = | None -> print_string "[]\n" | Some (_matched_name, r, _depth) -> let candidates = ref [] in - (* Flag completions when partial starts with - *) if String.starts_with ~prefix:"-" partial then candidates := flag_completions partial r.entries else begin - (* Subcommand completions *) List.iter (fun (sc : subcommand) -> if partial = "" || String.starts_with ~prefix:partial sc.name then candidates := completion_json sc.name sc.desc :: !candidates ) r.subcommands; candidates := List.rev !candidates; - (* Also offer flags if no subcommand prefix or few subcommand matches *) if partial = "" || !candidates = [] then candidates := !candidates @ flag_completions partial r.entries end; - Printf.printf "[%s]\n" (String.concat "," !candidates)); - close db + Printf.printf "[%s]\n" (String.concat "," !candidates)) -let cmd_query cmd db_path = - let db = init db_path in - (match lookup db cmd with - | None -> - Printf.eprintf "not found: %s\n" cmd; close db; exit 1 - | Some (data, source) -> - Printf.printf "# source: %s\n%s\n" source data); - close db - -let cmd_clear cmds db_path = - let db = init db_path in - (match cmds with - | [] -> - (match Sqlite3.exec db "DELETE FROM completions" with - | Sqlite3.Rc.OK -> - Printf.printf "cleared all commands from %s\n" db_path - | rc -> - Printf.eprintf "error: %s\n" (Sqlite3.Rc.to_string rc); exit 1) - | _ -> - List.iter (fun cmd -> - if has_command db cmd then begin - delete db cmd; - Printf.printf "removed %s\n" cmd - end else - Printf.eprintf "not found: %s\n" cmd - ) cmds); - close db +let cmd_query cmd dirs = + match lookup_raw dirs cmd with + | None -> + Printf.eprintf "not found: %s\n" cmd; exit 1 + | Some data -> + print_string data; print_newline () let load_ignorelist path = try @@ -679,47 +644,40 @@ let load_ignorelist path = with _ -> SSet.empty let parse_index_args args = - let rec go prefixes db ignore = function - | [] -> (List.rev prefixes, db, ignore) - | "--db" :: path :: rest -> go prefixes path ignore rest - | "--ignore" :: path :: rest -> go prefixes db (SSet.union ignore (load_ignorelist path)) rest - | dir :: rest -> go (dir :: prefixes) db ignore rest in - go [] (default_db_path ()) SSet.empty args + let rec go prefixes dir ignore = function + | [] -> (List.rev prefixes, dir, ignore) + | "--dir" :: path :: rest -> go prefixes path ignore rest + | "--ignore" :: path :: rest -> go prefixes dir (SSet.union ignore (load_ignorelist path)) rest + | prefix :: rest -> go (prefix :: prefixes) dir ignore rest in + go [] (default_store_path ()) SSet.empty args + +let parse_dir_args args = + let rec go user_dir system_dirs rest_args = function + | [] -> (user_dir, system_dirs, List.rev rest_args) + | "--dir" :: path :: rest -> go path system_dirs rest_args rest + | "--system-dir" :: path :: rest -> go user_dir (path :: system_dirs) rest_args rest + | arg :: rest -> go user_dir system_dirs (arg :: rest_args) rest in + go (default_store_path ()) [] [] args let () = match Array.to_list Sys.argv |> List.tl with | "index" :: rest -> - let (prefixes, db_path, ignorelist) = parse_index_args rest in + let (prefixes, dir, ignorelist) = parse_index_args rest in if prefixes = [] then (Printf.eprintf "error: index requires at least one prefix dir\n"; exit 1); let bindirs = List.map (fun p -> Filename.concat p "bin") prefixes in let mandirs = List.map (fun p -> Filename.concat p "share/man") prefixes in - cmd_index bindirs mandirs ignorelist db_path + cmd_index bindirs mandirs ignorelist dir | "complete" :: rest -> - let rec parse_complete_args spans db = function - | [] -> (List.rev spans, db) - | "--db" :: path :: rest -> parse_complete_args spans path rest - | arg :: rest -> parse_complete_args (arg :: spans) db rest in - let (spans, db_path) = parse_complete_args [] (default_db_path ()) rest in - cmd_complete spans db_path + let (user_dir, system_dirs, spans) = parse_dir_args rest in + cmd_complete spans user_dir system_dirs | "query" :: rest -> - let (cmd, db_path) = match rest with - | [cmd] -> (cmd, default_db_path ()) - | [cmd; "--db"; path] -> (cmd, path) - | _ -> Printf.eprintf "error: query CMD [--db PATH]\n"; exit 1 in - cmd_query cmd db_path - | "clear" :: rest -> - let rec parse_clear_args cmds db = function - | [] -> (List.rev cmds, db) - | "--db" :: path :: rest -> parse_clear_args cmds path rest - | cmd :: rest -> parse_clear_args (cmd :: cmds) db rest in - let (cmds, db_path) = parse_clear_args [] (default_db_path ()) rest in - cmd_clear cmds db_path + let (user_dir, system_dirs, args) = parse_dir_args rest in + (match args with + | [cmd] -> cmd_query cmd (user_dir :: system_dirs) + | _ -> Printf.eprintf "error: query CMD [--dir PATH] [--system-dir PATH]\n"; exit 1) | "dump" :: rest -> - let db_path = match rest with - | ["--db"; path] -> path - | [] -> default_db_path () - | _ -> Printf.eprintf "error: dump [--db PATH]\n"; exit 1 in - cmd_dump db_path + let (user_dir, system_dirs, _) = parse_dir_args rest in + cmd_dump (user_dir :: system_dirs) | ["manpage"; file] -> cmd_manpage file | ["manpage-dir"; dir] -> cmd_manpage_dir dir | _ -> usage () diff --git a/flake.nix b/flake.nix index 8636276..90a94f9 100644 --- a/flake.nix +++ b/flake.nix @@ -20,7 +20,6 @@ angstrom angstrom-unix camlzip - ocaml_sqlite3 ppx_inline_test ocaml-lsp ocamlformat @@ -44,7 +43,6 @@ angstrom angstrom-unix camlzip - ocaml_sqlite3 ]; meta.mainProgram = "inshellah"; diff --git a/lib/dune b/lib/dune index 338f98a..38defe1 100644 --- a/lib/dune +++ b/lib/dune @@ -1,3 +1,3 @@ (library (name inshellah) - (libraries angstrom angstrom-unix camlzip sqlite3 str unix)) + (libraries angstrom angstrom-unix camlzip str unix)) diff --git a/lib/store.ml b/lib/store.ml index b340192..467b798 100644 --- a/lib/store.ml +++ b/lib/store.ml @@ -1,35 +1,24 @@ open Parser -let default_db_path () = +let default_store_path () = let cache = try Sys.getenv "XDG_CACHE_HOME" with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in - Filename.concat cache "inshellah/completions.db" + Filename.concat cache "inshellah" -let ensure_parent path = - let dir = Filename.dirname path in +let ensure_dir dir = let rec mkdir_p d = if Sys.file_exists d then () else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in mkdir_p dir -let init db_path = - ensure_parent db_path; - let db = Sqlite3.db_open db_path in - let exec sql = - match Sqlite3.exec db sql with - | Sqlite3.Rc.OK -> () - | rc -> failwith (Printf.sprintf "sqlite: %s: %s" (Sqlite3.Rc.to_string rc) sql) in - exec "PRAGMA journal_mode=WAL"; - exec "PRAGMA synchronous=NORMAL"; - exec {|CREATE TABLE IF NOT EXISTS completions ( - command TEXT PRIMARY KEY, - data TEXT NOT NULL, - source TEXT, - updated_at INTEGER NOT NULL - )|}; - db +let filename_of_command cmd = + String.map (function + | ' ' -> '_' + | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c + | _ -> '-') cmd -let close db = ignore (Sqlite3.db_close db) +let command_of_filename base = + String.map (function '_' -> ' ' | c -> c) base (* --- JSON serialization of help_result --- *) @@ -78,8 +67,9 @@ let json_positional_of p = let json_list f items = "[" ^ String.concat "," (List.map f items) ^ "]" -let json_of_help_result r = - Printf.sprintf "{\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}" +let json_of_help_result ?(source="help") r = + Printf.sprintf "{\"source\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}" + (json_string source) (json_list json_entry_of r.entries) (json_list json_subcommand_of r.subcommands) (json_list json_positional_of r.positionals) @@ -249,96 +239,115 @@ let help_result_of_json j = subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j)); positionals = List.map positional_of_json (json_to_list (json_get "positionals" j)) } -(* --- Database operations --- *) +(* --- Filesystem operations --- *) -let upsert db ?(source="help") command result = - let json = json_of_help_result result in - let now = int_of_float (Unix.gettimeofday ()) in - let stmt = Sqlite3.prepare db - "INSERT INTO completions (command, data, source, updated_at) VALUES (?, ?, ?, ?) - ON CONFLICT(command) DO UPDATE SET data=excluded.data, source=excluded.source, updated_at=excluded.updated_at" in - ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); - ignore (Sqlite3.bind stmt 2 (Sqlite3.Data.TEXT json)); - ignore (Sqlite3.bind stmt 3 (Sqlite3.Data.TEXT source)); - ignore (Sqlite3.bind stmt 4 (Sqlite3.Data.INT (Int64.of_int now))); - (match Sqlite3.step stmt with - | Sqlite3.Rc.DONE -> () - | rc -> failwith (Printf.sprintf "upsert %s: %s" command (Sqlite3.Rc.to_string rc))); - ignore (Sqlite3.finalize stmt) +let write_file path contents = + let oc = open_out path in + output_string oc contents; + close_out oc -let upsert_raw db ?(source="native") command data = - let now = int_of_float (Unix.gettimeofday ()) in - let stmt = Sqlite3.prepare db - "INSERT INTO completions (command, data, source, updated_at) VALUES (?, ?, ?, ?) - ON CONFLICT(command) DO UPDATE SET data=excluded.data, source=excluded.source, updated_at=excluded.updated_at" in - ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); - ignore (Sqlite3.bind stmt 2 (Sqlite3.Data.TEXT data)); - ignore (Sqlite3.bind stmt 3 (Sqlite3.Data.TEXT source)); - ignore (Sqlite3.bind stmt 4 (Sqlite3.Data.INT (Int64.of_int now))); - (match Sqlite3.step stmt with - | Sqlite3.Rc.DONE -> () - | rc -> failwith (Printf.sprintf "upsert_raw %s: %s" command (Sqlite3.Rc.to_string rc))); - ignore (Sqlite3.finalize stmt) +let read_file path = + try + let ic = open_in path in + let n = in_channel_length ic in + let s = Bytes.create n in + really_input ic s 0 n; + close_in ic; + Some (Bytes.to_string s) + with _ -> None -let lookup db command = - let stmt = Sqlite3.prepare db - "SELECT data, source FROM completions WHERE command = ?" in - ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); - let result = match Sqlite3.step stmt with - | Sqlite3.Rc.ROW -> - let data = Sqlite3.column_text stmt 0 in - let source = Sqlite3.column_text stmt 1 in - Some (data, source) - | _ -> None in - ignore (Sqlite3.finalize stmt); - result +let write_result ~dir ?(source="help") command result = + let path = Filename.concat dir (filename_of_command command ^ ".json") in + write_file path (json_of_help_result ~source result) -let lookup_result db command = - match lookup db command with - | None -> None - | Some (data, _source) -> - (try Some (help_result_of_json (parse_json data)) - with _ -> None) +let write_native ~dir command data = + let path = Filename.concat dir (filename_of_command command ^ ".nu") in + write_file path data -let has_command db command = - let stmt = Sqlite3.prepare db - "SELECT 1 FROM completions WHERE command = ?" in - ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); - let found = Sqlite3.step stmt = Sqlite3.Rc.ROW in - ignore (Sqlite3.finalize stmt); - found +let find_file dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let json_path = Filename.concat dir (base ^ ".json") in + if Sys.file_exists json_path then Some json_path + else + let nu_path = Filename.concat dir (base ^ ".nu") in + if Sys.file_exists nu_path then Some nu_path + else go rest in + go dirs -let all_commands db = - let stmt = Sqlite3.prepare db "SELECT command FROM completions ORDER BY command" in - let results = ref [] in - while Sqlite3.step stmt = Sqlite3.Rc.ROW do - results := Sqlite3.column_text stmt 0 :: !results - done; - ignore (Sqlite3.finalize stmt); - List.rev !results +let lookup dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let path = Filename.concat dir (base ^ ".json") in + (match read_file path with + | Some data -> + (try Some (help_result_of_json (parse_json data)) + with _ -> None) + | None -> go rest) in + go dirs -let delete db command = - let stmt = Sqlite3.prepare db "DELETE FROM completions WHERE command = ?" in - ignore (Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT command)); - ignore (Sqlite3.step stmt); - ignore (Sqlite3.finalize stmt) +let lookup_raw dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let json_path = Filename.concat dir (base ^ ".json") in + (match read_file json_path with + | Some _ as r -> r + | None -> + let nu_path = Filename.concat dir (base ^ ".nu") in + match read_file nu_path with + | Some _ as r -> r + | None -> go rest) in + go dirs -let begin_transaction db = - match Sqlite3.exec db "BEGIN IMMEDIATE" with - | Sqlite3.Rc.OK -> () | _ -> () +let has_command dirs command = + find_file dirs command <> None -let commit db = - match Sqlite3.exec db "COMMIT" with - | Sqlite3.Rc.OK -> () | _ -> () +let all_commands dirs = + let module SSet = Set.Make(String) in + let cmds = ref SSet.empty in + List.iter (fun dir -> + if Sys.file_exists dir && Sys.is_directory dir then + Array.iter (fun f -> + let base = + if Filename.check_suffix f ".json" then + Some (Filename.chop_suffix f ".json") + else if Filename.check_suffix f ".nu" then + Some (Filename.chop_suffix f ".nu") + else None in + match base with + | Some b -> cmds := SSet.add (command_of_filename b) !cmds + | None -> () + ) (Sys.readdir dir) + ) dirs; + SSet.elements !cmds -let stats db = - let stmt = Sqlite3.prepare db - "SELECT COUNT(*), COUNT(DISTINCT source) FROM completions" in - let result = match Sqlite3.step stmt with - | Sqlite3.Rc.ROW -> - let count = Sqlite3.column_int stmt 0 in - let sources = Sqlite3.column_int stmt 1 in - (count, sources) - | _ -> (0, 0) in - ignore (Sqlite3.finalize stmt); - result +let delete ~dir command = + let base = filename_of_command command in + let json_path = Filename.concat dir (base ^ ".json") in + let nu_path = Filename.concat dir (base ^ ".nu") in + (try Sys.remove json_path with Sys_error _ -> ()); + (try Sys.remove nu_path with Sys_error _ -> ()) + +let file_type_of dirs command = + let base = filename_of_command command in + let rec go = function + | [] -> None + | dir :: rest -> + let json_path = Filename.concat dir (base ^ ".json") in + if Sys.file_exists json_path then + (match read_file json_path with + | Some data -> + (try Some (json_to_string (json_get "source" (parse_json data))) + with _ -> Some "json") + | None -> Some "json") + else + let nu_path = Filename.concat dir (base ^ ".nu") in + if Sys.file_exists nu_path then Some "native" + else go rest in + go dirs diff --git a/nix/module.nix b/nix/module.nix index 4491dbf..8072a92 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -1,11 +1,12 @@ -# NixOS module: automatic nushell completion generation +# NixOS module: automatic nushell completion indexing # -# Generates completions using three strategies in priority order: +# Indexes completions using three strategies in priority order: # 1. Native completion generators (e.g. CMD completions nushell) # 2. Manpage parsing # 3. --help output parsing # -# Runs as a single pass during the system profile build. +# Produces a directory of .json/.nu files at build time. +# The `complete` command reads from this directory as a system overlay. # # Usage: # { pkgs, ... }: { @@ -25,20 +26,19 @@ let in { options.programs.inshellah = { - enable = lib.mkEnableOption "nushell completion generation via inshellah"; + enable = lib.mkEnableOption "nushell completion indexing via inshellah"; package = lib.mkOption { type = lib.types.package; - description = "The inshellah package to use for generating completions."; + description = "The inshellah package to use for indexing completions."; }; - generatedCompletionsPath = lib.mkOption { + completionsPath = lib.mkOption { type = lib.types.str; - default = "/share/nushell/vendor/autoload"; + default = "/share/inshellah"; description = '' Subdirectory within the merged environment where completion files - are placed. The default matches nushell's vendor autoload convention - (discovered via XDG_DATA_DIRS). + are placed. Used as the system-dir for the completer. ''; }; @@ -47,19 +47,19 @@ in default = []; example = [ "meat" "problematic-tool" ]; description = '' - List of command names to skip during completion generation. + List of command names to skip during completion indexing. ''; }; }; config = lib.mkIf cfg.enable { - environment.pathsToLink = [ cfg.generatedCompletionsPath ]; + environment.pathsToLink = [ cfg.completionsPath ]; environment.extraSetup = let inshellah = "${cfg.package}/bin/inshellah"; - destDir = "$out${cfg.generatedCompletionsPath}"; - segments = lib.filter (s: s != "") (lib.splitString "/" cfg.generatedCompletionsPath); + destDir = "$out${cfg.completionsPath}"; + segments = lib.filter (s: s != "") (lib.splitString "/" cfg.completionsPath); derefPath = lib.concatMapStringsSep "\n " (seg: '' _cur="$_cur/${seg}" if [ -L "$_cur" ]; then @@ -79,10 +79,10 @@ in ${derefPath} mkdir -p ${destDir} - # Generate all completions in one pass: + # Index completions in one pass: # native generators > manpages > --help fallback if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then - ${inshellah} generate "$out/bin" "$out/share/man" -o ${destDir}${ignoreFlag} \ + ${inshellah} index "$out" --dir ${destDir}${ignoreFlag} \ 2>/dev/null || true fi From fa997542f84cd5d21d6cb048fe75e4377caafa83 Mon Sep 17 00:00:00 2001 From: atagen Date: Mon, 23 Mar 2026 13:15:50 +1100 Subject: [PATCH 6/6] fix module --- flake.nix | 123 ++++++++++++++++++++----------------------------- nix/module.nix | 31 ++++--------- 2 files changed, 59 insertions(+), 95 deletions(-) diff --git a/flake.nix b/flake.nix index 90a94f9..64ec763 100644 --- a/flake.nix +++ b/flake.nix @@ -7,89 +7,66 @@ forAllSystems = f: nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ] ( - system: f (import nixpkgs { inherit system; }) system + system: f (import nixpkgs { inherit system; }) ); in { - devShells = forAllSystems ( - pkgs: sys: { - default = pkgs.mkShell { - packages = with pkgs.ocamlPackages; [ - dune_3 - ocaml - angstrom - angstrom-unix - camlzip - ppx_inline_test - ocaml-lsp - ocamlformat - ocamlformat-rpc-lib - utop - ]; - }; - } - ); + devShells = forAllSystems (pkgs: { + default = pkgs.mkShell { + packages = with pkgs.ocamlPackages; [ + dune_3 + ocaml + angstrom + angstrom-unix + camlzip + ppx_inline_test + ocaml-lsp + ocamlformat + ocamlformat-rpc-lib + utop + ]; + }; + }); - packages = forAllSystems ( - pkgs: sys: { - default = pkgs.ocamlPackages.buildDunePackage { - pname = "inshellah"; - version = "0.1"; - src = ./.; - nativeBuildInputs = [ pkgs.git ]; - buildInputs = with pkgs.ocamlPackages; [ - dune_3 - ocaml - angstrom - angstrom-unix - camlzip - ]; + packages = forAllSystems (pkgs: { + default = pkgs.ocamlPackages.buildDunePackage { + pname = "inshellah"; + version = "0.1"; + src = ./.; + nativeBuildInputs = [ pkgs.git ]; + buildInputs = with pkgs.ocamlPackages; [ + dune_3 + ocaml + angstrom + angstrom-unix + camlzip + ]; - meta.mainProgram = "inshellah"; - - }; - } - ); - - checks = forAllSystems ( - pkgs: sys: - let - # Evaluate a minimal NixOS config that enables the module. - # If the module has infinite recursion, this evaluation will fail. - mockSystem = nixpkgs.lib.nixosSystem { - system = sys; - modules = [ - self.nixosModules.default - { - # Minimal config to make NixOS evaluation happy - boot.loader.grub.device = "nodev"; - fileSystems."/" = { device = "/dev/sda1"; fsType = "ext4"; }; - programs.inshellah.enable = true; - environment.systemPackages = [ pkgs.hello ]; - } - ]; - }; - in - { - module-no-infinite-recursion = pkgs.runCommandLocal "inshellah-module-test" { - # Force evaluation of extraSetup and systemPackages at eval time. - # If the module has infinite recursion, this derivation can't even - # be instantiated. - extraSetupLen = builtins.stringLength mockSystem.config.environment.extraSetup; - syspkgCount = builtins.length mockSystem.config.environment.systemPackages; - } '' - echo "environment.extraSetup length: $extraSetupLen" - echo "environment.systemPackages count: $syspkgCount" - touch $out - ''; - } - ); + meta.mainProgram = "inshellah"; + }; + }); nixosModules.default = - { pkgs, ... }: + { + pkgs, + lib, + config, + ... + }: { imports = [ ./nix/module.nix ]; programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default; + programs.inshellah.snippet = '' + let cache_home = if ('XDG_CACHE_HOME' in $env) { "--dir " + $env.XDG_CACHE_HOME } else if ('HOME' in $env) { "--dir " + $env.HOME + '/.cache/inshellah' } else { "" } + let inshellah_complete = {|spans| + ${lib.getExe config.programs.inshellah.package} complete $spans $cache_home --system-dir /run/current-system/sw/${config.programs.inshellah.completionsPath} + } + $env.config.completions.external = { + enable: true + max_results: 100 + completer: $inshellah_complete + } + ''; }; }; } diff --git a/nix/module.nix b/nix/module.nix index 8072a92..975263e 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -37,50 +37,37 @@ in type = lib.types.str; default = "/share/inshellah"; description = '' - Subdirectory within the merged environment where completion files + Subdirectory within the system profile where completion files are placed. Used as the system-dir for the completer. ''; }; ignoreCommands = lib.mkOption { type = lib.types.listOf lib.types.str; - default = []; - example = [ "meat" "problematic-tool" ]; + default = [ ]; + example = [ "problematic-tool" ]; description = '' List of command names to skip during completion indexing. ''; }; + + snippet = lib.mkOption { + type = lib.types.str; + readOnly = true; + }; }; config = lib.mkIf cfg.enable { - environment.pathsToLink = [ cfg.completionsPath ]; - environment.extraSetup = let inshellah = "${cfg.package}/bin/inshellah"; destDir = "$out${cfg.completionsPath}"; - segments = lib.filter (s: s != "") (lib.splitString "/" cfg.completionsPath); - derefPath = lib.concatMapStringsSep "\n " (seg: '' - _cur="$_cur/${seg}" - if [ -L "$_cur" ]; then - _target=$(readlink "$_cur") - rm "$_cur" - mkdir -p "$_cur" - if [ -d "$_target" ]; then - cp -rT "$_target" "$_cur" - chmod -R u+w "$_cur" - fi - fi'') segments; ignoreFile = pkgs.writeText "inshellah-ignore" (lib.concatStringsSep "\n" cfg.ignoreCommands); - ignoreFlag = lib.optionalString (cfg.ignoreCommands != []) " --ignore ${ignoreFile}"; + ignoreFlag = lib.optionalString (cfg.ignoreCommands != [ ]) " --ignore ${ignoreFile}"; in '' - _cur="$out" - ${derefPath} mkdir -p ${destDir} - # Index completions in one pass: - # native generators > manpages > --help fallback if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then ${inshellah} index "$out" --dir ${destDir}${ignoreFlag} \ 2>/dev/null || true