init
This commit is contained in:
commit
7ded0fea1b
22 changed files with 4717 additions and 0 deletions
444
lib/store.ml
Normal file
444
lib/store.ml
Normal file
|
|
@ -0,0 +1,444 @@
|
|||
(* store.ml — filesystem-backed cache of parsed completion data.
|
||||
*
|
||||
* this module handles persistence of completion data to disk. each command's
|
||||
* help_result is serialized to json and stored as a file in a cache directory
|
||||
* (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions
|
||||
* are stored as .nu files instead.
|
||||
*
|
||||
* the store also provides lookup, listing, and subcommand discovery by
|
||||
* scanning filenames in the cache directory.
|
||||
*
|
||||
* file naming convention:
|
||||
* - spaces in command names become underscores (e.g. "git add" → "git_add.json")
|
||||
* - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json")
|
||||
* - .json files contain serialized help_result
|
||||
* - .nu files contain native nushell extern source code
|
||||
*
|
||||
* the module includes a minimal hand-rolled json parser/serializer because
|
||||
* we only need to handle our own output format (no need for a full json library).
|
||||
*)
|
||||
|
||||
open Parser
|
||||
|
||||
(* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to
|
||||
* ~/.cache/inshellah if XDG_CACHE_HOME is not set. *)
|
||||
let default_store_path () =
|
||||
let cache = try Sys.getenv "XDG_CACHE_HOME"
|
||||
with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
|
||||
Filename.concat cache "inshellah"
|
||||
|
||||
(* recursively create directories (equivalent to mkdir -p) *)
|
||||
let ensure_dir dir =
|
||||
let rec mkdir_p d =
|
||||
if Sys.file_exists d then ()
|
||||
else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in
|
||||
mkdir_p dir
|
||||
|
||||
(* convert command name to safe filename: spaces become underscores,
|
||||
* non-alphanumeric chars become hyphens.
|
||||
* e.g. "git add" → "git_add", "docker-compose" → "docker-compose" *)
|
||||
let filename_of_command cmd =
|
||||
String.map (function
|
||||
| ' ' -> '_'
|
||||
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c
|
||||
| _ -> '-') cmd
|
||||
|
||||
(* inverse of filename_of_command: underscores back to spaces.
|
||||
* peculiarity: this is lossy — original underscores in command names
|
||||
* (e.g. "my_tool") would be converted to spaces. in practice this
|
||||
* doesn't matter because tools with underscores in names are rare,
|
||||
* and subcommands use space-separated naming. *)
|
||||
let command_of_filename base =
|
||||
String.map (function '_' -> ' ' | c -> c) base
|
||||
|
||||
(* --- json serialization of help_result ---
|
||||
* hand-rolled json emitters. we don't use a json library because:
|
||||
* 1. the schema is fixed and simple — we only serialize our own types
|
||||
* 2. avoiding dependencies keeps the binary small
|
||||
* 3. printf-style emission is fast and straightforward for our types *)
|
||||
|
||||
(* escape a string for json: quotes, backslashes, and control characters.
|
||||
* control chars below 0x20 are emitted as \u00XX unicode escapes. *)
|
||||
let escape_json s =
|
||||
let buf = Buffer.create (String.length s + 4) in
|
||||
String.iter (fun c -> match c with
|
||||
| '"' -> Buffer.add_string buf "\\\""
|
||||
| '\\' -> Buffer.add_string buf "\\\\"
|
||||
| '\n' -> Buffer.add_string buf "\\n"
|
||||
| '\t' -> Buffer.add_string buf "\\t"
|
||||
| '\r' -> Buffer.add_string buf "\\r"
|
||||
| c when Char.code c < 0x20 ->
|
||||
Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
|
||||
| c -> Buffer.add_char buf c
|
||||
) s;
|
||||
Buffer.contents buf
|
||||
|
||||
let json_string s = Printf.sprintf "\"%s\"" (escape_json s)
|
||||
let json_null = "null"
|
||||
|
||||
let json_switch_of = function
|
||||
| Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c))
|
||||
| Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l)
|
||||
| Both (c, l) ->
|
||||
Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
|
||||
(json_string (String.make 1 c)) (json_string l)
|
||||
|
||||
let json_param_of = function
|
||||
| None -> json_null
|
||||
| Some (Mandatory p) ->
|
||||
Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p)
|
||||
| Some (Optional p) ->
|
||||
Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p)
|
||||
|
||||
let json_entry_of e =
|
||||
Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
|
||||
(json_switch_of e.switch) (json_param_of e.param) (json_string e.desc)
|
||||
|
||||
let json_subcommand_of sc =
|
||||
Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
|
||||
|
||||
let json_positional_of p =
|
||||
Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
|
||||
(json_string p.pos_name) p.optional p.variadic
|
||||
|
||||
let json_list f items =
|
||||
"[" ^ String.concat "," (List.map f items) ^ "]"
|
||||
|
||||
let json_of_help_result ?(source="help") r =
|
||||
Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
|
||||
(json_string source)
|
||||
(json_string r.description)
|
||||
(json_list json_entry_of r.entries)
|
||||
(json_list json_subcommand_of r.subcommands)
|
||||
(json_list json_positional_of r.positionals)
|
||||
|
||||
(* --- json deserialization ---
|
||||
* minimal hand-rolled recursive-descent json parser. only handles the subset
|
||||
* we emit: strings, booleans, nulls, arrays, and objects. no number parsing
|
||||
* (we don't emit numbers). this is intentionally minimal — we only read back
|
||||
* our own serialized format, so robustness against arbitrary json is not needed.
|
||||
*
|
||||
* peculiarity: the \u escape handler does basic utf-8 encoding for code points
|
||||
* up to 0xffff but doesn't handle surrogate pairs. this is fine for our use
|
||||
* case since we only escape control characters below 0x20. *)
|
||||
|
||||
type json =
|
||||
| Jnull
|
||||
| Jbool of bool
|
||||
| Jstring of string
|
||||
| Jarray of json list
|
||||
| Jobject of (string * json) list
|
||||
|
||||
(* json accessor helpers — return sensible defaults for missing/wrong types *)
|
||||
let json_get key = function
|
||||
| Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
|
||||
| _ -> Jnull
|
||||
|
||||
let json_to_string = function Jstring s -> s | _ -> ""
|
||||
let json_to_bool = function Jbool b -> b | _ -> false
|
||||
let json_to_list = function Jarray l -> l | _ -> []
|
||||
|
||||
exception Json_error of string
|
||||
|
||||
(* imperative recursive-descent json parser.
|
||||
* uses a mutable position ref to walk through the string.
|
||||
* peculiarity: boolean/null parsing just advances a fixed number of chars
|
||||
* without validating the actual characters — safe because we only read
|
||||
* our own output, but would be incorrect for arbitrary json. *)
|
||||
let parse_json s =
|
||||
let len = String.length s in
|
||||
let pos = ref 0 in
|
||||
let peek () = if !pos < len then s.[!pos] else '\x00' in
|
||||
let advance () = incr pos in
|
||||
let skip_ws () =
|
||||
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t'
|
||||
|| s.[!pos] = '\n' || s.[!pos] = '\r') do
|
||||
advance ()
|
||||
done in
|
||||
let expect c =
|
||||
skip_ws ();
|
||||
if peek () <> c then
|
||||
raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos));
|
||||
advance () in
|
||||
let rec parse_value () =
|
||||
skip_ws ();
|
||||
match peek () with
|
||||
| '"' -> Jstring (parse_string ())
|
||||
| '{' -> parse_object ()
|
||||
| '[' -> parse_array ()
|
||||
| 'n' -> advance (); advance (); advance (); advance (); Jnull
|
||||
| 't' -> advance (); advance (); advance (); advance (); Jbool true
|
||||
| 'f' ->
|
||||
advance (); advance (); advance (); advance (); advance (); Jbool false
|
||||
| c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos))
|
||||
and parse_string () =
|
||||
expect '"';
|
||||
let buf = Buffer.create 32 in
|
||||
while peek () <> '"' do
|
||||
if peek () = '\\' then begin
|
||||
advance ();
|
||||
(match peek () with
|
||||
| '"' -> Buffer.add_char buf '"'
|
||||
| '\\' -> Buffer.add_char buf '\\'
|
||||
| 'n' -> Buffer.add_char buf '\n'
|
||||
| 't' -> Buffer.add_char buf '\t'
|
||||
| 'r' -> Buffer.add_char buf '\r'
|
||||
| 'u' ->
|
||||
advance ();
|
||||
let hex = String.sub s !pos 4 in
|
||||
pos := !pos + 3;
|
||||
let code = int_of_string ("0x" ^ hex) in
|
||||
if code < 128 then Buffer.add_char buf (Char.chr code)
|
||||
else begin
|
||||
(* UTF-8 encode *)
|
||||
if code < 0x800 then begin
|
||||
Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
|
||||
end else begin
|
||||
Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
|
||||
end
|
||||
end
|
||||
| c -> Buffer.add_char buf c);
|
||||
advance ()
|
||||
end else begin
|
||||
Buffer.add_char buf (peek ());
|
||||
advance ()
|
||||
end
|
||||
done;
|
||||
advance (); (* closing quote *)
|
||||
Buffer.contents buf
|
||||
and parse_object () =
|
||||
expect '{';
|
||||
skip_ws ();
|
||||
if peek () = '}' then (advance (); Jobject [])
|
||||
else begin
|
||||
let pairs = ref [] in
|
||||
let cont = ref true in
|
||||
while !cont do
|
||||
skip_ws ();
|
||||
let key = parse_string () in
|
||||
expect ':';
|
||||
let value = parse_value () in
|
||||
pairs := (key, value) :: !pairs;
|
||||
skip_ws ();
|
||||
if peek () = ',' then advance ()
|
||||
else cont := false
|
||||
done;
|
||||
expect '}';
|
||||
Jobject (List.rev !pairs)
|
||||
end
|
||||
and parse_array () =
|
||||
expect '[';
|
||||
skip_ws ();
|
||||
if peek () = ']' then (advance (); Jarray [])
|
||||
else begin
|
||||
let items = ref [] in
|
||||
let cont = ref true in
|
||||
while !cont do
|
||||
let v = parse_value () in
|
||||
items := v :: !items;
|
||||
skip_ws ();
|
||||
if peek () = ',' then advance ()
|
||||
else cont := false
|
||||
done;
|
||||
expect ']';
|
||||
Jarray (List.rev !items)
|
||||
end
|
||||
in
|
||||
parse_value ()
|
||||
|
||||
(* --- json → ocaml type converters ---
|
||||
* these reconstruct our parser types from their json representations.
|
||||
* they mirror the json_*_of serializers above. *)
|
||||
|
||||
let switch_of_json j =
|
||||
match json_to_string (json_get "type" j) with
|
||||
| "short" ->
|
||||
let c = json_to_string (json_get "char" j) in
|
||||
Short (if String.length c > 0 then c.[0] else '?')
|
||||
| "long" -> Long (json_to_string (json_get "name" j))
|
||||
| "both" ->
|
||||
let c = json_to_string (json_get "char" j) in
|
||||
Both ((if String.length c > 0 then c.[0] else '?'),
|
||||
json_to_string (json_get "name" j))
|
||||
| _ -> Long "?"
|
||||
|
||||
let param_of_json = function
|
||||
| Jnull -> None
|
||||
| j ->
|
||||
let name = json_to_string (json_get "name" j) in
|
||||
(match json_to_string (json_get "kind" j) with
|
||||
| "mandatory" -> Some (Mandatory name)
|
||||
| "optional" -> Some (Optional name)
|
||||
| _ -> None)
|
||||
|
||||
let entry_of_json j =
|
||||
{ switch = switch_of_json (json_get "switch" j);
|
||||
param = param_of_json (json_get "param" j);
|
||||
desc = json_to_string (json_get "desc" j) }
|
||||
|
||||
let subcommand_of_json j =
|
||||
{ name = json_to_string (json_get "name" j);
|
||||
desc = json_to_string (json_get "desc" j) }
|
||||
|
||||
let positional_of_json j =
|
||||
{ pos_name = json_to_string (json_get "name" j);
|
||||
optional = json_to_bool (json_get "optional" j);
|
||||
variadic = json_to_bool (json_get "variadic" j) }
|
||||
|
||||
let help_result_of_json j =
|
||||
{ entries = List.map entry_of_json (json_to_list (json_get "entries" j));
|
||||
subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j));
|
||||
positionals = List.map positional_of_json (json_to_list (json_get "positionals" j));
|
||||
description = json_to_string (json_get "description" j) }
|
||||
|
||||
(* --- filesystem operations --- *)
|
||||
|
||||
let write_file path contents =
|
||||
let oc = open_out path in
|
||||
output_string oc contents;
|
||||
close_out oc
|
||||
|
||||
let read_file path =
|
||||
try
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
Some (Bytes.to_string s)
|
||||
with _ -> None
|
||||
|
||||
(* write a parsed help_result to the store as json *)
|
||||
let write_result ~dir ?(source="help") command result =
|
||||
let path = Filename.concat dir (filename_of_command command ^ ".json") in
|
||||
write_file path (json_of_help_result ~source result)
|
||||
|
||||
(* write native nushell completion source to the store as a .nu file *)
|
||||
let write_native ~dir command data =
|
||||
let path = Filename.concat dir (filename_of_command command ^ ".nu") in
|
||||
write_file path data
|
||||
|
||||
let is_dir path = Sys.file_exists path && Sys.is_directory path
|
||||
|
||||
(* look for a command's data file across multiple store directories.
|
||||
* checks json first, then .nu. returns the first match found.
|
||||
* directories are searched in order (user dir before system dirs). *)
|
||||
let find_file dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
if Sys.file_exists json_path then Some json_path
|
||||
else
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
if Sys.file_exists nu_path then Some nu_path
|
||||
else None
|
||||
) dirs
|
||||
|
||||
(* look up a command and deserialize its help_result from json.
|
||||
* only searches for .json files (not .nu, since those can't be deserialized
|
||||
* back into help_result). returns none if not found or parse fails. *)
|
||||
let lookup dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let path = Filename.concat dir (base ^ ".json") in
|
||||
match read_file path with
|
||||
| Some data ->
|
||||
(try Some (help_result_of_json (parse_json data))
|
||||
with _ -> None)
|
||||
| None -> None
|
||||
) dirs
|
||||
|
||||
(* look up a command's raw data (json or .nu source) without parsing.
|
||||
* used by the "query" command to dump stored data as-is. *)
|
||||
let lookup_raw dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
match read_file json_path with
|
||||
| Some _ as r -> r
|
||||
| None ->
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
read_file nu_path
|
||||
) dirs
|
||||
|
||||
let chop_extension f =
|
||||
if Filename.check_suffix f ".json" then Some (Filename.chop_suffix f ".json")
|
||||
else if Filename.check_suffix f ".nu" then Some (Filename.chop_suffix f ".nu")
|
||||
else None
|
||||
|
||||
(* discover subcommands of a command by scanning filenames in the store.
|
||||
* looks for files whose names start with the command's filename + "_"
|
||||
* (e.g. for "git", finds "git_add.json", "git_commit.json", etc.)
|
||||
*
|
||||
* only returns immediate subcommands (no nested underscores beyond the prefix).
|
||||
* tries to extract description from the json "description" field if available.
|
||||
*
|
||||
* peculiarity: this filesystem-based discovery is used as a fallback when the
|
||||
* command's own help_result doesn't list subcommands. it enables completion
|
||||
* for subcommands that were indexed from separate manpages or help runs. *)
|
||||
let subcommands_of dirs command =
|
||||
let prefix = filename_of_command command ^ "_" in
|
||||
let plen = String.length prefix in
|
||||
let module SMap = Map.Make(String) in
|
||||
let subs = List.fold_left (fun subs dir ->
|
||||
if is_dir dir then
|
||||
Array.fold_left (fun subs f ->
|
||||
if not (String.starts_with ~prefix f) then subs
|
||||
else
|
||||
let is_json = Filename.check_suffix f ".json" in
|
||||
match chop_extension f with
|
||||
| None -> subs
|
||||
| Some b ->
|
||||
let rest = String.sub b plen (String.length b - plen) in
|
||||
if String.contains rest '_' || String.length rest = 0 then subs
|
||||
else if SMap.mem rest subs then subs
|
||||
else
|
||||
let desc = if is_json then
|
||||
match read_file (Filename.concat dir f) with
|
||||
| Some data ->
|
||||
(try json_to_string (json_get "description" (parse_json data))
|
||||
with _ -> "")
|
||||
| None -> ""
|
||||
else "" in
|
||||
SMap.add rest { name = rest; desc } subs
|
||||
) subs (Sys.readdir dir)
|
||||
else subs
|
||||
) SMap.empty dirs in
|
||||
SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
|
||||
|
||||
(* list all indexed commands across all store directories.
|
||||
* returns a sorted, deduplicated list of command names. *)
|
||||
let all_commands dirs =
|
||||
let module SSet = Set.Make(String) in
|
||||
List.fold_left (fun cmds dir ->
|
||||
if is_dir dir then
|
||||
Array.fold_left (fun cmds f ->
|
||||
match chop_extension f with
|
||||
| Some b -> SSet.add (command_of_filename b) cmds
|
||||
| None -> cmds
|
||||
) cmds (Sys.readdir dir)
|
||||
else cmds
|
||||
) SSet.empty dirs
|
||||
|> SSet.elements
|
||||
|
||||
(* determine how a command was indexed: "help", "manpage", "native", etc.
|
||||
* for json files, reads the "source" field. for .nu files, returns "native".
|
||||
* used by the "dump" command to show provenance. *)
|
||||
let file_type_of dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
if Sys.file_exists json_path then
|
||||
(match read_file json_path with
|
||||
| Some data ->
|
||||
(try Some (json_to_string (json_get "source" (parse_json data)))
|
||||
with _ -> Some "json")
|
||||
| None -> Some "json")
|
||||
else
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
if Sys.file_exists nu_path then Some "native"
|
||||
else None
|
||||
) dirs
|
||||
Loading…
Add table
Add a link
Reference in a new issue