inshellah/lib/store.ml
2026-03-24 20:40:03 +11:00

444 lines
16 KiB
OCaml

(* store.ml — filesystem-backed cache of parsed completion data.
*
* this module handles persistence of completion data to disk. each command's
* help_result is serialized to json and stored as a file in a cache directory
* (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions
* are stored as .nu files instead.
*
* the store also provides lookup, listing, and subcommand discovery by
* scanning filenames in the cache directory.
*
* file naming convention:
* - spaces in command names become underscores (e.g. "git add" → "git_add.json")
* - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json")
* - .json files contain serialized help_result
* - .nu files contain native nushell extern source code
*
* the module includes a minimal hand-rolled json parser/serializer because
* we only need to handle our own output format (no need for a full json library).
*)
open Parser
(* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to
* ~/.cache/inshellah if XDG_CACHE_HOME is not set. *)
let default_store_path () =
let cache = try Sys.getenv "XDG_CACHE_HOME"
with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
Filename.concat cache "inshellah"
(* recursively create directories (equivalent to mkdir -p) *)
let ensure_dir dir =
let rec mkdir_p d =
if Sys.file_exists d then ()
else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in
mkdir_p dir
(* convert command name to safe filename: spaces become underscores,
* non-alphanumeric chars become hyphens.
* e.g. "git add" → "git_add", "docker-compose" → "docker-compose" *)
let filename_of_command cmd =
String.map (function
| ' ' -> '_'
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c
| _ -> '-') cmd
(* inverse of filename_of_command: underscores back to spaces.
* peculiarity: this is lossy — original underscores in command names
* (e.g. "my_tool") would be converted to spaces. in practice this
* doesn't matter because tools with underscores in names are rare,
* and subcommands use space-separated naming. *)
let command_of_filename base =
String.map (function '_' -> ' ' | c -> c) base
(* --- json serialization of help_result ---
* hand-rolled json emitters. we don't use a json library because:
* 1. the schema is fixed and simple — we only serialize our own types
* 2. avoiding dependencies keeps the binary small
* 3. printf-style emission is fast and straightforward for our types *)
(* escape a string for json: quotes, backslashes, and control characters.
* control chars below 0x20 are emitted as \u00XX unicode escapes. *)
let escape_json s =
let buf = Buffer.create (String.length s + 4) in
String.iter (fun c -> match c with
| '"' -> Buffer.add_string buf "\\\""
| '\\' -> Buffer.add_string buf "\\\\"
| '\n' -> Buffer.add_string buf "\\n"
| '\t' -> Buffer.add_string buf "\\t"
| '\r' -> Buffer.add_string buf "\\r"
| c when Char.code c < 0x20 ->
Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
| c -> Buffer.add_char buf c
) s;
Buffer.contents buf
let json_string s = Printf.sprintf "\"%s\"" (escape_json s)
let json_null = "null"
let json_switch_of = function
| Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c))
| Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l)
| Both (c, l) ->
Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
(json_string (String.make 1 c)) (json_string l)
let json_param_of = function
| None -> json_null
| Some (Mandatory p) ->
Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p)
| Some (Optional p) ->
Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p)
let json_entry_of e =
Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
(json_switch_of e.switch) (json_param_of e.param) (json_string e.desc)
let json_subcommand_of sc =
Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
let json_positional_of p =
Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
(json_string p.pos_name) p.optional p.variadic
let json_list f items =
"[" ^ String.concat "," (List.map f items) ^ "]"
let json_of_help_result ?(source="help") r =
Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
(json_string source)
(json_string r.description)
(json_list json_entry_of r.entries)
(json_list json_subcommand_of r.subcommands)
(json_list json_positional_of r.positionals)
(* --- json deserialization ---
* minimal hand-rolled recursive-descent json parser. only handles the subset
* we emit: strings, booleans, nulls, arrays, and objects. no number parsing
* (we don't emit numbers). this is intentionally minimal — we only read back
* our own serialized format, so robustness against arbitrary json is not needed.
*
* peculiarity: the \u escape handler does basic utf-8 encoding for code points
* up to 0xffff but doesn't handle surrogate pairs. this is fine for our use
* case since we only escape control characters below 0x20. *)
type json =
| Jnull
| Jbool of bool
| Jstring of string
| Jarray of json list
| Jobject of (string * json) list
(* json accessor helpers — return sensible defaults for missing/wrong types *)
let json_get key = function
| Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
| _ -> Jnull
let json_to_string = function Jstring s -> s | _ -> ""
let json_to_bool = function Jbool b -> b | _ -> false
let json_to_list = function Jarray l -> l | _ -> []
exception Json_error of string
(* imperative recursive-descent json parser.
* uses a mutable position ref to walk through the string.
* peculiarity: boolean/null parsing just advances a fixed number of chars
* without validating the actual characters — safe because we only read
* our own output, but would be incorrect for arbitrary json. *)
let parse_json s =
let len = String.length s in
let pos = ref 0 in
let peek () = if !pos < len then s.[!pos] else '\x00' in
let advance () = incr pos in
let skip_ws () =
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t'
|| s.[!pos] = '\n' || s.[!pos] = '\r') do
advance ()
done in
let expect c =
skip_ws ();
if peek () <> c then
raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos));
advance () in
let rec parse_value () =
skip_ws ();
match peek () with
| '"' -> Jstring (parse_string ())
| '{' -> parse_object ()
| '[' -> parse_array ()
| 'n' -> advance (); advance (); advance (); advance (); Jnull
| 't' -> advance (); advance (); advance (); advance (); Jbool true
| 'f' ->
advance (); advance (); advance (); advance (); advance (); Jbool false
| c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos))
and parse_string () =
expect '"';
let buf = Buffer.create 32 in
while peek () <> '"' do
if peek () = '\\' then begin
advance ();
(match peek () with
| '"' -> Buffer.add_char buf '"'
| '\\' -> Buffer.add_char buf '\\'
| 'n' -> Buffer.add_char buf '\n'
| 't' -> Buffer.add_char buf '\t'
| 'r' -> Buffer.add_char buf '\r'
| 'u' ->
advance ();
let hex = String.sub s !pos 4 in
pos := !pos + 3;
let code = int_of_string ("0x" ^ hex) in
if code < 128 then Buffer.add_char buf (Char.chr code)
else begin
(* UTF-8 encode *)
if code < 0x800 then begin
Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
end else begin
Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
end
end
| c -> Buffer.add_char buf c);
advance ()
end else begin
Buffer.add_char buf (peek ());
advance ()
end
done;
advance (); (* closing quote *)
Buffer.contents buf
and parse_object () =
expect '{';
skip_ws ();
if peek () = '}' then (advance (); Jobject [])
else begin
let pairs = ref [] in
let cont = ref true in
while !cont do
skip_ws ();
let key = parse_string () in
expect ':';
let value = parse_value () in
pairs := (key, value) :: !pairs;
skip_ws ();
if peek () = ',' then advance ()
else cont := false
done;
expect '}';
Jobject (List.rev !pairs)
end
and parse_array () =
expect '[';
skip_ws ();
if peek () = ']' then (advance (); Jarray [])
else begin
let items = ref [] in
let cont = ref true in
while !cont do
let v = parse_value () in
items := v :: !items;
skip_ws ();
if peek () = ',' then advance ()
else cont := false
done;
expect ']';
Jarray (List.rev !items)
end
in
parse_value ()
(* --- json → ocaml type converters ---
* these reconstruct our parser types from their json representations.
* they mirror the json_*_of serializers above. *)
let switch_of_json j =
match json_to_string (json_get "type" j) with
| "short" ->
let c = json_to_string (json_get "char" j) in
Short (if String.length c > 0 then c.[0] else '?')
| "long" -> Long (json_to_string (json_get "name" j))
| "both" ->
let c = json_to_string (json_get "char" j) in
Both ((if String.length c > 0 then c.[0] else '?'),
json_to_string (json_get "name" j))
| _ -> Long "?"
let param_of_json = function
| Jnull -> None
| j ->
let name = json_to_string (json_get "name" j) in
(match json_to_string (json_get "kind" j) with
| "mandatory" -> Some (Mandatory name)
| "optional" -> Some (Optional name)
| _ -> None)
let entry_of_json j =
{ switch = switch_of_json (json_get "switch" j);
param = param_of_json (json_get "param" j);
desc = json_to_string (json_get "desc" j) }
let subcommand_of_json j =
{ name = json_to_string (json_get "name" j);
desc = json_to_string (json_get "desc" j) }
let positional_of_json j =
{ pos_name = json_to_string (json_get "name" j);
optional = json_to_bool (json_get "optional" j);
variadic = json_to_bool (json_get "variadic" j) }
let help_result_of_json j =
{ entries = List.map entry_of_json (json_to_list (json_get "entries" j));
subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j));
positionals = List.map positional_of_json (json_to_list (json_get "positionals" j));
description = json_to_string (json_get "description" j) }
(* --- filesystem operations --- *)
let write_file path contents =
let oc = open_out path in
output_string oc contents;
close_out oc
let read_file path =
try
let ic = open_in path in
let n = in_channel_length ic in
let s = Bytes.create n in
really_input ic s 0 n;
close_in ic;
Some (Bytes.to_string s)
with _ -> None
(* write a parsed help_result to the store as json *)
let write_result ~dir ?(source="help") command result =
let path = Filename.concat dir (filename_of_command command ^ ".json") in
write_file path (json_of_help_result ~source result)
(* write native nushell completion source to the store as a .nu file *)
let write_native ~dir command data =
let path = Filename.concat dir (filename_of_command command ^ ".nu") in
write_file path data
let is_dir path = Sys.file_exists path && Sys.is_directory path
(* look for a command's data file across multiple store directories.
* checks json first, then .nu. returns the first match found.
* directories are searched in order (user dir before system dirs). *)
let find_file dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let json_path = Filename.concat dir (base ^ ".json") in
if Sys.file_exists json_path then Some json_path
else
let nu_path = Filename.concat dir (base ^ ".nu") in
if Sys.file_exists nu_path then Some nu_path
else None
) dirs
(* look up a command and deserialize its help_result from json.
* only searches for .json files (not .nu, since those can't be deserialized
* back into help_result). returns none if not found or parse fails. *)
let lookup dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let path = Filename.concat dir (base ^ ".json") in
match read_file path with
| Some data ->
(try Some (help_result_of_json (parse_json data))
with _ -> None)
| None -> None
) dirs
(* look up a command's raw data (json or .nu source) without parsing.
* used by the "query" command to dump stored data as-is. *)
let lookup_raw dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let json_path = Filename.concat dir (base ^ ".json") in
match read_file json_path with
| Some _ as r -> r
| None ->
let nu_path = Filename.concat dir (base ^ ".nu") in
read_file nu_path
) dirs
let chop_extension f =
if Filename.check_suffix f ".json" then Some (Filename.chop_suffix f ".json")
else if Filename.check_suffix f ".nu" then Some (Filename.chop_suffix f ".nu")
else None
(* discover subcommands of a command by scanning filenames in the store.
* looks for files whose names start with the command's filename + "_"
* (e.g. for "git", finds "git_add.json", "git_commit.json", etc.)
*
* only returns immediate subcommands (no nested underscores beyond the prefix).
* tries to extract description from the json "description" field if available.
*
* peculiarity: this filesystem-based discovery is used as a fallback when the
* command's own help_result doesn't list subcommands. it enables completion
* for subcommands that were indexed from separate manpages or help runs. *)
let subcommands_of dirs command =
let prefix = filename_of_command command ^ "_" in
let plen = String.length prefix in
let module SMap = Map.Make(String) in
let subs = List.fold_left (fun subs dir ->
if is_dir dir then
Array.fold_left (fun subs f ->
if not (String.starts_with ~prefix f) then subs
else
let is_json = Filename.check_suffix f ".json" in
match chop_extension f with
| None -> subs
| Some b ->
let rest = String.sub b plen (String.length b - plen) in
if String.contains rest '_' || String.length rest = 0 then subs
else if SMap.mem rest subs then subs
else
let desc = if is_json then
match read_file (Filename.concat dir f) with
| Some data ->
(try json_to_string (json_get "description" (parse_json data))
with _ -> "")
| None -> ""
else "" in
SMap.add rest { name = rest; desc } subs
) subs (Sys.readdir dir)
else subs
) SMap.empty dirs in
SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
(* list all indexed commands across all store directories.
* returns a sorted, deduplicated list of command names. *)
let all_commands dirs =
let module SSet = Set.Make(String) in
List.fold_left (fun cmds dir ->
if is_dir dir then
Array.fold_left (fun cmds f ->
match chop_extension f with
| Some b -> SSet.add (command_of_filename b) cmds
| None -> cmds
) cmds (Sys.readdir dir)
else cmds
) SSet.empty dirs
|> SSet.elements
(* determine how a command was indexed: "help", "manpage", "native", etc.
* for json files, reads the "source" field. for .nu files, returns "native".
* used by the "dump" command to show provenance. *)
let file_type_of dirs command =
let base = filename_of_command command in
List.find_map (fun dir ->
let json_path = Filename.concat dir (base ^ ".json") in
if Sys.file_exists json_path then
(match read_file json_path with
| Some data ->
(try Some (json_to_string (json_get "source" (parse_json data)))
with _ -> Some "json")
| None -> Some "json")
else
let nu_path = Filename.concat dir (base ^ ".nu") in
if Sys.file_exists nu_path then Some "native"
else None
) dirs