(* store.ml — filesystem-backed cache of parsed completion data. * * this module handles persistence of completion data to disk. each command's * help_result is serialized to JSON and stored as a file in a cache directory * (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions * are stored as .nu files instead. * * the store also provides lookup, listing, and subcommand discovery by * scanning filenames in the cache directory. * * file naming convention: * - spaces in command names become underscores (e.g. "git add" -> "git_add.json") * - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json") * - .json files contain serialized help_result * - .nu files contain native nushell extern source code * * the module includes a minimal hand-rolled JSON parser/serializer because * we only need to handle our own output format (no need for a full JSON library). *) open Parser (* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to * ~/.cache/inshellah if XDG_CACHE_HOME is not set. *) let default_store_path () = let cache = try Sys.getenv "XDG_CACHE_HOME" with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in Filename.concat cache "inshellah" (* recursively create directories along a path (equivalent to mkdir -p). * splits the path into components and folds over them, accumulating * the current directory prefix and creating each level if missing. *) let ensure_dir dir = let sep = Filename.dir_sep in let parts = String.split_on_char sep.[0] dir in (* determine the starting prefix: absolute paths begin with "/" *) let start = if String.length dir > 0 && dir.[0] = sep.[0] then sep else "" in let _final = List.fold_left (fun current part -> if part = "" then current else begin let next = if current = sep then sep ^ part else if current = "" then part else current ^ sep ^ part in (if not (Sys.file_exists next) then Unix.mkdir next 0o755); next end ) start parts in () (* convert command name to safe filename: spaces become underscores, * non-alphanumeric chars become hyphens. * e.g. "git add" -> "git_add", "docker-compose" -> "docker-compose" *) let filename_of_command cmd = String.map (function | ' ' -> '_' | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as char_val -> char_val | _ -> '-') cmd (* inverse of filename_of_command: underscores back to spaces. * note: this is lossy — original underscores in command names * (e.g. "my_tool") would be converted to spaces. in practice this * doesn't matter because tools with underscores in names are rare, * and subcommands use space-separated naming. *) let command_of_filename base_name = String.map (function '_' -> ' ' | char_val -> char_val) base_name (* --- JSON serialization of help_result --- * hand-rolled JSON emitters. we don't use a JSON library because: * 1. the schema is fixed and simple — we only serialize our own types * 2. avoiding dependencies keeps the binary small * 3. printf-style emission is fast and straightforward for our types *) (* escape a string for JSON: quotes, backslashes, and control characters. * control chars below 0x20 are emitted as \uXXXX unicode escapes. *) let escape_json contents = let buf = Buffer.create (String.length contents + 4) in String.iter (fun char_val -> match char_val with | '"' -> Buffer.add_string buf "\\\"" | '\\' -> Buffer.add_string buf "\\\\" | '\n' -> Buffer.add_string buf "\\n" | '\t' -> Buffer.add_string buf "\\t" | '\r' -> Buffer.add_string buf "\\r" | c when Char.code c < 0x20 -> Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) | c -> Buffer.add_char buf c ) contents; Buffer.contents buf (* wrap a string in quotes after escaping for JSON *) let json_string text = Printf.sprintf "\"%s\"" (escape_json text) (* the literal null value for JSON output *) let json_null = "null" (* serialize a switch (short flag, long flag, or both) to JSON *) let json_switch_of = function | Short char_val -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 char_val)) | Long name -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string name) | Both (char_val, name) -> Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}" (json_string (String.make 1 char_val)) (json_string name) (* serialize a parameter spec (mandatory, optional, or absent) to JSON *) let json_param_of = function | None -> json_null | Some (Mandatory name) -> Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string name) | Some (Optional name) -> Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string name) (* serialize a single flag entry (switch + param + description) to JSON *) let json_entry_of entry = Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}" (json_switch_of entry.switch) (json_param_of entry.param) (json_string entry.desc) (* serialize a subcommand (name + description) to JSON *) let json_subcommand_of sc = Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc) (* serialize a positional argument to JSON *) let json_positional_of p = Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}" (json_string p.pos_name) p.optional p.variadic (* serialize a list of items to a JSON array using the given formatter *) let json_list formatter items = "[" ^ String.concat "," (List.map formatter items) ^ "]" (* serialize an entire help_result to a JSON object string *) let json_of_help_result ?(source="help") result = Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}" (json_string source) (json_string result.description) (json_list json_entry_of result.entries) (json_list json_subcommand_of result.subcommands) (json_list json_positional_of result.positionals) (* --- JSON deserialization --- * minimal hand-rolled recursive-descent JSON parser. only handles the subset * we emit: strings, booleans, nulls, arrays, and objects. no number parsing * (we don't emit numbers). this is intentionally minimal — we only read back * our own serialized format, so robustness against arbitrary JSON is not needed. * * note: the \u escape handler does basic UTF-8 encoding for code points * up to 0xFFFF but doesn't handle surrogate pairs. this is fine for our use * case since we only escape control characters below 0x20. *) type json = | Jnull | Jbool of bool | Jstring of string | Jarray of json list | Jobject of (string * json) list (* JSON accessor helpers — return sensible defaults for missing/wrong types *) let json_get key = function | Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull) | _ -> Jnull (* extract a string from a JSON value, defaulting to empty string *) let json_to_string = function Jstring text -> text | _ -> "" (* extract a boolean from a JSON value, defaulting to false *) let json_to_bool = function Jbool value -> value | _ -> false (* extract a list from a JSON array value, defaulting to empty list *) let json_to_list = function Jarray items -> items | _ -> [] exception Json_error of string (* imperative recursive-descent JSON parser. * uses a mutable position ref to walk through the string. * note: boolean/null parsing just advances a fixed number of chars * without validating the actual characters — safe because we only read * our own output, but would be incorrect for arbitrary JSON. *) let parse_json contents = let len = String.length contents in let pos = ref 0 in (* peek at the current character without consuming it *) let peek () = if !pos < len then contents.[!pos] else '\x00' in (* advance the position by one character *) let advance () = incr pos in (* skip over any whitespace characters at current position *) let skip_ws () = while !pos < len && (contents.[!pos] = ' ' || contents.[!pos] = '\t' || contents.[!pos] = '\n' || contents.[!pos] = '\r') do advance () done in (* skip whitespace then consume the expected character, or raise *) let expect char_val = skip_ws (); if peek () <> char_val then raise (Json_error (Printf.sprintf "expected '%c' at %d" char_val !pos)); advance () in (* mutually recursive parsers for each JSON value type *) let rec parse_value () = skip_ws (); match peek () with | '"' -> Jstring (parse_string ()) | '{' -> parse_object () | '[' -> parse_array () | 'n' -> advance (); advance (); advance (); advance (); Jnull | 't' -> advance (); advance (); advance (); advance (); Jbool true | 'f' -> advance (); advance (); advance (); advance (); advance (); Jbool false | char_val -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" char_val !pos)) (* parse a quoted string value, handling escape sequences *) and parse_string () = expect '"'; let buf = Buffer.create 32 in while peek () <> '"' do if peek () = '\\' then begin advance (); (match peek () with | '"' -> Buffer.add_char buf '"' | '\\' -> Buffer.add_char buf '\\' | 'n' -> Buffer.add_char buf '\n' | 't' -> Buffer.add_char buf '\t' | 'r' -> Buffer.add_char buf '\r' | 'u' -> (* handle \uXXXX unicode escapes with basic UTF-8 encoding *) advance (); let hex = String.sub contents !pos 4 in pos := !pos + 3; let code = int_of_string ("0x" ^ hex) in if code < 128 then Buffer.add_char buf (Char.chr code) else begin if code < 0x800 then begin Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6))); Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) end else begin Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12))); Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f))); Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) end end | char_val -> Buffer.add_char buf char_val); advance () end else begin Buffer.add_char buf (peek ()); advance () end done; advance (); (* consume closing quote *) Buffer.contents buf (* parse a JSON object: { "key": value, ... } *) and parse_object () = expect '{'; skip_ws (); if peek () = '}' then (advance (); Jobject []) else begin let pairs = ref [] in let more = ref true in while !more do skip_ws (); let key = parse_string () in expect ':'; let value = parse_value () in pairs := (key, value) :: !pairs; skip_ws (); if peek () = ',' then advance () else more := false done; expect '}'; Jobject (List.rev !pairs) end (* parse a JSON array: [ value, value, ... ] *) and parse_array () = expect '['; skip_ws (); if peek () = ']' then (advance (); Jarray []) else begin let items = ref [] in let more = ref true in while !more do let value = parse_value () in items := value :: !items; skip_ws (); if peek () = ',' then advance () else more := false done; expect ']'; Jarray (List.rev !items) end in parse_value () (* --- JSON to OCaml type converters --- * these reconstruct our parser types from their JSON representations. * they mirror the json_*_of serializers above. *) (* reconstruct a switch value from its JSON representation *) let switch_of_json json_node = match json_to_string (json_get "type" json_node) with | "short" -> let char_str = json_to_string (json_get "char" json_node) in Short (if String.length char_str > 0 then char_str.[0] else '?') | "long" -> Long (json_to_string (json_get "name" json_node)) | "both" -> let char_str = json_to_string (json_get "char" json_node) in Both ((if String.length char_str > 0 then char_str.[0] else '?'), json_to_string (json_get "name" json_node)) | _ -> Long "?" (* reconstruct a parameter spec from its JSON representation *) let param_of_json = function | Jnull -> None | json_node -> let name = json_to_string (json_get "name" json_node) in (match json_to_string (json_get "kind" json_node) with | "mandatory" -> Some (Mandatory name) | "optional" -> Some (Optional name) | _ -> None) (* reconstruct a flag entry from its JSON representation *) let entry_of_json json_node = { switch = switch_of_json (json_get "switch" json_node); param = param_of_json (json_get "param" json_node); desc = json_to_string (json_get "desc" json_node) } (* reconstruct a subcommand from its JSON representation *) let subcommand_of_json json_node = { name = json_to_string (json_get "name" json_node); desc = json_to_string (json_get "desc" json_node) } (* reconstruct a positional argument from its JSON representation *) let positional_of_json json_node = { pos_name = json_to_string (json_get "name" json_node); optional = json_to_bool (json_get "optional" json_node); variadic = json_to_bool (json_get "variadic" json_node) } (* reconstruct a full help_result from its JSON representation *) let help_result_of_json json_node = { entries = List.map entry_of_json (json_to_list (json_get "entries" json_node)); subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" json_node)); positionals = List.map positional_of_json (json_to_list (json_get "positionals" json_node)); description = json_to_string (json_get "description" json_node) } (* --- filesystem operations --- *) (* write a string to a file, overwriting any existing content *) let write_file path contents = let oc = open_out path in output_string oc contents; close_out oc (* read an entire file into a string, returning None on any error *) let read_file path = try let ic = open_in path in let size = in_channel_length ic in let contents = Bytes.create size in really_input ic contents 0 size; close_in ic; Some (Bytes.to_string contents) with _ -> None (* write a parsed help_result to the store as JSON *) let write_result ~dir ?(source="help") command result = let path = Filename.concat dir (filename_of_command command ^ ".json") in write_file path (json_of_help_result ~source result) (* write native nushell completion source to the store as a .nu file *) let write_native ~dir command data = let path = Filename.concat dir (filename_of_command command ^ ".nu") in write_file path data (* check whether a path exists and is a directory *) let is_dir path = Sys.file_exists path && Sys.is_directory path (* look for a command's data file across multiple store directories. * checks JSON first, then .nu. returns the first match found. * directories are searched in order (user dir before system dirs). *) let find_file dirs command = let base_name = filename_of_command command in List.find_map (fun directory -> let json_path = Filename.concat directory (base_name ^ ".json") in if Sys.file_exists json_path then Some json_path else let nu_path = Filename.concat directory (base_name ^ ".nu") in if Sys.file_exists nu_path then Some nu_path else None ) dirs (* look up a command and deserialize its help_result from JSON. * only searches for .json files (not .nu, since those can't be deserialized * back into help_result). returns None if not found or parse fails. *) let lookup dirs command = let base_name = filename_of_command command in List.find_map (fun directory -> let path = Filename.concat directory (base_name ^ ".json") in match read_file path with | Some data -> (try Some (help_result_of_json (parse_json data)) with _ -> None) | None -> None ) dirs (* look up a command's raw data (JSON or .nu source) without parsing. * used by the "query" command to dump stored data as-is. *) let lookup_raw dirs command = let base_name = filename_of_command command in List.find_map (fun directory -> let json_path = Filename.concat directory (base_name ^ ".json") in match read_file json_path with | Some _ as result -> result | None -> let nu_path = Filename.concat directory (base_name ^ ".nu") in read_file nu_path ) dirs (* strip known extensions (.json or .nu) from a filename, returning None * if the filename has neither extension *) let chop_extension filename = if Filename.check_suffix filename ".json" then Some (Filename.chop_suffix filename ".json") else if Filename.check_suffix filename ".nu" then Some (Filename.chop_suffix filename ".nu") else None (* discover subcommands of a command by scanning filenames in the store. * looks for files whose names start with the command's filename + "_" * (e.g. for "git", finds "git_add.json", "git_commit.json", etc.) * * only returns immediate subcommands (no nested underscores beyond the prefix). * tries to extract description from the JSON "description" field if available. * * note: this filesystem-based discovery is used as a fallback when the * command's own help_result doesn't list subcommands. it enables completion * for subcommands that were indexed from separate manpages or help runs. *) let subcommands_of dirs command = let prefix = filename_of_command command ^ "_" in let prefix_len = String.length prefix in let module SMap = Map.Make(String) in let subs = List.fold_left (fun subs directory -> if is_dir directory then Array.fold_left (fun subs filename -> if not (String.starts_with ~prefix filename) then subs else let is_json = Filename.check_suffix filename ".json" in match chop_extension filename with | None -> subs | Some base_name -> let rest = String.sub base_name prefix_len (String.length base_name - prefix_len) in (* skip nested subcommands and empty names *) if String.contains rest '_' || String.length rest = 0 then subs else if SMap.mem rest subs then subs else (* try to read the description from the JSON file *) let desc = if is_json then match read_file (Filename.concat directory filename) with | Some data -> (try json_to_string (json_get "description" (parse_json data)) with _ -> "") | None -> "" else "" in SMap.add rest { name = rest; desc } subs ) subs (Sys.readdir directory) else subs ) SMap.empty dirs in SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev (* list all indexed commands across all store directories. * returns a sorted, deduplicated list of command names. *) let all_commands dirs = let module SSet = Set.Make(String) in List.fold_left (fun cmds directory -> if is_dir directory then Array.fold_left (fun cmds filename -> match chop_extension filename with | Some base_name -> SSet.add (command_of_filename base_name) cmds | None -> cmds ) cmds (Sys.readdir directory) else cmds ) SSet.empty dirs |> SSet.elements (* determine how a command was indexed: "help", "manpage", "native", etc. * for JSON files, reads the "source" field. for .nu files, returns "native". * used by the "dump" command to show provenance. *) let file_type_of dirs command = let base_name = filename_of_command command in List.find_map (fun directory -> let json_path = Filename.concat directory (base_name ^ ".json") in if Sys.file_exists json_path then (match read_file json_path with | Some data -> (try Some (json_to_string (json_get "source" (parse_json data))) with _ -> Some "json") | None -> Some "json") else let nu_path = Filename.concat directory (base_name ^ ".nu") in if Sys.file_exists nu_path then Some "native" else None ) dirs