(* store.ml — filesystem-backed cache of parsed completion data. * * this module handles persistence of completion data to disk. each command's * help_result is serialized to json and stored as a file in a cache directory * (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions * are stored as .nu files instead. * * the store also provides lookup, listing, and subcommand discovery by * scanning filenames in the cache directory. * * file naming convention: * - spaces in command names become underscores (e.g. "git add" → "git_add.json") * - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json") * - .json files contain serialized help_result * - .nu files contain native nushell extern source code * * the module includes a minimal hand-rolled json parser/serializer because * we only need to handle our own output format (no need for a full json library). *) open Parser (* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to * ~/.cache/inshellah if XDG_CACHE_HOME is not set. *) let default_store_path () = let cache = try Sys.getenv "XDG_CACHE_HOME" with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in Filename.concat cache "inshellah" (* recursively create directories (equivalent to mkdir -p) *) let ensure_dir dir = let rec mkdir_p d = if Sys.file_exists d then () else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in mkdir_p dir (* convert command name to safe filename: spaces become underscores, * non-alphanumeric chars become hyphens. * e.g. "git add" → "git_add", "docker-compose" → "docker-compose" *) let filename_of_command cmd = String.map (function | ' ' -> '_' | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c | _ -> '-') cmd (* inverse of filename_of_command: underscores back to spaces. * peculiarity: this is lossy — original underscores in command names * (e.g. "my_tool") would be converted to spaces. in practice this * doesn't matter because tools with underscores in names are rare, * and subcommands use space-separated naming. *) let command_of_filename base = String.map (function '_' -> ' ' | c -> c) base (* --- json serialization of help_result --- * hand-rolled json emitters. we don't use a json library because: * 1. the schema is fixed and simple — we only serialize our own types * 2. avoiding dependencies keeps the binary small * 3. printf-style emission is fast and straightforward for our types *) (* escape a string for json: quotes, backslashes, and control characters. * control chars below 0x20 are emitted as \u00XX unicode escapes. *) let escape_json s = let buf = Buffer.create (String.length s + 4) in String.iter (fun c -> match c with | '"' -> Buffer.add_string buf "\\\"" | '\\' -> Buffer.add_string buf "\\\\" | '\n' -> Buffer.add_string buf "\\n" | '\t' -> Buffer.add_string buf "\\t" | '\r' -> Buffer.add_string buf "\\r" | c when Char.code c < 0x20 -> Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c)) | c -> Buffer.add_char buf c ) s; Buffer.contents buf let json_string s = Printf.sprintf "\"%s\"" (escape_json s) let json_null = "null" let json_switch_of = function | Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c)) | Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l) | Both (c, l) -> Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}" (json_string (String.make 1 c)) (json_string l) let json_param_of = function | None -> json_null | Some (Mandatory p) -> Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p) | Some (Optional p) -> Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p) let json_entry_of e = Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}" (json_switch_of e.switch) (json_param_of e.param) (json_string e.desc) let json_subcommand_of sc = Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc) let json_positional_of p = Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}" (json_string p.pos_name) p.optional p.variadic let json_list f items = "[" ^ String.concat "," (List.map f items) ^ "]" let json_of_help_result ?(source="help") r = Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}" (json_string source) (json_string r.description) (json_list json_entry_of r.entries) (json_list json_subcommand_of r.subcommands) (json_list json_positional_of r.positionals) (* --- json deserialization --- * minimal hand-rolled recursive-descent json parser. only handles the subset * we emit: strings, booleans, nulls, arrays, and objects. no number parsing * (we don't emit numbers). this is intentionally minimal — we only read back * our own serialized format, so robustness against arbitrary json is not needed. * * peculiarity: the \u escape handler does basic utf-8 encoding for code points * up to 0xffff but doesn't handle surrogate pairs. this is fine for our use * case since we only escape control characters below 0x20. *) type json = | Jnull | Jbool of bool | Jstring of string | Jarray of json list | Jobject of (string * json) list (* json accessor helpers — return sensible defaults for missing/wrong types *) let json_get key = function | Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull) | _ -> Jnull let json_to_string = function Jstring s -> s | _ -> "" let json_to_bool = function Jbool b -> b | _ -> false let json_to_list = function Jarray l -> l | _ -> [] exception Json_error of string (* imperative recursive-descent json parser. * uses a mutable position ref to walk through the string. * peculiarity: boolean/null parsing just advances a fixed number of chars * without validating the actual characters — safe because we only read * our own output, but would be incorrect for arbitrary json. *) let parse_json s = let len = String.length s in let pos = ref 0 in let peek () = if !pos < len then s.[!pos] else '\x00' in let advance () = incr pos in let skip_ws () = while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do advance () done in let expect c = skip_ws (); if peek () <> c then raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos)); advance () in let rec parse_value () = skip_ws (); match peek () with | '"' -> Jstring (parse_string ()) | '{' -> parse_object () | '[' -> parse_array () | 'n' -> advance (); advance (); advance (); advance (); Jnull | 't' -> advance (); advance (); advance (); advance (); Jbool true | 'f' -> advance (); advance (); advance (); advance (); advance (); Jbool false | c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos)) and parse_string () = expect '"'; let buf = Buffer.create 32 in while peek () <> '"' do if peek () = '\\' then begin advance (); (match peek () with | '"' -> Buffer.add_char buf '"' | '\\' -> Buffer.add_char buf '\\' | 'n' -> Buffer.add_char buf '\n' | 't' -> Buffer.add_char buf '\t' | 'r' -> Buffer.add_char buf '\r' | 'u' -> advance (); let hex = String.sub s !pos 4 in pos := !pos + 3; let code = int_of_string ("0x" ^ hex) in if code < 128 then Buffer.add_char buf (Char.chr code) else begin (* UTF-8 encode *) if code < 0x800 then begin Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6))); Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) end else begin Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12))); Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f))); Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f))) end end | c -> Buffer.add_char buf c); advance () end else begin Buffer.add_char buf (peek ()); advance () end done; advance (); (* closing quote *) Buffer.contents buf and parse_object () = expect '{'; skip_ws (); if peek () = '}' then (advance (); Jobject []) else begin let pairs = ref [] in let cont = ref true in while !cont do skip_ws (); let key = parse_string () in expect ':'; let value = parse_value () in pairs := (key, value) :: !pairs; skip_ws (); if peek () = ',' then advance () else cont := false done; expect '}'; Jobject (List.rev !pairs) end and parse_array () = expect '['; skip_ws (); if peek () = ']' then (advance (); Jarray []) else begin let items = ref [] in let cont = ref true in while !cont do let v = parse_value () in items := v :: !items; skip_ws (); if peek () = ',' then advance () else cont := false done; expect ']'; Jarray (List.rev !items) end in parse_value () (* --- json → ocaml type converters --- * these reconstruct our parser types from their json representations. * they mirror the json_*_of serializers above. *) let switch_of_json j = match json_to_string (json_get "type" j) with | "short" -> let c = json_to_string (json_get "char" j) in Short (if String.length c > 0 then c.[0] else '?') | "long" -> Long (json_to_string (json_get "name" j)) | "both" -> let c = json_to_string (json_get "char" j) in Both ((if String.length c > 0 then c.[0] else '?'), json_to_string (json_get "name" j)) | _ -> Long "?" let param_of_json = function | Jnull -> None | j -> let name = json_to_string (json_get "name" j) in (match json_to_string (json_get "kind" j) with | "mandatory" -> Some (Mandatory name) | "optional" -> Some (Optional name) | _ -> None) let entry_of_json j = { switch = switch_of_json (json_get "switch" j); param = param_of_json (json_get "param" j); desc = json_to_string (json_get "desc" j) } let subcommand_of_json j = { name = json_to_string (json_get "name" j); desc = json_to_string (json_get "desc" j) } let positional_of_json j = { pos_name = json_to_string (json_get "name" j); optional = json_to_bool (json_get "optional" j); variadic = json_to_bool (json_get "variadic" j) } let help_result_of_json j = { entries = List.map entry_of_json (json_to_list (json_get "entries" j)); subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j)); positionals = List.map positional_of_json (json_to_list (json_get "positionals" j)); description = json_to_string (json_get "description" j) } (* --- filesystem operations --- *) let write_file path contents = let oc = open_out path in output_string oc contents; close_out oc let read_file path = try let ic = open_in path in let n = in_channel_length ic in let s = Bytes.create n in really_input ic s 0 n; close_in ic; Some (Bytes.to_string s) with _ -> None (* write a parsed help_result to the store as json *) let write_result ~dir ?(source="help") command result = let path = Filename.concat dir (filename_of_command command ^ ".json") in write_file path (json_of_help_result ~source result) (* write native nushell completion source to the store as a .nu file *) let write_native ~dir command data = let path = Filename.concat dir (filename_of_command command ^ ".nu") in write_file path data let is_dir path = Sys.file_exists path && Sys.is_directory path (* look for a command's data file across multiple store directories. * checks json first, then .nu. returns the first match found. * directories are searched in order (user dir before system dirs). *) let find_file dirs command = let base = filename_of_command command in List.find_map (fun dir -> let json_path = Filename.concat dir (base ^ ".json") in if Sys.file_exists json_path then Some json_path else let nu_path = Filename.concat dir (base ^ ".nu") in if Sys.file_exists nu_path then Some nu_path else None ) dirs (* look up a command and deserialize its help_result from json. * only searches for .json files (not .nu, since those can't be deserialized * back into help_result). returns none if not found or parse fails. *) let lookup dirs command = let base = filename_of_command command in List.find_map (fun dir -> let path = Filename.concat dir (base ^ ".json") in match read_file path with | Some data -> (try Some (help_result_of_json (parse_json data)) with _ -> None) | None -> None ) dirs (* look up a command's raw data (json or .nu source) without parsing. * used by the "query" command to dump stored data as-is. *) let lookup_raw dirs command = let base = filename_of_command command in List.find_map (fun dir -> let json_path = Filename.concat dir (base ^ ".json") in match read_file json_path with | Some _ as r -> r | None -> let nu_path = Filename.concat dir (base ^ ".nu") in read_file nu_path ) dirs let chop_extension f = if Filename.check_suffix f ".json" then Some (Filename.chop_suffix f ".json") else if Filename.check_suffix f ".nu" then Some (Filename.chop_suffix f ".nu") else None (* discover subcommands of a command by scanning filenames in the store. * looks for files whose names start with the command's filename + "_" * (e.g. for "git", finds "git_add.json", "git_commit.json", etc.) * * only returns immediate subcommands (no nested underscores beyond the prefix). * tries to extract description from the json "description" field if available. * * peculiarity: this filesystem-based discovery is used as a fallback when the * command's own help_result doesn't list subcommands. it enables completion * for subcommands that were indexed from separate manpages or help runs. *) let subcommands_of dirs command = let prefix = filename_of_command command ^ "_" in let plen = String.length prefix in let module SMap = Map.Make(String) in let subs = List.fold_left (fun subs dir -> if is_dir dir then Array.fold_left (fun subs f -> if not (String.starts_with ~prefix f) then subs else let is_json = Filename.check_suffix f ".json" in match chop_extension f with | None -> subs | Some b -> let rest = String.sub b plen (String.length b - plen) in if String.contains rest '_' || String.length rest = 0 then subs else if SMap.mem rest subs then subs else let desc = if is_json then match read_file (Filename.concat dir f) with | Some data -> (try json_to_string (json_get "description" (parse_json data)) with _ -> "") | None -> "" else "" in SMap.add rest { name = rest; desc } subs ) subs (Sys.readdir dir) else subs ) SMap.empty dirs in SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev (* list all indexed commands across all store directories. * returns a sorted, deduplicated list of command names. *) let all_commands dirs = let module SSet = Set.Make(String) in List.fold_left (fun cmds dir -> if is_dir dir then Array.fold_left (fun cmds f -> match chop_extension f with | Some b -> SSet.add (command_of_filename b) cmds | None -> cmds ) cmds (Sys.readdir dir) else cmds ) SSet.empty dirs |> SSet.elements (* determine how a command was indexed: "help", "manpage", "native", etc. * for json files, reads the "source" field. for .nu files, returns "native". * used by the "dump" command to show provenance. *) let file_type_of dirs command = let base = filename_of_command command in List.find_map (fun dir -> let json_path = Filename.concat dir (base ^ ".json") in if Sys.file_exists json_path then (match read_file json_path with | Some data -> (try Some (json_to_string (json_get "source" (parse_json data))) with _ -> Some "json") | None -> Some "json") else let nu_path = Filename.concat dir (base ^ ".nu") in if Sys.file_exists nu_path then Some "native" else None ) dirs