init
This commit is contained in:
commit
55e74c6ed7
22 changed files with 4821 additions and 0 deletions
0
lib/.ocamlformat
Normal file
0
lib/.ocamlformat
Normal file
3
lib/dune
Normal file
3
lib/dune
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
(library
|
||||
(name inshellah)
|
||||
(libraries angstrom angstrom-unix camlzip str unix))
|
||||
1088
lib/manpage.ml
Normal file
1088
lib/manpage.ml
Normal file
File diff suppressed because it is too large
Load diff
242
lib/nushell.ml
Normal file
242
lib/nushell.ml
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
(* nushell.ml — generate nushell extern definitions from parsed help data.
|
||||
*
|
||||
* this module is the code generation backend. it takes a help_result (from
|
||||
* the parser or manpage modules) and produces nushell source code that
|
||||
* defines "extern" declarations — nushell's mechanism for teaching the shell
|
||||
* about external commands' flags and subcommands so it can offer completions.
|
||||
*
|
||||
* it also maintains a list of nushell's built-in commands to avoid generating
|
||||
* extern definitions that would shadow them.
|
||||
*
|
||||
* key responsibilities:
|
||||
* - deduplicating flag entries (same flag from multiple help sources)
|
||||
* - mapping parameter names to nushell types (path, int, string)
|
||||
* - formatting flags in nushell syntax: --flag(-f): type # description
|
||||
* - handling positional arguments with nushell's ordering constraints
|
||||
* - escaping special characters for nushell string literals
|
||||
*)
|
||||
|
||||
open Parser
|
||||
|
||||
module SSet = Set.Make(String)
|
||||
module SMap = Map.Make(String)
|
||||
module CSet = Set.Make(Char)
|
||||
|
||||
(* nushell built-in commands and keywords — we must never generate extern
|
||||
* definitions for these because it would shadow nushell's own implementations.
|
||||
* this list is maintained manually and should be updated with new nushell releases. *)
|
||||
let nushell_builtins = [
|
||||
"alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
|
||||
"bits"; "break"; "bytes";
|
||||
"cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
|
||||
"columns"; "commandline"; "compact"; "complete"; "config"; "const";
|
||||
"continue"; "cp";
|
||||
"date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
|
||||
"do"; "drop"; "du";
|
||||
"each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
|
||||
"exit"; "explain"; "explore"; "export"; "export-env"; "extern";
|
||||
"fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
|
||||
"generate"; "get"; "glob"; "grid"; "group-by";
|
||||
"hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
|
||||
"history"; "http";
|
||||
"if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
|
||||
"is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
|
||||
"job"; "join";
|
||||
"keybindings"; "kill";
|
||||
"last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
|
||||
"match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
|
||||
"move"; "mut"; "mv";
|
||||
"nu-check"; "nu-highlight";
|
||||
"open"; "overlay";
|
||||
"panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
|
||||
"query";
|
||||
"random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
|
||||
"roll"; "rotate"; "run-external";
|
||||
"save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
|
||||
"slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
|
||||
"stor"; "str"; "sys";
|
||||
"table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
|
||||
"try"; "tutor";
|
||||
"ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
|
||||
"upsert"; "url"; "use";
|
||||
"values"; "version"; "view";
|
||||
"watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
|
||||
"zip";
|
||||
]
|
||||
|
||||
(* lazily constructed set for fast lookup *)
|
||||
let builtin_set = lazy (SSet.of_list nushell_builtins)
|
||||
|
||||
let is_nushell_builtin cmd =
|
||||
SSet.mem cmd (Lazy.force builtin_set)
|
||||
|
||||
(* deduplicate flag entries that refer to the same flag.
|
||||
* when the same flag appears multiple times (e.g. from overlapping manpage
|
||||
* sections or repeated help text), we keep the "best" version using a score:
|
||||
* - both short+long form: +10 (most informative)
|
||||
* - has a parameter: +5
|
||||
* - description length bonus: up to +5
|
||||
*
|
||||
* peculiarity: after deduplication by long name, we also remove standalone
|
||||
* short flags whose letter is already covered by a Both(short, long) entry.
|
||||
* this prevents emitting both "-v" and "--verbose(-v)" which nushell would
|
||||
* reject as a duplicate. the filtering preserves original ordering from the
|
||||
* help text. *)
|
||||
let dedup_entries entries =
|
||||
let key_of entry =
|
||||
match entry.switch with
|
||||
| Short c -> Printf.sprintf "-%c" c
|
||||
| Long l | Both (_, l) -> Printf.sprintf "--%s" l
|
||||
in
|
||||
let score entry =
|
||||
let sw = match entry.switch with Both _ -> 10 | _ -> 0 in
|
||||
let p = match entry.param with Some _ -> 5 | None -> 0 in
|
||||
let d = min 5 (String.length entry.desc / 10) in
|
||||
sw + p + d
|
||||
in
|
||||
let best = List.fold_left (fun acc e ->
|
||||
let k = key_of e in
|
||||
match SMap.find_opt k acc with
|
||||
| Some prev when score prev >= score e -> acc
|
||||
| _ -> SMap.add k e acc
|
||||
) SMap.empty entries in
|
||||
let covered = SMap.fold (fun _ e acc ->
|
||||
match e.switch with
|
||||
| Both (c, _) -> CSet.add c acc
|
||||
| _ -> acc
|
||||
) best CSet.empty in
|
||||
List.fold_left (fun (seen, acc) e ->
|
||||
let k = key_of e in
|
||||
if SSet.mem k seen then (seen, acc)
|
||||
else match e.switch with
|
||||
| Short c when CSet.mem c covered -> (seen, acc)
|
||||
| _ -> (SSet.add k seen, SMap.find k best :: acc)
|
||||
) (SSet.empty, []) entries |> snd |> List.rev
|
||||
|
||||
(* map parameter names to nushell types.
|
||||
* nushell's extern declarations use typed parameters, so we infer the type
|
||||
* from the parameter name. file/path-related names become "path" (enables
|
||||
* path completion), numeric names become "int", everything else is "string". *)
|
||||
let nushell_type_of_param = function
|
||||
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
|
||||
| "FILENAME" | "PATTERNFILE" -> "path"
|
||||
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
|
||||
| "LINES" | "DEPTH" | "depth" -> "int"
|
||||
| _ -> "string"
|
||||
|
||||
(* escape a string for use inside nushell double-quoted string literals.
|
||||
* only double quotes and backslashes need escaping in nushell's syntax. *)
|
||||
let escape_nu s =
|
||||
if not (String.contains s '"') && not (String.contains s '\\') then s
|
||||
else begin
|
||||
let buf = Buffer.create (String.length s + 4) in
|
||||
String.iter (fun c -> match c with
|
||||
| '"' -> Buffer.add_string buf "\\\""
|
||||
| '\\' -> Buffer.add_string buf "\\\\"
|
||||
| _ -> Buffer.add_char buf c
|
||||
) s;
|
||||
Buffer.contents buf
|
||||
end
|
||||
|
||||
(* format a single flag entry as a nushell extern parameter line.
|
||||
* output examples:
|
||||
* " --verbose(-v) # increase verbosity"
|
||||
* " --output(-o): path # write output to file"
|
||||
* " -n: int # number of results"
|
||||
*
|
||||
* the description is right-padded to column 40 with a "# " comment prefix.
|
||||
* nushell's syntax for combined short+long is "--long(-s)". *)
|
||||
let format_flag entry =
|
||||
let name = match entry.switch with
|
||||
| Both (s, l) -> Printf.sprintf "--%s(-%c)" l s
|
||||
| Long l -> Printf.sprintf "--%s" l
|
||||
| Short s -> Printf.sprintf "-%c" s
|
||||
in
|
||||
let typed = match entry.param with
|
||||
| Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
|
||||
| None -> ""
|
||||
in
|
||||
let flag = " " ^ name ^ typed in
|
||||
if String.length entry.desc = 0 then flag
|
||||
else
|
||||
let pad_len = max 1 (40 - String.length flag) in
|
||||
flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
|
||||
|
||||
(* format a positional argument as a nushell extern parameter line.
|
||||
* nushell syntax: "...name: type" for variadic, "name?: type" for optional.
|
||||
* hyphens in names are converted to underscores (nushell identifiers can't
|
||||
* contain hyphens). *)
|
||||
let format_positional p =
|
||||
let name = String.map (function '-' -> '_' | c -> c) p.pos_name in
|
||||
let prefix = if p.variadic then "..." else "" in
|
||||
let suffix = if p.optional && not p.variadic then "?" else "" in
|
||||
let typ = nushell_type_of_param (String.uppercase_ascii p.pos_name) in
|
||||
Printf.sprintf " %s%s%s: %s" prefix name suffix typ
|
||||
|
||||
(* enforce nushell's positional argument ordering rules:
|
||||
* 1. no required positional may follow an optional one
|
||||
* 2. at most one variadic ("rest") parameter is allowed
|
||||
*
|
||||
* if a required positional appears after an optional one, it's silently
|
||||
* promoted to optional. duplicate variadic params are dropped. *)
|
||||
let fixup_positionals positionals =
|
||||
List.fold_left (fun (saw_opt, saw_rest, acc) p ->
|
||||
if p.variadic then
|
||||
if saw_rest then (saw_opt, saw_rest, acc)
|
||||
else (true, true, p :: acc)
|
||||
else if saw_opt then
|
||||
(true, saw_rest, { p with optional = true } :: acc)
|
||||
else
|
||||
(p.optional, saw_rest, p :: acc)
|
||||
) (false, false, []) positionals
|
||||
|> fun (_, _, acc) -> List.rev acc
|
||||
|
||||
(* generate the full nushell extern block for a command.
|
||||
* produces output like:
|
||||
* export extern "git add" [
|
||||
* ...pathspec?: path
|
||||
* --verbose(-v) # be verbose
|
||||
* --dry-run(-n) # dry run
|
||||
* ]
|
||||
*
|
||||
* subcommands that weren't resolved into their own full definitions get
|
||||
* stub externs with just a comment containing their description:
|
||||
* export extern "git stash" [ # stash changes
|
||||
* ]
|
||||
*)
|
||||
let extern_of cmd_name result =
|
||||
let entries = dedup_entries result.entries in
|
||||
let cmd = escape_nu cmd_name in
|
||||
let positionals = fixup_positionals result.positionals in
|
||||
let pos_lines = List.map (fun p -> format_positional p ^ "\n") positionals in
|
||||
let flags = List.map (fun e -> format_flag e ^ "\n") entries in
|
||||
let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" cmd (String.concat "" pos_lines) (String.concat "" flags) in
|
||||
let subs = List.map (fun (sc : subcommand) ->
|
||||
Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
|
||||
cmd (escape_nu sc.name) (escape_nu sc.desc)
|
||||
) result.subcommands in
|
||||
String.concat "" (main :: subs)
|
||||
|
||||
(* public alias for extern_of *)
|
||||
let generate_extern = extern_of
|
||||
|
||||
(* derive a nushell module name from a command name.
|
||||
* replaces non-alphanumeric characters with hyphens and appends "-completions".
|
||||
* e.g. "git" → "git-completions", "docker-compose" → "docker-compose-completions" *)
|
||||
let module_name_of cmd_name =
|
||||
let s = String.map (function
|
||||
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
|
||||
s ^ "-completions"
|
||||
|
||||
(* generate a complete nushell module wrapping the extern.
|
||||
* output: "module git-completions { ... }\n\nuse git-completions *\n"
|
||||
* the "use" at the end makes the extern immediately available. *)
|
||||
let generate_module cmd_name result =
|
||||
let m = module_name_of cmd_name in
|
||||
Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" m (extern_of cmd_name result) m
|
||||
|
||||
(* convenience wrapper: generate an extern from just a list of entries
|
||||
* (no subcommands, positionals, or description). used when we only have
|
||||
* flag data and nothing else. *)
|
||||
let generate_extern_from_entries cmd_name entries =
|
||||
generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }
|
||||
802
lib/parser.ml
Normal file
802
lib/parser.ml
Normal file
|
|
@ -0,0 +1,802 @@
|
|||
(* parser.ml — parse --help output into structured flag/subcommand/positional data.
|
||||
*
|
||||
* this module is the core of inshellah's help-text understanding. it takes the
|
||||
* raw text that a cli tool prints when you run `cmd --help` and extracts:
|
||||
* - flag entries (short/long switches with optional parameters and descriptions)
|
||||
* - subcommand listings (name + description pairs)
|
||||
* - positional arguments (from usage lines)
|
||||
*
|
||||
* the parser is built on angstrom (a monadic parser combinator library) for the
|
||||
* structured flag/subcommand extraction, with hand-rolled imperative parsers for
|
||||
* usage-line positional extraction (where the format is too varied for clean
|
||||
* combinator composition).
|
||||
*
|
||||
* key design decisions:
|
||||
* - the angstrom parser runs in prefix-consume mode — it doesn't need to parse
|
||||
* the entire input, just extract what it can recognize. unrecognized lines are
|
||||
* skipped via skip_non_option_line.
|
||||
* - multi-line descriptions are handled via indentation-based continuation:
|
||||
* lines indented 8+ spaces that don't start with '-' are folded into the
|
||||
* previous entry's description.
|
||||
* - subcommand detection uses a heuristic: lines with a name followed by 2+
|
||||
* spaces then a description, where the name is at least 2 chars. section
|
||||
* headers (like "arguments:") toggle whether name-description pairs are
|
||||
* treated as subcommands or positionals.
|
||||
* - positional extraction has two paths: usage-line parsing (the common case)
|
||||
* and cli11's explicit "positionals:" section format.
|
||||
*)
|
||||
|
||||
open Angstrom
|
||||
|
||||
(* strip ansi escape sequences and osc hyperlinks from --help output.
|
||||
* many modern cli tools emit colored/styled output even when piped,
|
||||
* so we need to clean this before parsing. handles:
|
||||
* - csi sequences (esc [ ... final_byte) — colors, cursor movement, etc.
|
||||
* - osc sequences (esc ] ... bel/st) — hyperlinks, window titles, etc.
|
||||
* - other two-byte esc+char sequences *)
|
||||
let strip_ansi s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
|
||||
let next = s.[!i + 1] in
|
||||
if next = '[' then begin
|
||||
(* CSI sequence: ESC [ ... final_byte *)
|
||||
i := !i + 2;
|
||||
while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
|
||||
if !i < len then incr i
|
||||
end else if next = ']' then begin
|
||||
(* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
|
||||
i := !i + 2;
|
||||
let found = ref false in
|
||||
while !i < len && not !found do
|
||||
if s.[!i] = '\x07' then
|
||||
(incr i; found := true)
|
||||
else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
|
||||
(i := !i + 2; found := true)
|
||||
else
|
||||
incr i
|
||||
done
|
||||
end else begin
|
||||
(* Other ESC sequence, skip ESC + one char *)
|
||||
i := !i + 2
|
||||
end
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
(* --- character class predicates --- *)
|
||||
(* these are used throughout the angstrom parsers to classify characters.
|
||||
* they're separated out for readability and reuse. *)
|
||||
|
||||
let is_whitespace = function ' ' | '\t' -> true | _ -> false
|
||||
|
||||
let is_alphanumeric = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
|
||||
| _ -> false
|
||||
|
||||
(* characters allowed inside parameter names like FILE, output-dir, etc. *)
|
||||
let is_param_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
|
||||
| _ -> false
|
||||
|
||||
(* used to detect all-caps parameter names like FILE, TIME_STYLE *)
|
||||
let is_upper_or_underscore = function
|
||||
| 'A' .. 'Z' | '_' -> true
|
||||
| _ -> false
|
||||
|
||||
(* characters allowed in long flag names (--foo-bar, --enable-feature2) *)
|
||||
let is_long_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
|
||||
| _ -> false
|
||||
|
||||
(* --- core types ---
|
||||
* these types represent the structured output of parsing a help text.
|
||||
* they are shared across the entire codebase (nushell codegen, store, manpage parser).
|
||||
*
|
||||
* switch: a flag can be short-only (-v), long-only (--verbose), or both (-v, --verbose).
|
||||
* the both variant keeps the pair together so nushell can emit "--verbose(-v)".
|
||||
*
|
||||
* param: flags can take mandatory (--output FILE) or optional (--color[=WHEN]) values.
|
||||
*
|
||||
* entry: one complete flag definition — its switch form, optional parameter, and
|
||||
* the description text (potentially multi-line, already joined).
|
||||
*
|
||||
* help_result: the complete parsed output for a single command. *)
|
||||
type switch = Short of char | Long of string | Both of char * string
|
||||
type param = Mandatory of string | Optional of string
|
||||
type entry = { switch : switch; param : param option; desc : string }
|
||||
type subcommand = { name : string; desc : string }
|
||||
type positional = { pos_name : string; optional : bool; variadic : bool }
|
||||
type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list; description : string }
|
||||
|
||||
(* --- low-level angstrom combinators --- *)
|
||||
(* these are the building blocks for all the parsers below. *)
|
||||
|
||||
(* consume horizontal whitespace (spaces and tabs) without crossing lines *)
|
||||
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
|
||||
(* end of line — matches either a newline or end of input.
|
||||
* this is the permissive version used in most places. *)
|
||||
let eol = end_of_line <|> end_of_input
|
||||
(* strict end of line — must consume an actual newline character.
|
||||
* used in skip_non_option_line so we don't accidentally match eof
|
||||
* and consume it when we shouldn't. *)
|
||||
let eol_strict = end_of_line
|
||||
|
||||
(* --- switch and parameter parsers --- *)
|
||||
(* these parse the flag name portion of an option line, e.g. "-v", "--verbose" *)
|
||||
|
||||
let short_switch = char '-' *> satisfy is_alphanumeric
|
||||
let long_switch = string "--" *> take_while1 is_long_char
|
||||
let comma = char ',' *> inline_ws
|
||||
|
||||
(* parameter parsers — these handle the various syntaxes tools use to indicate
|
||||
* that a flag takes a value. the formats are surprisingly diverse:
|
||||
* --output=FILE (eq_man_param — mandatory, common in gnu tools)
|
||||
* --color[=WHEN] (eq_opt_param — optional with = syntax)
|
||||
* --depth DEPTH (space_upper_param — space-separated ALL_CAPS)
|
||||
* --file <path> (space_angle_param — angle brackets)
|
||||
* --file [<path>] (space_opt_angle_param — optional angle brackets)
|
||||
* --format string (space_type_param — go/cobra lowercase type word)
|
||||
*)
|
||||
let eq_opt_param =
|
||||
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
|
||||
|
||||
let eq_man_param =
|
||||
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
|
||||
|
||||
(* space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE".
|
||||
* peculiarity: we peek ahead and check the first char is uppercase, then
|
||||
* validate the entire word is ALL_CAPS. this prevents false positives where
|
||||
* a description word like "Do" or "Set" immediately follows the flag name.
|
||||
* digits are allowed (e.g. "SHA256") but lowercase chars disqualify. *)
|
||||
let space_upper_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if is_upper_or_underscore c then
|
||||
take_while1 is_param_char >>= fun name ->
|
||||
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "not an all-caps param"
|
||||
else
|
||||
fail "not an uppercase param"
|
||||
|
||||
(* Angle-bracket param: e.g. "<file>", "<notation>" *)
|
||||
let angle_param =
|
||||
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
|
||||
Mandatory name
|
||||
|
||||
(* Space + angle bracket param *)
|
||||
let space_angle_param =
|
||||
char ' ' *> angle_param
|
||||
|
||||
(* Optional angle bracket param: [<file>] *)
|
||||
let opt_angle_param =
|
||||
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
|
||||
>>| fun name -> Optional name
|
||||
|
||||
let space_opt_angle_param =
|
||||
char ' ' *> opt_angle_param
|
||||
|
||||
(* go/cobra style: space + lowercase type word like "string", "list", "int".
|
||||
* peculiarity: capped at 10 chars to avoid consuming description words.
|
||||
* go's flag libraries commonly emit "--timeout duration" or "--name string"
|
||||
* where the type name is a short lowercase word. longer words are almost
|
||||
* certainly the start of a description, not a type annotation. *)
|
||||
let space_type_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if c >= 'a' && c <= 'z' then
|
||||
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
|
||||
if String.length name <= 10 then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "too long for type param"
|
||||
else
|
||||
fail "not a lowercase type param"
|
||||
|
||||
(* try each parameter format in order of specificity. the ordering matters:
|
||||
* eq_opt_param must come before eq_man_param because "[=WHEN]" would otherwise
|
||||
* partially match as "=WHEN" then fail on the trailing "]". similarly,
|
||||
* space_opt_angle_param before space_angle_param to catch "[<file>]" before "<file>". *)
|
||||
let param_parser =
|
||||
option None
|
||||
(choice
|
||||
[ eq_opt_param; eq_man_param;
|
||||
space_opt_angle_param; space_angle_param;
|
||||
space_upper_param; space_type_param ]
|
||||
>>| fun a -> Some a)
|
||||
|
||||
(* switch parser — handles the various ways help text presents flag names.
|
||||
* formats handled (in order of attempt):
|
||||
* -a, --all (short + comma + long — gnu style)
|
||||
* -a --all (short + space + long — some tools omit the comma)
|
||||
* --all / -a (long + slash + short — rare but seen in some tools)
|
||||
* -a (short only)
|
||||
* --all (long only)
|
||||
*
|
||||
* peculiarity: the ordering is critical because angstrom's choice commits to
|
||||
* the first parser that makes progress. short_switch consumes "-a", so the
|
||||
* combined parsers must be tried before the short-only parser. *)
|
||||
let switch_parser =
|
||||
choice
|
||||
[
|
||||
(short_switch >>= fun s ->
|
||||
comma *> long_switch >>| fun l -> Both (s, l));
|
||||
(short_switch >>= fun s ->
|
||||
char ' ' *> long_switch >>| fun l -> Both (s, l));
|
||||
(long_switch >>= fun l ->
|
||||
inline_ws *> char '/' *> inline_ws *>
|
||||
short_switch >>| fun s -> Both (s, l));
|
||||
(short_switch >>| fun s -> Short s);
|
||||
(long_switch >>| fun l -> Long l);
|
||||
]
|
||||
|
||||
(* --- description parsing with multi-line continuation ---
|
||||
* descriptions in help text often wrap across multiple lines. the convention
|
||||
* is that continuation lines are deeply indented (8+ spaces) and don't start
|
||||
* with '-' (which would indicate a new flag entry). we peek ahead to check
|
||||
* indentation without consuming, then decide whether to fold the line in. *)
|
||||
|
||||
(* take the rest of the line as text (does not consume the newline itself) *)
|
||||
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
|
||||
|
||||
(* check if a line is a continuation line: deeply indented, doesn't start with '-'.
|
||||
* peculiarity: we count tabs as 8 spaces to match typical terminal rendering.
|
||||
* the 8-space threshold was chosen empirically — most help formatters indent
|
||||
* descriptions at least this much, while flag lines are indented 2-4 spaces. *)
|
||||
let continuation_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
(* Must start with significant whitespace (8+ spaces or tab) *)
|
||||
let count_indent s =
|
||||
let n = ref 0 in
|
||||
let i = ref 0 in
|
||||
while !i < String.length s do
|
||||
(match s.[!i] with
|
||||
| ' ' -> incr n
|
||||
| '\t' -> n := !n + 8
|
||||
| _ -> i := String.length s);
|
||||
incr i
|
||||
done;
|
||||
!n
|
||||
in
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
(* Peek ahead to see indentation level *)
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
let indent = count_indent preview in
|
||||
let trimmed = String.trim preview in
|
||||
let starts_with_dash =
|
||||
String.length trimmed > 0 && trimmed.[0] = '-'
|
||||
in
|
||||
if indent >= 8 && not starts_with_dash then
|
||||
(* This is a continuation line — consume whitespace + text *)
|
||||
inline_ws *> rest_of_line <* eol
|
||||
else
|
||||
fail "not a continuation line"
|
||||
|
||||
(* parse description text: first line (after switch+param) plus any continuation lines.
|
||||
* blank continuation lines are filtered out, and all lines are trimmed and joined
|
||||
* with spaces into a single string. *)
|
||||
let description =
|
||||
inline_ws *> rest_of_line <* eol >>= fun first_line ->
|
||||
many continuation_line >>| fun cont_lines ->
|
||||
let all = first_line :: cont_lines in
|
||||
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
|
||||
String.concat " " (List.map String.trim all)
|
||||
|
||||
(* description that appears on a separate line below the flag.
|
||||
* this handles the clap (rust) "long" help format where flags and descriptions
|
||||
* are on separate lines:
|
||||
* --verbose
|
||||
* increase verbosity
|
||||
* here there's no inline description — just deeply-indented continuation lines. *)
|
||||
let description_below =
|
||||
many1 continuation_line >>| fun lines ->
|
||||
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
|
||||
String.concat " " (List.map String.trim lines)
|
||||
|
||||
(* --- line classification for skipping ---
|
||||
* the parser needs to skip lines it doesn't understand (section headers,
|
||||
* blank lines, description paragraphs not attached to a flag, etc.)
|
||||
* without consuming lines that ARE flag entries. *)
|
||||
|
||||
(* peek ahead to check if the current line looks like a flag entry.
|
||||
* an option line starts with whitespace then '-'. *)
|
||||
let at_option_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
peek_string (min avail 40) >>= fun preview ->
|
||||
let s = String.trim preview in
|
||||
if String.length s > 0 && s.[0] = '-' then return ()
|
||||
else fail "not an option line"
|
||||
|
||||
(* skip a non-option line (section header, blank, description-only, etc.).
|
||||
* peculiarity: uses eol_strict (not eol) so it won't match at eof — this
|
||||
* prevents the parser from infinitely skipping at the end of input. if the
|
||||
* line looks like an option line (at_option_line succeeds), we deliberately
|
||||
* fail so that the entry parser gets a chance at it instead. *)
|
||||
let skip_non_option_line =
|
||||
(at_option_line *> fail "this is an option line")
|
||||
<|> (rest_of_line *> eol_strict *> return ())
|
||||
|
||||
(* --- entry parsing --- *)
|
||||
|
||||
(* parse a single flag entry: leading whitespace, then switch+param, then description.
|
||||
* the description can appear on the same line (inline) or on the next line (below).
|
||||
* if there's no description at all, we accept an empty string.
|
||||
* the (eol *> description_below) branch handles the clap long-help format. *)
|
||||
let entry =
|
||||
inline_ws *>
|
||||
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
|
||||
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
|
||||
(description <|> (eol *> (description_below <|> return "")))
|
||||
|
||||
(* --- subcommand parsing ---
|
||||
* subcommand lines in help text follow the pattern:
|
||||
* " name description"
|
||||
* where the name and description are separated by 2+ spaces.
|
||||
* some tools also include argument placeholders between name and description:
|
||||
* " start UNIT... start one or more units"
|
||||
* " list [PATTERN] list matching units"
|
||||
*)
|
||||
|
||||
let is_subcommand_char = function
|
||||
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
|
||||
| _ -> false
|
||||
|
||||
(* skip argument placeholders like UNIT..., [PATTERN...|PID...], <file>
|
||||
* that appear between the subcommand name and the description.
|
||||
* only consumes single-space gaps — the two-space gap before the
|
||||
* description is left for the main parser to use as the delimiter.
|
||||
*
|
||||
* peculiarity: this is a recursive (fix-point) parser that peeks ahead
|
||||
* to distinguish single-space argument gaps from the double-space
|
||||
* description separator. it accepts tokens that start with [, <, or
|
||||
* are ALL_CAPS (with dots/pipes/commas for variadic syntax). *)
|
||||
let skip_arg_placeholders =
|
||||
fix (fun self ->
|
||||
(* Peek ahead: single space followed by arg-like token *)
|
||||
available >>= fun avail ->
|
||||
if avail < 2 then return ()
|
||||
else
|
||||
peek_string (min avail 2) >>= fun s2 ->
|
||||
if String.length s2 >= 2 && s2.[0] = ' ' && s2.[1] <> ' ' then
|
||||
(* Single space — could be an arg placeholder *)
|
||||
let next = s2.[1] in
|
||||
if next = '[' || next = '<'
|
||||
|| (next >= 'A' && next <= 'Z') then
|
||||
(* Peek the full token to check if it's ALL_CAPS/brackets *)
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
(* Extract the token after the single space *)
|
||||
let tok_start = 1 in
|
||||
let tok_end = ref tok_start in
|
||||
while !tok_end < String.length preview
|
||||
&& preview.[!tok_end] <> ' '
|
||||
&& preview.[!tok_end] <> '\n'
|
||||
&& preview.[!tok_end] <> '\r' do
|
||||
incr tok_end
|
||||
done;
|
||||
let tok = String.sub preview tok_start (!tok_end - tok_start) in
|
||||
(* Accept as placeholder if it starts with [ or < or is ALL_CAPS
|
||||
(possibly with dots, pipes, dashes) *)
|
||||
let is_placeholder =
|
||||
tok.[0] = '[' || tok.[0] = '<'
|
||||
|| String.for_all (fun c ->
|
||||
(c >= 'A' && c <= 'Z') || c = '_' || c = '-'
|
||||
|| c = '.' || c = '|' || c = ',' || (c >= '0' && c <= '9')
|
||||
) tok
|
||||
in
|
||||
if is_placeholder then
|
||||
advance (1 + String.length tok) *> self
|
||||
else return ()
|
||||
else return ()
|
||||
else return ())
|
||||
|
||||
(* parse a subcommand entry line.
|
||||
* requires: name >= 2 chars, followed by 2+ spaces, then description.
|
||||
* the name is lowercased for consistent lookup.
|
||||
*
|
||||
* peculiarity: if the description starts with "- " (a dash-space prefix),
|
||||
* it's stripped. some tools format their subcommand lists as:
|
||||
* " add - add a new item"
|
||||
* where the "- " is decorative, not part of the description. *)
|
||||
let subcommand_entry =
|
||||
inline_ws *>
|
||||
take_while1 is_subcommand_char >>= fun name ->
|
||||
if String.length name < 2 then fail "subcommand name too short"
|
||||
else
|
||||
skip_arg_placeholders *>
|
||||
char ' ' *> char ' ' *> inline_ws *>
|
||||
rest_of_line <* eol >>| fun desc ->
|
||||
{ name = String.lowercase_ascii name;
|
||||
desc = let t = String.trim desc in
|
||||
if String.length t >= 2 && t.[0] = '-' && t.[1] = ' ' then
|
||||
String.trim (String.sub t 2 (String.length t - 2))
|
||||
else t }
|
||||
|
||||
(* --- section header detection ---
|
||||
* section headers are critical for disambiguating subcommands from positional
|
||||
* arguments. lines like "commands:" introduce subcommand sections, while
|
||||
* "arguments:" or "positionals:" introduce argument sections where the same
|
||||
* name+description format should NOT be treated as subcommands. *)
|
||||
|
||||
(* detect section names that introduce positional argument listings.
|
||||
* the check is case-insensitive and strips trailing colons. *)
|
||||
let is_arg_section s =
|
||||
let lc = String.lowercase_ascii (String.trim s) in
|
||||
let base = if String.ends_with ~suffix:":" lc
|
||||
then String.sub lc 0 (String.length lc - 1) |> String.trim
|
||||
else lc in
|
||||
base = "arguments" || base = "args" || base = "positionals"
|
||||
|| base = "positional arguments"
|
||||
|
||||
(* a section header: left-aligned (or lightly indented, <= 4 spaces) text
|
||||
* ending with ':', not starting with '-'. must be consumed BEFORE
|
||||
* subcommand_entry in the choice combinator, otherwise "commands:" would
|
||||
* be parsed as a subcommand named "commands" with description ":".
|
||||
*
|
||||
* returns a bool indicating whether this is an argument section (true)
|
||||
* or some other section (false). this drives the subcommand filtering logic
|
||||
* in help_parser — entries under argument sections are excluded from the
|
||||
* subcommand list. *)
|
||||
let section_header =
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
(* Extract just the first line from the preview *)
|
||||
let first_line = match String.index_opt preview '\n' with
|
||||
| Some i -> String.sub preview 0 i
|
||||
| None -> preview in
|
||||
let t = String.trim first_line in
|
||||
let len = String.length t in
|
||||
let indent = let i = ref 0 in
|
||||
while !i < String.length first_line && (first_line.[!i] = ' ' || first_line.[!i] = '\t') do incr i done;
|
||||
!i in
|
||||
if len >= 2 && t.[len - 1] = ':' && t.[0] <> '-' && indent <= 4 then
|
||||
rest_of_line <* eol_strict >>| fun line -> is_arg_section line
|
||||
else fail "not a section header"
|
||||
|
||||
(* --- top-level parser ---
|
||||
* the main help parser: walks through all lines, trying each line as one of:
|
||||
* 1. a flag entry (starts with whitespace + '-')
|
||||
* 2. a section header (left-aligned text ending with ':')
|
||||
* 3. a subcommand line (name + 2+ spaces + description)
|
||||
* 4. anything else → skip
|
||||
*
|
||||
* the choice ordering matters: entries are tried first (highest priority),
|
||||
* then section headers (must beat subcommand_entry to avoid misparse),
|
||||
* then subcommands, then skip as fallback.
|
||||
*
|
||||
* after collecting all items, two post-processing steps happen:
|
||||
* - subcommands under argument sections are excluded (tracked via
|
||||
* a running in_arg_sec boolean toggled by section headers)
|
||||
* - duplicate subcommand names are deduplicated, keeping the entry
|
||||
* with the longer description (heuristic: more info = better)
|
||||
*
|
||||
* peculiarity: positionals are NOT extracted here — they come from
|
||||
* the usage line parser (extract_usage_positionals) or cli11's
|
||||
* explicit section parser (extract_cli11_positionals), applied later
|
||||
* in parse_help. *)
|
||||
let help_parser =
|
||||
let open Angstrom in
|
||||
fix (fun _self ->
|
||||
let try_entry =
|
||||
entry >>| fun e -> `Entry e
|
||||
in
|
||||
let try_section =
|
||||
section_header >>| fun is_arg -> `Section is_arg
|
||||
in
|
||||
let try_subcommand =
|
||||
subcommand_entry >>| fun sc -> `Subcommand sc
|
||||
in
|
||||
let try_skip =
|
||||
skip_non_option_line >>| fun () -> `Skip
|
||||
in
|
||||
many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items ->
|
||||
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
|
||||
let subcommands =
|
||||
List.fold_left (fun (in_arg_sec, acc) item ->
|
||||
match item with
|
||||
| `Section is_arg -> (is_arg, acc)
|
||||
| `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc)
|
||||
| _ -> (in_arg_sec, acc)
|
||||
) (false, []) items
|
||||
|> snd |> List.rev
|
||||
|> List.fold_left (fun acc sc ->
|
||||
match List.assoc_opt sc.name acc with
|
||||
| Some prev when String.length prev.desc >= String.length sc.desc -> acc
|
||||
| _ -> (sc.name, sc) :: List.remove_assoc sc.name acc
|
||||
) []
|
||||
|> List.rev_map snd
|
||||
in
|
||||
{ entries; subcommands; positionals = []; description = "" })
|
||||
|
||||
(* --- usage line parsing ---
|
||||
* usage lines look like: "usage: git add [OPTIONS] [--] [<pathspec>...]"
|
||||
* to extract positional arguments, we first need to skip past the command
|
||||
* name prefix ("git add") to reach the argument portion.
|
||||
*
|
||||
* skip_command_prefix walks word-by-word, treating each space-separated
|
||||
* token as part of the command name as long as it:
|
||||
* - is made of "word chars" (alphanumeric, hyphen, underscore, slash, dot)
|
||||
* - contains at least one lowercase letter (to distinguish from ALL_CAPS
|
||||
* positional names like FILE)
|
||||
* - doesn't start with [, <, (, {, or - (which indicate arguments, not
|
||||
* command name components)
|
||||
*
|
||||
* peculiarity: this is an imperative index-walking parser rather than using
|
||||
* angstrom, because usage lines are a single string (not line-oriented)
|
||||
* and the format is too varied for clean combinator composition. *)
|
||||
let skip_command_prefix s =
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
|
||||
let is_word_char = function
|
||||
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
|
||||
| _ -> false
|
||||
in
|
||||
let rec loop () =
|
||||
skip_ws ();
|
||||
if !i >= len then ()
|
||||
else if s.[!i] = '[' || s.[!i] = '<' || s.[!i] = '(' || s.[!i] = '{' || s.[!i] = '-' then ()
|
||||
else if is_word_char s.[!i] then begin
|
||||
let start = !i in
|
||||
while !i < len && is_word_char s.[!i] do incr i done;
|
||||
let word = String.sub s start (!i - start) in
|
||||
let has_lower = ref false in
|
||||
String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
|
||||
if not !has_lower then
|
||||
i := start
|
||||
else
|
||||
loop ()
|
||||
end
|
||||
in
|
||||
loop ();
|
||||
!i
|
||||
|
||||
(* parse the argument portion of a usage line into positional definitions.
|
||||
* handles these syntactic forms:
|
||||
* <file> - mandatory positional
|
||||
* [file] - optional positional
|
||||
* FILE - mandatory positional (ALL_CAPS convention)
|
||||
* <file>... - variadic (also handles utf-8 ellipsis)
|
||||
* [file...] - optional variadic
|
||||
* curly-brace alternatives - skipped, not a positional
|
||||
* -flag - flags (skipped)
|
||||
*
|
||||
* peculiarity: certain all-caps names are skipped because they're not real
|
||||
* positionals — "OPTIONS", "FLAGS", etc. are section labels that sometimes
|
||||
* appear in usage lines for readability.
|
||||
*
|
||||
* deduplication at the end ensures we don't emit the same positional twice
|
||||
* (can happen when usage lines are reformatted or repeated). *)
|
||||
let parse_usage_args s =
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let results = ref [] in
|
||||
let skip_ws () =
|
||||
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
|
||||
let is_pos_char c =
|
||||
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
|
||||
let read_dots () =
|
||||
skip_ws ();
|
||||
if !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' then
|
||||
(i := !i + 3; true)
|
||||
else if !i + 2 < len && s.[!i] = '\xe2' && s.[!i+1] = '\x80' && s.[!i+2] = '\xa6' then
|
||||
(i := !i + 3; true) (* UTF-8 ellipsis … *)
|
||||
else false
|
||||
in
|
||||
let is_skip name =
|
||||
let u = String.uppercase_ascii name in
|
||||
u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
|
||||
in
|
||||
let is_clean_name name =
|
||||
String.length name >= 2
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
||||
|| (c >= '0' && c <= '9') || c = '_' || c = '-') name
|
||||
in
|
||||
let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
|
||||
let skip_braces () =
|
||||
(* Skip {A|c|d|...} alternative blocks *)
|
||||
if !i < len && s.[!i] = '{' then begin
|
||||
let depth = ref 1 in
|
||||
incr i;
|
||||
while !i < len && !depth > 0 do
|
||||
if s.[!i] = '{' then incr depth
|
||||
else if s.[!i] = '}' then decr depth;
|
||||
incr i
|
||||
done;
|
||||
ignore (read_dots ());
|
||||
true
|
||||
end else false
|
||||
in
|
||||
while !i < len do
|
||||
skip_ws ();
|
||||
if !i >= len then ()
|
||||
else if skip_braces () then ()
|
||||
else match s.[!i] with
|
||||
| '[' ->
|
||||
incr i;
|
||||
let start = !i in
|
||||
let depth = ref 1 in
|
||||
while !i < len && !depth > 0 do
|
||||
if s.[!i] = '[' then incr depth
|
||||
else if s.[!i] = ']' then decr depth;
|
||||
incr i
|
||||
done;
|
||||
let bracket_end = !i - 1 in
|
||||
let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
|
||||
let inner, has_inner_dots =
|
||||
if String.ends_with ~suffix:"..." inner then
|
||||
(String.sub inner 0 (String.length inner - 3) |> String.trim, true)
|
||||
else (inner, false)
|
||||
in
|
||||
let variadic = has_inner_dots || read_dots () in
|
||||
if String.length inner > 0
|
||||
&& inner.[0] <> '-'
|
||||
&& (is_letter inner.[0] || inner.[0] = '<') then begin
|
||||
let name =
|
||||
if inner.[0] = '<' then
|
||||
let e = try String.index inner '>' with Not_found -> String.length inner in
|
||||
String.sub inner 1 (e - 1)
|
||||
else inner
|
||||
in
|
||||
if is_clean_name name && not (is_skip name) then
|
||||
results := { pos_name = String.lowercase_ascii name;
|
||||
optional = true; variadic } :: !results
|
||||
end
|
||||
| '<' ->
|
||||
incr i;
|
||||
let start = !i in
|
||||
while !i < len && s.[!i] <> '>' do incr i done;
|
||||
let name = String.sub s start (!i - start) in
|
||||
if !i < len then incr i;
|
||||
let variadic = read_dots () in
|
||||
if is_clean_name name && not (is_skip name) then
|
||||
results := { pos_name = String.lowercase_ascii name;
|
||||
optional = false; variadic } :: !results
|
||||
| '-' ->
|
||||
while !i < len && s.[!i] <> ' ' && s.[!i] <> '\t' && s.[!i] <> ']' do incr i done
|
||||
| c when c >= 'A' && c <= 'Z' ->
|
||||
let start = !i in
|
||||
while !i < len && is_pos_char s.[!i] do incr i done;
|
||||
let name = String.sub s start (!i - start) in
|
||||
let variadic = read_dots () in
|
||||
if String.length name >= 2
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
|
||||
) name
|
||||
&& not (is_skip name) then
|
||||
results := { pos_name = String.lowercase_ascii name;
|
||||
optional = false; variadic } :: !results
|
||||
| _ ->
|
||||
incr i
|
||||
done;
|
||||
List.rev !results
|
||||
|> List.fold_left (fun (seen, acc) p ->
|
||||
if List.mem p.pos_name seen then (seen, acc)
|
||||
else (p.pos_name :: seen, p :: acc)
|
||||
) ([], [])
|
||||
|> snd |> List.rev
|
||||
|
||||
(* find the "usage:" line in the help text and extract positionals from it.
|
||||
* searches line-by-line for a line starting with "usage:" (case-insensitive).
|
||||
* handles both inline usage ("usage: cmd [OPTIONS] FILE") and the clap style
|
||||
* where the actual usage is on the next line:
|
||||
* USAGE:
|
||||
* cmd [OPTIONS] FILE
|
||||
*
|
||||
* also handles the bare "usage" header (no colon) followed by a next line. *)
|
||||
let extract_usage_positionals text =
|
||||
let lines = String.split_on_char '\n' text in
|
||||
let lines_arr = Array.of_list lines in
|
||||
let len = Array.length lines_arr in
|
||||
let find_usage_line () =
|
||||
let rec go i =
|
||||
if i >= len then None
|
||||
else
|
||||
let t = String.trim lines_arr.(i) in
|
||||
let tlen = String.length t in
|
||||
let lc = String.lowercase_ascii t in
|
||||
if tlen >= 6 && String.sub lc 0 6 = "usage:" then begin
|
||||
let after = String.sub t 6 (tlen - 6) |> String.trim in
|
||||
if String.length after > 0 then Some after
|
||||
else if i + 1 < len then
|
||||
(* Clap style: USAGE:\n cmd [OPTIONS] PATTERN *)
|
||||
let next = String.trim lines_arr.(i + 1) in
|
||||
if String.length next > 0 then Some next else None
|
||||
else None
|
||||
end else if lc = "usage" then begin
|
||||
if i + 1 < len then
|
||||
let next = String.trim lines_arr.(i + 1) in
|
||||
if String.length next > 0 then Some next else None
|
||||
else None
|
||||
end else go (i + 1)
|
||||
in
|
||||
go 0
|
||||
in
|
||||
match find_usage_line () with
|
||||
| None -> []
|
||||
| Some usage ->
|
||||
let cmd_end = skip_command_prefix usage in
|
||||
let args = String.sub usage cmd_end (String.length usage - cmd_end) in
|
||||
parse_usage_args args
|
||||
|
||||
(* extract positionals from cli11's explicit "POSITIONALS:" section.
|
||||
* cli11 (a c++ arg parsing library) emits a dedicated section:
|
||||
* Positionals:
|
||||
* name TEXT description here
|
||||
* count INT another description
|
||||
*
|
||||
* this is preferred over usage-line extraction when present because it
|
||||
* provides more accurate type information. the parser looks for the
|
||||
* section header, then reads indented lines until a blank or unindented
|
||||
* line signals the end. type words (TEXT, INT, FLOAT, etc.) between the
|
||||
* name and description are skipped. *)
|
||||
let extract_cli11_positionals text =
|
||||
let lines = String.split_on_char '\n' text in
|
||||
let rec find_section = function
|
||||
| [] -> []
|
||||
| line :: rest ->
|
||||
let t = String.trim line in
|
||||
if t = "POSITIONALS:" || t = "Positionals:" then
|
||||
parse_lines rest []
|
||||
else
|
||||
find_section rest
|
||||
and parse_lines lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| line :: rest ->
|
||||
let len = String.length line in
|
||||
if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
|
||||
List.rev acc
|
||||
else
|
||||
let t = String.trim line in
|
||||
if String.length t = 0 then List.rev acc
|
||||
else match parse_one t with
|
||||
| Some p -> parse_lines rest (p :: acc)
|
||||
| None -> parse_lines rest acc
|
||||
and parse_one s =
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let is_name_char c =
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9') || c = '_' || c = '-' in
|
||||
while !i < len && is_name_char s.[!i] do incr i done;
|
||||
if !i < 2 then None
|
||||
else
|
||||
let name = String.sub s 0 !i in
|
||||
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
|
||||
(* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
|
||||
while !i < len && s.[!i] >= 'A' && s.[!i] <= 'Z' do incr i done;
|
||||
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
|
||||
let variadic = !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' in
|
||||
Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
|
||||
in
|
||||
find_section lines
|
||||
|
||||
(* top-level entry point: parse a --help text string into a help_result.
|
||||
* steps:
|
||||
* 1. strip ansi escapes (colors, hyperlinks, etc.)
|
||||
* 2. run the angstrom help_parser for flags and subcommands
|
||||
* 3. extract positionals via cli11 format (preferred) or usage line (fallback)
|
||||
* 4. merge positionals into the result
|
||||
* uses angstrom's prefix-consume mode — we don't need to parse every byte. *)
|
||||
let parse_help txt =
|
||||
let clean = strip_ansi txt in
|
||||
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
|
||||
| Ok result ->
|
||||
let cli11 = extract_cli11_positionals clean in
|
||||
let usage = extract_usage_positionals clean in
|
||||
let positionals = if cli11 <> [] then cli11 else usage in
|
||||
Ok { result with positionals }
|
||||
| Error msg -> Error msg
|
||||
444
lib/store.ml
Normal file
444
lib/store.ml
Normal file
|
|
@ -0,0 +1,444 @@
|
|||
(* store.ml — filesystem-backed cache of parsed completion data.
|
||||
*
|
||||
* this module handles persistence of completion data to disk. each command's
|
||||
* help_result is serialized to json and stored as a file in a cache directory
|
||||
* (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions
|
||||
* are stored as .nu files instead.
|
||||
*
|
||||
* the store also provides lookup, listing, and subcommand discovery by
|
||||
* scanning filenames in the cache directory.
|
||||
*
|
||||
* file naming convention:
|
||||
* - spaces in command names become underscores (e.g. "git add" → "git_add.json")
|
||||
* - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json")
|
||||
* - .json files contain serialized help_result
|
||||
* - .nu files contain native nushell extern source code
|
||||
*
|
||||
* the module includes a minimal hand-rolled json parser/serializer because
|
||||
* we only need to handle our own output format (no need for a full json library).
|
||||
*)
|
||||
|
||||
open Parser
|
||||
|
||||
(* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to
|
||||
* ~/.cache/inshellah if XDG_CACHE_HOME is not set. *)
|
||||
let default_store_path () =
|
||||
let cache = try Sys.getenv "XDG_CACHE_HOME"
|
||||
with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
|
||||
Filename.concat cache "inshellah"
|
||||
|
||||
(* recursively create directories (equivalent to mkdir -p) *)
|
||||
let ensure_dir dir =
|
||||
let rec mkdir_p d =
|
||||
if Sys.file_exists d then ()
|
||||
else begin mkdir_p (Filename.dirname d); Unix.mkdir d 0o755 end in
|
||||
mkdir_p dir
|
||||
|
||||
(* convert command name to safe filename: spaces become underscores,
|
||||
* non-alphanumeric chars become hyphens.
|
||||
* e.g. "git add" → "git_add", "docker-compose" → "docker-compose" *)
|
||||
let filename_of_command cmd =
|
||||
String.map (function
|
||||
| ' ' -> '_'
|
||||
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c
|
||||
| _ -> '-') cmd
|
||||
|
||||
(* inverse of filename_of_command: underscores back to spaces.
|
||||
* peculiarity: this is lossy — original underscores in command names
|
||||
* (e.g. "my_tool") would be converted to spaces. in practice this
|
||||
* doesn't matter because tools with underscores in names are rare,
|
||||
* and subcommands use space-separated naming. *)
|
||||
let command_of_filename base =
|
||||
String.map (function '_' -> ' ' | c -> c) base
|
||||
|
||||
(* --- json serialization of help_result ---
|
||||
* hand-rolled json emitters. we don't use a json library because:
|
||||
* 1. the schema is fixed and simple — we only serialize our own types
|
||||
* 2. avoiding dependencies keeps the binary small
|
||||
* 3. printf-style emission is fast and straightforward for our types *)
|
||||
|
||||
(* escape a string for json: quotes, backslashes, and control characters.
|
||||
* control chars below 0x20 are emitted as \u00XX unicode escapes. *)
|
||||
let escape_json s =
|
||||
let buf = Buffer.create (String.length s + 4) in
|
||||
String.iter (fun c -> match c with
|
||||
| '"' -> Buffer.add_string buf "\\\""
|
||||
| '\\' -> Buffer.add_string buf "\\\\"
|
||||
| '\n' -> Buffer.add_string buf "\\n"
|
||||
| '\t' -> Buffer.add_string buf "\\t"
|
||||
| '\r' -> Buffer.add_string buf "\\r"
|
||||
| c when Char.code c < 0x20 ->
|
||||
Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
|
||||
| c -> Buffer.add_char buf c
|
||||
) s;
|
||||
Buffer.contents buf
|
||||
|
||||
let json_string s = Printf.sprintf "\"%s\"" (escape_json s)
|
||||
let json_null = "null"
|
||||
|
||||
let json_switch_of = function
|
||||
| Short c -> Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 c))
|
||||
| Long l -> Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string l)
|
||||
| Both (c, l) ->
|
||||
Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
|
||||
(json_string (String.make 1 c)) (json_string l)
|
||||
|
||||
let json_param_of = function
|
||||
| None -> json_null
|
||||
| Some (Mandatory p) ->
|
||||
Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string p)
|
||||
| Some (Optional p) ->
|
||||
Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string p)
|
||||
|
||||
let json_entry_of e =
|
||||
Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
|
||||
(json_switch_of e.switch) (json_param_of e.param) (json_string e.desc)
|
||||
|
||||
let json_subcommand_of sc =
|
||||
Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
|
||||
|
||||
let json_positional_of p =
|
||||
Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
|
||||
(json_string p.pos_name) p.optional p.variadic
|
||||
|
||||
let json_list f items =
|
||||
"[" ^ String.concat "," (List.map f items) ^ "]"
|
||||
|
||||
let json_of_help_result ?(source="help") r =
|
||||
Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
|
||||
(json_string source)
|
||||
(json_string r.description)
|
||||
(json_list json_entry_of r.entries)
|
||||
(json_list json_subcommand_of r.subcommands)
|
||||
(json_list json_positional_of r.positionals)
|
||||
|
||||
(* --- json deserialization ---
|
||||
* minimal hand-rolled recursive-descent json parser. only handles the subset
|
||||
* we emit: strings, booleans, nulls, arrays, and objects. no number parsing
|
||||
* (we don't emit numbers). this is intentionally minimal — we only read back
|
||||
* our own serialized format, so robustness against arbitrary json is not needed.
|
||||
*
|
||||
* peculiarity: the \u escape handler does basic utf-8 encoding for code points
|
||||
* up to 0xffff but doesn't handle surrogate pairs. this is fine for our use
|
||||
* case since we only escape control characters below 0x20. *)
|
||||
|
||||
type json =
|
||||
| Jnull
|
||||
| Jbool of bool
|
||||
| Jstring of string
|
||||
| Jarray of json list
|
||||
| Jobject of (string * json) list
|
||||
|
||||
(* json accessor helpers — return sensible defaults for missing/wrong types *)
|
||||
let json_get key = function
|
||||
| Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
|
||||
| _ -> Jnull
|
||||
|
||||
let json_to_string = function Jstring s -> s | _ -> ""
|
||||
let json_to_bool = function Jbool b -> b | _ -> false
|
||||
let json_to_list = function Jarray l -> l | _ -> []
|
||||
|
||||
exception Json_error of string
|
||||
|
||||
(* imperative recursive-descent json parser.
|
||||
* uses a mutable position ref to walk through the string.
|
||||
* peculiarity: boolean/null parsing just advances a fixed number of chars
|
||||
* without validating the actual characters — safe because we only read
|
||||
* our own output, but would be incorrect for arbitrary json. *)
|
||||
let parse_json s =
|
||||
let len = String.length s in
|
||||
let pos = ref 0 in
|
||||
let peek () = if !pos < len then s.[!pos] else '\x00' in
|
||||
let advance () = incr pos in
|
||||
let skip_ws () =
|
||||
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t'
|
||||
|| s.[!pos] = '\n' || s.[!pos] = '\r') do
|
||||
advance ()
|
||||
done in
|
||||
let expect c =
|
||||
skip_ws ();
|
||||
if peek () <> c then
|
||||
raise (Json_error (Printf.sprintf "expected '%c' at %d" c !pos));
|
||||
advance () in
|
||||
let rec parse_value () =
|
||||
skip_ws ();
|
||||
match peek () with
|
||||
| '"' -> Jstring (parse_string ())
|
||||
| '{' -> parse_object ()
|
||||
| '[' -> parse_array ()
|
||||
| 'n' -> advance (); advance (); advance (); advance (); Jnull
|
||||
| 't' -> advance (); advance (); advance (); advance (); Jbool true
|
||||
| 'f' ->
|
||||
advance (); advance (); advance (); advance (); advance (); Jbool false
|
||||
| c -> raise (Json_error (Printf.sprintf "unexpected '%c' at %d" c !pos))
|
||||
and parse_string () =
|
||||
expect '"';
|
||||
let buf = Buffer.create 32 in
|
||||
while peek () <> '"' do
|
||||
if peek () = '\\' then begin
|
||||
advance ();
|
||||
(match peek () with
|
||||
| '"' -> Buffer.add_char buf '"'
|
||||
| '\\' -> Buffer.add_char buf '\\'
|
||||
| 'n' -> Buffer.add_char buf '\n'
|
||||
| 't' -> Buffer.add_char buf '\t'
|
||||
| 'r' -> Buffer.add_char buf '\r'
|
||||
| 'u' ->
|
||||
advance ();
|
||||
let hex = String.sub s !pos 4 in
|
||||
pos := !pos + 3;
|
||||
let code = int_of_string ("0x" ^ hex) in
|
||||
if code < 128 then Buffer.add_char buf (Char.chr code)
|
||||
else begin
|
||||
(* UTF-8 encode *)
|
||||
if code < 0x800 then begin
|
||||
Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
|
||||
end else begin
|
||||
Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
|
||||
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
|
||||
end
|
||||
end
|
||||
| c -> Buffer.add_char buf c);
|
||||
advance ()
|
||||
end else begin
|
||||
Buffer.add_char buf (peek ());
|
||||
advance ()
|
||||
end
|
||||
done;
|
||||
advance (); (* closing quote *)
|
||||
Buffer.contents buf
|
||||
and parse_object () =
|
||||
expect '{';
|
||||
skip_ws ();
|
||||
if peek () = '}' then (advance (); Jobject [])
|
||||
else begin
|
||||
let pairs = ref [] in
|
||||
let cont = ref true in
|
||||
while !cont do
|
||||
skip_ws ();
|
||||
let key = parse_string () in
|
||||
expect ':';
|
||||
let value = parse_value () in
|
||||
pairs := (key, value) :: !pairs;
|
||||
skip_ws ();
|
||||
if peek () = ',' then advance ()
|
||||
else cont := false
|
||||
done;
|
||||
expect '}';
|
||||
Jobject (List.rev !pairs)
|
||||
end
|
||||
and parse_array () =
|
||||
expect '[';
|
||||
skip_ws ();
|
||||
if peek () = ']' then (advance (); Jarray [])
|
||||
else begin
|
||||
let items = ref [] in
|
||||
let cont = ref true in
|
||||
while !cont do
|
||||
let v = parse_value () in
|
||||
items := v :: !items;
|
||||
skip_ws ();
|
||||
if peek () = ',' then advance ()
|
||||
else cont := false
|
||||
done;
|
||||
expect ']';
|
||||
Jarray (List.rev !items)
|
||||
end
|
||||
in
|
||||
parse_value ()
|
||||
|
||||
(* --- json → ocaml type converters ---
|
||||
* these reconstruct our parser types from their json representations.
|
||||
* they mirror the json_*_of serializers above. *)
|
||||
|
||||
let switch_of_json j =
|
||||
match json_to_string (json_get "type" j) with
|
||||
| "short" ->
|
||||
let c = json_to_string (json_get "char" j) in
|
||||
Short (if String.length c > 0 then c.[0] else '?')
|
||||
| "long" -> Long (json_to_string (json_get "name" j))
|
||||
| "both" ->
|
||||
let c = json_to_string (json_get "char" j) in
|
||||
Both ((if String.length c > 0 then c.[0] else '?'),
|
||||
json_to_string (json_get "name" j))
|
||||
| _ -> Long "?"
|
||||
|
||||
let param_of_json = function
|
||||
| Jnull -> None
|
||||
| j ->
|
||||
let name = json_to_string (json_get "name" j) in
|
||||
(match json_to_string (json_get "kind" j) with
|
||||
| "mandatory" -> Some (Mandatory name)
|
||||
| "optional" -> Some (Optional name)
|
||||
| _ -> None)
|
||||
|
||||
let entry_of_json j =
|
||||
{ switch = switch_of_json (json_get "switch" j);
|
||||
param = param_of_json (json_get "param" j);
|
||||
desc = json_to_string (json_get "desc" j) }
|
||||
|
||||
let subcommand_of_json j =
|
||||
{ name = json_to_string (json_get "name" j);
|
||||
desc = json_to_string (json_get "desc" j) }
|
||||
|
||||
let positional_of_json j =
|
||||
{ pos_name = json_to_string (json_get "name" j);
|
||||
optional = json_to_bool (json_get "optional" j);
|
||||
variadic = json_to_bool (json_get "variadic" j) }
|
||||
|
||||
let help_result_of_json j =
|
||||
{ entries = List.map entry_of_json (json_to_list (json_get "entries" j));
|
||||
subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" j));
|
||||
positionals = List.map positional_of_json (json_to_list (json_get "positionals" j));
|
||||
description = json_to_string (json_get "description" j) }
|
||||
|
||||
(* --- filesystem operations --- *)
|
||||
|
||||
let write_file path contents =
|
||||
let oc = open_out path in
|
||||
output_string oc contents;
|
||||
close_out oc
|
||||
|
||||
let read_file path =
|
||||
try
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
Some (Bytes.to_string s)
|
||||
with _ -> None
|
||||
|
||||
(* write a parsed help_result to the store as json *)
|
||||
let write_result ~dir ?(source="help") command result =
|
||||
let path = Filename.concat dir (filename_of_command command ^ ".json") in
|
||||
write_file path (json_of_help_result ~source result)
|
||||
|
||||
(* write native nushell completion source to the store as a .nu file *)
|
||||
let write_native ~dir command data =
|
||||
let path = Filename.concat dir (filename_of_command command ^ ".nu") in
|
||||
write_file path data
|
||||
|
||||
let is_dir path = Sys.file_exists path && Sys.is_directory path
|
||||
|
||||
(* look for a command's data file across multiple store directories.
|
||||
* checks json first, then .nu. returns the first match found.
|
||||
* directories are searched in order (user dir before system dirs). *)
|
||||
let find_file dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
if Sys.file_exists json_path then Some json_path
|
||||
else
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
if Sys.file_exists nu_path then Some nu_path
|
||||
else None
|
||||
) dirs
|
||||
|
||||
(* look up a command and deserialize its help_result from json.
|
||||
* only searches for .json files (not .nu, since those can't be deserialized
|
||||
* back into help_result). returns none if not found or parse fails. *)
|
||||
let lookup dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let path = Filename.concat dir (base ^ ".json") in
|
||||
match read_file path with
|
||||
| Some data ->
|
||||
(try Some (help_result_of_json (parse_json data))
|
||||
with _ -> None)
|
||||
| None -> None
|
||||
) dirs
|
||||
|
||||
(* look up a command's raw data (json or .nu source) without parsing.
|
||||
* used by the "query" command to dump stored data as-is. *)
|
||||
let lookup_raw dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
match read_file json_path with
|
||||
| Some _ as r -> r
|
||||
| None ->
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
read_file nu_path
|
||||
) dirs
|
||||
|
||||
let chop_extension f =
|
||||
if Filename.check_suffix f ".json" then Some (Filename.chop_suffix f ".json")
|
||||
else if Filename.check_suffix f ".nu" then Some (Filename.chop_suffix f ".nu")
|
||||
else None
|
||||
|
||||
(* discover subcommands of a command by scanning filenames in the store.
|
||||
* looks for files whose names start with the command's filename + "_"
|
||||
* (e.g. for "git", finds "git_add.json", "git_commit.json", etc.)
|
||||
*
|
||||
* only returns immediate subcommands (no nested underscores beyond the prefix).
|
||||
* tries to extract description from the json "description" field if available.
|
||||
*
|
||||
* peculiarity: this filesystem-based discovery is used as a fallback when the
|
||||
* command's own help_result doesn't list subcommands. it enables completion
|
||||
* for subcommands that were indexed from separate manpages or help runs. *)
|
||||
let subcommands_of dirs command =
|
||||
let prefix = filename_of_command command ^ "_" in
|
||||
let plen = String.length prefix in
|
||||
let module SMap = Map.Make(String) in
|
||||
let subs = List.fold_left (fun subs dir ->
|
||||
if is_dir dir then
|
||||
Array.fold_left (fun subs f ->
|
||||
if not (String.starts_with ~prefix f) then subs
|
||||
else
|
||||
let is_json = Filename.check_suffix f ".json" in
|
||||
match chop_extension f with
|
||||
| None -> subs
|
||||
| Some b ->
|
||||
let rest = String.sub b plen (String.length b - plen) in
|
||||
if String.contains rest '_' || String.length rest = 0 then subs
|
||||
else if SMap.mem rest subs then subs
|
||||
else
|
||||
let desc = if is_json then
|
||||
match read_file (Filename.concat dir f) with
|
||||
| Some data ->
|
||||
(try json_to_string (json_get "description" (parse_json data))
|
||||
with _ -> "")
|
||||
| None -> ""
|
||||
else "" in
|
||||
SMap.add rest { name = rest; desc } subs
|
||||
) subs (Sys.readdir dir)
|
||||
else subs
|
||||
) SMap.empty dirs in
|
||||
SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
|
||||
|
||||
(* list all indexed commands across all store directories.
|
||||
* returns a sorted, deduplicated list of command names. *)
|
||||
let all_commands dirs =
|
||||
let module SSet = Set.Make(String) in
|
||||
List.fold_left (fun cmds dir ->
|
||||
if is_dir dir then
|
||||
Array.fold_left (fun cmds f ->
|
||||
match chop_extension f with
|
||||
| Some b -> SSet.add (command_of_filename b) cmds
|
||||
| None -> cmds
|
||||
) cmds (Sys.readdir dir)
|
||||
else cmds
|
||||
) SSet.empty dirs
|
||||
|> SSet.elements
|
||||
|
||||
(* determine how a command was indexed: "help", "manpage", "native", etc.
|
||||
* for json files, reads the "source" field. for .nu files, returns "native".
|
||||
* used by the "dump" command to show provenance. *)
|
||||
let file_type_of dirs command =
|
||||
let base = filename_of_command command in
|
||||
List.find_map (fun dir ->
|
||||
let json_path = Filename.concat dir (base ^ ".json") in
|
||||
if Sys.file_exists json_path then
|
||||
(match read_file json_path with
|
||||
| Some data ->
|
||||
(try Some (json_to_string (json_get "source" (parse_json data)))
|
||||
with _ -> Some "json")
|
||||
| None -> Some "json")
|
||||
else
|
||||
let nu_path = Filename.concat dir (base ^ ".nu") in
|
||||
if Sys.file_exists nu_path then Some "native"
|
||||
else None
|
||||
) dirs
|
||||
Loading…
Add table
Add a link
Reference in a new issue