253 lines
12 KiB
OCaml
253 lines
12 KiB
OCaml
(* nushell.ml — generate nushell extern definitions from parsed help data.
|
|
*
|
|
* this module is the code generation backend. it takes a help_result (from
|
|
* the parser or manpage modules) and produces nushell source code that
|
|
* defines `extern` declarations — nushell's mechanism for teaching the shell
|
|
* about external commands' flags and subcommands so it can offer completions.
|
|
*
|
|
* it also maintains a list of nushell's built-in commands to avoid generating
|
|
* extern definitions that would shadow them.
|
|
*
|
|
* key responsibilities:
|
|
* - deduplicating flag entries (same flag from multiple help sources)
|
|
* - mapping parameter names to nushell types (path, int, string)
|
|
* - formatting flags in nushell syntax: --flag(-f): type # description
|
|
* - handling positional arguments with nushell's ordering constraints
|
|
* - escaping special characters for nushell string literals
|
|
*)
|
|
|
|
open Parser
|
|
|
|
module SSet = Set.Make(String)
|
|
module SMap = Map.Make(String)
|
|
module CSet = Set.Make(Char)
|
|
|
|
(* nushell built-in commands and keywords — we must never generate `extern`
|
|
* definitions for these because it would shadow nushell's own implementations.
|
|
* this list is maintained manually and should be updated with new nushell releases. *)
|
|
let nushell_builtins = [
|
|
"alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
|
|
"bits"; "break"; "bytes";
|
|
"cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
|
|
"columns"; "commandline"; "compact"; "complete"; "config"; "const";
|
|
"continue"; "cp";
|
|
"date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
|
|
"do"; "drop"; "du";
|
|
"each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
|
|
"exit"; "explain"; "explore"; "export"; "export-env"; "extern";
|
|
"fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
|
|
"generate"; "get"; "glob"; "grid"; "group-by";
|
|
"hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
|
|
"history"; "http";
|
|
"if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
|
|
"is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
|
|
"job"; "join";
|
|
"keybindings"; "kill";
|
|
"last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
|
|
"match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
|
|
"move"; "mut"; "mv";
|
|
"nu-check"; "nu-highlight";
|
|
"open"; "overlay";
|
|
"panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
|
|
"query";
|
|
"random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
|
|
"roll"; "rotate"; "run-external";
|
|
"save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
|
|
"slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
|
|
"stor"; "str"; "sys";
|
|
"table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
|
|
"try"; "tutor";
|
|
"ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
|
|
"upsert"; "url"; "use";
|
|
"values"; "version"; "view";
|
|
"watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
|
|
"zip";
|
|
]
|
|
|
|
(* lazily constructed set for fast membership checks against builtins *)
|
|
let builtin_set = lazy (SSet.of_list nushell_builtins)
|
|
|
|
(* returns true if the given command name collides with a nushell built-in *)
|
|
let is_nushell_builtin cmd =
|
|
SSet.mem cmd (Lazy.force builtin_set)
|
|
|
|
(* deduplicate flag entries that refer to the same flag.
|
|
* when the same flag appears multiple times (e.g. from overlapping manpage
|
|
* sections or repeated help text), we keep the "best" version using a score:
|
|
* - both short+long form present: +10 (most informative)
|
|
* - has a parameter: +5
|
|
* - description length bonus: up to +5
|
|
*
|
|
* after deduplication by long name, we also remove standalone short flags
|
|
* whose letter is already covered by a Both(short, long) entry. this prevents
|
|
* emitting both "-v" and "--verbose(-v)" which nushell would reject as a
|
|
* duplicate. the filtering preserves original ordering from the help text. *)
|
|
let dedup_entries entries =
|
|
(* produce a canonical key for each entry based on its switch form *)
|
|
let key_of entry =
|
|
match entry.switch with
|
|
| Short c -> Printf.sprintf "-%c" c
|
|
| Long l | Both (_, l) -> Printf.sprintf "--%s" l
|
|
in
|
|
(* compute a quality score for ranking duplicate entries *)
|
|
let score entry =
|
|
let switch_bonus = match entry.switch with Both _ -> 10 | _ -> 0 in
|
|
let param_bonus = match entry.param with Some _ -> 5 | None -> 0 in
|
|
let desc_bonus = min 5 (String.length entry.desc / 10) in
|
|
switch_bonus + param_bonus + desc_bonus
|
|
in
|
|
(* fold over entries, keeping only the highest-scored entry per key *)
|
|
let best = List.fold_left (fun acc entry ->
|
|
let key = key_of entry in
|
|
match SMap.find_opt key acc with
|
|
| Some prev when score prev >= score entry -> acc
|
|
| _ -> SMap.add key entry acc
|
|
) SMap.empty entries in
|
|
(* collect all short-flag characters that are already part of a Both entry,
|
|
* so we can suppress standalone Short entries for the same character *)
|
|
let covered = SMap.fold (fun _ entry acc ->
|
|
match entry.switch with
|
|
| Both (c, _) -> CSet.add c acc
|
|
| _ -> acc
|
|
) best CSet.empty in
|
|
(* emit entries in original order, skipping duplicates and covered shorts *)
|
|
List.fold_left (fun (seen, acc) entry ->
|
|
let key = key_of entry in
|
|
if SSet.mem key seen then (seen, acc)
|
|
else match entry.switch with
|
|
| Short c when CSet.mem c covered -> (seen, acc)
|
|
| _ -> (SSet.add key seen, SMap.find key best :: acc)
|
|
) (SSet.empty, []) entries |> snd |> List.rev
|
|
|
|
(* map parameter names to nushell types.
|
|
* nushell's `extern` declarations use typed parameters, so we infer the type
|
|
* from the parameter name. file/path-related names become "path" (enables
|
|
* path completion), numeric names become "int", everything else is "string". *)
|
|
let nushell_type_of_param = function
|
|
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
|
|
| "FILENAME" | "PATTERNFILE" -> "path"
|
|
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
|
|
| "LINES" | "DEPTH" | "depth" -> "int"
|
|
| _ -> "string"
|
|
|
|
(* escape a string for use inside nushell double-quoted string literals.
|
|
* only double quotes and backslashes need escaping in nushell's syntax. *)
|
|
let escape_nu s =
|
|
if not (String.contains s '"') && not (String.contains s '\\') then s
|
|
else begin
|
|
let buf = Buffer.create (String.length s + 4) in
|
|
String.iter (fun c -> match c with
|
|
| '"' -> Buffer.add_string buf "\\\""
|
|
| '\\' -> Buffer.add_string buf "\\\\"
|
|
| _ -> Buffer.add_char buf c
|
|
) s;
|
|
Buffer.contents buf
|
|
end
|
|
|
|
(* format a single flag entry as a nushell `extern` parameter line.
|
|
* output examples:
|
|
* " --verbose(-v) # increase verbosity"
|
|
* " --output(-o): path # write output to file"
|
|
* " -n: int # number of results"
|
|
*
|
|
* the description is right-padded to column 40 with a "# " comment prefix.
|
|
* nushell's syntax for combined short+long is "--long(-s)". *)
|
|
let format_flag entry =
|
|
let name = match entry.switch with
|
|
| Both (short_char, l) -> Printf.sprintf "--%s(-%c)" l short_char
|
|
| Long l -> Printf.sprintf "--%s" l
|
|
| Short short_char -> Printf.sprintf "-%c" short_char
|
|
in
|
|
let typed = match entry.param with
|
|
| Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
|
|
| None -> ""
|
|
in
|
|
let flag = " " ^ name ^ typed in
|
|
if String.length entry.desc = 0 then flag
|
|
else
|
|
let pad_len = max 1 (40 - String.length flag) in
|
|
flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
|
|
|
|
(* format a positional argument as a nushell `extern` parameter line.
|
|
* nushell syntax: "...name: type" for variadic, "name?: type" for optional.
|
|
* hyphens in names are converted to underscores since nushell identifiers
|
|
* cannot contain hyphens. *)
|
|
let format_positional positional =
|
|
let name = String.map (function '-' -> '_' | c -> c) positional.pos_name in
|
|
let prefix = if positional.variadic then "..." else "" in
|
|
let suffix = if positional.optional && not positional.variadic then "?" else "" in
|
|
let typ = nushell_type_of_param (String.uppercase_ascii positional.pos_name) in
|
|
Printf.sprintf " %s%s%s: %s" prefix name suffix typ
|
|
|
|
(* enforce nushell's positional argument ordering rules:
|
|
* 1. no required positional may follow an optional one
|
|
* 2. at most one variadic ("rest") parameter is allowed
|
|
*
|
|
* if a required positional appears after an optional one, it is silently
|
|
* promoted to optional. duplicate variadic params are dropped.
|
|
* uses a fold to track the state across the list in one pass. *)
|
|
let fixup_positionals positionals =
|
|
List.fold_left (fun (seen_optional, seen_variadic, acc) positional ->
|
|
if positional.variadic then
|
|
(* only allow the first variadic parameter *)
|
|
if seen_variadic then (seen_optional, seen_variadic, acc)
|
|
else (true, true, positional :: acc)
|
|
else if seen_optional then
|
|
(* once we've seen an optional, all subsequent must be optional too *)
|
|
(true, seen_variadic, { positional with optional = true } :: acc)
|
|
else
|
|
(positional.optional, seen_variadic, positional :: acc)
|
|
) (false, false, []) positionals
|
|
|> fun (_, _, acc) -> List.rev acc
|
|
|
|
(* generate the full nushell `extern` block for a command.
|
|
* produces output like:
|
|
* export extern "git add" [
|
|
* ...pathspec?: path
|
|
* --verbose(-v) # be verbose
|
|
* --dry-run(-n) # dry run
|
|
* ]
|
|
*
|
|
* subcommands that weren't resolved into their own full definitions get
|
|
* stub `extern` blocks with just a comment containing their description:
|
|
* export extern "git stash" [ # stash changes
|
|
* ]
|
|
*)
|
|
let extern_of cmd_name result =
|
|
let entries = dedup_entries result.entries in
|
|
let escaped_name = escape_nu cmd_name in
|
|
let positionals = fixup_positionals result.positionals in
|
|
(* format all positional and flag lines, each terminated with a newline *)
|
|
let pos_lines = List.map (fun positional -> format_positional positional ^ "\n") positionals in
|
|
let flags = List.map (fun entry -> format_flag entry ^ "\n") entries in
|
|
let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" escaped_name (String.concat "" pos_lines) (String.concat "" flags) in
|
|
(* generate stub extern blocks for unresolved subcommands *)
|
|
let subs = List.map (fun (subcommand : subcommand) ->
|
|
Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
|
|
escaped_name (escape_nu subcommand.name) (escape_nu subcommand.desc)
|
|
) result.subcommands in
|
|
String.concat "" (main :: subs)
|
|
|
|
(* public alias for extern_of — this is the main entry point for callers *)
|
|
let generate_extern = extern_of
|
|
|
|
(* derive a nushell `module` name from a command name.
|
|
* replaces non-alphanumeric characters with hyphens and appends "-completions".
|
|
* e.g. "git" becomes "git-completions", "docker-compose" stays "docker-compose-completions" *)
|
|
let module_name_of cmd_name =
|
|
let s = String.map (function
|
|
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
|
|
s ^ "-completions"
|
|
|
|
(* generate a complete nushell `module` wrapping the `extern`.
|
|
* output: "module git-completions { ... }\n\nuse git-completions *\n"
|
|
* the `use` at the end makes the `extern` immediately available in scope. *)
|
|
let generate_module cmd_name result =
|
|
let mod_name = module_name_of cmd_name in
|
|
Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" mod_name (extern_of cmd_name result) mod_name
|
|
|
|
(* convenience wrapper: generate an `extern` from just a list of entries
|
|
* (no subcommands, positionals, or description). used when we only have
|
|
* flag data and nothing else. *)
|
|
let generate_extern_from_entries cmd_name entries =
|
|
generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }
|