This commit is contained in:
atagen 2026-03-18 15:40:47 +11:00
commit 762223e19a
23 changed files with 5277 additions and 0 deletions

253
lib/nushell.ml Normal file
View file

@ -0,0 +1,253 @@
(* nushell.ml — generate nushell extern definitions from parsed help data.
*
* this module is the code generation backend. it takes a help_result (from
* the parser or manpage modules) and produces nushell source code that
* defines `extern` declarations nushell's mechanism for teaching the shell
* about external commands' flags and subcommands so it can offer completions.
*
* it also maintains a list of nushell's built-in commands to avoid generating
* extern definitions that would shadow them.
*
* key responsibilities:
* - deduplicating flag entries (same flag from multiple help sources)
* - mapping parameter names to nushell types (path, int, string)
* - formatting flags in nushell syntax: --flag(-f): type # description
* - handling positional arguments with nushell's ordering constraints
* - escaping special characters for nushell string literals
*)
open Parser
module SSet = Set.Make(String)
module SMap = Map.Make(String)
module CSet = Set.Make(Char)
(* nushell built-in commands and keywords — we must never generate `extern`
* definitions for these because it would shadow nushell's own implementations.
* this list is maintained manually and should be updated with new nushell releases. *)
let nushell_builtins = [
"alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
"bits"; "break"; "bytes";
"cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
"columns"; "commandline"; "compact"; "complete"; "config"; "const";
"continue"; "cp";
"date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
"do"; "drop"; "du";
"each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
"exit"; "explain"; "explore"; "export"; "export-env"; "extern";
"fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
"generate"; "get"; "glob"; "grid"; "group-by";
"hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
"history"; "http";
"if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
"is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
"job"; "join";
"keybindings"; "kill";
"last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
"match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
"move"; "mut"; "mv";
"nu-check"; "nu-highlight";
"open"; "overlay";
"panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
"query";
"random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
"roll"; "rotate"; "run-external";
"save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
"slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
"stor"; "str"; "sys";
"table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
"try"; "tutor";
"ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
"upsert"; "url"; "use";
"values"; "version"; "view";
"watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
"zip";
]
(* lazily constructed set for fast membership checks against builtins *)
let builtin_set = lazy (SSet.of_list nushell_builtins)
(* returns true if the given command name collides with a nushell built-in *)
let is_nushell_builtin cmd =
SSet.mem cmd (Lazy.force builtin_set)
(* deduplicate flag entries that refer to the same flag.
* when the same flag appears multiple times (e.g. from overlapping manpage
* sections or repeated help text), we keep the "best" version using a score:
* - both short+long form present: +10 (most informative)
* - has a parameter: +5
* - description length bonus: up to +5
*
* after deduplication by long name, we also remove standalone short flags
* whose letter is already covered by a Both(short, long) entry. this prevents
* emitting both "-v" and "--verbose(-v)" which nushell would reject as a
* duplicate. the filtering preserves original ordering from the help text. *)
let dedup_entries entries =
(* produce a canonical key for each entry based on its switch form *)
let key_of entry =
match entry.switch with
| Short c -> Printf.sprintf "-%c" c
| Long l | Both (_, l) -> Printf.sprintf "--%s" l
in
(* compute a quality score for ranking duplicate entries *)
let score entry =
let switch_bonus = match entry.switch with Both _ -> 10 | _ -> 0 in
let param_bonus = match entry.param with Some _ -> 5 | None -> 0 in
let desc_bonus = min 5 (String.length entry.desc / 10) in
switch_bonus + param_bonus + desc_bonus
in
(* fold over entries, keeping only the highest-scored entry per key *)
let best = List.fold_left (fun acc entry ->
let key = key_of entry in
match SMap.find_opt key acc with
| Some prev when score prev >= score entry -> acc
| _ -> SMap.add key entry acc
) SMap.empty entries in
(* collect all short-flag characters that are already part of a Both entry,
* so we can suppress standalone Short entries for the same character *)
let covered = SMap.fold (fun _ entry acc ->
match entry.switch with
| Both (c, _) -> CSet.add c acc
| _ -> acc
) best CSet.empty in
(* emit entries in original order, skipping duplicates and covered shorts *)
List.fold_left (fun (seen, acc) entry ->
let key = key_of entry in
if SSet.mem key seen then (seen, acc)
else match entry.switch with
| Short c when CSet.mem c covered -> (seen, acc)
| _ -> (SSet.add key seen, SMap.find key best :: acc)
) (SSet.empty, []) entries |> snd |> List.rev
(* map parameter names to nushell types.
* nushell's `extern` declarations use typed parameters, so we infer the type
* from the parameter name. file/path-related names become "path" (enables
* path completion), numeric names become "int", everything else is "string". *)
let nushell_type_of_param = function
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
| "FILENAME" | "PATTERNFILE" -> "path"
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
| "LINES" | "DEPTH" | "depth" -> "int"
| _ -> "string"
(* escape a string for use inside nushell double-quoted string literals.
* only double quotes and backslashes need escaping in nushell's syntax. *)
let escape_nu s =
if not (String.contains s '"') && not (String.contains s '\\') then s
else begin
let buf = Buffer.create (String.length s + 4) in
String.iter (fun c -> match c with
| '"' -> Buffer.add_string buf "\\\""
| '\\' -> Buffer.add_string buf "\\\\"
| _ -> Buffer.add_char buf c
) s;
Buffer.contents buf
end
(* format a single flag entry as a nushell `extern` parameter line.
* output examples:
* " --verbose(-v) # increase verbosity"
* " --output(-o): path # write output to file"
* " -n: int # number of results"
*
* the description is right-padded to column 40 with a "# " comment prefix.
* nushell's syntax for combined short+long is "--long(-s)". *)
let format_flag entry =
let name = match entry.switch with
| Both (short_char, l) -> Printf.sprintf "--%s(-%c)" l short_char
| Long l -> Printf.sprintf "--%s" l
| Short short_char -> Printf.sprintf "-%c" short_char
in
let typed = match entry.param with
| Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
| None -> ""
in
let flag = " " ^ name ^ typed in
if String.length entry.desc = 0 then flag
else
let pad_len = max 1 (40 - String.length flag) in
flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
(* format a positional argument as a nushell `extern` parameter line.
* nushell syntax: "...name: type" for variadic, "name?: type" for optional.
* hyphens in names are converted to underscores since nushell identifiers
* cannot contain hyphens. *)
let format_positional positional =
let name = String.map (function '-' -> '_' | c -> c) positional.pos_name in
let prefix = if positional.variadic then "..." else "" in
let suffix = if positional.optional && not positional.variadic then "?" else "" in
let typ = nushell_type_of_param (String.uppercase_ascii positional.pos_name) in
Printf.sprintf " %s%s%s: %s" prefix name suffix typ
(* enforce nushell's positional argument ordering rules:
* 1. no required positional may follow an optional one
* 2. at most one variadic ("rest") parameter is allowed
*
* if a required positional appears after an optional one, it is silently
* promoted to optional. duplicate variadic params are dropped.
* uses a fold to track the state across the list in one pass. *)
let fixup_positionals positionals =
List.fold_left (fun (seen_optional, seen_variadic, acc) positional ->
if positional.variadic then
(* only allow the first variadic parameter *)
if seen_variadic then (seen_optional, seen_variadic, acc)
else (true, true, positional :: acc)
else if seen_optional then
(* once we've seen an optional, all subsequent must be optional too *)
(true, seen_variadic, { positional with optional = true } :: acc)
else
(positional.optional, seen_variadic, positional :: acc)
) (false, false, []) positionals
|> fun (_, _, acc) -> List.rev acc
(* generate the full nushell `extern` block for a command.
* produces output like:
* export extern "git add" [
* ...pathspec?: path
* --verbose(-v) # be verbose
* --dry-run(-n) # dry run
* ]
*
* subcommands that weren't resolved into their own full definitions get
* stub `extern` blocks with just a comment containing their description:
* export extern "git stash" [ # stash changes
* ]
*)
let extern_of cmd_name result =
let entries = dedup_entries result.entries in
let escaped_name = escape_nu cmd_name in
let positionals = fixup_positionals result.positionals in
(* format all positional and flag lines, each terminated with a newline *)
let pos_lines = List.map (fun positional -> format_positional positional ^ "\n") positionals in
let flags = List.map (fun entry -> format_flag entry ^ "\n") entries in
let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" escaped_name (String.concat "" pos_lines) (String.concat "" flags) in
(* generate stub extern blocks for unresolved subcommands *)
let subs = List.map (fun (subcommand : subcommand) ->
Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
escaped_name (escape_nu subcommand.name) (escape_nu subcommand.desc)
) result.subcommands in
String.concat "" (main :: subs)
(* public alias for extern_of — this is the main entry point for callers *)
let generate_extern = extern_of
(* derive a nushell `module` name from a command name.
* replaces non-alphanumeric characters with hyphens and appends "-completions".
* e.g. "git" becomes "git-completions", "docker-compose" stays "docker-compose-completions" *)
let module_name_of cmd_name =
let s = String.map (function
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
s ^ "-completions"
(* generate a complete nushell `module` wrapping the `extern`.
* output: "module git-completions { ... }\n\nuse git-completions *\n"
* the `use` at the end makes the `extern` immediately available in scope. *)
let generate_module cmd_name result =
let mod_name = module_name_of cmd_name in
Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" mod_name (extern_of cmd_name result) mod_name
(* convenience wrapper: generate an `extern` from just a list of entries
* (no subcommands, positionals, or description). used when we only have
* flag data and nothing else. *)
let generate_extern_from_entries cmd_name entries =
generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }