cli11, bsd manual

This commit is contained in:
atagen 2026-03-21 21:22:41 +11:00
parent 9c7c528a0c
commit 18c97eacd0
7 changed files with 534 additions and 117 deletions

View file

@ -435,33 +435,25 @@ let count_macro name lines =
(* Auto-detect and try strategies, return the one with most entries *)
let extract_entries lines =
let results = ref [] in
(* Try TP if .TP macros present *)
if count_macro "TP" lines > 0 then
results := ("TP", strategy_tp lines) :: !results;
(* Try IP if .IP macros present *)
if count_macro "IP" lines > 0 then
results := ("IP", strategy_ip lines) :: !results;
(* Try PP+RS if both present *)
if count_macro "PP" lines > 0 && count_macro "RS" lines > 0 then
results := ("PP+RS", strategy_pp_rs lines) :: !results;
(* Try nix3 style if UR macros present *)
if count_macro "UR" lines > 0 && count_macro "IP" lines > 0 then
results := ("nix", strategy_nix lines) :: !results;
(* Always try deroff as fallback *)
results := ("deroff", strategy_deroff_lines lines) :: !results;
(* Prefer specialized strategies over deroff fallback *)
let specialized =
List.filter (fun (name, entries) -> name <> "deroff" && entries <> []) !results
let tp = count_macro "TP" lines
and ip = count_macro "IP" lines
and pp = count_macro "PP" lines
and rs = count_macro "RS" lines
and ur = count_macro "UR" lines in
let specialized = List.filter_map Fun.id [
(if tp > 0 then Some ("TP", strategy_tp lines) else None);
(if ip > 0 then Some ("IP", strategy_ip lines) else None);
(if pp > 0 && rs > 0 then Some ("PP+RS", strategy_pp_rs lines) else None);
(if ur > 0 && ip > 0 then Some ("nix", strategy_nix lines) else None);
] in
let candidates = match List.filter (fun (_, e) -> e <> []) specialized with
| [] -> [("deroff", strategy_deroff_lines lines)]
| filtered -> filtered
in
let candidates = if specialized <> [] then specialized else !results in
let best =
List.fold_left (fun (best_name, best_entries) (name, entries) ->
if List.length entries >= List.length best_entries then (name, entries)
else (best_name, best_entries)
) ("none", []) candidates
in
snd best
List.fold_left (fun (_, best) (name, entries) ->
if List.length entries >= List.length best then (name, entries)
else (name, best)
) ("none", []) candidates |> snd
(* --- SYNOPSIS command name extraction --- *)
@ -514,12 +506,171 @@ let extract_synopsis_command contents =
let lines = String.split_on_char '\n' contents in
extract_synopsis_command_lines lines
(* --- SYNOPSIS positional extraction --- *)
let extract_synopsis_positionals_lines lines =
let classified = List.map classify_line lines in
let is_synopsis name =
String.uppercase_ascii (String.trim name) = "SYNOPSIS"
in
let rec find = function
| [] -> []
| Macro ("SH", args) :: rest when is_synopsis args -> collect rest []
| _ :: rest -> find rest
and collect lines acc =
match lines with
| [] -> finish acc
| Macro ("SH", _) :: _ -> finish acc
| Macro ("SS", _) :: _ -> finish acc
| Macro ("br", _) :: _ -> finish acc
| Text s :: rest ->
let s = strip_groff_escapes s |> String.trim in
collect rest (if String.length s > 0 then s :: acc else acc)
| Macro (("B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"), args) :: rest ->
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
collect rest (if String.length s > 0 then s :: acc else acc)
| _ :: rest -> collect rest acc
and finish acc =
let parts = List.rev acc in
let full = String.concat " " parts |> String.trim in
if String.length full = 0 then []
else
let cmd_end = skip_command_prefix full in
let args = String.sub full cmd_end (String.length full - cmd_end) in
parse_usage_args args
in
find classified
(* --- mdoc (BSD) format support --- *)
let is_mdoc lines =
List.exists (fun l ->
match classify_line l with Macro ("Sh", _) -> true | _ -> false
) lines
let mdoc_text_of line =
match line with
| Text s -> Some (strip_groff_escapes s)
| Macro (m, args) ->
(match m with
| "Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt"
| "Oo" | "Oc" | "Op" -> None
| _ ->
let s = strip_groff_escapes args |> String.trim in
if s = "" then None else Some s)
| _ -> None
let parse_mdoc_lines lines =
let classified = List.map classify_line lines in
let entries = ref [] in
let positionals = ref [] in
let rec skip_to_el = function
| [] -> []
| Macro ("El", _) :: rest -> rest
| _ :: rest -> skip_to_el rest
in
let collect_desc rest =
let rec go acc = function
| [] -> (acc, [])
| (Macro ("It", _) | Macro ("El", _)
| Macro ("Sh", _) | Macro ("Ss", _)) :: _ as rest -> (acc, rest)
| line :: rest ->
go (match mdoc_text_of line with Some s -> s :: acc | None -> acc) rest
in
let parts, rest = go [] rest in
(String.concat " " (List.rev parts) |> String.trim, rest)
in
let parse_mdoc_it args =
let words = String.split_on_char ' ' args
|> List.filter (fun w -> w <> "" && w <> "Ns") in
match words with
| "Fl" :: c :: _ when String.length c = 1 && is_alphanumeric c.[0] ->
let param = match words with
| _ :: _ :: "Ar" :: p :: _ -> Some (Mandatory p)
| _ -> None
in
Some { switch = Short c.[0]; param; desc = "" }
| "Fl" :: name :: _ when String.length name > 1 && name.[0] = '-' ->
let long = String.sub name 1 (String.length name - 1) in
let param = match words with
| _ :: _ :: "Ar" :: p :: _ -> Some (Mandatory p)
| _ -> None
in
Some { switch = Long long; param; desc = "" }
| _ -> None
in
let rec parse_option_list = function
| [] -> []
| Macro ("El", _) :: rest -> rest
| Macro ("It", args) :: rest ->
let desc, rest = collect_desc rest in
(match parse_mdoc_it args with
| Some e -> entries := { e with desc } :: !entries
| None -> ());
parse_option_list rest
| _ :: rest -> parse_option_list rest
in
let rec scan = function
| [] -> ()
| Macro ("Bl", _) :: Macro ("It", it_args) :: rest ->
let words = String.split_on_char ' ' it_args
|> List.filter (fun w -> w <> "") in
if (match words with "Fl" :: _ -> true | _ -> false) then begin
let desc, rest = collect_desc rest in
(match parse_mdoc_it it_args with
| Some e -> entries := { e with desc } :: !entries
| None -> ());
scan (parse_option_list rest)
end else
scan (skip_to_el rest)
| Macro ("Bl", _) :: rest -> scan (skip_to_el rest)
| Macro ("Sh", args) :: rest
when String.uppercase_ascii (String.trim args) = "SYNOPSIS" ->
scan (parse_synopsis rest)
| _ :: rest -> scan rest
and parse_synopsis = function
| [] -> []
| Macro ("Sh", _) :: _ as rest -> rest
| Macro ("Ar", args) :: rest ->
let words = String.split_on_char ' ' args
|> List.filter (fun w -> w <> "") in
(match words with
| name :: _ when String.length name >= 2 ->
let variadic = List.mem "..." words in
positionals := { pos_name = String.lowercase_ascii name;
optional = false; variadic } :: !positionals
| _ -> ());
parse_synopsis rest
| Macro ("Op", args) :: rest ->
let words = String.split_on_char ' ' args
|> List.filter (fun w -> w <> "") in
(match words with
| "Ar" :: name :: _ when String.length name >= 2 ->
let variadic = List.mem "..." words in
positionals := { pos_name = String.lowercase_ascii name;
optional = true; variadic } :: !positionals
| _ -> ());
parse_synopsis rest
| _ :: rest -> parse_synopsis rest
in
scan classified;
let seen = Hashtbl.create 8 in
let positionals = List.rev !positionals |> List.filter (fun p ->
if Hashtbl.mem seen p.pos_name then false
else (Hashtbl.replace seen p.pos_name true; true)) in
{ entries = List.rev !entries; subcommands = []; positionals }
(* --- Top-level API --- *)
let parse_manpage_lines lines =
let options_section = extract_options_section lines in
let entries = extract_entries options_section in
{ entries; subcommands = [] }
if is_mdoc lines then
parse_mdoc_lines lines
else begin
let options_section = extract_options_section lines in
let entries = extract_entries options_section in
let positionals = extract_synopsis_positionals_lines lines in
{ entries; subcommands = []; positionals }
end
let parse_manpage_string contents =
let lines = String.split_on_char '\n' contents in

View file

@ -1,5 +1,9 @@
open Parser
module SSet = Set.Make(String)
module SMap = Map.Make(String)
module CSet = Set.Make(Char)
(* Nushell built-in commands and keywords *)
let nushell_builtins = [
"alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
@ -40,13 +44,10 @@ let nushell_builtins = [
"zip";
]
let builtin_set = lazy (
let tbl = Hashtbl.create (List.length nushell_builtins) in
List.iter (fun s -> Hashtbl.replace tbl s true) nushell_builtins;
tbl)
let builtin_set = lazy (SSet.of_list nushell_builtins)
let is_nushell_builtin cmd =
Hashtbl.mem (Lazy.force builtin_set) cmd
SSet.mem cmd (Lazy.force builtin_set)
let dedup_entries entries =
let key_of entry =
@ -60,30 +61,24 @@ let dedup_entries entries =
let d = min 5 (String.length entry.desc / 10) in
sw + p + d
in
let best = Hashtbl.create 64 in
List.iter (fun e ->
let best = List.fold_left (fun acc e ->
let k = key_of e in
match Hashtbl.find_opt best k with
| Some prev when score prev >= score e -> ()
| _ -> Hashtbl.replace best k e
) entries;
let covered_shorts = Hashtbl.create 16 in
Hashtbl.iter (fun _ e ->
match SMap.find_opt k acc with
| Some prev when score prev >= score e -> acc
| _ -> SMap.add k e acc
) SMap.empty entries in
let covered = SMap.fold (fun _ e acc ->
match e.switch with
| Both (c, _) -> Hashtbl.replace covered_shorts c true
| _ -> ()
) best;
let seen = Hashtbl.create 64 in
List.filter_map (fun e ->
| Both (c, _) -> CSet.add c acc
| _ -> acc
) best CSet.empty in
List.fold_left (fun (seen, acc) e ->
let k = key_of e in
if Hashtbl.mem seen k then None
else
match e.switch with
| Short c when Hashtbl.mem covered_shorts c -> None
| _ ->
Hashtbl.add seen k true;
Hashtbl.find_opt best k
) entries
if SSet.mem k seen then (seen, acc)
else match e.switch with
| Short c when CSet.mem c covered -> (seen, acc)
| _ -> (SSet.add k seen, SMap.find k best :: acc)
) (SSet.empty, []) entries |> snd |> List.rev
let nushell_type_of_param = function
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
@ -120,22 +115,26 @@ let format_flag entry =
let pad_len = max 1 (40 - String.length flag) in
flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
let write_extern buf cmd_name result =
let entries = dedup_entries result.entries in
Printf.bprintf buf "export extern \"%s\" [\n" (escape_nu cmd_name);
List.iter (fun e ->
Buffer.add_string buf (format_flag e); Buffer.add_char buf '\n'
) entries;
Buffer.add_string buf "]\n";
List.iter (fun (sc : subcommand) ->
Printf.bprintf buf "\nexport extern \"%s %s\" [ # %s\n]\n"
(escape_nu cmd_name) (escape_nu sc.name) (escape_nu sc.desc)
) result.subcommands
let format_positional p =
let name = String.map (function '-' -> '_' | c -> c) p.pos_name in
let prefix = if p.variadic then "..." else "" in
let suffix = if p.optional && not p.variadic then "?" else "" in
let typ = nushell_type_of_param (String.uppercase_ascii p.pos_name) in
Printf.sprintf " %s%s%s: %s" prefix name suffix typ
let generate_extern cmd_name result =
let buf = Buffer.create 1024 in
write_extern buf cmd_name result;
Buffer.contents buf
let extern_of cmd_name result =
let entries = dedup_entries result.entries in
let cmd = escape_nu cmd_name in
let pos_lines = List.map (fun p -> format_positional p ^ "\n") result.positionals in
let flags = List.map (fun e -> format_flag e ^ "\n") entries in
let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" cmd (String.concat "" pos_lines) (String.concat "" flags) in
let subs = List.map (fun (sc : subcommand) ->
Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
cmd (escape_nu sc.name) (escape_nu sc.desc)
) result.subcommands in
String.concat "" (main :: subs)
let generate_extern = extern_of
let module_name_of cmd_name =
let s = String.map (function
@ -144,11 +143,7 @@ let module_name_of cmd_name =
let generate_module cmd_name result =
let m = module_name_of cmd_name in
let buf = Buffer.create 1024 in
Printf.bprintf buf "module %s {\n" m;
write_extern buf cmd_name result;
Printf.bprintf buf "}\n\nuse %s *\n" m;
Buffer.contents buf
Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" m (extern_of cmd_name result) m
let generate_extern_from_entries cmd_name entries =
generate_extern cmd_name { entries; subcommands = [] }
generate_extern cmd_name { entries; subcommands = []; positionals = [] }

View file

@ -58,7 +58,8 @@ type switch = Short of char | Long of string | Both of char * string
type param = Mandatory of string | Optional of string
type entry = { switch : switch; param : param option; desc : string }
type subcommand = { name : string; desc : string }
type help_result = { entries : entry list; subcommands : subcommand list }
type positional = { pos_name : string; optional : bool; variadic : bool }
type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list }
(* --- Low-level combinators --- *)
@ -257,12 +258,225 @@ let help_parser =
many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items ->
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
let subcommands = List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items in
{ entries; subcommands })
{ entries; subcommands; positionals = [] })
let skip_command_prefix s =
let len = String.length s in
let i = ref 0 in
let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
let is_word_char = function
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
| _ -> false
in
let rec loop () =
skip_ws ();
if !i >= len then ()
else if s.[!i] = '[' || s.[!i] = '<' || s.[!i] = '(' || s.[!i] = '{' || s.[!i] = '-' then ()
else if is_word_char s.[!i] then begin
let start = !i in
while !i < len && is_word_char s.[!i] do incr i done;
let word = String.sub s start (!i - start) in
let has_lower = ref false in
String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
if not !has_lower then
i := start
else
loop ()
end
in
loop ();
!i
let parse_usage_args s =
let len = String.length s in
let i = ref 0 in
let results = ref [] in
let skip_ws () =
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
let is_pos_char c =
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
let read_dots () =
skip_ws ();
if !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' then
(i := !i + 3; true)
else if !i + 2 < len && s.[!i] = '\xe2' && s.[!i+1] = '\x80' && s.[!i+2] = '\xa6' then
(i := !i + 3; true) (* UTF-8 ellipsis … *)
else false
in
let is_skip name =
let u = String.uppercase_ascii name in
u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
in
let is_clean_name name =
String.length name >= 2
&& String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9') || c = '_' || c = '-') name
in
let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
let skip_braces () =
(* Skip {A|c|d|...} alternative blocks *)
if !i < len && s.[!i] = '{' then begin
let depth = ref 1 in
incr i;
while !i < len && !depth > 0 do
if s.[!i] = '{' then incr depth
else if s.[!i] = '}' then decr depth;
incr i
done;
ignore (read_dots ());
true
end else false
in
while !i < len do
skip_ws ();
if !i >= len then ()
else if skip_braces () then ()
else match s.[!i] with
| '[' ->
incr i;
let start = !i in
let depth = ref 1 in
while !i < len && !depth > 0 do
if s.[!i] = '[' then incr depth
else if s.[!i] = ']' then decr depth;
incr i
done;
let bracket_end = !i - 1 in
let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
let inner, has_inner_dots =
if String.ends_with ~suffix:"..." inner then
(String.sub inner 0 (String.length inner - 3) |> String.trim, true)
else (inner, false)
in
let variadic = has_inner_dots || read_dots () in
if String.length inner > 0
&& inner.[0] <> '-'
&& (is_letter inner.[0] || inner.[0] = '<') then begin
let name =
if inner.[0] = '<' then
let e = try String.index inner '>' with Not_found -> String.length inner in
String.sub inner 1 (e - 1)
else inner
in
if is_clean_name name && not (is_skip name) then
results := { pos_name = String.lowercase_ascii name;
optional = true; variadic } :: !results
end
| '<' ->
incr i;
let start = !i in
while !i < len && s.[!i] <> '>' do incr i done;
let name = String.sub s start (!i - start) in
if !i < len then incr i;
let variadic = read_dots () in
if is_clean_name name && not (is_skip name) then
results := { pos_name = String.lowercase_ascii name;
optional = false; variadic } :: !results
| '-' ->
while !i < len && s.[!i] <> ' ' && s.[!i] <> '\t' && s.[!i] <> ']' do incr i done
| c when c >= 'A' && c <= 'Z' ->
let start = !i in
while !i < len && is_pos_char s.[!i] do incr i done;
let name = String.sub s start (!i - start) in
let variadic = read_dots () in
if String.length name >= 2
&& String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
) name
&& not (is_skip name) then
results := { pos_name = String.lowercase_ascii name;
optional = false; variadic } :: !results
| _ ->
incr i
done;
let seen = Hashtbl.create 8 in
List.rev !results |> List.filter (fun p ->
if Hashtbl.mem seen p.pos_name then false
else (Hashtbl.replace seen p.pos_name true; true))
let extract_usage_positionals text =
let lines = String.split_on_char '\n' text in
let lines_arr = Array.of_list lines in
let len = Array.length lines_arr in
let find_usage_line () =
let rec go i =
if i >= len then None
else
let t = String.trim lines_arr.(i) in
let tlen = String.length t in
if tlen >= 6 then
let prefix = String.lowercase_ascii (String.sub t 0 6) in
if prefix = "usage:" then begin
let after = String.sub t 6 (tlen - 6) |> String.trim in
if String.length after > 0 then Some after
else if i + 1 < len then
(* Clap style: USAGE:\n cmd [OPTIONS] PATTERN *)
let next = String.trim lines_arr.(i + 1) in
if String.length next > 0 then Some next else None
else None
end else go (i + 1)
else go (i + 1)
in
go 0
in
match find_usage_line () with
| None -> []
| Some usage ->
let cmd_end = skip_command_prefix usage in
let args = String.sub usage cmd_end (String.length usage - cmd_end) in
parse_usage_args args
let extract_cli11_positionals text =
let lines = String.split_on_char '\n' text in
let rec find_section = function
| [] -> []
| line :: rest ->
let t = String.trim line in
if t = "POSITIONALS:" || t = "Positionals:" then
parse_lines rest []
else
find_section rest
and parse_lines lines acc =
match lines with
| [] -> List.rev acc
| line :: rest ->
let len = String.length line in
if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
List.rev acc
else
let t = String.trim line in
if String.length t = 0 then List.rev acc
else match parse_one t with
| Some p -> parse_lines rest (p :: acc)
| None -> parse_lines rest acc
and parse_one s =
let len = String.length s in
let i = ref 0 in
let is_name_char c =
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c = '_' || c = '-' in
while !i < len && is_name_char s.[!i] do incr i done;
if !i < 2 then None
else
let name = String.sub s 0 !i in
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
(* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
while !i < len && s.[!i] >= 'A' && s.[!i] <= 'Z' do incr i done;
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
let variadic = !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' in
Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
in
find_section lines
let parse_help txt =
let clean = strip_ansi txt in
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
| Ok result -> Ok result
| Ok result ->
let cli11 = extract_cli11_positionals clean in
let usage = extract_usage_positionals clean in
let positionals = if cli11 <> [] then cli11 else usage in
Ok { result with positionals }
| Error msg -> Error msg
(* --- Pretty printers --- *)
@ -286,6 +500,11 @@ let print_subcommand sc =
Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n"
sc.name sc.desc
let print_positional p =
Printf.printf "\n\t** POSITIONAL **\n\tName: %s\n\tOptional: %b\n\tVariadic: %b\n"
p.pos_name p.optional p.variadic
let print_help_result r =
List.iter print_entry r.entries;
List.iter print_subcommand r.subcommands
List.iter print_subcommand r.subcommands;
List.iter print_positional r.positionals