510 lines
17 KiB
OCaml
510 lines
17 KiB
OCaml
open Angstrom
|
|
|
|
(* Strip ANSI escape sequences and OSC hyperlinks from --help output *)
|
|
let strip_ansi s =
|
|
let buf = Buffer.create (String.length s) in
|
|
let len = String.length s in
|
|
let i = ref 0 in
|
|
while !i < len do
|
|
if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
|
|
let next = s.[!i + 1] in
|
|
if next = '[' then begin
|
|
(* CSI sequence: ESC [ ... final_byte *)
|
|
i := !i + 2;
|
|
while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
|
|
if !i < len then incr i
|
|
end else if next = ']' then begin
|
|
(* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
|
|
i := !i + 2;
|
|
let found = ref false in
|
|
while !i < len && not !found do
|
|
if s.[!i] = '\x07' then
|
|
(incr i; found := true)
|
|
else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
|
|
(i := !i + 2; found := true)
|
|
else
|
|
incr i
|
|
done
|
|
end else begin
|
|
(* Other ESC sequence, skip ESC + one char *)
|
|
i := !i + 2
|
|
end
|
|
end else begin
|
|
Buffer.add_char buf s.[!i];
|
|
incr i
|
|
end
|
|
done;
|
|
Buffer.contents buf
|
|
|
|
let is_whitespace = function ' ' | '\t' -> true | _ -> false
|
|
|
|
let is_alphanumeric = function
|
|
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
|
|
| _ -> false
|
|
|
|
let is_param_char = function
|
|
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
|
|
| _ -> false
|
|
|
|
let is_upper_or_underscore = function
|
|
| 'A' .. 'Z' | '_' -> true
|
|
| _ -> false
|
|
|
|
let is_long_char = function
|
|
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
|
|
| _ -> false
|
|
|
|
type switch = Short of char | Long of string | Both of char * string
|
|
type param = Mandatory of string | Optional of string
|
|
type entry = { switch : switch; param : param option; desc : string }
|
|
type subcommand = { name : string; desc : string }
|
|
type positional = { pos_name : string; optional : bool; variadic : bool }
|
|
type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list }
|
|
|
|
(* --- Low-level combinators --- *)
|
|
|
|
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
|
|
let eol = end_of_line <|> end_of_input
|
|
let eol_strict = end_of_line (* Must consume a newline, no EOF match *)
|
|
|
|
let short_switch = char '-' *> satisfy is_alphanumeric
|
|
let long_switch = string "--" *> take_while1 is_long_char
|
|
let comma = char ',' *> inline_ws
|
|
|
|
(* Parameter parsers *)
|
|
let eq_opt_param =
|
|
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
|
|
|
|
let eq_man_param =
|
|
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
|
|
|
|
(* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *)
|
|
let space_upper_param =
|
|
char ' ' *> peek_char_fail >>= fun c ->
|
|
if is_upper_or_underscore c then
|
|
take_while1 is_param_char >>= fun name ->
|
|
(* Ensure it's truly all-uppercase (not a description word like "Do") *)
|
|
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
|
|
return (Mandatory name)
|
|
else
|
|
fail "not an all-caps param"
|
|
else
|
|
fail "not an uppercase param"
|
|
|
|
(* Angle-bracket param: e.g. "<file>", "<notation>" *)
|
|
let angle_param =
|
|
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
|
|
Mandatory name
|
|
|
|
(* Space + angle bracket param *)
|
|
let space_angle_param =
|
|
char ' ' *> angle_param
|
|
|
|
(* Optional angle bracket param: [<file>] *)
|
|
let opt_angle_param =
|
|
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
|
|
>>| fun name -> Optional name
|
|
|
|
let space_opt_angle_param =
|
|
char ' ' *> opt_angle_param
|
|
|
|
(* Go/Cobra style: space + lowercase type word like "string", "list", "int" *)
|
|
let space_type_param =
|
|
char ' ' *> peek_char_fail >>= fun c ->
|
|
if c >= 'a' && c <= 'z' then
|
|
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
|
|
(* Only short type-like words *)
|
|
if String.length name <= 10 then
|
|
return (Mandatory name)
|
|
else
|
|
fail "too long for type param"
|
|
else
|
|
fail "not a lowercase type param"
|
|
|
|
let param_parser =
|
|
option None
|
|
(choice
|
|
[ eq_opt_param; eq_man_param;
|
|
space_opt_angle_param; space_angle_param;
|
|
space_upper_param; space_type_param ]
|
|
>>| fun a -> Some a)
|
|
|
|
(* Switch parser: -a, --all | --all / -a | -a | --all *)
|
|
let switch_parser =
|
|
choice
|
|
[
|
|
(short_switch >>= fun s ->
|
|
comma *> long_switch >>| fun l -> Both (s, l));
|
|
(long_switch >>= fun l ->
|
|
inline_ws *> char '/' *> inline_ws *>
|
|
short_switch >>| fun s -> Both (s, l));
|
|
(short_switch >>| fun s -> Short s);
|
|
(long_switch >>| fun l -> Long l);
|
|
]
|
|
|
|
(* --- Description parsing with multi-line continuation --- *)
|
|
|
|
(* Take the rest of the line as text (does not consume newline) *)
|
|
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
|
|
|
|
(* Check if a line is a continuation line: deeply indented, doesn't start with '-' *)
|
|
let continuation_line =
|
|
peek_string 1 >>= fun _ ->
|
|
(* Must start with significant whitespace (8+ spaces or tab) *)
|
|
let count_indent s =
|
|
let n = ref 0 in
|
|
let i = ref 0 in
|
|
while !i < String.length s do
|
|
(match s.[!i] with
|
|
| ' ' -> incr n
|
|
| '\t' -> n := !n + 8
|
|
| _ -> i := String.length s);
|
|
incr i
|
|
done;
|
|
!n
|
|
in
|
|
available >>= fun avail ->
|
|
if avail = 0 then fail "eof"
|
|
else
|
|
(* Peek ahead to see indentation level *)
|
|
peek_string (min avail 80) >>= fun preview ->
|
|
let indent = count_indent preview in
|
|
let trimmed = String.trim preview in
|
|
let starts_with_dash =
|
|
String.length trimmed > 0 && trimmed.[0] = '-'
|
|
in
|
|
if indent >= 8 && not starts_with_dash then
|
|
(* This is a continuation line — consume whitespace + text *)
|
|
inline_ws *> rest_of_line <* eol
|
|
else
|
|
fail "not a continuation line"
|
|
|
|
let description =
|
|
inline_ws *> rest_of_line <* eol >>= fun first_line ->
|
|
many continuation_line >>| fun cont_lines ->
|
|
let all = first_line :: cont_lines in
|
|
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
|
|
String.concat " " (List.map String.trim all)
|
|
|
|
(* Description that appears on a separate line below the flag (Clap long style) *)
|
|
let description_below =
|
|
many1 continuation_line >>| fun lines ->
|
|
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
|
|
String.concat " " (List.map String.trim lines)
|
|
|
|
(* --- Line classification for skipping --- *)
|
|
|
|
(* An option line starts with whitespace then '-' *)
|
|
let at_option_line =
|
|
peek_string 1 >>= fun _ ->
|
|
available >>= fun avail ->
|
|
if avail = 0 then fail "eof"
|
|
else
|
|
peek_string (min avail 40) >>= fun preview ->
|
|
let s = String.trim preview in
|
|
if String.length s > 0 && s.[0] = '-' then return ()
|
|
else fail "not an option line"
|
|
|
|
(* Skip a non-option line (section header, blank, description-only, etc.) *)
|
|
let skip_non_option_line =
|
|
(* Don't skip if this looks like an option line *)
|
|
(at_option_line *> fail "this is an option line")
|
|
<|> (rest_of_line *> eol_strict *> return ())
|
|
|
|
(* --- Entry parsing --- *)
|
|
|
|
(* Parse a single flag entry *)
|
|
let entry =
|
|
inline_ws *>
|
|
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
|
|
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
|
|
(description <|> (eol *> (description_below <|> return "")))
|
|
|
|
(* --- Subcommand parsing --- *)
|
|
|
|
(* A subcommand line: " name description" *)
|
|
let is_subcommand_char = function
|
|
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
|
|
| _ -> false
|
|
|
|
let subcommand_entry =
|
|
inline_ws *>
|
|
take_while1 is_subcommand_char >>= fun name ->
|
|
if String.length name < 2 then fail "subcommand name too short"
|
|
else
|
|
char ' ' *> char ' ' *> inline_ws *>
|
|
rest_of_line <* eol >>| fun desc ->
|
|
{ name; desc = String.trim desc }
|
|
|
|
(* --- Top-level parser --- *)
|
|
|
|
(* The main help parser: walks through lines, skipping non-option content,
|
|
collecting entries and subcommands *)
|
|
let help_parser =
|
|
let open Angstrom in
|
|
fix (fun _self ->
|
|
(* Try to parse an entry *)
|
|
let try_entry =
|
|
entry >>| fun e -> `Entry e
|
|
in
|
|
(* Try to parse a subcommand *)
|
|
let try_subcommand =
|
|
subcommand_entry >>| fun sc -> `Subcommand sc
|
|
in
|
|
(* Skip one non-option line *)
|
|
let try_skip =
|
|
skip_non_option_line >>| fun () -> `Skip
|
|
in
|
|
many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items ->
|
|
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
|
|
let subcommands = List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items in
|
|
{ entries; subcommands; positionals = [] })
|
|
|
|
let skip_command_prefix s =
|
|
let len = String.length s in
|
|
let i = ref 0 in
|
|
let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
|
|
let is_word_char = function
|
|
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
|
|
| _ -> false
|
|
in
|
|
let rec loop () =
|
|
skip_ws ();
|
|
if !i >= len then ()
|
|
else if s.[!i] = '[' || s.[!i] = '<' || s.[!i] = '(' || s.[!i] = '{' || s.[!i] = '-' then ()
|
|
else if is_word_char s.[!i] then begin
|
|
let start = !i in
|
|
while !i < len && is_word_char s.[!i] do incr i done;
|
|
let word = String.sub s start (!i - start) in
|
|
let has_lower = ref false in
|
|
String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
|
|
if not !has_lower then
|
|
i := start
|
|
else
|
|
loop ()
|
|
end
|
|
in
|
|
loop ();
|
|
!i
|
|
|
|
let parse_usage_args s =
|
|
let len = String.length s in
|
|
let i = ref 0 in
|
|
let results = ref [] in
|
|
let skip_ws () =
|
|
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in
|
|
let is_pos_char c =
|
|
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
|
|
let read_dots () =
|
|
skip_ws ();
|
|
if !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' then
|
|
(i := !i + 3; true)
|
|
else if !i + 2 < len && s.[!i] = '\xe2' && s.[!i+1] = '\x80' && s.[!i+2] = '\xa6' then
|
|
(i := !i + 3; true) (* UTF-8 ellipsis … *)
|
|
else false
|
|
in
|
|
let is_skip name =
|
|
let u = String.uppercase_ascii name in
|
|
u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
|
|
in
|
|
let is_clean_name name =
|
|
String.length name >= 2
|
|
&& String.for_all (fun c ->
|
|
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
|
|| (c >= '0' && c <= '9') || c = '_' || c = '-') name
|
|
in
|
|
let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
|
|
let skip_braces () =
|
|
(* Skip {A|c|d|...} alternative blocks *)
|
|
if !i < len && s.[!i] = '{' then begin
|
|
let depth = ref 1 in
|
|
incr i;
|
|
while !i < len && !depth > 0 do
|
|
if s.[!i] = '{' then incr depth
|
|
else if s.[!i] = '}' then decr depth;
|
|
incr i
|
|
done;
|
|
ignore (read_dots ());
|
|
true
|
|
end else false
|
|
in
|
|
while !i < len do
|
|
skip_ws ();
|
|
if !i >= len then ()
|
|
else if skip_braces () then ()
|
|
else match s.[!i] with
|
|
| '[' ->
|
|
incr i;
|
|
let start = !i in
|
|
let depth = ref 1 in
|
|
while !i < len && !depth > 0 do
|
|
if s.[!i] = '[' then incr depth
|
|
else if s.[!i] = ']' then decr depth;
|
|
incr i
|
|
done;
|
|
let bracket_end = !i - 1 in
|
|
let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
|
|
let inner, has_inner_dots =
|
|
if String.ends_with ~suffix:"..." inner then
|
|
(String.sub inner 0 (String.length inner - 3) |> String.trim, true)
|
|
else (inner, false)
|
|
in
|
|
let variadic = has_inner_dots || read_dots () in
|
|
if String.length inner > 0
|
|
&& inner.[0] <> '-'
|
|
&& (is_letter inner.[0] || inner.[0] = '<') then begin
|
|
let name =
|
|
if inner.[0] = '<' then
|
|
let e = try String.index inner '>' with Not_found -> String.length inner in
|
|
String.sub inner 1 (e - 1)
|
|
else inner
|
|
in
|
|
if is_clean_name name && not (is_skip name) then
|
|
results := { pos_name = String.lowercase_ascii name;
|
|
optional = true; variadic } :: !results
|
|
end
|
|
| '<' ->
|
|
incr i;
|
|
let start = !i in
|
|
while !i < len && s.[!i] <> '>' do incr i done;
|
|
let name = String.sub s start (!i - start) in
|
|
if !i < len then incr i;
|
|
let variadic = read_dots () in
|
|
if is_clean_name name && not (is_skip name) then
|
|
results := { pos_name = String.lowercase_ascii name;
|
|
optional = false; variadic } :: !results
|
|
| '-' ->
|
|
while !i < len && s.[!i] <> ' ' && s.[!i] <> '\t' && s.[!i] <> ']' do incr i done
|
|
| c when c >= 'A' && c <= 'Z' ->
|
|
let start = !i in
|
|
while !i < len && is_pos_char s.[!i] do incr i done;
|
|
let name = String.sub s start (!i - start) in
|
|
let variadic = read_dots () in
|
|
if String.length name >= 2
|
|
&& String.for_all (fun c ->
|
|
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
|
|
) name
|
|
&& not (is_skip name) then
|
|
results := { pos_name = String.lowercase_ascii name;
|
|
optional = false; variadic } :: !results
|
|
| _ ->
|
|
incr i
|
|
done;
|
|
let seen = Hashtbl.create 8 in
|
|
List.rev !results |> List.filter (fun p ->
|
|
if Hashtbl.mem seen p.pos_name then false
|
|
else (Hashtbl.replace seen p.pos_name true; true))
|
|
|
|
let extract_usage_positionals text =
|
|
let lines = String.split_on_char '\n' text in
|
|
let lines_arr = Array.of_list lines in
|
|
let len = Array.length lines_arr in
|
|
let find_usage_line () =
|
|
let rec go i =
|
|
if i >= len then None
|
|
else
|
|
let t = String.trim lines_arr.(i) in
|
|
let tlen = String.length t in
|
|
if tlen >= 6 then
|
|
let prefix = String.lowercase_ascii (String.sub t 0 6) in
|
|
if prefix = "usage:" then begin
|
|
let after = String.sub t 6 (tlen - 6) |> String.trim in
|
|
if String.length after > 0 then Some after
|
|
else if i + 1 < len then
|
|
(* Clap style: USAGE:\n cmd [OPTIONS] PATTERN *)
|
|
let next = String.trim lines_arr.(i + 1) in
|
|
if String.length next > 0 then Some next else None
|
|
else None
|
|
end else go (i + 1)
|
|
else go (i + 1)
|
|
in
|
|
go 0
|
|
in
|
|
match find_usage_line () with
|
|
| None -> []
|
|
| Some usage ->
|
|
let cmd_end = skip_command_prefix usage in
|
|
let args = String.sub usage cmd_end (String.length usage - cmd_end) in
|
|
parse_usage_args args
|
|
|
|
let extract_cli11_positionals text =
|
|
let lines = String.split_on_char '\n' text in
|
|
let rec find_section = function
|
|
| [] -> []
|
|
| line :: rest ->
|
|
let t = String.trim line in
|
|
if t = "POSITIONALS:" || t = "Positionals:" then
|
|
parse_lines rest []
|
|
else
|
|
find_section rest
|
|
and parse_lines lines acc =
|
|
match lines with
|
|
| [] -> List.rev acc
|
|
| line :: rest ->
|
|
let len = String.length line in
|
|
if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
|
|
List.rev acc
|
|
else
|
|
let t = String.trim line in
|
|
if String.length t = 0 then List.rev acc
|
|
else match parse_one t with
|
|
| Some p -> parse_lines rest (p :: acc)
|
|
| None -> parse_lines rest acc
|
|
and parse_one s =
|
|
let len = String.length s in
|
|
let i = ref 0 in
|
|
let is_name_char c =
|
|
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
|
|| (c >= '0' && c <= '9') || c = '_' || c = '-' in
|
|
while !i < len && is_name_char s.[!i] do incr i done;
|
|
if !i < 2 then None
|
|
else
|
|
let name = String.sub s 0 !i in
|
|
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
|
|
(* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
|
|
while !i < len && s.[!i] >= 'A' && s.[!i] <= 'Z' do incr i done;
|
|
while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done;
|
|
let variadic = !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' in
|
|
Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
|
|
in
|
|
find_section lines
|
|
|
|
let parse_help txt =
|
|
let clean = strip_ansi txt in
|
|
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
|
|
| Ok result ->
|
|
let cli11 = extract_cli11_positionals clean in
|
|
let usage = extract_usage_positionals clean in
|
|
let positionals = if cli11 <> [] then cli11 else usage in
|
|
Ok { result with positionals }
|
|
| Error msg -> Error msg
|
|
|
|
(* --- Pretty printers --- *)
|
|
|
|
let print_switch = function
|
|
| Short o -> Printf.sprintf "Short: %c" o
|
|
| Long o -> Printf.sprintf "Long: %s" o
|
|
| Both (s, l) -> Printf.sprintf "Both, short: %c long: %s" s l
|
|
|
|
let print_opt = function
|
|
| Some (Mandatory o) -> Printf.sprintf "Mandatory: %s" o
|
|
| Some (Optional o) -> Printf.sprintf "Optional: %s" o
|
|
| None -> "None"
|
|
|
|
let print_entry e =
|
|
Printf.printf
|
|
"\n\t** ENTRY **\n\tSwitch: %s\n\tParam: %s\n\tDescription: %s\n"
|
|
(print_switch e.switch) (print_opt e.param) e.desc
|
|
|
|
let print_subcommand sc =
|
|
Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n"
|
|
sc.name sc.desc
|
|
|
|
let print_positional p =
|
|
Printf.printf "\n\t** POSITIONAL **\n\tName: %s\n\tOptional: %b\n\tVariadic: %b\n"
|
|
p.pos_name p.optional p.variadic
|
|
|
|
let print_help_result r =
|
|
List.iter print_entry r.entries;
|
|
List.iter print_subcommand r.subcommands;
|
|
List.iter print_positional r.positionals
|