first draft

This commit is contained in:
atagen 2026-03-18 18:19:37 +11:00
parent ab009ec9af
commit 01ccf64efc
13 changed files with 1311 additions and 239 deletions

View file

@ -1,16 +1,55 @@
(* open Angstrom_unix *)
(* also look for "subcommands" for clapslop *)
(* and other common help patterns *)
open Angstrom
let ( <| ) = ( @@ )
let ( <&> ) p1 p2 = lift2 (fun a b -> (a, b)) p1 p2
let is_whitespace = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false
(* Strip ANSI escape sequences and OSC hyperlinks from --help output *)
let strip_ansi s =
let buf = Buffer.create (String.length s) in
let len = String.length s in
let i = ref 0 in
while !i < len do
if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
let next = s.[!i + 1] in
if next = '[' then begin
(* CSI sequence: ESC [ ... final_byte *)
i := !i + 2;
while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
if !i < len then incr i
end else if next = ']' then begin
(* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
i := !i + 2;
let found = ref false in
while !i < len && not !found do
if s.[!i] = '\x07' then
(incr i; found := true)
else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
(i := !i + 2; found := true)
else
incr i
done
end else begin
(* Other ESC sequence, skip ESC + one char *)
i := !i + 2
end
end else begin
Buffer.add_char buf s.[!i];
incr i
end
done;
Buffer.contents buf
let is_whitespace = function ' ' | '\t' -> true | _ -> false
let is_alphanumeric = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
| _ -> false
let is_param_char = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
| _ -> false
let is_upper_or_underscore = function
| 'A' .. 'Z' | '_' -> true
| _ -> false
let is_long_char = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
| _ -> false
@ -18,48 +57,207 @@ let is_long_char = function
type switch = Short of char | Long of string | Both of char * string
type param = Mandatory of string | Optional of string
type entry = { switch : switch; param : param option; desc : string }
type subcommand = { name : string; desc : string }
type help_result = { entries : entry list; subcommands : subcommand list }
(* --- Low-level combinators --- *)
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
let eol = end_of_line <|> end_of_input
let eol_strict = end_of_line (* Must consume a newline, no EOF match *)
let whitespace = skip_while is_whitespace
let comma = char ',' *> whitespace
let short_switch = char '-' *> satisfy is_alphanumeric
let long_switch = string "--" *> take_while1 is_long_char
let comma = char ',' *> inline_ws
let opt_param =
print_endline "opt param is running";
string "[=" *> take_while is_alphanumeric <* char ']' >>| fun a -> Optional a
(* Parameter parsers *)
let eq_opt_param =
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
let man_param =
print_endline "man param is running";
char '=' *> take_while is_alphanumeric >>| fun a -> Mandatory a
let eq_man_param =
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
(* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *)
let space_upper_param =
char ' ' *> peek_char_fail >>= fun c ->
if is_upper_or_underscore c then
take_while1 is_param_char >>= fun name ->
(* Ensure it's truly all-uppercase (not a description word like "Do") *)
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
return (Mandatory name)
else
fail "not an all-caps param"
else
fail "not an uppercase param"
(* Angle-bracket param: e.g. "<file>", "<notation>" *)
let angle_param =
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
Mandatory name
(* Space + angle bracket param *)
let space_angle_param =
char ' ' *> angle_param
(* Optional angle bracket param: [<file>] *)
let opt_angle_param =
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
>>| fun name -> Optional name
let space_opt_angle_param =
char ' ' *> opt_angle_param
(* Go/Cobra style: space + lowercase type word like "string", "list", "int" *)
let space_type_param =
char ' ' *> peek_char_fail >>= fun c ->
if c >= 'a' && c <= 'z' then
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
(* Only short type-like words *)
if String.length name <= 10 then
return (Mandatory name)
else
fail "too long for type param"
else
fail "not a lowercase type param"
let param_parser =
option None (choice [ opt_param; man_param ] >>| fun a -> Some a)
option None
(choice
[ eq_opt_param; eq_man_param;
space_opt_angle_param; space_angle_param;
space_upper_param; space_type_param ]
>>| fun a -> Some a)
(* Switch parser: -a, --all | -a | --all *)
let switch_parser =
choice
[
(* -a, --all *)
( short_switch >>= fun s ->
comma *> long_switch >>| fun l -> Both (s, l) );
(* -a *)
(short_switch >>= fun s ->
comma *> long_switch >>| fun l -> Both (s, l));
(short_switch >>| fun s -> Short s);
(* --all *)
(long_switch >>| fun l -> Long l);
]
let description = whitespace *> take_till (fun c -> c = '\n') <* end_of_line
(* --- Description parsing with multi-line continuation --- *)
(* Take the rest of the line as text (does not consume newline) *)
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
(* Check if a line is a continuation line: deeply indented, doesn't start with '-' *)
let continuation_line =
peek_string 1 >>= fun _ ->
(* Must start with significant whitespace (8+ spaces or tab) *)
let count_indent s =
let n = ref 0 in
let i = ref 0 in
while !i < String.length s do
(match s.[!i] with
| ' ' -> incr n
| '\t' -> n := !n + 8
| _ -> i := String.length s);
incr i
done;
!n
in
available >>= fun avail ->
if avail = 0 then fail "eof"
else
(* Peek ahead to see indentation level *)
peek_string (min avail 80) >>= fun preview ->
let indent = count_indent preview in
let trimmed = String.trim preview in
let starts_with_dash =
String.length trimmed > 0 && trimmed.[0] = '-'
in
if indent >= 8 && not starts_with_dash then
(* This is a continuation line — consume whitespace + text *)
inline_ws *> rest_of_line <* eol
else
fail "not a continuation line"
let description =
inline_ws *> rest_of_line <* eol >>= fun first_line ->
many continuation_line >>| fun cont_lines ->
let all = first_line :: cont_lines in
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
String.concat " " (List.map String.trim all)
(* Description that appears on a separate line below the flag (Clap long style) *)
let description_below =
many1 continuation_line >>| fun lines ->
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
String.concat " " (List.map String.trim lines)
(* --- Line classification for skipping --- *)
(* An option line starts with whitespace then '-' *)
let at_option_line =
peek_string 1 >>= fun _ ->
available >>= fun avail ->
if avail = 0 then fail "eof"
else
peek_string (min avail 40) >>= fun preview ->
let s = String.trim preview in
if String.length s > 0 && s.[0] = '-' then return ()
else fail "not an option line"
(* Skip a non-option line (section header, blank, description-only, etc.) *)
let skip_non_option_line =
(* Don't skip if this looks like an option line *)
(at_option_line *> fail "this is an option line")
<|> (rest_of_line *> eol_strict *> return ())
(* --- Entry parsing --- *)
(* Parse a single flag entry *)
let entry =
skip_while (fun c -> c <> '-')
*> lift3 (fun a b c -> (a, b, c)) switch_parser param_parser description
>>| fun (switch, param, desc) -> { switch; param; desc }
inline_ws *>
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
(description <|> (eol *> (description_below <|> return "")))
let endline = option () (char '\n' *> return ())
let entry_line = entry <* endline
let help_parser = many entry_line
(* --- Subcommand parsing --- *)
(* A subcommand line: " name description" *)
let subcommand_entry =
inline_ws *>
take_while1 (fun c -> c <> ' ' && c <> '\t' && c <> '\n') >>= fun name ->
(* Must have at least 2 spaces before description *)
char ' ' *> char ' ' *> inline_ws *>
rest_of_line <* eol >>| fun desc ->
{ name; desc = String.trim desc }
(* --- Top-level parser --- *)
(* The main help parser: walks through lines, skipping non-option content,
collecting entries and subcommands *)
let help_parser =
let open Angstrom in
fix (fun _self ->
(* Try to parse an entry *)
let try_entry =
entry >>| fun e -> `Entry e
in
(* Try to parse a subcommand *)
let try_subcommand =
subcommand_entry >>| fun sc -> `Subcommand sc
in
(* Skip one non-option line *)
let try_skip =
skip_non_option_line >>| fun () -> `Skip
in
many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items ->
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
let subcommands = List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items in
{ entries; subcommands })
let parse_help txt =
Angstrom.parse_string ~consume:Consume.Prefix help_parser txt
let clean = strip_ansi txt in
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
| Ok result -> Ok result
| Error msg -> Error msg
(* --- Pretty printers --- *)
let print_switch = function
| Short o -> Printf.sprintf "Short: %c" o
@ -75,3 +273,11 @@ let print_entry e =
Printf.printf
"\n\t** ENTRY **\n\tSwitch: %s\n\tParam: %s\n\tDescription: %s\n"
(print_switch e.switch) (print_opt e.param) e.desc
let print_subcommand sc =
Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n"
sc.name sc.desc
let print_help_result r =
List.iter print_entry r.entries;
List.iter print_subcommand r.subcommands