open Angstrom (* Strip ANSI escape sequences and OSC hyperlinks from --help output *) let strip_ansi s = let buf = Buffer.create (String.length s) in let len = String.length s in let i = ref 0 in while !i < len do if !i + 1 < len && Char.code s.[!i] = 0x1b then begin let next = s.[!i + 1] in if next = '[' then begin (* CSI sequence: ESC [ ... final_byte *) i := !i + 2; while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done; if !i < len then incr i end else if next = ']' then begin (* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *) i := !i + 2; let found = ref false in while !i < len && not !found do if s.[!i] = '\x07' then (incr i; found := true) else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then (i := !i + 2; found := true) else incr i done end else begin (* Other ESC sequence, skip ESC + one char *) i := !i + 2 end end else begin Buffer.add_char buf s.[!i]; incr i end done; Buffer.contents buf let is_whitespace = function ' ' | '\t' -> true | _ -> false let is_alphanumeric = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true | _ -> false let is_param_char = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true | _ -> false let is_upper_or_underscore = function | 'A' .. 'Z' | '_' -> true | _ -> false let is_long_char = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true | _ -> false type switch = Short of char | Long of string | Both of char * string type param = Mandatory of string | Optional of string type entry = { switch : switch; param : param option; desc : string } type subcommand = { name : string; desc : string } type help_result = { entries : entry list; subcommands : subcommand list } (* --- Low-level combinators --- *) let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false) let eol = end_of_line <|> end_of_input let eol_strict = end_of_line (* Must consume a newline, no EOF match *) let short_switch = char '-' *> satisfy is_alphanumeric let long_switch = string "--" *> take_while1 is_long_char let comma = char ',' *> inline_ws (* Parameter parsers *) let eq_opt_param = string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a let eq_man_param = char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a (* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *) let space_upper_param = char ' ' *> peek_char_fail >>= fun c -> if is_upper_or_underscore c then take_while1 is_param_char >>= fun name -> (* Ensure it's truly all-uppercase (not a description word like "Do") *) if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then return (Mandatory name) else fail "not an all-caps param" else fail "not an uppercase param" (* Angle-bracket param: e.g. "", "" *) let angle_param = char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name -> Mandatory name (* Space + angle bracket param *) let space_angle_param = char ' ' *> angle_param (* Optional angle bracket param: [] *) let opt_angle_param = char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']' >>| fun name -> Optional name let space_opt_angle_param = char ' ' *> opt_angle_param (* Go/Cobra style: space + lowercase type word like "string", "list", "int" *) let space_type_param = char ' ' *> peek_char_fail >>= fun c -> if c >= 'a' && c <= 'z' then take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name -> (* Only short type-like words *) if String.length name <= 10 then return (Mandatory name) else fail "too long for type param" else fail "not a lowercase type param" let param_parser = option None (choice [ eq_opt_param; eq_man_param; space_opt_angle_param; space_angle_param; space_upper_param; space_type_param ] >>| fun a -> Some a) (* Switch parser: -a, --all | -a | --all *) let switch_parser = choice [ (short_switch >>= fun s -> comma *> long_switch >>| fun l -> Both (s, l)); (short_switch >>| fun s -> Short s); (long_switch >>| fun l -> Long l); ] (* --- Description parsing with multi-line continuation --- *) (* Take the rest of the line as text (does not consume newline) *) let rest_of_line = take_till (fun c -> c = '\n' || c = '\r') (* Check if a line is a continuation line: deeply indented, doesn't start with '-' *) let continuation_line = peek_string 1 >>= fun _ -> (* Must start with significant whitespace (8+ spaces or tab) *) let count_indent s = let n = ref 0 in let i = ref 0 in while !i < String.length s do (match s.[!i] with | ' ' -> incr n | '\t' -> n := !n + 8 | _ -> i := String.length s); incr i done; !n in available >>= fun avail -> if avail = 0 then fail "eof" else (* Peek ahead to see indentation level *) peek_string (min avail 80) >>= fun preview -> let indent = count_indent preview in let trimmed = String.trim preview in let starts_with_dash = String.length trimmed > 0 && trimmed.[0] = '-' in if indent >= 8 && not starts_with_dash then (* This is a continuation line — consume whitespace + text *) inline_ws *> rest_of_line <* eol else fail "not a continuation line" let description = inline_ws *> rest_of_line <* eol >>= fun first_line -> many continuation_line >>| fun cont_lines -> let all = first_line :: cont_lines in let all = List.filter (fun s -> String.length (String.trim s) > 0) all in String.concat " " (List.map String.trim all) (* Description that appears on a separate line below the flag (Clap long style) *) let description_below = many1 continuation_line >>| fun lines -> let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in String.concat " " (List.map String.trim lines) (* --- Line classification for skipping --- *) (* An option line starts with whitespace then '-' *) let at_option_line = peek_string 1 >>= fun _ -> available >>= fun avail -> if avail = 0 then fail "eof" else peek_string (min avail 40) >>= fun preview -> let s = String.trim preview in if String.length s > 0 && s.[0] = '-' then return () else fail "not an option line" (* Skip a non-option line (section header, blank, description-only, etc.) *) let skip_non_option_line = (* Don't skip if this looks like an option line *) (at_option_line *> fail "this is an option line") <|> (rest_of_line *> eol_strict *> return ()) (* --- Entry parsing --- *) (* Parse a single flag entry *) let entry = inline_ws *> lift2 (fun (sw, param) desc -> { switch = sw; param; desc }) (lift2 (fun a b -> (a, b)) switch_parser param_parser) (description <|> (eol *> (description_below <|> return ""))) (* --- Subcommand parsing --- *) (* A subcommand line: " name description" *) let subcommand_entry = inline_ws *> take_while1 (fun c -> c <> ' ' && c <> '\t' && c <> '\n') >>= fun name -> (* Must have at least 2 spaces before description *) char ' ' *> char ' ' *> inline_ws *> rest_of_line <* eol >>| fun desc -> { name; desc = String.trim desc } (* --- Top-level parser --- *) (* The main help parser: walks through lines, skipping non-option content, collecting entries and subcommands *) let help_parser = let open Angstrom in fix (fun _self -> (* Try to parse an entry *) let try_entry = entry >>| fun e -> `Entry e in (* Try to parse a subcommand *) let try_subcommand = subcommand_entry >>| fun sc -> `Subcommand sc in (* Skip one non-option line *) let try_skip = skip_non_option_line >>| fun () -> `Skip in many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items -> let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in let subcommands = List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items in { entries; subcommands }) let parse_help txt = let clean = strip_ansi txt in match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with | Ok result -> Ok result | Error msg -> Error msg (* --- Pretty printers --- *) let print_switch = function | Short o -> Printf.sprintf "Short: %c" o | Long o -> Printf.sprintf "Long: %s" o | Both (s, l) -> Printf.sprintf "Both, short: %c long: %s" s l let print_opt = function | Some (Mandatory o) -> Printf.sprintf "Mandatory: %s" o | Some (Optional o) -> Printf.sprintf "Optional: %s" o | None -> "None" let print_entry e = Printf.printf "\n\t** ENTRY **\n\tSwitch: %s\n\tParam: %s\n\tDescription: %s\n" (print_switch e.switch) (print_opt e.param) e.desc let print_subcommand sc = Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n" sc.name sc.desc let print_help_result r = List.iter print_entry r.entries; List.iter print_subcommand r.subcommands