open Angstrom (* Strip ANSI escape sequences and OSC hyperlinks from --help output *) let strip_ansi s = let buf = Buffer.create (String.length s) in let len = String.length s in let i = ref 0 in while !i < len do if !i + 1 < len && Char.code s.[!i] = 0x1b then begin let next = s.[!i + 1] in if next = '[' then begin (* CSI sequence: ESC [ ... final_byte *) i := !i + 2; while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done; if !i < len then incr i end else if next = ']' then begin (* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *) i := !i + 2; let found = ref false in while !i < len && not !found do if s.[!i] = '\x07' then (incr i; found := true) else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then (i := !i + 2; found := true) else incr i done end else begin (* Other ESC sequence, skip ESC + one char *) i := !i + 2 end end else begin Buffer.add_char buf s.[!i]; incr i end done; Buffer.contents buf let is_whitespace = function ' ' | '\t' -> true | _ -> false let is_alphanumeric = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true | _ -> false let is_param_char = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true | _ -> false let is_upper_or_underscore = function | 'A' .. 'Z' | '_' -> true | _ -> false let is_long_char = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true | _ -> false type switch = Short of char | Long of string | Both of char * string type param = Mandatory of string | Optional of string type entry = { switch : switch; param : param option; desc : string } type subcommand = { name : string; desc : string } type positional = { pos_name : string; optional : bool; variadic : bool } type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list; description : string } (* --- Low-level combinators --- *) let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false) let eol = end_of_line <|> end_of_input let eol_strict = end_of_line (* Must consume a newline, no EOF match *) let short_switch = char '-' *> satisfy is_alphanumeric let long_switch = string "--" *> take_while1 is_long_char let comma = char ',' *> inline_ws (* Parameter parsers *) let eq_opt_param = string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a let eq_man_param = char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a (* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *) let space_upper_param = char ' ' *> peek_char_fail >>= fun c -> if is_upper_or_underscore c then take_while1 is_param_char >>= fun name -> (* Ensure it's truly all-uppercase (not a description word like "Do") *) if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then return (Mandatory name) else fail "not an all-caps param" else fail "not an uppercase param" (* Angle-bracket param: e.g. "", "" *) let angle_param = char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name -> Mandatory name (* Space + angle bracket param *) let space_angle_param = char ' ' *> angle_param (* Optional angle bracket param: [] *) let opt_angle_param = char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']' >>| fun name -> Optional name let space_opt_angle_param = char ' ' *> opt_angle_param (* Go/Cobra style: space + lowercase type word like "string", "list", "int" *) let space_type_param = char ' ' *> peek_char_fail >>= fun c -> if c >= 'a' && c <= 'z' then take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name -> (* Only short type-like words *) if String.length name <= 10 then return (Mandatory name) else fail "too long for type param" else fail "not a lowercase type param" let param_parser = option None (choice [ eq_opt_param; eq_man_param; space_opt_angle_param; space_angle_param; space_upper_param; space_type_param ] >>| fun a -> Some a) (* Switch parser: -a, --all | --all / -a | -a | --all *) let switch_parser = choice [ (short_switch >>= fun s -> comma *> long_switch >>| fun l -> Both (s, l)); (long_switch >>= fun l -> inline_ws *> char '/' *> inline_ws *> short_switch >>| fun s -> Both (s, l)); (short_switch >>| fun s -> Short s); (long_switch >>| fun l -> Long l); ] (* --- Description parsing with multi-line continuation --- *) (* Take the rest of the line as text (does not consume newline) *) let rest_of_line = take_till (fun c -> c = '\n' || c = '\r') (* Check if a line is a continuation line: deeply indented, doesn't start with '-' *) let continuation_line = peek_string 1 >>= fun _ -> (* Must start with significant whitespace (8+ spaces or tab) *) let count_indent s = let n = ref 0 in let i = ref 0 in while !i < String.length s do (match s.[!i] with | ' ' -> incr n | '\t' -> n := !n + 8 | _ -> i := String.length s); incr i done; !n in available >>= fun avail -> if avail = 0 then fail "eof" else (* Peek ahead to see indentation level *) peek_string (min avail 80) >>= fun preview -> let indent = count_indent preview in let trimmed = String.trim preview in let starts_with_dash = String.length trimmed > 0 && trimmed.[0] = '-' in if indent >= 8 && not starts_with_dash then (* This is a continuation line — consume whitespace + text *) inline_ws *> rest_of_line <* eol else fail "not a continuation line" let description = inline_ws *> rest_of_line <* eol >>= fun first_line -> many continuation_line >>| fun cont_lines -> let all = first_line :: cont_lines in let all = List.filter (fun s -> String.length (String.trim s) > 0) all in String.concat " " (List.map String.trim all) (* Description that appears on a separate line below the flag (Clap long style) *) let description_below = many1 continuation_line >>| fun lines -> let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in String.concat " " (List.map String.trim lines) (* --- Line classification for skipping --- *) (* An option line starts with whitespace then '-' *) let at_option_line = peek_string 1 >>= fun _ -> available >>= fun avail -> if avail = 0 then fail "eof" else peek_string (min avail 40) >>= fun preview -> let s = String.trim preview in if String.length s > 0 && s.[0] = '-' then return () else fail "not an option line" (* Skip a non-option line (section header, blank, description-only, etc.) *) let skip_non_option_line = (* Don't skip if this looks like an option line *) (at_option_line *> fail "this is an option line") <|> (rest_of_line *> eol_strict *> return ()) (* --- Entry parsing --- *) (* Parse a single flag entry *) let entry = inline_ws *> lift2 (fun (sw, param) desc -> { switch = sw; param; desc }) (lift2 (fun a b -> (a, b)) switch_parser param_parser) (description <|> (eol *> (description_below <|> return ""))) (* --- Subcommand parsing --- *) (* A subcommand line: " name description" *) let is_subcommand_char = function | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true | _ -> false let subcommand_entry = inline_ws *> take_while1 is_subcommand_char >>= fun name -> if String.length name < 2 then fail "subcommand name too short" else char ' ' *> char ' ' *> inline_ws *> rest_of_line <* eol >>| fun desc -> { name = String.lowercase_ascii name; desc = let t = String.trim desc in if String.length t >= 2 && t.[0] = '-' && t.[1] = ' ' then String.trim (String.sub t 2 (String.length t - 2)) else t } (* --- Section header detection --- *) (* Detect lines like "Arguments:", "POSITIONALS:", etc. that introduce positional-argument sections (where name+desc lines are NOT subcommands) *) let is_arg_section s = let lc = String.lowercase_ascii (String.trim s) in let base = if String.ends_with ~suffix:":" lc then String.sub lc 0 (String.length lc - 1) |> String.trim else lc in base = "arguments" || base = "args" || base = "positionals" || base = "positional arguments" (* A section header: left-aligned (or lightly indented) text ending with ':', not starting with '-'. Must be consumed BEFORE subcommand_entry in choice. *) let section_header = available >>= fun avail -> if avail = 0 then fail "eof" else peek_string (min avail 80) >>= fun preview -> (* Extract just the first line from the preview *) let first_line = match String.index_opt preview '\n' with | Some i -> String.sub preview 0 i | None -> preview in let t = String.trim first_line in let len = String.length t in let indent = let i = ref 0 in while !i < String.length first_line && (first_line.[!i] = ' ' || first_line.[!i] = '\t') do incr i done; !i in if len >= 2 && t.[len - 1] = ':' && t.[0] <> '-' && indent <= 4 then rest_of_line <* eol_strict >>| fun line -> is_arg_section line else fail "not a section header" (* --- Top-level parser --- *) (* The main help parser: walks through lines, skipping non-option content, collecting entries and subcommands *) let help_parser = let open Angstrom in fix (fun _self -> (* Try to parse an entry *) let try_entry = entry >>| fun e -> `Entry e in (* Detect section headers to track arg vs command sections *) let try_section = section_header >>| fun is_arg -> `Section is_arg in (* Try to parse a subcommand *) let try_subcommand = subcommand_entry >>| fun sc -> `Subcommand sc in (* Skip one non-option line *) let try_skip = skip_non_option_line >>| fun () -> `Skip in many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items -> let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in (* Only keep subcommands that didn't appear under an Arguments/Positionals section *) let subcommands = List.fold_left (fun (in_arg_sec, acc) item -> match item with | `Section is_arg -> (is_arg, acc) | `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc) | _ -> (in_arg_sec, acc) ) (false, []) items |> snd |> List.rev |> List.fold_left (fun acc sc -> match List.assoc_opt sc.name acc with | Some prev when String.length prev.desc >= String.length sc.desc -> acc | _ -> (sc.name, sc) :: List.remove_assoc sc.name acc ) [] |> List.rev_map snd in { entries; subcommands; positionals = []; description = "" }) let skip_command_prefix s = let len = String.length s in let i = ref 0 in let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in let is_word_char = function | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true | _ -> false in let rec loop () = skip_ws (); if !i >= len then () else if s.[!i] = '[' || s.[!i] = '<' || s.[!i] = '(' || s.[!i] = '{' || s.[!i] = '-' then () else if is_word_char s.[!i] then begin let start = !i in while !i < len && is_word_char s.[!i] do incr i done; let word = String.sub s start (!i - start) in let has_lower = ref false in String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word; if not !has_lower then i := start else loop () end in loop (); !i let parse_usage_args s = let len = String.length s in let i = ref 0 in let results = ref [] in let skip_ws () = while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done in let is_pos_char c = (c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in let read_dots () = skip_ws (); if !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' then (i := !i + 3; true) else if !i + 2 < len && s.[!i] = '\xe2' && s.[!i+1] = '\x80' && s.[!i+2] = '\xa6' then (i := !i + 3; true) (* UTF-8 ellipsis … *) else false in let is_skip name = let u = String.uppercase_ascii name in u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG" in let is_clean_name name = String.length name >= 2 && String.for_all (fun c -> (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '_' || c = '-') name in let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in let skip_braces () = (* Skip {A|c|d|...} alternative blocks *) if !i < len && s.[!i] = '{' then begin let depth = ref 1 in incr i; while !i < len && !depth > 0 do if s.[!i] = '{' then incr depth else if s.[!i] = '}' then decr depth; incr i done; ignore (read_dots ()); true end else false in while !i < len do skip_ws (); if !i >= len then () else if skip_braces () then () else match s.[!i] with | '[' -> incr i; let start = !i in let depth = ref 1 in while !i < len && !depth > 0 do if s.[!i] = '[' then incr depth else if s.[!i] = ']' then decr depth; incr i done; let bracket_end = !i - 1 in let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in let inner, has_inner_dots = if String.ends_with ~suffix:"..." inner then (String.sub inner 0 (String.length inner - 3) |> String.trim, true) else (inner, false) in let variadic = has_inner_dots || read_dots () in if String.length inner > 0 && inner.[0] <> '-' && (is_letter inner.[0] || inner.[0] = '<') then begin let name = if inner.[0] = '<' then let e = try String.index inner '>' with Not_found -> String.length inner in String.sub inner 1 (e - 1) else inner in if is_clean_name name && not (is_skip name) then results := { pos_name = String.lowercase_ascii name; optional = true; variadic } :: !results end | '<' -> incr i; let start = !i in while !i < len && s.[!i] <> '>' do incr i done; let name = String.sub s start (!i - start) in if !i < len then incr i; let variadic = read_dots () in if is_clean_name name && not (is_skip name) then results := { pos_name = String.lowercase_ascii name; optional = false; variadic } :: !results | '-' -> while !i < len && s.[!i] <> ' ' && s.[!i] <> '\t' && s.[!i] <> ']' do incr i done | c when c >= 'A' && c <= 'Z' -> let start = !i in while !i < len && is_pos_char s.[!i] do incr i done; let name = String.sub s start (!i - start) in let variadic = read_dots () in if String.length name >= 2 && String.for_all (fun c -> (c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') ) name && not (is_skip name) then results := { pos_name = String.lowercase_ascii name; optional = false; variadic } :: !results | _ -> incr i done; List.rev !results |> List.fold_left (fun (seen, acc) p -> if List.mem p.pos_name seen then (seen, acc) else (p.pos_name :: seen, p :: acc) ) ([], []) |> snd |> List.rev let extract_usage_positionals text = let lines = String.split_on_char '\n' text in let lines_arr = Array.of_list lines in let len = Array.length lines_arr in let find_usage_line () = let rec go i = if i >= len then None else let t = String.trim lines_arr.(i) in let tlen = String.length t in let lc = String.lowercase_ascii t in if tlen >= 6 && String.sub lc 0 6 = "usage:" then begin let after = String.sub t 6 (tlen - 6) |> String.trim in if String.length after > 0 then Some after else if i + 1 < len then (* Clap style: USAGE:\n cmd [OPTIONS] PATTERN *) let next = String.trim lines_arr.(i + 1) in if String.length next > 0 then Some next else None else None end else if lc = "usage" then begin if i + 1 < len then let next = String.trim lines_arr.(i + 1) in if String.length next > 0 then Some next else None else None end else go (i + 1) in go 0 in match find_usage_line () with | None -> [] | Some usage -> let cmd_end = skip_command_prefix usage in let args = String.sub usage cmd_end (String.length usage - cmd_end) in parse_usage_args args let extract_cli11_positionals text = let lines = String.split_on_char '\n' text in let rec find_section = function | [] -> [] | line :: rest -> let t = String.trim line in if t = "POSITIONALS:" || t = "Positionals:" then parse_lines rest [] else find_section rest and parse_lines lines acc = match lines with | [] -> List.rev acc | line :: rest -> let len = String.length line in if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then List.rev acc else let t = String.trim line in if String.length t = 0 then List.rev acc else match parse_one t with | Some p -> parse_lines rest (p :: acc) | None -> parse_lines rest acc and parse_one s = let len = String.length s in let i = ref 0 in let is_name_char c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c = '_' || c = '-' in while !i < len && is_name_char s.[!i] do incr i done; if !i < 2 then None else let name = String.sub s 0 !i in while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done; (* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *) while !i < len && s.[!i] >= 'A' && s.[!i] <= 'Z' do incr i done; while !i < len && (s.[!i] = ' ' || s.[!i] = '\t') do incr i done; let variadic = !i + 2 < len && s.[!i] = '.' && s.[!i+1] = '.' && s.[!i+2] = '.' in Some { pos_name = String.lowercase_ascii name; optional = false; variadic } in find_section lines let parse_help txt = let clean = strip_ansi txt in match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with | Ok result -> let cli11 = extract_cli11_positionals clean in let usage = extract_usage_positionals clean in let positionals = if cli11 <> [] then cli11 else usage in Ok { result with positionals } | Error msg -> Error msg (* --- Pretty printers --- *) let print_switch = function | Short o -> Printf.sprintf "Short: %c" o | Long o -> Printf.sprintf "Long: %s" o | Both (s, l) -> Printf.sprintf "Both, short: %c long: %s" s l let print_opt = function | Some (Mandatory o) -> Printf.sprintf "Mandatory: %s" o | Some (Optional o) -> Printf.sprintf "Optional: %s" o | None -> "None" let print_entry e = Printf.printf "\n\t** ENTRY **\n\tSwitch: %s\n\tParam: %s\n\tDescription: %s\n" (print_switch e.switch) (print_opt e.param) e.desc let print_subcommand sc = Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n" sc.name sc.desc let print_positional p = Printf.printf "\n\t** POSITIONAL **\n\tName: %s\n\tOptional: %b\n\tVariadic: %b\n" p.pos_name p.optional p.variadic let print_help_result r = List.iter print_entry r.entries; List.iter print_subcommand r.subcommands; List.iter print_positional r.positionals