first draft
This commit is contained in:
parent
ab009ec9af
commit
01ccf64efc
13 changed files with 1311 additions and 239 deletions
4
lib/dune
4
lib/dune
|
|
@ -1,3 +1,3 @@
|
|||
(library
|
||||
(name inshellah_parser)
|
||||
(libraries angstrom angstrom-unix))
|
||||
(name inshellah)
|
||||
(libraries angstrom angstrom-unix str unix))
|
||||
|
|
|
|||
415
lib/manpage.ml
Normal file
415
lib/manpage.ml
Normal file
|
|
@ -0,0 +1,415 @@
|
|||
open Parser
|
||||
|
||||
(* --- Groff escape/formatting stripper --- *)
|
||||
|
||||
let strip_groff_escapes s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if s.[!i] = '\\' && !i + 1 < len then begin
|
||||
let next = s.[!i + 1] in
|
||||
match next with
|
||||
| 'f' ->
|
||||
(* Font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *)
|
||||
if !i + 2 < len then begin
|
||||
if s.[!i + 2] = '(' then
|
||||
i := !i + 4 (* \f(XX *)
|
||||
else if s.[!i + 2] = '[' then begin
|
||||
(* \f[...] - skip to ] *)
|
||||
i := !i + 3;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
i := !i + 3 (* \fX *)
|
||||
end else
|
||||
i := !i + 2
|
||||
| '-' ->
|
||||
Buffer.add_char buf '-';
|
||||
i := !i + 2
|
||||
| '&' | '/' | ',' ->
|
||||
(* Zero-width characters *)
|
||||
i := !i + 2
|
||||
| '(' ->
|
||||
(* Two-char named character: \(aq, \(lq, \(rq, etc. *)
|
||||
if !i + 3 < len then begin
|
||||
let name = String.sub s (!i + 2) 2 in
|
||||
(match name with
|
||||
| "aq" -> Buffer.add_char buf '\''
|
||||
| "lq" | "Lq" -> Buffer.add_char buf '"'
|
||||
| "rq" | "Rq" -> Buffer.add_char buf '"'
|
||||
| "em" | "en" -> Buffer.add_char buf '-'
|
||||
| _ -> ());
|
||||
i := !i + 4
|
||||
end else
|
||||
i := !i + 2
|
||||
| '[' ->
|
||||
(* Named character: \[...] *)
|
||||
i := !i + 2;
|
||||
let start = !i in
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then begin
|
||||
let name = String.sub s start (!i - start) in
|
||||
(match name with
|
||||
| "aq" -> Buffer.add_char buf '\''
|
||||
| "lq" | "Lq" -> Buffer.add_char buf '"'
|
||||
| "rq" | "Rq" -> Buffer.add_char buf '"'
|
||||
| _ -> ());
|
||||
incr i
|
||||
end
|
||||
| 's' ->
|
||||
(* Size escape: \sN, \s+N, \s-N, \s'N' *)
|
||||
i := !i + 2;
|
||||
if !i < len && (s.[!i] = '+' || s.[!i] = '-') then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i
|
||||
| 'm' ->
|
||||
(* Color escape: \m[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| 'X' ->
|
||||
(* Device control: \X'...' *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '\'' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '\'' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| '*' ->
|
||||
(* String variable: \*X or \*(XX or \*[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'n' ->
|
||||
(* Number register: \nX or \n(XX or \n[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'e' ->
|
||||
Buffer.add_char buf '\\';
|
||||
i := !i + 2
|
||||
| '\\' ->
|
||||
Buffer.add_char buf '\\';
|
||||
i := !i + 2
|
||||
| ' ' ->
|
||||
Buffer.add_char buf ' ';
|
||||
i := !i + 2
|
||||
| _ ->
|
||||
(* Unknown escape, skip *)
|
||||
i := !i + 2
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Strip inline macro formatting: .BI, .BR, .IR, etc.
|
||||
These macros alternate between fonts for their arguments.
|
||||
We just concatenate the arguments. *)
|
||||
let strip_inline_macro_args s =
|
||||
(* Arguments are separated by spaces, quoted strings are kept together *)
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if s.[!i] = '"' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '"' do
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
done;
|
||||
if !i < len then incr i
|
||||
end else if s.[!i] = ' ' || s.[!i] = '\t' then begin
|
||||
incr i
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
let strip_groff line =
|
||||
let s = strip_groff_escapes line in
|
||||
String.trim s
|
||||
|
||||
(* --- Line classification --- *)
|
||||
|
||||
type groff_line =
|
||||
| Macro of string * string (* e.g. ("SH", "OPTIONS") or ("TP", "") *)
|
||||
| Text of string (* plain text after stroff stripping *)
|
||||
| Blank
|
||||
| Comment
|
||||
|
||||
let classify_line line =
|
||||
let len = String.length line in
|
||||
if len = 0 then Blank
|
||||
else if len >= 2 && line.[0] = '.' && line.[1] = '\\' && (len < 3 || line.[2] = '"') then
|
||||
Comment
|
||||
else if len >= 3 && line.[0] = '\\' && line.[1] = '"' then
|
||||
Comment
|
||||
else if line.[0] = '.' || line.[0] = '\'' then begin
|
||||
(* Macro line *)
|
||||
let rest = String.sub line 1 (len - 1) in
|
||||
let rest = String.trim rest in
|
||||
(* Split into macro name and arguments *)
|
||||
let space_pos =
|
||||
try Some (String.index rest ' ')
|
||||
with Not_found ->
|
||||
try Some (String.index rest '\t')
|
||||
with Not_found -> None
|
||||
in
|
||||
match space_pos with
|
||||
| Some pos ->
|
||||
let name = String.sub rest 0 pos in
|
||||
let args = String.trim (String.sub rest (pos + 1) (String.length rest - pos - 1)) in
|
||||
(* Strip quotes from args *)
|
||||
let args =
|
||||
let alen = String.length args in
|
||||
if alen >= 2 && args.[0] = '"' && args.[alen - 1] = '"' then
|
||||
String.sub args 1 (alen - 2)
|
||||
else args
|
||||
in
|
||||
Macro (name, args)
|
||||
| None ->
|
||||
Macro (rest, "")
|
||||
end else begin
|
||||
let stripped = strip_groff line in
|
||||
if String.length stripped = 0 then Blank
|
||||
else Text stripped
|
||||
end
|
||||
|
||||
(* Check for dot-backslash-quote style comments more carefully *)
|
||||
let is_comment_line line =
|
||||
let len = String.length line in
|
||||
(len >= 3 && line.[0] = '.' && line.[1] = '\\' && line.[2] = '"')
|
||||
|| (len >= 2 && line.[0] = '\\' && line.[1] = '"')
|
||||
|
||||
let classify_line line =
|
||||
if is_comment_line line then Comment
|
||||
else classify_line line
|
||||
|
||||
(* --- Section extraction --- *)
|
||||
|
||||
let extract_options_section lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec collect_until_next_sh lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", _) :: _ -> List.rev acc
|
||||
| line :: rest -> collect_until_next_sh rest (line :: acc)
|
||||
in
|
||||
let is_options_section name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "OPTIONS"
|
||||
|| (String.length s > 0 &&
|
||||
try let _ = Str.search_forward (Str.regexp_string "OPTION") s 0 in true
|
||||
with Not_found -> false)
|
||||
in
|
||||
(* First pass: look for OPTIONS section *)
|
||||
let rec find_options = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest when is_options_section args ->
|
||||
Some (collect_until_next_sh rest [])
|
||||
| _ :: rest -> find_options rest
|
||||
in
|
||||
(* Fallback: DESCRIPTION section *)
|
||||
let rec find_description = function
|
||||
| [] -> []
|
||||
| Macro ("SH", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "DESCRIPTION" ->
|
||||
collect_until_next_sh rest []
|
||||
| _ :: rest -> find_description rest
|
||||
in
|
||||
match find_options classified with
|
||||
| Some section -> section
|
||||
| None -> find_description classified
|
||||
|
||||
(* --- Strategy-based entry extraction --- *)
|
||||
|
||||
(* Collect text lines until next macro or blank *)
|
||||
let rec collect_text_lines lines acc =
|
||||
match lines with
|
||||
| Text s :: rest -> collect_text_lines rest (s :: acc)
|
||||
| _ -> (String.concat " " (List.rev acc), lines)
|
||||
|
||||
(* Parse a tag line to extract entry using the Angstrom switch_parser *)
|
||||
let parse_tag_to_entry tag desc =
|
||||
let tag = strip_groff_escapes tag in
|
||||
let tag = String.trim tag in
|
||||
match Angstrom.parse_string ~consume:Angstrom.Consume.Prefix
|
||||
(Angstrom.lift2 (fun sw p -> (sw, p)) switch_parser param_parser) tag with
|
||||
| Ok (switch, param) -> Some { switch; param; desc }
|
||||
| Error _ -> None
|
||||
|
||||
(* Strategy A: .TP style (most common — GNU coreutils, help2man) *)
|
||||
let strategy_tp lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("TP", _) :: rest ->
|
||||
(* Next text line is the tag *)
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy B: .IP style (curl, hand-written) *)
|
||||
let strategy_ip lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("IP", tag) :: rest ->
|
||||
let tag = strip_groff_escapes tag in
|
||||
let (desc, rest2) = collect_text_lines rest [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest2 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy C: .PP + .RS/.RE style (git, DocBook) *)
|
||||
let strategy_pp_rs lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("PP", _) :: rest ->
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
(* Look for .RS ... text ... .RE *)
|
||||
let rec collect_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
collect_in_rs rest3 desc_acc
|
||||
| Text s :: rest3 ->
|
||||
(* Sometimes description follows directly *)
|
||||
collect_rs rest3 (s :: desc_acc)
|
||||
| _ -> (String.concat " " (List.rev desc_acc), lines)
|
||||
and collect_in_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
(String.concat " " (List.rev desc_acc), rest3)
|
||||
| Text s :: rest3 ->
|
||||
collect_in_rs rest3 (s :: desc_acc)
|
||||
| Macro ("PP", _) :: _ | Macro ("SH", _) :: _ ->
|
||||
(String.concat " " (List.rev desc_acc), lines)
|
||||
| _ :: rest3 -> collect_in_rs rest3 desc_acc
|
||||
| [] -> (String.concat " " (List.rev desc_acc), [])
|
||||
in
|
||||
let (desc, rest3) = collect_rs rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy D: Deroff fallback — strip all groff, use help text parser *)
|
||||
let strategy_deroff_lines lines =
|
||||
let buf = Buffer.create 256 in
|
||||
List.iter (fun line ->
|
||||
match line with
|
||||
| Text s ->
|
||||
Buffer.add_string buf s;
|
||||
Buffer.add_char buf '\n'
|
||||
| Macro (("BI" | "BR" | "IR" | "B" | "I"), args) ->
|
||||
let text = strip_inline_macro_args args in
|
||||
let text = strip_groff_escapes text in
|
||||
Buffer.add_string buf text;
|
||||
Buffer.add_char buf '\n'
|
||||
| Blank -> Buffer.add_char buf '\n'
|
||||
| _ -> ()
|
||||
) lines;
|
||||
let text = Buffer.contents buf in
|
||||
match parse_help text with
|
||||
| Ok result -> result.entries
|
||||
| Error _ -> []
|
||||
|
||||
(* Count macros of a given type *)
|
||||
let count_macro name lines =
|
||||
List.fold_left (fun n line ->
|
||||
match line with Macro (m, _) when m = name -> n + 1 | _ -> n
|
||||
) 0 lines
|
||||
|
||||
(* Auto-detect and try strategies, return the one with most entries *)
|
||||
let extract_entries lines =
|
||||
let results = ref [] in
|
||||
(* Try TP if .TP macros present *)
|
||||
if count_macro "TP" lines > 0 then
|
||||
results := ("TP", strategy_tp lines) :: !results;
|
||||
(* Try IP if .IP macros present *)
|
||||
if count_macro "IP" lines > 0 then
|
||||
results := ("IP", strategy_ip lines) :: !results;
|
||||
(* Try PP+RS if both present *)
|
||||
if count_macro "PP" lines > 0 && count_macro "RS" lines > 0 then
|
||||
results := ("PP+RS", strategy_pp_rs lines) :: !results;
|
||||
(* Always try deroff as fallback *)
|
||||
results := ("deroff", strategy_deroff_lines lines) :: !results;
|
||||
(* Pick the result with the most entries *)
|
||||
let best =
|
||||
List.fold_left (fun (best_name, best_entries) (name, entries) ->
|
||||
if List.length entries >= List.length best_entries then (name, entries)
|
||||
else (best_name, best_entries)
|
||||
) ("none", []) !results
|
||||
in
|
||||
snd best
|
||||
|
||||
(* --- Top-level API --- *)
|
||||
|
||||
let parse_manpage_lines lines =
|
||||
let options_section = extract_options_section lines in
|
||||
extract_entries options_section
|
||||
|
||||
let parse_manpage_string contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
parse_manpage_lines lines
|
||||
|
||||
let parse_manpage_gzipped_file path =
|
||||
let ic = Unix.open_process_in (Printf.sprintf "gzip -dc %s" (Filename.quote path)) in
|
||||
let buf = Buffer.create 4096 in
|
||||
(try while true do
|
||||
let line = input_line ic in
|
||||
Buffer.add_string buf line;
|
||||
Buffer.add_char buf '\n'
|
||||
done with End_of_file -> ());
|
||||
let _ = Unix.close_process_in ic in
|
||||
parse_manpage_string (Buffer.contents buf)
|
||||
|
||||
let parse_manpage_file path =
|
||||
if Filename.check_suffix path ".gz" then
|
||||
parse_manpage_gzipped_file path
|
||||
else begin
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
parse_manpage_string (Bytes.to_string s)
|
||||
end
|
||||
82
lib/nushell.ml
Normal file
82
lib/nushell.ml
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
open Parser
|
||||
|
||||
(* Map a param name/type hint to a nushell type *)
|
||||
let nushell_type_of_param = function
|
||||
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
|
||||
| "FILENAME" | "PATTERNFILE" -> "path"
|
||||
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
|
||||
| "LINES" | "DEPTH" | "depth" -> "int"
|
||||
| _ -> "string"
|
||||
|
||||
(* Escape a nushell string: wrap in double quotes, escape inner quotes *)
|
||||
let escape_nu s =
|
||||
let buf = Buffer.create (String.length s + 2) in
|
||||
String.iter (fun c ->
|
||||
match c with
|
||||
| '"' -> Buffer.add_string buf "\\\""
|
||||
| '\\' -> Buffer.add_string buf "\\\\"
|
||||
| _ -> Buffer.add_char buf c
|
||||
) s;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Format a single flag for nushell extern *)
|
||||
let format_flag entry =
|
||||
let buf = Buffer.create 64 in
|
||||
Buffer.add_string buf " ";
|
||||
(* Flag name *)
|
||||
(match entry.switch with
|
||||
| Both (s, l) ->
|
||||
Buffer.add_string buf (Printf.sprintf "--%s(-%c)" l s)
|
||||
| Long l ->
|
||||
Buffer.add_string buf (Printf.sprintf "--%s" l)
|
||||
| Short s ->
|
||||
Buffer.add_string buf (Printf.sprintf "-%c" s));
|
||||
(* Type annotation *)
|
||||
(match entry.param with
|
||||
| Some (Mandatory name) ->
|
||||
Buffer.add_string buf ": ";
|
||||
Buffer.add_string buf (nushell_type_of_param name)
|
||||
| Some (Optional name) ->
|
||||
Buffer.add_string buf ": ";
|
||||
Buffer.add_string buf (nushell_type_of_param name)
|
||||
| None -> ());
|
||||
(* Description as comment *)
|
||||
if String.length entry.desc > 0 then begin
|
||||
(* Pad to align comments *)
|
||||
let current_len = Buffer.length buf in
|
||||
let target = max (current_len + 1) 40 in
|
||||
for _ = current_len to target - 1 do
|
||||
Buffer.add_char buf ' '
|
||||
done;
|
||||
Buffer.add_string buf "# ";
|
||||
Buffer.add_string buf entry.desc
|
||||
end;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Generate nushell extern definition for a command *)
|
||||
let generate_extern cmd_name result =
|
||||
let buf = Buffer.create 1024 in
|
||||
(* Main extern with flags *)
|
||||
Buffer.add_string buf (Printf.sprintf "export extern \"%s\" [\n" (escape_nu cmd_name));
|
||||
List.iter (fun entry ->
|
||||
Buffer.add_string buf (format_flag entry);
|
||||
Buffer.add_char buf '\n'
|
||||
) result.entries;
|
||||
Buffer.add_string buf "]\n";
|
||||
(* Subcommand externs *)
|
||||
List.iter (fun (sc : subcommand) ->
|
||||
Buffer.add_string buf
|
||||
(Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
|
||||
(escape_nu cmd_name) (escape_nu sc.name) (escape_nu sc.desc))
|
||||
) result.subcommands;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Generate a complete nushell module *)
|
||||
let generate_module cmd_name result =
|
||||
Printf.sprintf "module %s-completions {\n%s}\n"
|
||||
cmd_name (generate_extern cmd_name result)
|
||||
|
||||
(* Generate from manpage entries (no subcommands) *)
|
||||
let generate_extern_from_entries cmd_name entries =
|
||||
let result = { entries; subcommands = [] } in
|
||||
generate_extern cmd_name result
|
||||
262
lib/parser.ml
262
lib/parser.ml
|
|
@ -1,16 +1,55 @@
|
|||
(* open Angstrom_unix *)
|
||||
(* also look for "subcommands" for clapslop *)
|
||||
(* and other common help patterns *)
|
||||
open Angstrom
|
||||
|
||||
let ( <| ) = ( @@ )
|
||||
let ( <&> ) p1 p2 = lift2 (fun a b -> (a, b)) p1 p2
|
||||
let is_whitespace = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false
|
||||
(* Strip ANSI escape sequences and OSC hyperlinks from --help output *)
|
||||
let strip_ansi s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
|
||||
let next = s.[!i + 1] in
|
||||
if next = '[' then begin
|
||||
(* CSI sequence: ESC [ ... final_byte *)
|
||||
i := !i + 2;
|
||||
while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
|
||||
if !i < len then incr i
|
||||
end else if next = ']' then begin
|
||||
(* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
|
||||
i := !i + 2;
|
||||
let found = ref false in
|
||||
while !i < len && not !found do
|
||||
if s.[!i] = '\x07' then
|
||||
(incr i; found := true)
|
||||
else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
|
||||
(i := !i + 2; found := true)
|
||||
else
|
||||
incr i
|
||||
done
|
||||
end else begin
|
||||
(* Other ESC sequence, skip ESC + one char *)
|
||||
i := !i + 2
|
||||
end
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
let is_whitespace = function ' ' | '\t' -> true | _ -> false
|
||||
|
||||
let is_alphanumeric = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_param_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_upper_or_underscore = function
|
||||
| 'A' .. 'Z' | '_' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_long_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
|
||||
| _ -> false
|
||||
|
|
@ -18,48 +57,207 @@ let is_long_char = function
|
|||
type switch = Short of char | Long of string | Both of char * string
|
||||
type param = Mandatory of string | Optional of string
|
||||
type entry = { switch : switch; param : param option; desc : string }
|
||||
type subcommand = { name : string; desc : string }
|
||||
type help_result = { entries : entry list; subcommands : subcommand list }
|
||||
|
||||
(* --- Low-level combinators --- *)
|
||||
|
||||
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
|
||||
let eol = end_of_line <|> end_of_input
|
||||
let eol_strict = end_of_line (* Must consume a newline, no EOF match *)
|
||||
|
||||
let whitespace = skip_while is_whitespace
|
||||
let comma = char ',' *> whitespace
|
||||
let short_switch = char '-' *> satisfy is_alphanumeric
|
||||
let long_switch = string "--" *> take_while1 is_long_char
|
||||
let comma = char ',' *> inline_ws
|
||||
|
||||
let opt_param =
|
||||
print_endline "opt param is running";
|
||||
string "[=" *> take_while is_alphanumeric <* char ']' >>| fun a -> Optional a
|
||||
(* Parameter parsers *)
|
||||
let eq_opt_param =
|
||||
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
|
||||
|
||||
let man_param =
|
||||
print_endline "man param is running";
|
||||
char '=' *> take_while is_alphanumeric >>| fun a -> Mandatory a
|
||||
let eq_man_param =
|
||||
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
|
||||
|
||||
(* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *)
|
||||
let space_upper_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if is_upper_or_underscore c then
|
||||
take_while1 is_param_char >>= fun name ->
|
||||
(* Ensure it's truly all-uppercase (not a description word like "Do") *)
|
||||
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "not an all-caps param"
|
||||
else
|
||||
fail "not an uppercase param"
|
||||
|
||||
(* Angle-bracket param: e.g. "<file>", "<notation>" *)
|
||||
let angle_param =
|
||||
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
|
||||
Mandatory name
|
||||
|
||||
(* Space + angle bracket param *)
|
||||
let space_angle_param =
|
||||
char ' ' *> angle_param
|
||||
|
||||
(* Optional angle bracket param: [<file>] *)
|
||||
let opt_angle_param =
|
||||
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
|
||||
>>| fun name -> Optional name
|
||||
|
||||
let space_opt_angle_param =
|
||||
char ' ' *> opt_angle_param
|
||||
|
||||
(* Go/Cobra style: space + lowercase type word like "string", "list", "int" *)
|
||||
let space_type_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if c >= 'a' && c <= 'z' then
|
||||
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
|
||||
(* Only short type-like words *)
|
||||
if String.length name <= 10 then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "too long for type param"
|
||||
else
|
||||
fail "not a lowercase type param"
|
||||
|
||||
let param_parser =
|
||||
option None (choice [ opt_param; man_param ] >>| fun a -> Some a)
|
||||
option None
|
||||
(choice
|
||||
[ eq_opt_param; eq_man_param;
|
||||
space_opt_angle_param; space_angle_param;
|
||||
space_upper_param; space_type_param ]
|
||||
>>| fun a -> Some a)
|
||||
|
||||
(* Switch parser: -a, --all | -a | --all *)
|
||||
let switch_parser =
|
||||
choice
|
||||
[
|
||||
(* -a, --all *)
|
||||
( short_switch >>= fun s ->
|
||||
comma *> long_switch >>| fun l -> Both (s, l) );
|
||||
(* -a *)
|
||||
(short_switch >>= fun s ->
|
||||
comma *> long_switch >>| fun l -> Both (s, l));
|
||||
(short_switch >>| fun s -> Short s);
|
||||
(* --all *)
|
||||
(long_switch >>| fun l -> Long l);
|
||||
]
|
||||
|
||||
let description = whitespace *> take_till (fun c -> c = '\n') <* end_of_line
|
||||
(* --- Description parsing with multi-line continuation --- *)
|
||||
|
||||
(* Take the rest of the line as text (does not consume newline) *)
|
||||
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
|
||||
|
||||
(* Check if a line is a continuation line: deeply indented, doesn't start with '-' *)
|
||||
let continuation_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
(* Must start with significant whitespace (8+ spaces or tab) *)
|
||||
let count_indent s =
|
||||
let n = ref 0 in
|
||||
let i = ref 0 in
|
||||
while !i < String.length s do
|
||||
(match s.[!i] with
|
||||
| ' ' -> incr n
|
||||
| '\t' -> n := !n + 8
|
||||
| _ -> i := String.length s);
|
||||
incr i
|
||||
done;
|
||||
!n
|
||||
in
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
(* Peek ahead to see indentation level *)
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
let indent = count_indent preview in
|
||||
let trimmed = String.trim preview in
|
||||
let starts_with_dash =
|
||||
String.length trimmed > 0 && trimmed.[0] = '-'
|
||||
in
|
||||
if indent >= 8 && not starts_with_dash then
|
||||
(* This is a continuation line — consume whitespace + text *)
|
||||
inline_ws *> rest_of_line <* eol
|
||||
else
|
||||
fail "not a continuation line"
|
||||
|
||||
let description =
|
||||
inline_ws *> rest_of_line <* eol >>= fun first_line ->
|
||||
many continuation_line >>| fun cont_lines ->
|
||||
let all = first_line :: cont_lines in
|
||||
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
|
||||
String.concat " " (List.map String.trim all)
|
||||
|
||||
(* Description that appears on a separate line below the flag (Clap long style) *)
|
||||
let description_below =
|
||||
many1 continuation_line >>| fun lines ->
|
||||
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
|
||||
String.concat " " (List.map String.trim lines)
|
||||
|
||||
(* --- Line classification for skipping --- *)
|
||||
|
||||
(* An option line starts with whitespace then '-' *)
|
||||
let at_option_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
peek_string (min avail 40) >>= fun preview ->
|
||||
let s = String.trim preview in
|
||||
if String.length s > 0 && s.[0] = '-' then return ()
|
||||
else fail "not an option line"
|
||||
|
||||
(* Skip a non-option line (section header, blank, description-only, etc.) *)
|
||||
let skip_non_option_line =
|
||||
(* Don't skip if this looks like an option line *)
|
||||
(at_option_line *> fail "this is an option line")
|
||||
<|> (rest_of_line *> eol_strict *> return ())
|
||||
|
||||
(* --- Entry parsing --- *)
|
||||
|
||||
(* Parse a single flag entry *)
|
||||
let entry =
|
||||
skip_while (fun c -> c <> '-')
|
||||
*> lift3 (fun a b c -> (a, b, c)) switch_parser param_parser description
|
||||
>>| fun (switch, param, desc) -> { switch; param; desc }
|
||||
inline_ws *>
|
||||
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
|
||||
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
|
||||
(description <|> (eol *> (description_below <|> return "")))
|
||||
|
||||
let endline = option () (char '\n' *> return ())
|
||||
let entry_line = entry <* endline
|
||||
let help_parser = many entry_line
|
||||
(* --- Subcommand parsing --- *)
|
||||
|
||||
(* A subcommand line: " name description" *)
|
||||
let subcommand_entry =
|
||||
inline_ws *>
|
||||
take_while1 (fun c -> c <> ' ' && c <> '\t' && c <> '\n') >>= fun name ->
|
||||
(* Must have at least 2 spaces before description *)
|
||||
char ' ' *> char ' ' *> inline_ws *>
|
||||
rest_of_line <* eol >>| fun desc ->
|
||||
{ name; desc = String.trim desc }
|
||||
|
||||
(* --- Top-level parser --- *)
|
||||
|
||||
(* The main help parser: walks through lines, skipping non-option content,
|
||||
collecting entries and subcommands *)
|
||||
let help_parser =
|
||||
let open Angstrom in
|
||||
fix (fun _self ->
|
||||
(* Try to parse an entry *)
|
||||
let try_entry =
|
||||
entry >>| fun e -> `Entry e
|
||||
in
|
||||
(* Try to parse a subcommand *)
|
||||
let try_subcommand =
|
||||
subcommand_entry >>| fun sc -> `Subcommand sc
|
||||
in
|
||||
(* Skip one non-option line *)
|
||||
let try_skip =
|
||||
skip_non_option_line >>| fun () -> `Skip
|
||||
in
|
||||
many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items ->
|
||||
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
|
||||
let subcommands = List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items in
|
||||
{ entries; subcommands })
|
||||
|
||||
let parse_help txt =
|
||||
Angstrom.parse_string ~consume:Consume.Prefix help_parser txt
|
||||
let clean = strip_ansi txt in
|
||||
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
|
||||
| Ok result -> Ok result
|
||||
| Error msg -> Error msg
|
||||
|
||||
(* --- Pretty printers --- *)
|
||||
|
||||
let print_switch = function
|
||||
| Short o -> Printf.sprintf "Short: %c" o
|
||||
|
|
@ -75,3 +273,11 @@ let print_entry e =
|
|||
Printf.printf
|
||||
"\n\t** ENTRY **\n\tSwitch: %s\n\tParam: %s\n\tDescription: %s\n"
|
||||
(print_switch e.switch) (print_opt e.param) e.desc
|
||||
|
||||
let print_subcommand sc =
|
||||
Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n"
|
||||
sc.name sc.desc
|
||||
|
||||
let print_help_result r =
|
||||
List.iter print_entry r.entries;
|
||||
List.iter print_subcommand r.subcommands
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue