init
This commit is contained in:
commit
6ddbd4185e
22 changed files with 3985 additions and 0 deletions
911
lib/manpage.ml
Normal file
911
lib/manpage.ml
Normal file
|
|
@ -0,0 +1,911 @@
|
|||
open Parser
|
||||
|
||||
(* --- Groff escape/formatting stripper --- *)
|
||||
|
||||
let strip_groff_escapes s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let last = ref '\000' in
|
||||
let put c = Buffer.add_char buf c; last := c in
|
||||
let is_alnum c =
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
|
||||
in
|
||||
while !i < len do
|
||||
if s.[!i] = '\\' && !i + 1 < len then begin
|
||||
let next = s.[!i + 1] in
|
||||
match next with
|
||||
| 'f' ->
|
||||
(* Font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *)
|
||||
if !i + 2 < len then begin
|
||||
let fc = s.[!i + 2] in
|
||||
(* Insert space before italic font to preserve word boundaries
|
||||
e.g. \fB--max-results\fR\fIcount\fR → "--max-results count" *)
|
||||
if fc = 'I' && is_alnum !last then put ' ';
|
||||
if fc = '(' then
|
||||
i := !i + 5 (* \f(XX *)
|
||||
else if fc = '[' then begin
|
||||
i := !i + 3;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
i := !i + 3 (* \fX *)
|
||||
end else
|
||||
i := !i + 2
|
||||
| '-' ->
|
||||
put '-';
|
||||
i := !i + 2
|
||||
| '&' | '/' | ',' ->
|
||||
(* Zero-width characters *)
|
||||
i := !i + 2
|
||||
| '(' ->
|
||||
(* Two-char named character: \(aq, \(lq, \(rq, etc. *)
|
||||
if !i + 3 < len then begin
|
||||
let name = String.sub s (!i + 2) 2 in
|
||||
(match name with
|
||||
| "aq" -> put '\''
|
||||
| "lq" | "Lq" -> put '"'
|
||||
| "rq" | "Rq" -> put '"'
|
||||
| "em" | "en" -> put '-'
|
||||
| _ -> ());
|
||||
i := !i + 4
|
||||
end else
|
||||
i := !i + 2
|
||||
| '[' ->
|
||||
(* Named character: \[...] *)
|
||||
i := !i + 2;
|
||||
let start = !i in
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then begin
|
||||
let name = String.sub s start (!i - start) in
|
||||
(match name with
|
||||
| "aq" -> put '\''
|
||||
| "lq" | "Lq" -> put '"'
|
||||
| "rq" | "Rq" -> put '"'
|
||||
| _ -> ());
|
||||
incr i
|
||||
end
|
||||
| 's' ->
|
||||
(* Size escape: \sN, \s+N, \s-N, \s'N' *)
|
||||
i := !i + 2;
|
||||
if !i < len && (s.[!i] = '+' || s.[!i] = '-') then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i
|
||||
| 'm' ->
|
||||
(* Color escape: \m[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| 'X' ->
|
||||
(* Device control: \X'...' *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '\'' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '\'' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| '*' ->
|
||||
(* String variable: \*X or \*(XX or \*[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'n' ->
|
||||
(* Number register: \nX or \n(XX or \n[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'e' ->
|
||||
put '\\';
|
||||
i := !i + 2
|
||||
| '\\' ->
|
||||
put '\\';
|
||||
i := !i + 2
|
||||
| ' ' ->
|
||||
put ' ';
|
||||
i := !i + 2
|
||||
| _ ->
|
||||
(* Unknown escape, skip *)
|
||||
i := !i + 2
|
||||
end else begin
|
||||
put s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Strip inline macro formatting: .BI, .BR, .IR, etc.
|
||||
These macros alternate between fonts for their arguments.
|
||||
We just concatenate the arguments. *)
|
||||
let strip_inline_macro_args s =
|
||||
(* Arguments are separated by spaces, quoted strings are kept together *)
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if s.[!i] = '"' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '"' do
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
done;
|
||||
if !i < len then incr i
|
||||
end else if s.[!i] = ' ' || s.[!i] = '\t' then begin
|
||||
incr i
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
let strip_groff line =
|
||||
let s = strip_groff_escapes line in
|
||||
String.trim s
|
||||
|
||||
(* --- Line classification --- *)
|
||||
|
||||
type groff_line =
|
||||
| Macro of string * string (* e.g. ("SH", "OPTIONS") or ("TP", "") *)
|
||||
| Text of string (* plain text after stroff stripping *)
|
||||
| Blank
|
||||
| Comment
|
||||
|
||||
let classify_line line =
|
||||
let len = String.length line in
|
||||
if len = 0 then Blank
|
||||
else if len >= 2 && line.[0] = '.' && line.[1] = '\\' && (len < 3 || line.[2] = '"') then
|
||||
Comment
|
||||
else if len >= 3 && line.[0] = '\\' && line.[1] = '"' then
|
||||
Comment
|
||||
else if line.[0] = '.' || line.[0] = '\'' then begin
|
||||
(* Macro line *)
|
||||
let rest = String.sub line 1 (len - 1) in
|
||||
let rest = String.trim rest in
|
||||
(* Split into macro name and arguments *)
|
||||
let space_pos =
|
||||
try Some (String.index rest ' ')
|
||||
with Not_found ->
|
||||
try Some (String.index rest '\t')
|
||||
with Not_found -> None
|
||||
in
|
||||
match space_pos with
|
||||
| Some pos ->
|
||||
let name = String.sub rest 0 pos in
|
||||
let args = String.trim (String.sub rest (pos + 1) (String.length rest - pos - 1)) in
|
||||
(* Strip quotes from args *)
|
||||
let args =
|
||||
let alen = String.length args in
|
||||
if alen >= 2 && args.[0] = '"' && args.[alen - 1] = '"' then
|
||||
String.sub args 1 (alen - 2)
|
||||
else args
|
||||
in
|
||||
Macro (name, args)
|
||||
| None ->
|
||||
Macro (rest, "")
|
||||
end else begin
|
||||
let stripped = strip_groff line in
|
||||
if String.length stripped = 0 then Blank
|
||||
else Text stripped
|
||||
end
|
||||
|
||||
(* Check for dot-backslash-quote style comments more carefully *)
|
||||
let is_comment_line line =
|
||||
let len = String.length line in
|
||||
(len >= 3 && line.[0] = '.' && line.[1] = '\\' && line.[2] = '"')
|
||||
|| (len >= 2 && line.[0] = '\\' && line.[1] = '"')
|
||||
|
||||
let classify_line line =
|
||||
if is_comment_line line then Comment
|
||||
else classify_line line
|
||||
|
||||
(* --- Section extraction --- *)
|
||||
|
||||
let extract_options_section lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec collect_until_next_sh lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", _) :: _ -> List.rev acc
|
||||
| line :: rest -> collect_until_next_sh rest (line :: acc)
|
||||
in
|
||||
let is_options_section name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "OPTIONS"
|
||||
|| (String.length s > 0 &&
|
||||
try let _ = Str.search_forward (Str.regexp_string "OPTION") s 0 in true
|
||||
with Not_found -> false)
|
||||
in
|
||||
(* First pass: look for OPTIONS section *)
|
||||
let rec find_options = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest when is_options_section args ->
|
||||
Some (collect_until_next_sh rest [])
|
||||
| _ :: rest -> find_options rest
|
||||
in
|
||||
(* Fallback: DESCRIPTION section *)
|
||||
let rec find_description = function
|
||||
| [] -> []
|
||||
| Macro ("SH", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "DESCRIPTION" ->
|
||||
collect_until_next_sh rest []
|
||||
| _ :: rest -> find_description rest
|
||||
in
|
||||
match find_options classified with
|
||||
| Some section -> section
|
||||
| None -> find_description classified
|
||||
|
||||
(* --- Strategy-based entry extraction --- *)
|
||||
|
||||
(* Collect text lines until next macro or blank *)
|
||||
let rec collect_text_lines lines acc =
|
||||
match lines with
|
||||
| Text s :: rest -> collect_text_lines rest (s :: acc)
|
||||
| _ -> (String.concat " " (List.rev acc), lines)
|
||||
|
||||
(* Parse a tag line to extract entry using the Angstrom switch_parser *)
|
||||
let parse_tag_to_entry tag desc =
|
||||
let tag = strip_groff_escapes tag in
|
||||
let tag = String.trim tag in
|
||||
match Angstrom.parse_string ~consume:Angstrom.Consume.Prefix
|
||||
(Angstrom.lift2 (fun sw p -> (sw, p)) switch_parser param_parser) tag with
|
||||
| Ok (switch, param) -> Some { switch; param; desc }
|
||||
| Error _ -> None
|
||||
|
||||
(* Extract tag text from a macro line (.B, .I preserve spaces; .BI/.BR/.IR alternate) *)
|
||||
let tag_of_macro name args =
|
||||
match name with
|
||||
| "B" | "I" -> strip_groff_escapes args |> String.trim
|
||||
| _ -> strip_inline_macro_args args |> strip_groff_escapes |> String.trim
|
||||
|
||||
(* Strategy A: .TP style (most common — GNU coreutils, help2man) *)
|
||||
let strategy_tp lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("TP", _) :: rest ->
|
||||
(* Next line is the tag — could be Text or a formatting macro *)
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| Macro (("B" | "I" | "BI" | "BR" | "IR") as m, args) :: rest2 ->
|
||||
let tag = tag_of_macro m args in
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy B: .IP style (curl, hand-written) *)
|
||||
let strategy_ip lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("IP", tag) :: rest ->
|
||||
let tag = strip_groff_escapes tag in
|
||||
let (desc, rest2) = collect_text_lines rest [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest2 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy C: .PP + .RS/.RE style (git, DocBook) *)
|
||||
let strategy_pp_rs lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("PP", _) :: rest ->
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
(* Look for .RS ... text ... .RE *)
|
||||
let rec collect_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
collect_in_rs rest3 desc_acc
|
||||
| Text s :: rest3 ->
|
||||
(* Sometimes description follows directly *)
|
||||
collect_rs rest3 (s :: desc_acc)
|
||||
| _ -> (String.concat " " (List.rev desc_acc), lines)
|
||||
and collect_in_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
(String.concat " " (List.rev desc_acc), rest3)
|
||||
| Text s :: rest3 ->
|
||||
collect_in_rs rest3 (s :: desc_acc)
|
||||
| Macro ("PP", _) :: _ | Macro ("SH", _) :: _ ->
|
||||
(String.concat " " (List.rev desc_acc), lines)
|
||||
| _ :: rest3 -> collect_in_rs rest3 desc_acc
|
||||
| [] -> (String.concat " " (List.rev desc_acc), [])
|
||||
in
|
||||
let (desc, rest3) = collect_rs rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy D: Deroff fallback — strip all groff, use help text parser *)
|
||||
let strategy_deroff_lines lines =
|
||||
let buf = Buffer.create 256 in
|
||||
List.iter (fun line ->
|
||||
match line with
|
||||
| Text s ->
|
||||
Buffer.add_string buf s;
|
||||
Buffer.add_char buf '\n'
|
||||
| Macro (("BI" | "BR" | "IR" | "B" | "I"), args) ->
|
||||
let text = strip_inline_macro_args args in
|
||||
let text = strip_groff_escapes text in
|
||||
Buffer.add_string buf text;
|
||||
Buffer.add_char buf '\n'
|
||||
| Blank -> Buffer.add_char buf '\n'
|
||||
| _ -> ()
|
||||
) lines;
|
||||
let text = Buffer.contents buf in
|
||||
match parse_help text with
|
||||
| Ok result -> result.entries
|
||||
| Error _ -> []
|
||||
|
||||
(* Strategy E: Nix3-style bullet .IP with .UR/.UE hyperlinks *)
|
||||
let strategy_nix lines =
|
||||
let is_bullet_ip args =
|
||||
String.length (String.trim args) > 0
|
||||
in
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("IP", args) :: rest when is_bullet_ip args ->
|
||||
(* Collect tag: skip UR/UE macros, collect Text lines *)
|
||||
let rec collect_tag lines parts =
|
||||
match lines with
|
||||
| Macro ("UR", _) :: rest2 -> collect_tag rest2 parts
|
||||
| Macro ("UE", _) :: rest2 -> collect_tag rest2 parts
|
||||
| Text s :: rest2 -> collect_tag rest2 (s :: parts)
|
||||
| _ -> (String.concat " " (List.rev parts), lines)
|
||||
in
|
||||
let (tag, rest2) = collect_tag rest [] in
|
||||
(* Collect description after the description .IP marker *)
|
||||
let rec collect_desc lines parts =
|
||||
match lines with
|
||||
| Macro ("IP", dargs) :: rest3 when not (is_bullet_ip dargs) ->
|
||||
collect_desc_text rest3 parts
|
||||
| _ -> (String.concat " " (List.rev parts), lines)
|
||||
and collect_desc_text lines parts =
|
||||
match lines with
|
||||
| Text s :: rest3 -> collect_desc_text rest3 (s :: parts)
|
||||
| Macro ("IP", args) :: _ when is_bullet_ip args ->
|
||||
(String.concat " " (List.rev parts), lines)
|
||||
| Macro (("SS" | "SH"), _) :: _ ->
|
||||
(String.concat " " (List.rev parts), lines)
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
skip_rs rest3 parts 1
|
||||
| Macro ("IP", _) :: rest3 ->
|
||||
(* Non-bullet IP = continuation paragraph *)
|
||||
collect_desc_text rest3 parts
|
||||
| Macro _ :: rest3 -> collect_desc_text rest3 parts
|
||||
| Blank :: rest3 -> collect_desc_text rest3 parts
|
||||
| Comment :: rest3 -> collect_desc_text rest3 parts
|
||||
| [] -> (String.concat " " (List.rev parts), [])
|
||||
and skip_rs lines parts depth =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
if depth <= 1 then collect_desc_text rest3 parts
|
||||
else skip_rs rest3 parts (depth - 1)
|
||||
| Macro ("RS", _) :: rest3 -> skip_rs rest3 parts (depth + 1)
|
||||
| _ :: rest3 -> skip_rs rest3 parts depth
|
||||
| [] -> (String.concat " " (List.rev parts), [])
|
||||
in
|
||||
let (desc, rest3) = collect_desc rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Count macros of a given type *)
|
||||
let count_macro name lines =
|
||||
List.fold_left (fun n line ->
|
||||
match line with Macro (m, _) when m = name -> n + 1 | _ -> n
|
||||
) 0 lines
|
||||
|
||||
(* Auto-detect and try strategies, return the one with most entries *)
|
||||
let extract_entries lines =
|
||||
let tp = count_macro "TP" lines
|
||||
and ip = count_macro "IP" lines
|
||||
and pp = count_macro "PP" lines
|
||||
and rs = count_macro "RS" lines
|
||||
and ur = count_macro "UR" lines in
|
||||
let specialized = List.filter_map Fun.id [
|
||||
(if tp > 0 then Some ("TP", strategy_tp lines) else None);
|
||||
(if ip > 0 then Some ("IP", strategy_ip lines) else None);
|
||||
(if pp > 0 && rs > 0 then Some ("PP+RS", strategy_pp_rs lines) else None);
|
||||
(if ur > 0 && ip > 0 then Some ("nix", strategy_nix lines) else None);
|
||||
] in
|
||||
let candidates = match List.filter (fun (_, e) -> e <> []) specialized with
|
||||
| [] -> [("deroff", strategy_deroff_lines lines)]
|
||||
| filtered -> filtered
|
||||
in
|
||||
List.fold_left (fun (_, best) (name, entries) ->
|
||||
if List.length entries >= List.length best then (name, entries)
|
||||
else (name, best)
|
||||
) ("none", []) candidates |> snd
|
||||
|
||||
(* --- NAME section description extraction --- *)
|
||||
|
||||
let extract_name_description contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
let classified = List.map classify_line lines in
|
||||
let rec find = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "NAME" ->
|
||||
collect rest []
|
||||
| _ :: rest -> find rest
|
||||
and collect lines acc =
|
||||
match lines with
|
||||
| Macro ("SH", _) :: _ | [] -> finish acc
|
||||
| Text s :: rest -> collect rest (s :: acc)
|
||||
| Macro (("B" | "BI" | "BR" | "I" | "IR"), args) :: rest ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| Macro ("Nm", args) :: rest ->
|
||||
let s = strip_groff_escapes args |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| Macro ("Nd", args) :: rest ->
|
||||
let s = strip_groff_escapes args |> String.trim in
|
||||
collect rest (if String.length s > 0 then ("\\- " ^ s) :: acc else acc)
|
||||
| _ :: rest -> collect rest acc
|
||||
and finish acc =
|
||||
let full = String.concat " " (List.rev acc) |> String.trim in
|
||||
(* NAME lines look like: "git-add \- Add file contents to the index" *)
|
||||
let sep = Str.regexp {| *\\- *\| +- +|} in
|
||||
match Str.bounded_split sep full 2 with
|
||||
| [_; desc] -> Some (String.trim desc)
|
||||
| _ -> None
|
||||
in
|
||||
find classified
|
||||
|
||||
(* --- SYNOPSIS command name extraction --- *)
|
||||
|
||||
let extract_synopsis_command_lines lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let is_synopsis name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "SYNOPSIS"
|
||||
in
|
||||
let extract_cmd line =
|
||||
let words = String.split_on_char ' ' (String.trim line) in
|
||||
let words = List.filter (fun w -> String.length w > 0) words in
|
||||
let is_cmd_char = function
|
||||
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.' -> true
|
||||
| _ -> false
|
||||
in
|
||||
let rec take = function
|
||||
| [] -> []
|
||||
| w :: rest ->
|
||||
if String.length w > 0
|
||||
&& (w.[0] = '[' || w.[0] = '-' || w.[0] = '<'
|
||||
|| w.[0] = '(' || w.[0] = '{')
|
||||
then []
|
||||
else if String.for_all is_cmd_char w then
|
||||
w :: take rest
|
||||
else []
|
||||
in
|
||||
match take words with
|
||||
| [] -> None
|
||||
| cmd -> Some (String.concat " " cmd)
|
||||
in
|
||||
let rec find = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest when is_synopsis args -> collect rest
|
||||
| _ :: rest -> find rest
|
||||
and collect = function
|
||||
| [] -> None
|
||||
| Macro ("SH", _) :: _ -> None
|
||||
| Text s :: _ ->
|
||||
let s = String.trim s in
|
||||
if String.length s > 0 then extract_cmd s else None
|
||||
| Macro (("B" | "BI" | "BR"), args) :: _ ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
if String.length s > 0 then extract_cmd s else None
|
||||
| _ :: rest -> collect rest
|
||||
in
|
||||
find classified
|
||||
|
||||
let extract_synopsis_command contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
extract_synopsis_command_lines lines
|
||||
|
||||
(* --- SYNOPSIS positional extraction --- *)
|
||||
|
||||
let extract_synopsis_positionals_lines lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let is_synopsis name =
|
||||
String.uppercase_ascii (String.trim name) = "SYNOPSIS"
|
||||
in
|
||||
let rec find = function
|
||||
| [] -> []
|
||||
| Macro ("SH", args) :: rest when is_synopsis args -> collect rest []
|
||||
| _ :: rest -> find rest
|
||||
and collect lines acc =
|
||||
match lines with
|
||||
| [] -> finish acc
|
||||
| Macro ("SH", _) :: _ -> finish acc
|
||||
| Macro ("SS", _) :: _ -> finish acc
|
||||
| Macro ("br", _) :: _ -> finish acc
|
||||
| Text s :: rest ->
|
||||
let s = strip_groff_escapes s |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| Macro (("B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"), args) :: rest ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
collect rest (if String.length s > 0 then s :: acc else acc)
|
||||
| _ :: rest -> collect rest acc
|
||||
and finish acc =
|
||||
let parts = List.rev acc in
|
||||
let full = String.concat " " parts |> String.trim in
|
||||
if String.length full = 0 then []
|
||||
else
|
||||
let cmd_end = skip_command_prefix full in
|
||||
let args = String.sub full cmd_end (String.length full - cmd_end) in
|
||||
parse_usage_args args
|
||||
in
|
||||
find classified
|
||||
|
||||
(* --- mdoc (BSD) format support --- *)
|
||||
|
||||
let is_mdoc lines =
|
||||
List.exists (fun l ->
|
||||
match classify_line l with Macro ("Sh", _) -> true | _ -> false
|
||||
) lines
|
||||
|
||||
let mdoc_text_of line =
|
||||
match line with
|
||||
| Text s -> Some (strip_groff_escapes s)
|
||||
| Macro (m, args) ->
|
||||
(match m with
|
||||
| "Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt"
|
||||
| "Oo" | "Oc" | "Op" -> None
|
||||
| _ ->
|
||||
let s = strip_groff_escapes args |> String.trim in
|
||||
if s = "" then None else Some s)
|
||||
| _ -> None
|
||||
|
||||
let parse_mdoc_it args =
|
||||
let words = String.split_on_char ' ' args
|
||||
|> List.filter (fun w -> w <> "" && w <> "Ns") in
|
||||
let param = match words with
|
||||
| _ :: _ :: "Ar" :: p :: _ -> Some (Mandatory p)
|
||||
| _ -> None
|
||||
in
|
||||
match words with
|
||||
| "Fl" :: c :: _ when String.length c = 1 && is_alphanumeric c.[0] ->
|
||||
Some { switch = Short c.[0]; param; desc = "" }
|
||||
| "Fl" :: name :: _ when String.length name > 1 && name.[0] = '-' ->
|
||||
Some { switch = Long (String.sub name 1 (String.length name - 1)); param; desc = "" }
|
||||
| _ -> None
|
||||
|
||||
let positional_of_mdoc_line optional args =
|
||||
let words = String.split_on_char ' ' args
|
||||
|> List.filter (fun w -> w <> "") in
|
||||
match words with
|
||||
| name :: _ when String.length name >= 2 ->
|
||||
Some { pos_name = String.lowercase_ascii name;
|
||||
optional; variadic = List.mem "..." words }
|
||||
| _ -> None
|
||||
|
||||
let parse_mdoc_lines lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec skip_to_el = function
|
||||
| [] -> []
|
||||
| Macro ("El", _) :: rest -> rest
|
||||
| _ :: rest -> skip_to_el rest
|
||||
in
|
||||
let rec collect_desc acc = function
|
||||
| [] -> (acc, [])
|
||||
| (Macro ("It", _) | Macro ("El", _)
|
||||
| Macro ("Sh", _) | Macro ("Ss", _)) :: _ as rest -> (acc, rest)
|
||||
| line :: rest ->
|
||||
collect_desc (match mdoc_text_of line with Some s -> s :: acc | None -> acc) rest
|
||||
in
|
||||
let desc_of rest =
|
||||
let parts, rest = collect_desc [] rest in
|
||||
(String.concat " " (List.rev parts) |> String.trim, rest)
|
||||
in
|
||||
let parse_it args rest entries =
|
||||
let desc, rest = desc_of rest in
|
||||
let entries = match parse_mdoc_it args with
|
||||
| Some e -> { e with desc } :: entries
|
||||
| None -> entries
|
||||
in
|
||||
(entries, rest)
|
||||
in
|
||||
let rec parse_option_list entries = function
|
||||
| [] -> (entries, [])
|
||||
| Macro ("El", _) :: rest -> (entries, rest)
|
||||
| Macro ("It", args) :: rest ->
|
||||
let entries, rest = parse_it args rest entries in
|
||||
parse_option_list entries rest
|
||||
| _ :: rest -> parse_option_list entries rest
|
||||
in
|
||||
let rec scan entries positionals = function
|
||||
| [] -> (entries, positionals)
|
||||
| Macro ("Bl", _) :: Macro ("It", it_args) :: rest ->
|
||||
let words = String.split_on_char ' ' it_args
|
||||
|> List.filter (fun w -> w <> "") in
|
||||
if (match words with "Fl" :: _ -> true | _ -> false) then
|
||||
let entries, rest = parse_it it_args rest entries in
|
||||
let entries, rest = parse_option_list entries rest in
|
||||
scan entries positionals rest
|
||||
else
|
||||
scan entries positionals (skip_to_el rest)
|
||||
| Macro ("Bl", _) :: rest -> scan entries positionals (skip_to_el rest)
|
||||
| Macro ("Sh", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "SYNOPSIS" ->
|
||||
let positionals, rest = parse_synopsis positionals rest in
|
||||
scan entries positionals rest
|
||||
| _ :: rest -> scan entries positionals rest
|
||||
and parse_synopsis positionals = function
|
||||
| [] -> (positionals, [])
|
||||
| Macro ("Sh", _) :: _ as rest -> (positionals, rest)
|
||||
| Macro ("Ar", args) :: rest ->
|
||||
let positionals = match positional_of_mdoc_line false args with
|
||||
| Some p -> p :: positionals | None -> positionals in
|
||||
parse_synopsis positionals rest
|
||||
| Macro ("Op", args) :: rest ->
|
||||
let words = String.split_on_char ' ' args
|
||||
|> List.filter (fun w -> w <> "") in
|
||||
let positionals = match words with
|
||||
| "Ar" :: _ ->
|
||||
(match positional_of_mdoc_line true args with
|
||||
| Some p -> p :: positionals | None -> positionals)
|
||||
| _ -> positionals in
|
||||
parse_synopsis positionals rest
|
||||
| _ :: rest -> parse_synopsis positionals rest
|
||||
in
|
||||
let entries, positionals = scan [] [] classified in
|
||||
let positionals =
|
||||
List.rev positionals
|
||||
|> List.fold_left (fun (seen, acc) p ->
|
||||
if List.mem p.pos_name seen then (seen, acc)
|
||||
else (p.pos_name :: seen, p :: acc)
|
||||
) ([], [])
|
||||
|> snd |> List.rev
|
||||
in
|
||||
{ entries = List.rev entries; subcommands = []; positionals; description = "" }
|
||||
|
||||
(* --- COMMANDS section subcommand extraction --- *)
|
||||
|
||||
(* Extract subcommands from COMMANDS/COMMAND sections.
|
||||
These use .PP + bold name + .RS/.RE blocks, e.g.:
|
||||
.PP
|
||||
\fBstart\fR \fIUNIT\fR...
|
||||
.RS 4
|
||||
Start (activate) one or more units.
|
||||
.RE *)
|
||||
let extract_commands_section lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec collect_until_next_sh lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", _) :: _ -> List.rev acc
|
||||
| line :: rest -> collect_until_next_sh rest (line :: acc)
|
||||
in
|
||||
let is_commands_section name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "COMMANDS" || s = "COMMAND"
|
||||
in
|
||||
let rec find_commands acc = function
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", args) :: rest when is_commands_section args ->
|
||||
find_commands (collect_until_next_sh rest [] :: acc) rest
|
||||
| _ :: rest -> find_commands acc rest
|
||||
in
|
||||
let sections = find_commands [] classified in
|
||||
List.concat sections
|
||||
|
||||
(* Extract subcommand name from a bold groff text like
|
||||
"\fBlist\-units\fR [\fIPATTERN\fR...]" → "list-units" *)
|
||||
let extract_bold_command_name text =
|
||||
let s = String.trim text in
|
||||
(* Look for \fB...\fR at the start *)
|
||||
if String.length s >= 4
|
||||
&& s.[0] = '\\' && s.[1] = 'f' && s.[2] = 'B' then
|
||||
let start = 3 in
|
||||
let end_marker = "\\fR" in
|
||||
match String.split_on_char '\\' (String.sub s start (String.length s - start)) with
|
||||
| name_part :: _ ->
|
||||
let name = strip_groff_escapes ("\\fB" ^ name_part ^ end_marker) |> String.trim in
|
||||
(* Must look like a subcommand: lowercase, hyphens, no leading dash *)
|
||||
if String.length name >= 2
|
||||
&& name.[0] <> '-'
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_'
|
||||
) name
|
||||
then Some name
|
||||
else None
|
||||
| [] -> None
|
||||
else
|
||||
(* Try already-stripped text *)
|
||||
let stripped = strip_groff_escapes s in
|
||||
let first_word = match String.split_on_char ' ' stripped with
|
||||
| w :: _ -> w | [] -> "" in
|
||||
if String.length first_word >= 2
|
||||
&& first_word.[0] <> '-'
|
||||
&& String.for_all (fun c ->
|
||||
(c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_'
|
||||
) first_word
|
||||
then Some first_word
|
||||
else None
|
||||
|
||||
let extract_subcommands_from_commands lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("PP", _) :: rest ->
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
(* Check if this is a subcommand (bold name, not a flag) *)
|
||||
begin match extract_bold_command_name tag with
|
||||
| Some name ->
|
||||
(* Collect description from .RS/.RE block *)
|
||||
let rec collect_desc lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
collect_in_rs rest3 desc_acc
|
||||
| Text s :: rest3 ->
|
||||
collect_desc rest3 (s :: desc_acc)
|
||||
| _ -> (String.concat " " (List.rev desc_acc), lines)
|
||||
and collect_in_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
(String.concat " " (List.rev desc_acc), rest3)
|
||||
| Text s :: rest3 ->
|
||||
collect_in_rs rest3 (s :: desc_acc)
|
||||
| Macro ("PP", _) :: _ | Macro ("SH", _) :: _ | Macro ("SS", _) :: _ ->
|
||||
(String.concat " " (List.rev desc_acc), lines)
|
||||
| _ :: rest3 -> collect_in_rs rest3 desc_acc
|
||||
| [] -> (String.concat " " (List.rev desc_acc), [])
|
||||
in
|
||||
let (desc, rest3) = collect_desc rest2 [] in
|
||||
let desc = String.trim desc in
|
||||
(* Take first sentence as description *)
|
||||
let short_desc = match String.split_on_char '.' desc with
|
||||
| first :: _ when String.length first > 0 -> String.trim first
|
||||
| _ -> desc in
|
||||
let sc : subcommand = { name; desc = short_desc } in
|
||||
walk rest3 (sc :: acc)
|
||||
| None -> walk rest2 acc
|
||||
end
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* --- Top-level API --- *)
|
||||
|
||||
let parse_manpage_lines lines =
|
||||
if is_mdoc lines then
|
||||
parse_mdoc_lines lines
|
||||
else begin
|
||||
let options_section = extract_options_section lines in
|
||||
let entries = extract_entries options_section in
|
||||
let positionals = extract_synopsis_positionals_lines lines in
|
||||
let commands_section = extract_commands_section lines in
|
||||
let subcommands = extract_subcommands_from_commands commands_section in
|
||||
{ entries; subcommands; positionals; description = "" }
|
||||
end
|
||||
|
||||
let parse_manpage_string contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
let result = parse_manpage_lines lines in
|
||||
let description = match extract_name_description contents with
|
||||
| Some d -> d | None -> "" in
|
||||
{ result with description }
|
||||
|
||||
(* --- Clap-style SUBCOMMAND section extraction --- *)
|
||||
(* Manpages generated by clap (Rust) put each subcommand under its own
|
||||
.SH SUBCOMMAND header with a Usage: line giving the name. *)
|
||||
|
||||
let extract_subcommand_sections contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
let classified = List.map classify_line lines in
|
||||
(* Split into sections at .SH boundaries *)
|
||||
let rec collect_sections acc current_name current_lines = function
|
||||
| [] ->
|
||||
let acc = match current_name with
|
||||
| Some n -> (n, List.rev current_lines) :: acc
|
||||
| None -> acc in
|
||||
List.rev acc
|
||||
| Macro ("SH", args) :: rest ->
|
||||
let acc = match current_name with
|
||||
| Some n -> (n, List.rev current_lines) :: acc
|
||||
| None -> acc in
|
||||
let name = String.uppercase_ascii (String.trim args) in
|
||||
if name = "SUBCOMMAND" || name = "SUBCOMMANDS" then
|
||||
collect_sections acc (Some name) [] rest
|
||||
else
|
||||
collect_sections acc None [] rest
|
||||
| line :: rest ->
|
||||
collect_sections acc current_name (line :: current_lines) rest
|
||||
in
|
||||
let sections = collect_sections [] None [] classified in
|
||||
(* For each SUBCOMMAND section, extract name from Usage: line and parse entries *)
|
||||
let usage_re = Str.regexp {|Usage: \([a-zA-Z0-9_-]+\)|} in
|
||||
let matches_usage s =
|
||||
try ignore (Str.search_forward usage_re s 0); Some (Str.matched_group 1 s)
|
||||
with Not_found -> None in
|
||||
List.filter_map (fun (_header, section_lines) ->
|
||||
let name, desc_lines =
|
||||
List.fold_left (fun (name, desc_lines) line ->
|
||||
match name with
|
||||
| Some _ -> (name, desc_lines)
|
||||
| None ->
|
||||
match line with
|
||||
| Text s ->
|
||||
(match matches_usage s with
|
||||
| Some _ as found -> (found, desc_lines)
|
||||
| None -> (None, s :: desc_lines))
|
||||
| Macro (("TP" | "B" | "BI" | "BR"), args) ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
(matches_usage s, desc_lines)
|
||||
| _ -> (None, desc_lines)
|
||||
) (None, []) section_lines in
|
||||
match name with
|
||||
| None -> None
|
||||
| Some subcmd_name ->
|
||||
let entries = extract_entries section_lines in
|
||||
let desc = String.concat " " (List.rev desc_lines)
|
||||
|> strip_groff_escapes |> String.trim in
|
||||
let desc = Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" desc in
|
||||
Some (subcmd_name, desc, { entries; subcommands = []; positionals = []; description = desc })
|
||||
) sections
|
||||
|
||||
let read_manpage_file path =
|
||||
if Filename.check_suffix path ".gz" then begin
|
||||
let ic = Gzip.open_in path in
|
||||
let buf = Buffer.create 8192 in
|
||||
let chunk = Bytes.create 8192 in
|
||||
(try while true do
|
||||
let n = Gzip.input ic chunk 0 8192 in
|
||||
if n = 0 then raise Exit
|
||||
else Buffer.add_subbytes buf chunk 0 n
|
||||
done with Exit | End_of_file -> ());
|
||||
Gzip.close_in ic;
|
||||
Buffer.contents buf
|
||||
end else begin
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
Bytes.to_string s
|
||||
end
|
||||
|
||||
let parse_manpage_file path =
|
||||
read_manpage_file path |> parse_manpage_string
|
||||
Loading…
Add table
Add a link
Reference in a new issue