comprehensive completion generation: native, manpage, --help
Three-strategy pipeline with priority: native completion generators (e.g. CMD completions nushell) > manpage parsing > --help fallback. Single `generate` command produces one module-wrapped .nu file per command. Parallel execution scaled to cores, 200ms timeouts, ELF string scanning to skip binaries without -h support, native gzip decompression via camlzip, SYNOPSIS-based subcommand detection, nix3 manpage strategy, deduplication, nushell builtin exclusion.
This commit is contained in:
parent
01ccf64efc
commit
7f0ec8ab4d
9 changed files with 937 additions and 265 deletions
207
lib/manpage.ml
207
lib/manpage.ml
|
|
@ -6,6 +6,11 @@ let strip_groff_escapes s =
|
|||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
let last = ref '\000' in
|
||||
let put c = Buffer.add_char buf c; last := c in
|
||||
let is_alnum c =
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
|
||||
in
|
||||
while !i < len do
|
||||
if s.[!i] = '\\' && !i + 1 < len then begin
|
||||
let next = s.[!i + 1] in
|
||||
|
|
@ -13,10 +18,13 @@ let strip_groff_escapes s =
|
|||
| 'f' ->
|
||||
(* Font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *)
|
||||
if !i + 2 < len then begin
|
||||
if s.[!i + 2] = '(' then
|
||||
i := !i + 4 (* \f(XX *)
|
||||
else if s.[!i + 2] = '[' then begin
|
||||
(* \f[...] - skip to ] *)
|
||||
let fc = s.[!i + 2] in
|
||||
(* Insert space before italic font to preserve word boundaries
|
||||
e.g. \fB--max-results\fR\fIcount\fR → "--max-results count" *)
|
||||
if fc = 'I' && is_alnum !last then put ' ';
|
||||
if fc = '(' then
|
||||
i := !i + 5 (* \f(XX *)
|
||||
else if fc = '[' then begin
|
||||
i := !i + 3;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
|
|
@ -25,7 +33,7 @@ let strip_groff_escapes s =
|
|||
end else
|
||||
i := !i + 2
|
||||
| '-' ->
|
||||
Buffer.add_char buf '-';
|
||||
put '-';
|
||||
i := !i + 2
|
||||
| '&' | '/' | ',' ->
|
||||
(* Zero-width characters *)
|
||||
|
|
@ -35,10 +43,10 @@ let strip_groff_escapes s =
|
|||
if !i + 3 < len then begin
|
||||
let name = String.sub s (!i + 2) 2 in
|
||||
(match name with
|
||||
| "aq" -> Buffer.add_char buf '\''
|
||||
| "lq" | "Lq" -> Buffer.add_char buf '"'
|
||||
| "rq" | "Rq" -> Buffer.add_char buf '"'
|
||||
| "em" | "en" -> Buffer.add_char buf '-'
|
||||
| "aq" -> put '\''
|
||||
| "lq" | "Lq" -> put '"'
|
||||
| "rq" | "Rq" -> put '"'
|
||||
| "em" | "en" -> put '-'
|
||||
| _ -> ());
|
||||
i := !i + 4
|
||||
end else
|
||||
|
|
@ -51,9 +59,9 @@ let strip_groff_escapes s =
|
|||
if !i < len then begin
|
||||
let name = String.sub s start (!i - start) in
|
||||
(match name with
|
||||
| "aq" -> Buffer.add_char buf '\''
|
||||
| "lq" | "Lq" -> Buffer.add_char buf '"'
|
||||
| "rq" | "Rq" -> Buffer.add_char buf '"'
|
||||
| "aq" -> put '\''
|
||||
| "lq" | "Lq" -> put '"'
|
||||
| "rq" | "Rq" -> put '"'
|
||||
| _ -> ());
|
||||
incr i
|
||||
end
|
||||
|
|
@ -106,19 +114,19 @@ let strip_groff_escapes s =
|
|||
incr i
|
||||
end
|
||||
| 'e' ->
|
||||
Buffer.add_char buf '\\';
|
||||
put '\\';
|
||||
i := !i + 2
|
||||
| '\\' ->
|
||||
Buffer.add_char buf '\\';
|
||||
put '\\';
|
||||
i := !i + 2
|
||||
| ' ' ->
|
||||
Buffer.add_char buf ' ';
|
||||
put ' ';
|
||||
i := !i + 2
|
||||
| _ ->
|
||||
(* Unknown escape, skip *)
|
||||
i := !i + 2
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
put s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
|
|
@ -262,18 +270,29 @@ let parse_tag_to_entry tag desc =
|
|||
| Ok (switch, param) -> Some { switch; param; desc }
|
||||
| Error _ -> None
|
||||
|
||||
(* Extract tag text from a macro line (.B, .I preserve spaces; .BI/.BR/.IR alternate) *)
|
||||
let tag_of_macro name args =
|
||||
match name with
|
||||
| "B" | "I" -> strip_groff_escapes args |> String.trim
|
||||
| _ -> strip_inline_macro_args args |> strip_groff_escapes |> String.trim
|
||||
|
||||
(* Strategy A: .TP style (most common — GNU coreutils, help2man) *)
|
||||
let strategy_tp lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("TP", _) :: rest ->
|
||||
(* Next text line is the tag *)
|
||||
(* Next line is the tag — could be Text or a formatting macro *)
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| Macro (("B" | "I" | "BI" | "BR" | "IR") as m, args) :: rest2 ->
|
||||
let tag = tag_of_macro m args in
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
|
|
@ -352,6 +371,62 @@ let strategy_deroff_lines lines =
|
|||
| Ok result -> result.entries
|
||||
| Error _ -> []
|
||||
|
||||
(* Strategy E: Nix3-style bullet .IP with .UR/.UE hyperlinks *)
|
||||
let strategy_nix lines =
|
||||
let is_bullet_ip args =
|
||||
String.length (String.trim args) > 0
|
||||
in
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("IP", args) :: rest when is_bullet_ip args ->
|
||||
(* Collect tag: skip UR/UE macros, collect Text lines *)
|
||||
let rec collect_tag lines parts =
|
||||
match lines with
|
||||
| Macro ("UR", _) :: rest2 -> collect_tag rest2 parts
|
||||
| Macro ("UE", _) :: rest2 -> collect_tag rest2 parts
|
||||
| Text s :: rest2 -> collect_tag rest2 (s :: parts)
|
||||
| _ -> (String.concat " " (List.rev parts), lines)
|
||||
in
|
||||
let (tag, rest2) = collect_tag rest [] in
|
||||
(* Collect description after the description .IP marker *)
|
||||
let rec collect_desc lines parts =
|
||||
match lines with
|
||||
| Macro ("IP", dargs) :: rest3 when not (is_bullet_ip dargs) ->
|
||||
collect_desc_text rest3 parts
|
||||
| _ -> (String.concat " " (List.rev parts), lines)
|
||||
and collect_desc_text lines parts =
|
||||
match lines with
|
||||
| Text s :: rest3 -> collect_desc_text rest3 (s :: parts)
|
||||
| Macro ("IP", args) :: _ when is_bullet_ip args ->
|
||||
(String.concat " " (List.rev parts), lines)
|
||||
| Macro (("SS" | "SH"), _) :: _ ->
|
||||
(String.concat " " (List.rev parts), lines)
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
skip_rs rest3 parts 1
|
||||
| Macro ("IP", _) :: rest3 ->
|
||||
(* Non-bullet IP = continuation paragraph *)
|
||||
collect_desc_text rest3 parts
|
||||
| Macro _ :: rest3 -> collect_desc_text rest3 parts
|
||||
| Blank :: rest3 -> collect_desc_text rest3 parts
|
||||
| Comment :: rest3 -> collect_desc_text rest3 parts
|
||||
| [] -> (String.concat " " (List.rev parts), [])
|
||||
and skip_rs lines parts depth =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
if depth <= 1 then collect_desc_text rest3 parts
|
||||
else skip_rs rest3 parts (depth - 1)
|
||||
| Macro ("RS", _) :: rest3 -> skip_rs rest3 parts (depth + 1)
|
||||
| _ :: rest3 -> skip_rs rest3 parts depth
|
||||
| [] -> (String.concat " " (List.rev parts), [])
|
||||
in
|
||||
let (desc, rest3) = collect_desc rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Count macros of a given type *)
|
||||
let count_macro name lines =
|
||||
List.fold_left (fun n line ->
|
||||
|
|
@ -370,46 +445,106 @@ let extract_entries lines =
|
|||
(* Try PP+RS if both present *)
|
||||
if count_macro "PP" lines > 0 && count_macro "RS" lines > 0 then
|
||||
results := ("PP+RS", strategy_pp_rs lines) :: !results;
|
||||
(* Try nix3 style if UR macros present *)
|
||||
if count_macro "UR" lines > 0 && count_macro "IP" lines > 0 then
|
||||
results := ("nix", strategy_nix lines) :: !results;
|
||||
(* Always try deroff as fallback *)
|
||||
results := ("deroff", strategy_deroff_lines lines) :: !results;
|
||||
(* Pick the result with the most entries *)
|
||||
(* Prefer specialized strategies over deroff fallback *)
|
||||
let specialized =
|
||||
List.filter (fun (name, entries) -> name <> "deroff" && entries <> []) !results
|
||||
in
|
||||
let candidates = if specialized <> [] then specialized else !results in
|
||||
let best =
|
||||
List.fold_left (fun (best_name, best_entries) (name, entries) ->
|
||||
if List.length entries >= List.length best_entries then (name, entries)
|
||||
else (best_name, best_entries)
|
||||
) ("none", []) !results
|
||||
) ("none", []) candidates
|
||||
in
|
||||
snd best
|
||||
|
||||
(* --- SYNOPSIS command name extraction --- *)
|
||||
|
||||
let extract_synopsis_command_lines lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let is_synopsis name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "SYNOPSIS"
|
||||
in
|
||||
let extract_cmd line =
|
||||
let words = String.split_on_char ' ' (String.trim line) in
|
||||
let words = List.filter (fun w -> String.length w > 0) words in
|
||||
let is_cmd_char = function
|
||||
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.' -> true
|
||||
| _ -> false
|
||||
in
|
||||
let rec take = function
|
||||
| [] -> []
|
||||
| w :: rest ->
|
||||
if String.length w > 0
|
||||
&& (w.[0] = '[' || w.[0] = '-' || w.[0] = '<'
|
||||
|| w.[0] = '(' || w.[0] = '{')
|
||||
then []
|
||||
else if String.for_all is_cmd_char w then
|
||||
w :: take rest
|
||||
else []
|
||||
in
|
||||
match take words with
|
||||
| [] -> None
|
||||
| cmd -> Some (String.concat " " cmd)
|
||||
in
|
||||
let rec find = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest when is_synopsis args -> collect rest
|
||||
| _ :: rest -> find rest
|
||||
and collect = function
|
||||
| [] -> None
|
||||
| Macro ("SH", _) :: _ -> None
|
||||
| Text s :: _ ->
|
||||
let s = String.trim s in
|
||||
if String.length s > 0 then extract_cmd s else None
|
||||
| Macro (("B" | "BI" | "BR"), args) :: _ ->
|
||||
let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in
|
||||
if String.length s > 0 then extract_cmd s else None
|
||||
| _ :: rest -> collect rest
|
||||
in
|
||||
find classified
|
||||
|
||||
let extract_synopsis_command contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
extract_synopsis_command_lines lines
|
||||
|
||||
(* --- Top-level API --- *)
|
||||
|
||||
let parse_manpage_lines lines =
|
||||
let options_section = extract_options_section lines in
|
||||
extract_entries options_section
|
||||
let entries = extract_entries options_section in
|
||||
{ entries; subcommands = [] }
|
||||
|
||||
let parse_manpage_string contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
parse_manpage_lines lines
|
||||
|
||||
let parse_manpage_gzipped_file path =
|
||||
let ic = Unix.open_process_in (Printf.sprintf "gzip -dc %s" (Filename.quote path)) in
|
||||
let buf = Buffer.create 4096 in
|
||||
(try while true do
|
||||
let line = input_line ic in
|
||||
Buffer.add_string buf line;
|
||||
Buffer.add_char buf '\n'
|
||||
done with End_of_file -> ());
|
||||
let _ = Unix.close_process_in ic in
|
||||
parse_manpage_string (Buffer.contents buf)
|
||||
|
||||
let parse_manpage_file path =
|
||||
if Filename.check_suffix path ".gz" then
|
||||
parse_manpage_gzipped_file path
|
||||
else begin
|
||||
let read_manpage_file path =
|
||||
if Filename.check_suffix path ".gz" then begin
|
||||
let ic = Gzip.open_in path in
|
||||
let buf = Buffer.create 8192 in
|
||||
let chunk = Bytes.create 8192 in
|
||||
(try while true do
|
||||
let n = Gzip.input ic chunk 0 8192 in
|
||||
if n = 0 then raise Exit
|
||||
else Buffer.add_subbytes buf chunk 0 n
|
||||
done with Exit | End_of_file -> ());
|
||||
Gzip.close_in ic;
|
||||
Buffer.contents buf
|
||||
end else begin
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
parse_manpage_string (Bytes.to_string s)
|
||||
Bytes.to_string s
|
||||
end
|
||||
|
||||
let parse_manpage_file path =
|
||||
read_manpage_file path |> parse_manpage_string
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue