diff --git a/bin/main.ml b/bin/main.ml index 66d753a..2221cf4 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -279,7 +279,15 @@ let process_manpage file = if is_nushell_builtin cmd then None else let result = parse_manpage_string contents in - if result.entries <> [] then Some (cmd, result) else None + let sub_sections = extract_subcommand_sections contents in + let result = if sub_sections <> [] then + { result with subcommands = List.map (fun (name, desc, _) -> + { name; desc }) sub_sections } + else result in + let subs = List.map (fun (name, _desc, r) -> + (cmd ^ " " ^ name, r)) sub_sections in + if result.entries <> [] || subs <> [] then Some (cmd, result, subs) + else None with _ -> None let manpaged_commands mandir = @@ -499,12 +507,19 @@ let cmd_index bindirs mandirs ignorelist dir = Array.iter (fun file -> match process_manpage (Filename.concat subdir file) with | None -> () - | Some (cmd, result) -> + | Some (cmd, result, subs) -> if not (SSet.mem cmd !done_cmds) then begin write_result ~dir ~source:"manpage" cmd result; done_cmds := SSet.add cmd !done_cmds; incr n_results - end + end; + List.iter (fun (sub_cmd, sub_result) -> + if not (SSet.mem sub_cmd !done_cmds) then begin + write_result ~dir ~source:"manpage" sub_cmd sub_result; + done_cmds := SSet.add sub_cmd !done_cmds; + incr n_results + end + ) subs ) files end ) command_sections diff --git a/lib/manpage.ml b/lib/manpage.ml index ff48b68..436ca21 100644 --- a/lib/manpage.ml +++ b/lib/manpage.ml @@ -717,6 +717,65 @@ let parse_manpage_string contents = | Some d -> d | None -> "" in { result with description } +(* --- Clap-style SUBCOMMAND section extraction --- *) +(* Manpages generated by clap (Rust) put each subcommand under its own + .SH SUBCOMMAND header with a Usage: line giving the name. *) + +let extract_subcommand_sections contents = + let lines = String.split_on_char '\n' contents in + let classified = List.map classify_line lines in + (* Split into sections at .SH boundaries *) + let rec collect_sections acc current_name current_lines = function + | [] -> + let acc = match current_name with + | Some n -> (n, List.rev current_lines) :: acc + | None -> acc in + List.rev acc + | Macro ("SH", args) :: rest -> + let acc = match current_name with + | Some n -> (n, List.rev current_lines) :: acc + | None -> acc in + let name = String.uppercase_ascii (String.trim args) in + if name = "SUBCOMMAND" || name = "SUBCOMMANDS" then + collect_sections acc (Some name) [] rest + else + collect_sections acc None [] rest + | line :: rest -> + collect_sections acc current_name (line :: current_lines) rest + in + let sections = collect_sections [] None [] classified in + (* For each SUBCOMMAND section, extract name from Usage: line and parse entries *) + let usage_re = Str.regexp {|Usage: \([a-zA-Z0-9_-]+\)|} in + List.filter_map (fun (_header, section_lines) -> + (* Find subcommand name from Usage: line *) + let name = ref None in + let desc_lines = ref [] in + List.iter (fun line -> + if !name = None then + match line with + | Text s -> + if try ignore (Str.search_forward usage_re s 0); true + with Not_found -> false + then name := Some (Str.matched_group 1 s) + else desc_lines := s :: !desc_lines + | Macro (("TP" | "B" | "BI" | "BR"), args) -> + let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in + if try ignore (Str.search_forward usage_re s 0); true + with Not_found -> false + then name := Some (Str.matched_group 1 s) + | _ -> () + ) section_lines; + match !name with + | None -> None + | Some subcmd_name -> + let entries = extract_entries section_lines in + let desc = String.concat " " (List.rev !desc_lines) + |> strip_groff_escapes |> String.trim in + (* Remove backtick quoting common in clap output *) + let desc = Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" desc in + Some (subcmd_name, desc, { entries; subcommands = []; positionals = []; description = desc }) + ) sections + let read_manpage_file path = if Filename.check_suffix path ".gz" then begin let ic = Gzip.open_in path in