fix partial, systemctl manpage

This commit is contained in:
atagen 2026-03-23 22:01:46 +11:00
parent 71de2e7b4b
commit 76eb2c2aef
4 changed files with 176 additions and 12 deletions

View file

@ -698,6 +698,115 @@ let parse_mdoc_lines lines =
in
{ entries = List.rev entries; subcommands = []; positionals; description = "" }
(* --- COMMANDS section subcommand extraction --- *)
(* Extract subcommands from COMMANDS/COMMAND sections.
These use .PP + bold name + .RS/.RE blocks, e.g.:
.PP
\fBstart\fR \fIUNIT\fR...
.RS 4
Start (activate) one or more units.
.RE *)
let extract_commands_section lines =
let classified = List.map classify_line lines in
let rec collect_until_next_sh lines acc =
match lines with
| [] -> List.rev acc
| Macro ("SH", _) :: _ -> List.rev acc
| line :: rest -> collect_until_next_sh rest (line :: acc)
in
let is_commands_section name =
let s = String.uppercase_ascii (String.trim name) in
s = "COMMANDS" || s = "COMMAND"
in
let rec find_commands acc = function
| [] -> List.rev acc
| Macro ("SH", args) :: rest when is_commands_section args ->
find_commands (collect_until_next_sh rest [] :: acc) rest
| _ :: rest -> find_commands acc rest
in
let sections = find_commands [] classified in
List.concat sections
(* Extract subcommand name from a bold groff text like
"\fBlist\-units\fR [\fIPATTERN\fR...]" "list-units" *)
let extract_bold_command_name text =
let s = String.trim text in
(* Look for \fB...\fR at the start *)
if String.length s >= 4
&& s.[0] = '\\' && s.[1] = 'f' && s.[2] = 'B' then
let start = 3 in
let end_marker = "\\fR" in
match String.split_on_char '\\' (String.sub s start (String.length s - start)) with
| name_part :: _ ->
let name = strip_groff_escapes ("\\fB" ^ name_part ^ end_marker) |> String.trim in
(* Must look like a subcommand: lowercase, hyphens, no leading dash *)
if String.length name >= 2
&& name.[0] <> '-'
&& String.for_all (fun c ->
(c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_'
) name
then Some name
else None
| [] -> None
else
(* Try already-stripped text *)
let stripped = strip_groff_escapes s in
let first_word = match String.split_on_char ' ' stripped with
| w :: _ -> w | [] -> "" in
if String.length first_word >= 2
&& first_word.[0] <> '-'
&& String.for_all (fun c ->
(c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c = '-' || c = '_'
) first_word
then Some first_word
else None
let extract_subcommands_from_commands lines =
let rec walk lines acc =
match lines with
| [] -> List.rev acc
| Macro ("PP", _) :: rest ->
begin match rest with
| Text tag :: rest2 ->
(* Check if this is a subcommand (bold name, not a flag) *)
begin match extract_bold_command_name tag with
| Some name ->
(* Collect description from .RS/.RE block *)
let rec collect_desc lines desc_acc =
match lines with
| Macro ("RS", _) :: rest3 ->
collect_in_rs rest3 desc_acc
| Text s :: rest3 ->
collect_desc rest3 (s :: desc_acc)
| _ -> (String.concat " " (List.rev desc_acc), lines)
and collect_in_rs lines desc_acc =
match lines with
| Macro ("RE", _) :: rest3 ->
(String.concat " " (List.rev desc_acc), rest3)
| Text s :: rest3 ->
collect_in_rs rest3 (s :: desc_acc)
| Macro ("PP", _) :: _ | Macro ("SH", _) :: _ | Macro ("SS", _) :: _ ->
(String.concat " " (List.rev desc_acc), lines)
| _ :: rest3 -> collect_in_rs rest3 desc_acc
| [] -> (String.concat " " (List.rev desc_acc), [])
in
let (desc, rest3) = collect_desc rest2 [] in
let desc = String.trim desc in
(* Take first sentence as description *)
let short_desc = match String.split_on_char '.' desc with
| first :: _ when String.length first > 0 -> String.trim first
| _ -> desc in
let sc : subcommand = { name; desc = short_desc } in
walk rest3 (sc :: acc)
| None -> walk rest2 acc
end
| _ -> walk rest acc
end
| _ :: rest -> walk rest acc
in
walk lines []
(* --- Top-level API --- *)
let parse_manpage_lines lines =
@ -707,7 +816,9 @@ let parse_manpage_lines lines =
let options_section = extract_options_section lines in
let entries = extract_entries options_section in
let positionals = extract_synopsis_positionals_lines lines in
{ entries; subcommands = []; positionals; description = "" }
let commands_section = extract_commands_section lines in
let subcommands = extract_subcommands_from_commands commands_section in
{ entries; subcommands; positionals; description = "" }
end
let parse_manpage_string contents =

View file

@ -129,12 +129,14 @@ let param_parser =
space_upper_param; space_type_param ]
>>| fun a -> Some a)
(* Switch parser: -a, --all | --all / -a | -a | --all *)
(* Switch parser: -a, --all | -a --all | --all / -a | -a | --all *)
let switch_parser =
choice
[
(short_switch >>= fun s ->
comma *> long_switch >>| fun l -> Both (s, l));
(short_switch >>= fun s ->
char ' ' *> long_switch >>| fun l -> Both (s, l));
(long_switch >>= fun l ->
inline_ws *> char '/' *> inline_ws *>
short_switch >>| fun s -> Both (s, l));
@ -222,16 +224,61 @@ let entry =
(* --- Subcommand parsing --- *)
(* A subcommand line: " name description" *)
(* A subcommand line: " name description"
Also handles argument placeholders: " name UNIT... description" *)
let is_subcommand_char = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
| _ -> false
(* Skip argument placeholders like UNIT..., [PATTERN...|PID...], <file>
that appear between the subcommand name and the description.
Only consumes single-space gaps the two-space gap before the
description is left for the main parser. *)
let skip_arg_placeholders =
fix (fun self ->
(* Peek ahead: single space followed by arg-like token *)
available >>= fun avail ->
if avail < 2 then return ()
else
peek_string (min avail 2) >>= fun s2 ->
if String.length s2 >= 2 && s2.[0] = ' ' && s2.[1] <> ' ' then
(* Single space — could be an arg placeholder *)
let next = s2.[1] in
if next = '[' || next = '<'
|| (next >= 'A' && next <= 'Z') then
(* Peek the full token to check if it's ALL_CAPS/brackets *)
peek_string (min avail 80) >>= fun preview ->
(* Extract the token after the single space *)
let tok_start = 1 in
let tok_end = ref tok_start in
while !tok_end < String.length preview
&& preview.[!tok_end] <> ' '
&& preview.[!tok_end] <> '\n'
&& preview.[!tok_end] <> '\r' do
incr tok_end
done;
let tok = String.sub preview tok_start (!tok_end - tok_start) in
(* Accept as placeholder if it starts with [ or < or is ALL_CAPS
(possibly with dots, pipes, dashes) *)
let is_placeholder =
tok.[0] = '[' || tok.[0] = '<'
|| String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || c = '_' || c = '-'
|| c = '.' || c = '|' || c = ',' || (c >= '0' && c <= '9')
) tok
in
if is_placeholder then
advance (1 + String.length tok) *> self
else return ()
else return ()
else return ())
let subcommand_entry =
inline_ws *>
take_while1 is_subcommand_char >>= fun name ->
if String.length name < 2 then fail "subcommand name too short"
else
skip_arg_placeholders *>
char ' ' *> char ' ' *> inline_ws *>
rest_of_line <* eol >>| fun desc ->
{ name = String.lowercase_ascii name;