riir
This commit is contained in:
parent
da4bc139eb
commit
9745ef9c56
49 changed files with 9039 additions and 5483 deletions
4
src/lib.rs
Normal file
4
src/lib.rs
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
pub mod parsers;
|
||||
pub mod pool;
|
||||
pub mod store;
|
||||
pub mod types;
|
||||
1830
src/main.rs
Normal file
1830
src/main.rs
Normal file
File diff suppressed because it is too large
Load diff
187
src/parsers/help.rs
Normal file
187
src/parsers/help.rs
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
mod description;
|
||||
mod helpers;
|
||||
mod options;
|
||||
mod positionals;
|
||||
mod subcommands;
|
||||
|
||||
pub use options::{param_parser, parse_usage_flags, switch_parser};
|
||||
pub use positionals::{
|
||||
extract_cli11_positionals, extract_usage_positionals, parse_usage_args, skip_command_name,
|
||||
};
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{
|
||||
parsers::help::{description::description, helpers::get_indent, subcommands::subcommand_entry},
|
||||
types::*,
|
||||
};
|
||||
use nom::{IResult, Parser, character::complete::space0, combinator::opt};
|
||||
|
||||
use crate::make_parser;
|
||||
|
||||
type EntryParts<'a> = (
|
||||
&'a str,
|
||||
(Switch<'a>, Option<Param<'a>>),
|
||||
(&'a str, Vec<&'a str>),
|
||||
);
|
||||
|
||||
// parse a single flag entry: indent + switch + optional param + description.
|
||||
make_parser!(entry -> OptionEntry<'a>,
|
||||
(
|
||||
space0,
|
||||
(switch_parser, opt(param_parser)),
|
||||
description,
|
||||
)
|
||||
=> |(_, (switch, param), (first, cont))
|
||||
: EntryParts<'a>|
|
||||
{
|
||||
let mut desc: Vec<&str> = Vec::with_capacity(1 + cont.len());
|
||||
if !first.trim().is_empty() { desc.push(first); }
|
||||
desc.extend(cont.into_iter().filter(|l| !l.trim().is_empty()));
|
||||
OptionEntry { switch, param, desc }
|
||||
}
|
||||
);
|
||||
|
||||
/// dedup raw subcommands by case-insensitive name, keeping the entry with
|
||||
/// the longest description. preserves first-seen ordering.
|
||||
fn dedup_subcommands<'a>(raw: Vec<Subcommand<'a>>) -> Vec<Subcommand<'a>> {
|
||||
let mut by_name: HashMap<String, Subcommand<'a>> = HashMap::new();
|
||||
let mut order: Vec<String> = Vec::new();
|
||||
for sc in raw {
|
||||
let key = sc.name.to_ascii_lowercase();
|
||||
match by_name.get(&key) {
|
||||
Some(prev) if prev.desc.len() >= sc.desc.len() => {}
|
||||
_ => {
|
||||
if !by_name.contains_key(&key) {
|
||||
order.push(key.clone());
|
||||
}
|
||||
by_name.insert(key, sc);
|
||||
}
|
||||
}
|
||||
}
|
||||
order
|
||||
.into_iter()
|
||||
.map(|k| by_name.remove(&k).unwrap())
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
enum HelpSection {
|
||||
Unknown,
|
||||
Options,
|
||||
Commands,
|
||||
Other,
|
||||
}
|
||||
|
||||
fn classify_section_line(line: &str) -> Option<HelpSection> {
|
||||
let (idx, indent) = get_indent(line);
|
||||
if indent > 4 {
|
||||
return None;
|
||||
}
|
||||
let trimmed = line[idx..].trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let without_colon = trimmed.trim_end_matches(':').trim();
|
||||
let lower = without_colon.to_ascii_lowercase();
|
||||
|
||||
if lower.starts_with("usage") {
|
||||
return Some(HelpSection::Unknown);
|
||||
}
|
||||
if lower.starts_with("valid arguments")
|
||||
|| lower.contains(" is one of the following")
|
||||
|| lower.contains(" defaults to")
|
||||
|| lower == "examples"
|
||||
|| lower == "example"
|
||||
{
|
||||
return Some(HelpSection::Other);
|
||||
}
|
||||
let command_header = matches!(lower.as_str(), "command" | "commands" | "subcommands")
|
||||
|| lower.ends_with(" commands")
|
||||
|| lower.ends_with(" subcommands");
|
||||
if command_header && !lower.contains("option") && !lower.contains("flag") {
|
||||
return Some(HelpSection::Commands);
|
||||
}
|
||||
if lower.contains("argument")
|
||||
|| lower == "args"
|
||||
|| lower == "positionals"
|
||||
|| lower == "positional arguments"
|
||||
{
|
||||
return Some(HelpSection::Other);
|
||||
}
|
||||
if lower.contains("option") || lower.contains("flag") || trimmed.ends_with(':') {
|
||||
return Some(HelpSection::Options);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn consume_line(s: &str) -> &str {
|
||||
match s.find('\n') {
|
||||
Some(idx) => &s[idx + 1..],
|
||||
None => "",
|
||||
}
|
||||
}
|
||||
|
||||
fn parser_made_progress(original: &str, rem: &str) -> bool {
|
||||
rem.len() < original.len()
|
||||
}
|
||||
|
||||
/// build the final HelpResult by scanning help text with lightweight section
|
||||
/// awareness. options are accepted in option-like sections and before a
|
||||
/// section is known; subcommands are accepted only in command-like sections.
|
||||
fn build_help_result<'a>(original: &'a str) -> HelpResult<'a> {
|
||||
let mut entries = Vec::new();
|
||||
let mut raw_subcommands: Vec<Subcommand<'a>> = Vec::new();
|
||||
let mut section = HelpSection::Unknown;
|
||||
let mut rem = original;
|
||||
|
||||
while !rem.is_empty() {
|
||||
let line = rem.split_once('\n').map(|(line, _)| line).unwrap_or(rem);
|
||||
if let Some(next_section) = classify_section_line(line) {
|
||||
section = next_section;
|
||||
rem = consume_line(rem);
|
||||
continue;
|
||||
}
|
||||
|
||||
if matches!(section, HelpSection::Unknown | HelpSection::Options)
|
||||
&& let Ok((next, parsed)) = entry(rem)
|
||||
&& parser_made_progress(rem, next)
|
||||
{
|
||||
entries.push(parsed);
|
||||
rem = next;
|
||||
continue;
|
||||
}
|
||||
|
||||
if section == HelpSection::Commands
|
||||
&& let Ok((next, parsed)) = subcommand_entry(rem)
|
||||
&& parser_made_progress(rem, next)
|
||||
{
|
||||
raw_subcommands.push(parsed);
|
||||
rem = next;
|
||||
continue;
|
||||
}
|
||||
|
||||
rem = consume_line(rem);
|
||||
}
|
||||
|
||||
let subcommands = dedup_subcommands(raw_subcommands);
|
||||
// cli11 positional section takes priority over the usage-line scan
|
||||
// when both are present — cli11 carries types and optionality.
|
||||
let positionals = match extract_cli11_positionals(original) {
|
||||
Ok((_, p)) if !p.is_empty() => p,
|
||||
_ => extract_usage_positionals(original)
|
||||
.map(|(_, p)| p)
|
||||
.unwrap_or_default(),
|
||||
};
|
||||
HelpResult {
|
||||
entries,
|
||||
subcommands,
|
||||
positionals,
|
||||
desc: "",
|
||||
}
|
||||
}
|
||||
|
||||
/// top-level help parser.
|
||||
pub fn help_parser(s: &str) -> IResult<&str, HelpResult<'_>> {
|
||||
Ok(("", build_help_result(s)))
|
||||
}
|
||||
37
src/parsers/help/description.rs
Normal file
37
src/parsers/help/description.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
use nom::{
|
||||
IResult, Parser,
|
||||
character::complete::space0,
|
||||
combinator::verify,
|
||||
multi::many0,
|
||||
sequence::{preceded, terminated},
|
||||
};
|
||||
|
||||
use crate::make_parser;
|
||||
use crate::parsers::help::helpers::{at_least_indent, eol, rest_of_line};
|
||||
|
||||
// continuation line: an indented (≥8 visual cols), non-flag-shaped line
|
||||
// belonging to the previous flag's description. blank-but-indented lines
|
||||
// are accepted (content = ""), filtered out by the caller's join.
|
||||
make_parser!(continuation_line -> &'a str,
|
||||
verify(
|
||||
preceded(
|
||||
// assert ≥8 visual cols of leading horizontal whitespace
|
||||
// without consuming — space0 inside `rest_of_line`'s preceded
|
||||
// will eat them next.
|
||||
at_least_indent(8),
|
||||
terminated(preceded(space0, rest_of_line), eol)
|
||||
),
|
||||
// reject lines whose first non-space char is '-' — that's a new
|
||||
// flag entry, not a continuation of the previous one.
|
||||
|content: &&str| !content.starts_with('-')
|
||||
)
|
||||
);
|
||||
|
||||
// description: the line of text after the switch+param, plus any
|
||||
// continuation lines. always succeeds — first line may be empty (when
|
||||
// the switch is followed immediately by a newline, "clap long" style).
|
||||
make_parser!(pub description -> (&'a str, Vec<&'a str>),
|
||||
(
|
||||
terminated(preceded(space0, rest_of_line), eol),
|
||||
many0(continuation_line),
|
||||
));
|
||||
105
src/parsers/help/helpers.rs
Normal file
105
src/parsers/help/helpers.rs
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
use nom::{
|
||||
AsChar, IResult, Parser, branch::alt, bytes::complete::take_till,
|
||||
character::complete::line_ending, combinator::eof,
|
||||
};
|
||||
#[allow(unused_imports)]
|
||||
use nom::{bytes::complete::take_while, combinator::peek, combinator::verify};
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_parser {
|
||||
(pub $name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[allow(mismatched_lifetime_syntaxes)]
|
||||
pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
|
||||
let (rem, val) = $parser.parse(s)?;
|
||||
Ok((rem, $wrap(val)))
|
||||
}
|
||||
};
|
||||
(pub $name:ident -> $out:ty, $parser:expr) => {
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[allow(mismatched_lifetime_syntaxes)]
|
||||
pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
|
||||
$parser.parse(s)
|
||||
}
|
||||
};
|
||||
($name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[allow(mismatched_lifetime_syntaxes)]
|
||||
fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
|
||||
let (rem, val) = $parser.parse(s)?;
|
||||
Ok((rem, $wrap(val)))
|
||||
}
|
||||
};
|
||||
($name:ident -> $out:ty, $parser:expr) => {
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[allow(mismatched_lifetime_syntaxes)]
|
||||
fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
|
||||
$parser.parse(s)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_predicate {
|
||||
(pub $name:ident, |$c:ident| $($body:tt)*) => {
|
||||
pub fn $name($c: char) -> bool { $($body)* }
|
||||
};
|
||||
($name:ident, |$c:ident| $($body:tt)*) => {
|
||||
fn $name($c: char) -> bool { $($body)* }
|
||||
};
|
||||
}
|
||||
|
||||
make_predicate!(pub is_option_char, |c| c.is_alphanumeric() || matches!(c, '-' | '_'));
|
||||
|
||||
make_parser!(pub rest_of_line -> &'a str,
|
||||
take_till(|c: char| c.is_newline())
|
||||
);
|
||||
|
||||
// end of line — matches either a newline or end of input.
|
||||
// permissive version used in most line-consuming parsers.
|
||||
make_parser!(pub eol -> &'a str, alt((line_ending, eof)));
|
||||
|
||||
/// compute the visual indent of a leading whitespace run.
|
||||
/// spaces count 1, tabs count 8 (typical terminal default).
|
||||
pub fn visual_indent(s: &str) -> u8 {
|
||||
s.chars().fold(0u8, |acc, c| {
|
||||
acc.saturating_add(match c {
|
||||
' ' => 1,
|
||||
'\t' => 8,
|
||||
_ => 0,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// nom-shaped check that the input begins with at least `min` visual
|
||||
/// columns of horizontal whitespace (spaces or tabs). doesn't consume —
|
||||
/// pair with `space0`/`take_while` to actually eat the indent.
|
||||
pub fn at_least_indent<'a>(
|
||||
min: u8,
|
||||
) -> impl Parser<&'a str, Output = &'a str, Error = nom::error::Error<&'a str>> {
|
||||
verify(
|
||||
peek(take_while(|c: char| c == ' ' || c == '\t')),
|
||||
move |s: &str| visual_indent(s) >= min,
|
||||
)
|
||||
}
|
||||
|
||||
/// legacy helper: returns (byte index of first non-space, visual indent).
|
||||
/// used by callers that still need the byte index.
|
||||
pub fn get_indent(s: &str) -> (usize, u8) {
|
||||
let mut traversed = 0;
|
||||
let mut indent = 0;
|
||||
for (i, c) in s.char_indices() {
|
||||
let incr = match c {
|
||||
' ' => 1,
|
||||
'\t' => 8,
|
||||
_ => 0,
|
||||
};
|
||||
if incr == 0 {
|
||||
traversed = i;
|
||||
break;
|
||||
} else {
|
||||
indent += incr;
|
||||
}
|
||||
}
|
||||
(traversed, indent)
|
||||
}
|
||||
192
src/parsers/help/options.rs
Normal file
192
src/parsers/help/options.rs
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
use crate::make_parser;
|
||||
use crate::parsers::help::helpers::is_option_char;
|
||||
use crate::types::*;
|
||||
|
||||
use nom::bytes::complete::{take_till, take_till1};
|
||||
use nom::character::complete::{space0, space1};
|
||||
use nom::combinator::{map, opt};
|
||||
use nom::multi::many0;
|
||||
use nom::sequence::separated_pair;
|
||||
use nom::{
|
||||
IResult, Parser,
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_while1},
|
||||
character::complete::{char, satisfy},
|
||||
combinator::{value, verify},
|
||||
sequence::{delimited, preceded},
|
||||
};
|
||||
|
||||
make_parser!(short_switch -> char,
|
||||
preceded(char('-'), satisfy(|c| c.is_alphanumeric())));
|
||||
|
||||
make_parser!(long_switch -> &'a str,
|
||||
preceded(tag("--"), take_while1(is_option_char)));
|
||||
|
||||
make_parser!(negatable_long_switch -> &'a str,
|
||||
preceded(tag("--[no-]"), take_while1(is_option_char)));
|
||||
|
||||
make_parser!(comma -> (),
|
||||
value((), preceded(char(','), space0)));
|
||||
|
||||
make_parser!(eq_optional_param -> Param<'a>,
|
||||
delimited(tag("[="), take_while1(is_option_char), char(']')) => Param::Optional);
|
||||
|
||||
make_parser!(eq_optional_angle_param -> Param<'a>,
|
||||
delimited(tag("[=<"), take_till1(|c| c == '>'), tag(">]")) => Param::Optional);
|
||||
|
||||
make_parser!(eq_mandatory_param -> Param<'a>,
|
||||
preceded(char('='), take_while1(is_option_char)) => Param::Mandatory);
|
||||
|
||||
// take a wide alphanumeric/_/- token then verify the WHOLE thing looks
|
||||
// like an ALL_CAPS-style param name. taking only uppercase chars would
|
||||
// match just "N" of " Needs: ..." and leave "eeds:..." as desc, so we
|
||||
// widen, then reject anything that doesn't pass the all-caps check.
|
||||
make_parser!(spaced_uppercase_param -> Param<'a>,
|
||||
preceded(
|
||||
char(' '),
|
||||
verify(
|
||||
take_while1(|c: char|
|
||||
c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_' || c == '-'
|
||||
),
|
||||
|s: &str| {
|
||||
let first = match s.chars().next() { Some(c) => c, None => return false };
|
||||
if !(first.is_ascii_uppercase() || first == '_') { return false; }
|
||||
s.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
|
||||
}
|
||||
)
|
||||
) => Param::Mandatory);
|
||||
|
||||
make_parser!(spaced_angle_param -> Param<'a>,
|
||||
preceded(char(' '), delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
|
||||
|
||||
make_parser!(spaced_opt_angle_param -> Param<'a>,
|
||||
preceded(char(' '), delimited(char('<'),
|
||||
delimited(char('['), take_while1(|c| c != ']'), char(']')),
|
||||
char('>'))) => Param::Optional);
|
||||
|
||||
make_parser!(spaced_angle_param_after_space -> Param<'a>,
|
||||
preceded(space1, delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
|
||||
|
||||
// take the full lowercase token then verify it's <=10 chars. a
|
||||
// take_while_m_n with a 10-char cap would leave a partial match — e.g.
|
||||
// "--foo nanoseconds" would extract param "nanosecond" and leave "s" as
|
||||
// the description. a word longer than 10 chars is almost certainly the
|
||||
// start of the description, not a type annotation.
|
||||
make_parser!(spaced_type_param -> Param<'a>,
|
||||
preceded(
|
||||
char(' '),
|
||||
verify(
|
||||
take_while1(|c: char| !c.is_whitespace()),
|
||||
|s: &str| s.len() <= 10 && s.chars().all(|c| c.is_ascii_lowercase())
|
||||
)
|
||||
) => Param::Mandatory
|
||||
);
|
||||
|
||||
make_parser!(pub param_parser -> Param<'a>, alt((
|
||||
eq_optional_angle_param,
|
||||
eq_optional_param,
|
||||
eq_mandatory_param,
|
||||
spaced_opt_angle_param,
|
||||
spaced_angle_param_after_space,
|
||||
spaced_angle_param,
|
||||
spaced_uppercase_param,
|
||||
spaced_type_param,
|
||||
)));
|
||||
|
||||
macro_rules! switch_pair {
|
||||
($name:ident, $left:expr, $sep:expr, $right:expr => |$a:ident, $b:ident| $body:expr) => {
|
||||
fn $name<'a>(s: &'a str) -> IResult<&'a str, Switch<'a>> {
|
||||
use nom::sequence::separated_pair;
|
||||
let (rem, ($a, $b)) = separated_pair($left, $sep, $right).parse(s)?;
|
||||
Ok((rem, $body))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
switch_pair!(short_comma_long,
|
||||
short_switch, comma, long_switch => |s, l| Switch::Both(s, l));
|
||||
|
||||
switch_pair!(short_comma_negatable_long,
|
||||
short_switch, comma, negatable_long_switch => |s, l| Switch::Both(s, l));
|
||||
|
||||
switch_pair!(short_space_long,
|
||||
short_switch, char(' '), long_switch => |s, l| Switch::Both(s, l));
|
||||
|
||||
switch_pair!(short_space_negatable_long,
|
||||
short_switch, char(' '), negatable_long_switch => |s, l| Switch::Both(s, l));
|
||||
|
||||
make_parser!(slash_sep -> (),
|
||||
value((), delimited(space0, char('/'), space0)));
|
||||
|
||||
switch_pair!(long_slash_short,
|
||||
long_switch, slash_sep, short_switch => |l, s| Switch::Both(s, l));
|
||||
|
||||
make_parser!(short_as_switch -> Switch<'a>, short_switch => Switch::Short);
|
||||
make_parser!(negatable_long_as_switch -> Switch<'a>, negatable_long_switch => Switch::Long);
|
||||
make_parser!(long_as_switch -> Switch<'a>, long_switch => Switch::Long);
|
||||
|
||||
make_parser!(pub switch_parser -> Switch<'a>,
|
||||
alt((
|
||||
short_comma_negatable_long,
|
||||
short_space_negatable_long,
|
||||
short_comma_long,
|
||||
short_space_long,
|
||||
long_slash_short,
|
||||
short_as_switch,
|
||||
negatable_long_as_switch,
|
||||
long_as_switch,
|
||||
))
|
||||
);
|
||||
|
||||
// `{--long | -s}` — manpage SYNOPSIS-line switch pair. nix-env's
|
||||
// synopsis is the canonical case: `[{--file | -f} path] [{--profile |
|
||||
// -p} path]`. emits Switch::Both with the long name.
|
||||
make_parser!(brace_pipe_long_short -> Switch<'a>,
|
||||
separated_pair(long_switch, (space0, char('|'), space0), short_switch)
|
||||
=> |(l, s): (&'a str, char)| Switch::Both(s, l)
|
||||
);
|
||||
|
||||
make_parser!(brace_pipe_short_long -> Switch<'a>,
|
||||
separated_pair(short_switch, (space0, char('|'), space0), long_switch)
|
||||
=> |(s, l): (char, &'a str)| Switch::Both(s, l)
|
||||
);
|
||||
|
||||
make_parser!(brace_pipe_switch -> Switch<'a>,
|
||||
delimited(
|
||||
(char('{'), space0),
|
||||
alt((brace_pipe_long_short, brace_pipe_short_long)),
|
||||
(space0, char('}'))
|
||||
)
|
||||
);
|
||||
|
||||
make_parser!(usage_switch_parser -> Switch<'a>,
|
||||
alt((brace_pipe_switch, switch_parser))
|
||||
);
|
||||
|
||||
// consume any chars except `]`. used to swallow trailing tokens inside a
|
||||
// flag bracket — e.g. `[--option name value]` keeps switch=Long("option")
|
||||
// and param=Mandatory("name"), discarding ` value` before the closing `]`.
|
||||
make_parser!(take_till_bracket -> &'a str, take_till(|c: char| c == ']'));
|
||||
|
||||
// `[<switch> [param] <junk>]` inside the SYNOPSIS line.
|
||||
make_parser!(flag_in_bracket -> (Switch<'a>, Option<Param<'a>>),
|
||||
delimited(
|
||||
(char('['), space0),
|
||||
(usage_switch_parser, opt(param_parser)),
|
||||
(take_till_bracket, char(']'))
|
||||
)
|
||||
);
|
||||
|
||||
// walk the joined SYNOPSIS-line text, collecting every flag-bracketed
|
||||
// switch + its first param. non-flag tokens (positional brackets,
|
||||
// command name, ellipses) are skipped one char at a time.
|
||||
make_parser!(pub parse_usage_flags -> Vec<(Switch<'a>, Option<Param<'a>>)>,
|
||||
many0(alt((
|
||||
map(flag_in_bracket, Some),
|
||||
// `value(None, ...)` requires `None: Clone` which forces Clone
|
||||
// on Switch/Param; `map(..., |_| None)` doesn't.
|
||||
map(satisfy(|c| c != '\n' && c != '\r'), |_| None),
|
||||
)))
|
||||
=> |v: Vec<Option<(Switch<'a>, Option<Param<'a>>)>>|
|
||||
v.into_iter().flatten().collect()
|
||||
);
|
||||
373
src/parsers/help/positionals.rs
Normal file
373
src/parsers/help/positionals.rs
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
use crate::parsers::help::helpers::rest_of_line;
|
||||
use crate::types::Positional;
|
||||
use crate::{make_parser, make_predicate};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{tag, tag_no_case, take_till, take_till1, take_while, take_while1};
|
||||
use nom::character::complete::{char, line_ending, satisfy, space0, space1};
|
||||
use nom::combinator::{map, not, opt, peek, recognize, value, verify};
|
||||
use nom::multi::many0;
|
||||
use nom::sequence::{delimited, preceded, terminated};
|
||||
use nom::{AsChar, IResult, Parser};
|
||||
|
||||
#[derive(Clone)]
|
||||
enum PositionalParse<'a> {
|
||||
Curly,
|
||||
Flag,
|
||||
Skip,
|
||||
Mandatory(&'a str),
|
||||
Optional(&'a str),
|
||||
ManVariadic(&'a str),
|
||||
OptVariadic(&'a str),
|
||||
}
|
||||
|
||||
make_predicate!(is_word_char, |c| c.is_alphanumeric()
|
||||
|| matches!(c, '-' | '_' | '/' | '.'));
|
||||
|
||||
make_predicate!(is_pos_char, |c| c.is_ascii_uppercase()
|
||||
|| c.is_numeric()
|
||||
|| matches!(c, '_' | '-'));
|
||||
|
||||
make_parser!(section_label -> (),
|
||||
value((), alt((
|
||||
tag_no_case("options"),
|
||||
tag_no_case("option"),
|
||||
tag_no_case("flags"),
|
||||
tag_no_case("flag")
|
||||
)))
|
||||
);
|
||||
|
||||
make_parser!(ellipses -> (),
|
||||
value((),
|
||||
alt((tag("..."), tag("\u{2026}")))
|
||||
)
|
||||
);
|
||||
|
||||
make_parser!(braces -> PositionalParse<'a>,
|
||||
value(PositionalParse::Curly, delimited(char('{'), take_till1(|c| c == '}'), char('}')))
|
||||
);
|
||||
|
||||
// FIXME should this be a take_while is_option_char?
|
||||
// why tf do we have a ']' condition
|
||||
make_parser!(flag -> PositionalParse<'a>,
|
||||
value(PositionalParse::Flag, preceded(char('-'), take_till1(|c: char| c.is_space() || c == ']')))
|
||||
);
|
||||
|
||||
fn check_positional(s: &str) -> bool {
|
||||
let s = s.trim();
|
||||
if s.is_empty() {
|
||||
return false;
|
||||
}
|
||||
// reject names starting with '-' — these are flag tokens accidentally
|
||||
// captured by the bracket parser, e.g. "[--at-operation]" in jj's
|
||||
// synopsis. without this guard every `[--flag]` token would be
|
||||
// recorded as a positional named "--flag".
|
||||
if s.starts_with('-') {
|
||||
return false;
|
||||
}
|
||||
if section_label.parse(s).is_ok() {
|
||||
return false;
|
||||
}
|
||||
let upper = s.to_ascii_uppercase();
|
||||
if matches!(upper.as_str(), "OPTIONS" | "OPTION" | "FLAGS" | "FLAG") {
|
||||
return false;
|
||||
}
|
||||
s.chars()
|
||||
.all(|c| c.is_alphanumeric() || matches!(c, '-' | '_' | '/' | '.'))
|
||||
}
|
||||
|
||||
// recognize a balanced `[...]` block, tolerating ONE level of nested
|
||||
// brackets inside. expressed entirely via nom combinators:
|
||||
//
|
||||
// `[` + many0(alt((nested_bracket_block, non_bracket_char))) + `]`
|
||||
//
|
||||
// nested_bracket_block is `[ chars_until_] ]`, which means we accept a
|
||||
// single inner `[...]` correctly but not arbitrarily-deep nesting —
|
||||
// manpages don't go deeper than two levels.
|
||||
// returns the inner content (everything between the outer brackets).
|
||||
make_parser!(balanced_bracket_inner -> &'a str,
|
||||
recognize(delimited(
|
||||
char('['),
|
||||
many0(alt((
|
||||
recognize((char('['), take_till(|c: char| c == ']'), char(']'))),
|
||||
recognize(satisfy(|c: char| c != ']' && c != '[')),
|
||||
))),
|
||||
char(']'),
|
||||
))
|
||||
=> |whole: &'a str| &whole[1..whole.len() - 1]
|
||||
);
|
||||
|
||||
/// extract a positional name from already-trimmed bracket-inner content.
|
||||
/// returns the name slice and a flag indicating whether the bracket inner
|
||||
/// carried a trailing `...` (in-bracket variadic marker).
|
||||
fn parse_bracket_inner_name(inner: &str) -> Option<(&str, bool)> {
|
||||
let inner = inner.trim();
|
||||
// strip trailing "..." for in-bracket variadic.
|
||||
let (rest, has_dots) = if let Some(stripped) = inner.strip_suffix("...") {
|
||||
(stripped.trim_end(), true)
|
||||
} else if let Some(stripped) = inner.strip_suffix('\u{2026}') {
|
||||
(stripped.trim_end(), true)
|
||||
} else {
|
||||
(inner, false)
|
||||
};
|
||||
let name = if let Some(after_lt) = rest.strip_prefix('<') {
|
||||
// angle-bracket name: take everything up to the matching '>'
|
||||
let end = after_lt.find('>')?;
|
||||
&after_lt[..end]
|
||||
} else {
|
||||
// bare name: take leading word
|
||||
let end = rest
|
||||
.find(|c: char| c.is_whitespace() || c == '[' || c == ']')
|
||||
.unwrap_or(rest.len());
|
||||
if end == 0 {
|
||||
return None;
|
||||
}
|
||||
&rest[..end]
|
||||
};
|
||||
Some((name, has_dots))
|
||||
}
|
||||
|
||||
// extract a balanced `[...]` block and decompose its inner content into
|
||||
// (name, has-inner-`...` flag). `map_opt` turns a `None` from
|
||||
// `parse_bracket_inner_name` into a nom parse error.
|
||||
make_parser!(opt_bracket_name -> (&'a str, bool),
|
||||
nom::combinator::map_opt(balanced_bracket_inner, parse_bracket_inner_name)
|
||||
);
|
||||
|
||||
make_parser!(
|
||||
opt_positional -> PositionalParse<'a>,
|
||||
verify(
|
||||
// tuple parser: (name + in-bracket variadic, post-bracket ellipsis).
|
||||
// matches "[name]", "[name...]", "[name ...]", "[name] ...",
|
||||
// "[<name>]", and one-level nests like "[<program> [<arg>...]]".
|
||||
(opt_bracket_name, opt(ellipses)),
|
||||
|((name, _), _): &((&'a str, bool), Option<()>)| check_positional(name)
|
||||
) => |((name, has_inner_dots), post_dots): ((&'a str, bool), Option<()>)| {
|
||||
if has_inner_dots || post_dots.is_some() {
|
||||
PositionalParse::OptVariadic(name)
|
||||
} else {
|
||||
PositionalParse::Optional(name)
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
make_parser!(man_positional -> PositionalParse<'a>,
|
||||
verify(
|
||||
(
|
||||
delimited(
|
||||
char('<'),
|
||||
(
|
||||
take_till1(|c| c == '.' || c == '\u{2026}' || c == '>'),
|
||||
opt(ellipses)
|
||||
),
|
||||
char('>')
|
||||
),
|
||||
opt(ellipses)
|
||||
),
|
||||
|((ss, _), _)| check_positional(ss)
|
||||
) => |((p, v), v1): ((&'a str, Option<()>), Option<()>)|
|
||||
if v.is_some() || v1.is_some() { PositionalParse::ManVariadic(p) }
|
||||
else { PositionalParse::Mandatory(p) }
|
||||
);
|
||||
|
||||
make_parser!(allcaps_positional -> PositionalParse<'a>,
|
||||
verify(
|
||||
(
|
||||
preceded(
|
||||
peek(
|
||||
satisfy(|c: char| c.is_ascii_uppercase())
|
||||
),
|
||||
take_while1(is_pos_char)
|
||||
),
|
||||
opt(
|
||||
alt((
|
||||
tag("..."),
|
||||
tag("\u{2026}"))
|
||||
)
|
||||
)
|
||||
),
|
||||
|(ss, _): &(&str, _)| check_positional(ss)
|
||||
) => |(p, v): (&'a str, Option<&'a str>)|
|
||||
if v.is_some() { PositionalParse::ManVariadic(p) } else { PositionalParse::Mandatory(p) }
|
||||
);
|
||||
|
||||
fn caseless_push<'a>(k: &'a str, v: Positional, acc: &mut Vec<(&'a str, Positional)>) {
|
||||
let dupe = acc.iter().any(|(ik, _)| ik.eq_ignore_ascii_case(k));
|
||||
if !dupe {
|
||||
acc.push((k, v));
|
||||
}
|
||||
}
|
||||
|
||||
// parse_usage_args runs on a single logical usage line. SKIP refuses to
|
||||
// cross a newline boundary so many0 stops at end-of-line — without this
|
||||
// the parser would happily wander into the OPTIONS section and treat
|
||||
// every `--flag <name>` angle-bracket parameter as a positional.
|
||||
//
|
||||
// the inner positional terminator uses peek(line_ending) instead of
|
||||
// consuming the newline, so the trailing `opt(line_ending)` in the
|
||||
// outer delimited eats it cleanly and we never advance past the usage
|
||||
// line.
|
||||
make_parser!(pub parse_usage_args -> Vec<(&'a str, Positional)>,
|
||||
(delimited(
|
||||
space0,
|
||||
many0(
|
||||
alt((
|
||||
map(
|
||||
(
|
||||
terminated(
|
||||
alt((
|
||||
braces,
|
||||
opt_positional,
|
||||
man_positional,
|
||||
flag,
|
||||
allcaps_positional,
|
||||
)),
|
||||
alt((
|
||||
space1,
|
||||
value("", peek(line_ending)),
|
||||
value("", peek(nom::combinator::eof)),
|
||||
))
|
||||
),
|
||||
// catch "[section] ..." patterns where the ellipsis is
|
||||
// on the *next* token, separated by whitespace.
|
||||
opt(terminated(
|
||||
alt((tag("..."), tag("\u{2026}"))),
|
||||
alt((
|
||||
space1,
|
||||
value("", peek(line_ending)),
|
||||
value("", peek(nom::combinator::eof)),
|
||||
))
|
||||
))
|
||||
),
|
||||
|(positional, trailing): (PositionalParse<'a>, Option<_>)| {
|
||||
if trailing.is_none() { positional }
|
||||
else {
|
||||
match positional {
|
||||
PositionalParse::Optional(n) => PositionalParse::OptVariadic(n),
|
||||
PositionalParse::Mandatory(n) => PositionalParse::ManVariadic(n),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
}
|
||||
),
|
||||
// SKIP must NOT consume a newline. without this, many0 keeps
|
||||
// iterating past the usage line into OPTIONS-section flag
|
||||
// syntax and over-extracts positionals.
|
||||
value(PositionalParse::Skip, satisfy(|c: char| c != '\n' && c != '\r')),
|
||||
))
|
||||
),
|
||||
opt((space0, line_ending))
|
||||
)) => |p: Vec<PositionalParse<'a>>|
|
||||
p.into_iter().fold(Vec::new(), |mut acc, parse|
|
||||
{
|
||||
match parse {
|
||||
PositionalParse::Curly => (),
|
||||
PositionalParse::Flag => (),
|
||||
PositionalParse::Skip => (),
|
||||
PositionalParse::OptVariadic(arg) => caseless_push(arg, Positional {
|
||||
optional: true,
|
||||
variadic: true
|
||||
}, &mut acc),
|
||||
PositionalParse::ManVariadic(arg) => caseless_push(arg, Positional {
|
||||
optional: false,
|
||||
variadic: true
|
||||
}, &mut acc),
|
||||
PositionalParse::Optional(arg) => caseless_push(arg, Positional {
|
||||
optional: true,
|
||||
variadic: false,
|
||||
}, &mut acc),
|
||||
PositionalParse::Mandatory(arg) => caseless_push(arg, Positional {
|
||||
optional: false,
|
||||
variadic: false
|
||||
}, &mut acc),
|
||||
}
|
||||
acc
|
||||
})
|
||||
);
|
||||
|
||||
make_parser!(pub skip_command_name -> (),
|
||||
value((), preceded(space0,
|
||||
many0(
|
||||
(
|
||||
verify(
|
||||
preceded(not(char('-')), take_while1(is_word_char)),
|
||||
|ss: &str| ss.chars().any(|c: char| c.is_ascii_lowercase())
|
||||
),
|
||||
space1
|
||||
)
|
||||
)
|
||||
))
|
||||
);
|
||||
|
||||
make_parser!(find_usage_line -> (),
|
||||
value((), preceded(
|
||||
space0,
|
||||
terminated(
|
||||
tag_no_case("usage"),
|
||||
// accept any of:
|
||||
// "Usage:" — inline form with colon
|
||||
// "Usage args" — inline form, space follows the word
|
||||
// "USAGE\n cmd args" — clap-style header on its own line
|
||||
alt(
|
||||
(
|
||||
value((), char(':')),
|
||||
value((), peek(line_ending)),
|
||||
value((), peek(satisfy(|c: char| c == ' ' || c == '\t'))),
|
||||
)
|
||||
)
|
||||
)
|
||||
))
|
||||
);
|
||||
|
||||
make_parser!(pub extract_usage_positionals -> Vec<(&'a str, Positional)>,
|
||||
preceded(
|
||||
many0(preceded(not(find_usage_line), (rest_of_line, line_ending))),
|
||||
preceded(
|
||||
(find_usage_line, space0, opt(line_ending), space0, skip_command_name),
|
||||
parse_usage_args
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
make_predicate!(is_cli11_name_char, |c| c.is_alphanumeric()
|
||||
|| matches!(c, '_' | '-'));
|
||||
|
||||
make_parser!(cli11_section_header -> (),
|
||||
value((),
|
||||
delimited(
|
||||
space0,
|
||||
alt((tag("POSITIONALS:"), tag("Positionals:"))),
|
||||
(rest_of_line, opt(line_ending))
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
make_parser!(cli11_pos_line -> (&'a str, bool),
|
||||
preceded(
|
||||
verify(space0, |ss: &str| !ss.is_empty()),
|
||||
terminated(
|
||||
(
|
||||
verify(take_while1(is_cli11_name_char), |s: &str| s.len() >= 2),
|
||||
preceded(
|
||||
(space0, take_while(|c: char| c.is_ascii_uppercase()), space0),
|
||||
opt(tag("..."))
|
||||
)
|
||||
),
|
||||
(rest_of_line, opt(line_ending))
|
||||
)
|
||||
) => |(name, variadic): (&'a str, Option<_>)| (name, variadic.is_some())
|
||||
);
|
||||
|
||||
make_parser!(parse_cli11_body -> Vec<(&'a str, Positional)>,
|
||||
many0(cli11_pos_line) => |entries: Vec<(&'a str, bool)>|
|
||||
entries.into_iter().fold(Vec::new(), |mut acc, (name, variadic)| {
|
||||
caseless_push(name, Positional { optional: false, variadic }, &mut acc);
|
||||
acc
|
||||
})
|
||||
);
|
||||
|
||||
make_parser!(pub extract_cli11_positionals -> Vec<(&'a str, Positional)>,
|
||||
preceded(
|
||||
many0(preceded(not(cli11_section_header), (rest_of_line, line_ending))),
|
||||
preceded(cli11_section_header, parse_cli11_body)
|
||||
)
|
||||
);
|
||||
83
src/parsers/help/subcommands.rs
Normal file
83
src/parsers/help/subcommands.rs
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
use nom::{
|
||||
AsChar, IResult, Parser,
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_till, take_while1},
|
||||
character::complete::{char, space0},
|
||||
combinator::{not, value, verify},
|
||||
multi::many0,
|
||||
sequence::{delimited, preceded, terminated},
|
||||
};
|
||||
|
||||
use crate::make_parser;
|
||||
use crate::parsers::help::helpers::{eol, is_option_char};
|
||||
use crate::types::Subcommand;
|
||||
|
||||
fn is_placeholder(c: char) -> bool {
|
||||
match c {
|
||||
_ if c.is_alphanumeric() => true,
|
||||
'_' | '-' | '.' | '|' | ',' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// chars allowed inside a bare (unbracketed) placeholder token, e.g.
|
||||
/// "FILE", "PATTERN...", "A|B". excludes lowercase letters so mixed-case
|
||||
/// description words like "NixOS" or "Home-manager" don't get swallowed
|
||||
/// as placeholders.
|
||||
fn is_bare_placeholder_char(c: char) -> bool {
|
||||
matches!(c, 'A'..='Z' | '0'..='9' | '_' | '-' | '.' | '|' | ',')
|
||||
}
|
||||
|
||||
make_parser!(
|
||||
skip_arg_placeholders -> (),
|
||||
value(
|
||||
(),
|
||||
many0(preceded(
|
||||
// peek ahead one char (don't consume) so the per-branch parser can
|
||||
// see the full token. needed because the bare ALL_CAPS branch must
|
||||
// verify the *entire* token before deciding to consume.
|
||||
char(' '),
|
||||
alt((
|
||||
// <...> bracketed placeholder
|
||||
delimited(char('<'), take_while1(is_placeholder), char('>')),
|
||||
// [...] optional bracketed placeholder
|
||||
delimited(char('['), take_while1(is_placeholder), char(']')),
|
||||
// bare ALL_CAPS placeholder — first char must be uppercase or
|
||||
// a digit (allows e.g. "N", "M2"), and the whole token must
|
||||
// be uppercase-friendly. rejects "NixOS"-style mixed-case so
|
||||
// descriptions don't get swallowed.
|
||||
verify(
|
||||
take_while1(is_bare_placeholder_char),
|
||||
|s: &str| {
|
||||
let first = s.chars().next().unwrap();
|
||||
first.is_ascii_uppercase() || first.is_ascii_digit()
|
||||
}
|
||||
),
|
||||
)),
|
||||
)),
|
||||
)
|
||||
);
|
||||
|
||||
// parse a subcommand entry: leading whitespace, then a name (2+ option
|
||||
// chars, not starting with '-'), optional argument placeholders, exactly
|
||||
// two spaces, optional padding, then the description text and eol.
|
||||
make_parser!(pub subcommand_entry -> Subcommand<'a>,
|
||||
(
|
||||
preceded(
|
||||
space0,
|
||||
verify(
|
||||
preceded(not(char('-')), take_while1(is_option_char)),
|
||||
|n: &str| n.len() >= 2,
|
||||
),
|
||||
),
|
||||
skip_arg_placeholders,
|
||||
tag(" "),
|
||||
space0,
|
||||
terminated(take_till(|c: char| c.is_newline()), eol),
|
||||
) => |(name, _, _, _, desc): (&'a str, _, _, _, &'a str)| {
|
||||
// some help formats prefix desc with "- " (manpage-style); strip it.
|
||||
let d = desc.trim_start();
|
||||
let desc = d.strip_prefix("- ").map(|s| s.trim_start()).unwrap_or(d);
|
||||
Subcommand { name, desc }
|
||||
}
|
||||
);
|
||||
327
src/parsers/manpage.rs
Normal file
327
src/parsers/manpage.rs
Normal file
|
|
@ -0,0 +1,327 @@
|
|||
//! parse unix manpages (groff/mdoc format) into a structured result.
|
||||
//!
|
||||
//! manpages are written in roff/groff markup — a decades-old typesetting language
|
||||
//! used by man(1). this module strips the formatting and extracts structured data
|
||||
//! (flags, subcommands, positionals) from the raw groff source.
|
||||
//!
|
||||
//! there are two major manpage macro packages:
|
||||
//! - man (groff) — used by gnu/linux tools. uses macros like .SH, .TP, .IP, .PP
|
||||
//! - mdoc (bsd) — used by bsd tools. uses .Sh, .Fl, .Ar, .Op, .It, .Bl/.El
|
||||
//!
|
||||
//! this module handles both, auto-detecting the format by checking for .Sh macros.
|
||||
//!
|
||||
//! for groff manpages, flag extraction uses multiple "strategies" that target
|
||||
//! different common formatting patterns:
|
||||
//! - strategy_tp: .TP tagged paragraphs (gnu coreutils, help2man)
|
||||
//! - strategy_ip: .IP indented paragraphs (curl, hand-written)
|
||||
//! - strategy_pp_rs: .PP + .RS/.RE blocks (git, docbook)
|
||||
//! - strategy_nix: nix3-style bullet .IP with .UR/.UE hyperlinks
|
||||
//! - strategy_deroff: fallback — strip all groff, feed to help text parser
|
||||
//!
|
||||
//! the module tries all applicable strategies and picks the one that extracts
|
||||
//! the most flag entries, on the theory that more results = better match.
|
||||
|
||||
mod commands;
|
||||
mod groff;
|
||||
mod mdoc;
|
||||
mod sections;
|
||||
mod strategies;
|
||||
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::types::{HelpResult, OptionEntry, Param, Positional, Subcommand, Switch};
|
||||
|
||||
pub use self::groff::{GroffLine, classify_line, strip_groff_escapes};
|
||||
pub use self::sections::{extract_subcommand_sections, extract_synopsis_command};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum OwnedSwitch {
|
||||
Short(char),
|
||||
Long(String),
|
||||
Both(char, String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum OwnedParam {
|
||||
Mandatory(String),
|
||||
Optional(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ManpageEntry {
|
||||
pub switch: OwnedSwitch,
|
||||
pub param: Option<OwnedParam>,
|
||||
pub desc: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ManpageSubcommand {
|
||||
pub name: String,
|
||||
pub desc: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ManpageResult {
|
||||
pub entries: Vec<ManpageEntry>,
|
||||
pub subcommands: Vec<ManpageSubcommand>,
|
||||
pub positionals: Vec<(String, Positional)>,
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
impl From<&Switch<'_>> for OwnedSwitch {
|
||||
fn from(s: &Switch<'_>) -> Self {
|
||||
match s {
|
||||
Switch::Short(c) => OwnedSwitch::Short(*c),
|
||||
Switch::Long(l) => OwnedSwitch::Long((*l).to_string()),
|
||||
Switch::Both(c, l) => OwnedSwitch::Both(*c, (*l).to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Param<'_>> for OwnedParam {
|
||||
fn from(p: &Param<'_>) -> Self {
|
||||
match p {
|
||||
Param::Mandatory(s) => OwnedParam::Mandatory((*s).to_string()),
|
||||
Param::Optional(s) => OwnedParam::Optional((*s).to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&OptionEntry<'_>> for ManpageEntry {
|
||||
fn from(e: &OptionEntry<'_>) -> Self {
|
||||
let desc: String = e
|
||||
.desc
|
||||
.iter()
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
ManpageEntry {
|
||||
switch: (&e.switch).into(),
|
||||
param: e.param.as_ref().map(Into::into),
|
||||
desc,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Subcommand<'_>> for ManpageSubcommand {
|
||||
fn from(sc: &Subcommand<'_>) -> Self {
|
||||
// lowercase the subcommand name here so (a) file naming is
|
||||
// consistent (meat_yum.json vs meat_YUM.json) and (b) recursive
|
||||
// --help probes use the lowercase form, which is what most real
|
||||
// CLIs accept — even tools like meat that DISPLAY uppercase
|
||||
// names in their help text dispatch on the lowercased argument.
|
||||
ManpageSubcommand {
|
||||
name: sc.name.to_ascii_lowercase(),
|
||||
desc: sc.desc.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&HelpResult<'_>> for ManpageResult {
|
||||
fn from(r: &HelpResult<'_>) -> Self {
|
||||
ManpageResult {
|
||||
entries: r.entries.iter().map(Into::into).collect(),
|
||||
subcommands: r.subcommands.iter().map(Into::into).collect(),
|
||||
// positional names are stored lowercased so output is
|
||||
// stable across the various places we extract them from
|
||||
// (synopsis, usage, cli11 sections).
|
||||
positionals: r
|
||||
.positionals
|
||||
.iter()
|
||||
.map(|(k, v)| (k.to_ascii_lowercase(), v.clone()))
|
||||
.collect(),
|
||||
description: r.desc.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// parse a manpage from its classified lines.
|
||||
/// auto-detects mdoc vs groff format. for groff, runs the multi-strategy
|
||||
/// extraction pipeline.
|
||||
pub fn parse_manpage_lines(lines: &[GroffLine]) -> ManpageResult {
|
||||
if mdoc::is_mdoc(lines) {
|
||||
mdoc::parse_mdoc_lines(lines)
|
||||
} else {
|
||||
let options_section = sections::extract_options_section(lines);
|
||||
let mut entries = strategies::extract_entries(&options_section);
|
||||
// merge SYNOPSIS-only flags (nix-env's `[{--profile | -p} path]`
|
||||
// pattern, where the flag is declared in the synopsis but never
|
||||
// listed as an entry in the OPTIONS body). body entries take
|
||||
// precedence on duplicate names — they carry the descriptions.
|
||||
let synopsis_flags = sections::extract_synopsis_flags(lines);
|
||||
if !synopsis_flags.is_empty() {
|
||||
let have_long: std::collections::HashSet<String> = entries
|
||||
.iter()
|
||||
.filter_map(|e| match &e.switch {
|
||||
OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => Some(l.to_ascii_lowercase()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let have_short: std::collections::HashSet<char> = entries
|
||||
.iter()
|
||||
.filter_map(|e| match &e.switch {
|
||||
OwnedSwitch::Short(c) | OwnedSwitch::Both(c, _) => Some(*c),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
for e in synopsis_flags {
|
||||
let dup = match &e.switch {
|
||||
OwnedSwitch::Long(l) => have_long.contains(&l.to_ascii_lowercase()),
|
||||
OwnedSwitch::Short(c) => have_short.contains(c),
|
||||
OwnedSwitch::Both(c, l) => {
|
||||
have_short.contains(c) || have_long.contains(&l.to_ascii_lowercase())
|
||||
}
|
||||
};
|
||||
if !dup {
|
||||
entries.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
let positionals = sections::extract_synopsis_positionals(lines);
|
||||
let commands_section = sections::extract_commands_section(lines);
|
||||
let subcommands = commands::extract_subcommands_from_commands(&commands_section);
|
||||
ManpageResult {
|
||||
entries,
|
||||
subcommands,
|
||||
positionals,
|
||||
description: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// parse a manpage from its raw string contents.
|
||||
/// splits into lines, parses, then extracts the NAME section description.
|
||||
pub fn parse_manpage_string(contents: &str) -> ManpageResult {
|
||||
let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
|
||||
let mut result = parse_manpage_lines(&lines);
|
||||
if let Some(desc) = sections::extract_name_description(&lines) {
|
||||
result.description = desc;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// parse a manpage and also pull out clap-style `.SH SUBCOMMAND` sections
|
||||
/// as separate per-subcommand results. each subcommand section in a
|
||||
/// clap-generated manpage is its own command with its own flags; the
|
||||
/// parent's subcommand list is populated from their names.
|
||||
///
|
||||
/// returns (main_result, sub_results) where each sub_result has
|
||||
/// name=full_command ("nh os"), desc, and its own ManpageResult.
|
||||
pub fn parse_manpage_with_subs(contents: &str) -> (ManpageResult, Vec<(String, ManpageResult)>) {
|
||||
let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
|
||||
let mut result = parse_manpage_lines(&lines);
|
||||
if let Some(desc) = sections::extract_name_description(&lines) {
|
||||
result.description = desc;
|
||||
}
|
||||
let sub_sections = sections::extract_subcommand_sections(&lines);
|
||||
if !sub_sections.is_empty() {
|
||||
// overwrite subcommands with the SUBCOMMAND-section names —
|
||||
// these are the authoritative list for clap-generated manpages.
|
||||
result.subcommands = sub_sections
|
||||
.iter()
|
||||
.map(|(name, desc, _)| ManpageSubcommand {
|
||||
name: name.to_ascii_lowercase(),
|
||||
desc: desc.clone(),
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
// each SUBCOMMAND section body is parsed via the same strategy-picker
|
||||
// as the top-level OPTIONS section — clap puts flag definitions
|
||||
// directly under the .SH SUBCOMMAND header with no inner .SH wrapping,
|
||||
// so parse_manpage_lines (which looks for a child OPTIONS section)
|
||||
// would come back empty.
|
||||
let subs: Vec<(String, ManpageResult)> = sub_sections
|
||||
.into_iter()
|
||||
.map(|(name, desc, lines)| {
|
||||
let entries = strategies::extract_entries(&lines);
|
||||
let sub_result = ManpageResult {
|
||||
entries,
|
||||
subcommands: Vec::new(),
|
||||
positionals: Default::default(),
|
||||
description: desc,
|
||||
};
|
||||
(name, sub_result)
|
||||
})
|
||||
.collect();
|
||||
(result, subs)
|
||||
}
|
||||
|
||||
/// read a manpage file from disk. handles .gz compressed files (the common
|
||||
/// case — most installed manpages are gzipped). plain text files are read directly.
|
||||
pub fn read_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<String> {
|
||||
let path = path.as_ref();
|
||||
let bytes = std::fs::read(path)?;
|
||||
if path.extension().and_then(|e| e.to_str()) == Some("gz") {
|
||||
let mut decoder = flate2::read::GzDecoder::new(&bytes[..]);
|
||||
let mut out = String::new();
|
||||
decoder.read_to_string(&mut out)?;
|
||||
Ok(out)
|
||||
} else {
|
||||
String::from_utf8(bytes).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
|
||||
}
|
||||
}
|
||||
|
||||
/// read + parse a manpage file in one step.
|
||||
pub fn parse_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<ManpageResult> {
|
||||
let contents = read_manpage_file(path)?;
|
||||
Ok(parse_manpage_string(&contents))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const TP_MANPAGE: &str = r#".TH FOO 1 "2024" "1.0" "User Commands"
|
||||
.SH NAME
|
||||
foo \- a synthetic test command
|
||||
.SH SYNOPSIS
|
||||
.B foo
|
||||
[\fIOPTIONS\fR] <input> [output]
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
\fB\-v\fR, \fB\-\-verbose\fR
|
||||
increase output verbosity
|
||||
.TP
|
||||
\fB\-o\fR \fIFILE\fR, \fB\-\-output\fR=\fIFILE\fR
|
||||
write to FILE
|
||||
.TP
|
||||
\fB\-h\fR, \fB\-\-help\fR
|
||||
show this help and exit
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn tp_strategy_extracts_flags() {
|
||||
let r = parse_manpage_string(TP_MANPAGE);
|
||||
assert_eq!(
|
||||
r.entries.len(),
|
||||
3,
|
||||
"expected 3 entries, got {:?}",
|
||||
r.entries
|
||||
);
|
||||
assert_eq!(r.description, "a synthetic test command");
|
||||
assert!(matches!(
|
||||
r.entries[0].switch,
|
||||
OwnedSwitch::Both('v', ref l) if l == "verbose"
|
||||
));
|
||||
assert!(matches!(
|
||||
r.entries[2].switch,
|
||||
OwnedSwitch::Both('h', ref l) if l == "help"
|
||||
));
|
||||
assert!(r.entries[0].desc.contains("verbosity"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mdoc_format_detected() {
|
||||
let src = ".Sh NAME\n.Nm test\n.Nd a test\n.Sh DESCRIPTION\nstuff\n";
|
||||
let lines: Vec<GroffLine> = src.split('\n').map(classify_line).collect();
|
||||
assert!(mdoc::is_mdoc(&lines));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn groff_escapes_stripped() {
|
||||
let stripped = groff::strip_groff_escapes("\\fB\\-v\\fR \\fIfile\\fR");
|
||||
assert_eq!(stripped.trim(), "-v file");
|
||||
}
|
||||
}
|
||||
157
src/parsers/manpage/commands.rs
Normal file
157
src/parsers/manpage/commands.rs
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
//! COMMANDS section subcommand extraction.
|
||||
//!
|
||||
//! some manpages (notably systemctl) have a dedicated COMMANDS section
|
||||
//! listing subcommands with descriptions. these use .PP + bold name +
|
||||
//! .RS/.RE blocks:
|
||||
//! .PP
|
||||
//! \fBstart\fR \fIUNIT\fR...
|
||||
//! .RS 4
|
||||
//! Start (activate) one or more units.
|
||||
//! .RE
|
||||
|
||||
use crate::parsers::manpage::ManpageSubcommand;
|
||||
use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes, strip_inline_macro_args};
|
||||
|
||||
/// validate that the extracted name looks like a subcommand: lowercase,
|
||||
/// at least 2 chars, no leading dash.
|
||||
fn is_valid_subcmd(name: &str) -> bool {
|
||||
name.len() >= 2
|
||||
&& !name.starts_with('-')
|
||||
&& name
|
||||
.chars()
|
||||
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
|
||||
}
|
||||
|
||||
/// extract subcommand name from a bold groff text like
|
||||
/// "\fBlist\-units\fR [\fIPATTERN\fR...]" -> "list-units"
|
||||
fn extract_bold_command_name(text: &str) -> Option<String> {
|
||||
let trimmed = text.trim();
|
||||
if trimmed.len() >= 4 && trimmed.starts_with("\\fB") {
|
||||
// look for \fB...\fR at the start: find the next '\\' and take
|
||||
// the segment between \fB and there.
|
||||
let after = &trimmed[3..];
|
||||
let segment_end = after.find('\\').unwrap_or(after.len());
|
||||
let name_part = &after[..segment_end];
|
||||
let reconstructed = format!("\\fB{name_part}\\fR");
|
||||
let name = normalize_command_token(strip_groff_escapes(&reconstructed).trim());
|
||||
if is_valid_subcmd(&name) {
|
||||
return Some(name);
|
||||
}
|
||||
return None;
|
||||
}
|
||||
// fallback: take the first whitespace-delimited word of the stripped text
|
||||
let stripped = strip_groff_escapes(trimmed);
|
||||
let first_word = stripped.split_whitespace().next().unwrap_or("");
|
||||
let name = normalize_command_token(first_word);
|
||||
if is_valid_subcmd(&name) {
|
||||
Some(name)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_command_token(token: &str) -> String {
|
||||
let token = token.trim();
|
||||
let token = token
|
||||
.find('(')
|
||||
.map(|idx| &token[..idx])
|
||||
.unwrap_or(token)
|
||||
.trim_end_matches(',');
|
||||
token.to_string()
|
||||
}
|
||||
|
||||
fn extract_command_name_from_line(line: &GroffLine) -> Option<String> {
|
||||
match line {
|
||||
GroffLine::Text(tag) => extract_bold_command_name(tag),
|
||||
GroffLine::Macro { name, args }
|
||||
if matches!(
|
||||
name.as_str(),
|
||||
"B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
|
||||
) =>
|
||||
{
|
||||
let rendered = strip_groff_escapes(&strip_inline_macro_args(args));
|
||||
extract_bold_command_name(&rendered)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// walk through commands section lines, extracting subcommand name+description
|
||||
/// pairs from .PP + Text + .RS/.RE blocks.
|
||||
pub fn extract_subcommands_from_commands(lines: &[GroffLine]) -> Vec<ManpageSubcommand> {
|
||||
let mut out = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& name == "PP"
|
||||
{
|
||||
i += 1;
|
||||
if i >= lines.len() {
|
||||
continue;
|
||||
}
|
||||
if let Some(name) = extract_command_name_from_line(&lines[i]) {
|
||||
let (desc, new_i) = collect_subcmd_desc(lines, i + 1);
|
||||
let short_desc = first_sentence(&desc);
|
||||
out.push(ManpageSubcommand {
|
||||
name: name.to_ascii_lowercase(),
|
||||
desc: short_desc,
|
||||
});
|
||||
i = new_i;
|
||||
continue;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// collect the description for a subcommand entry. handles .RS/.RE blocks
|
||||
/// and stops at the next .PP/.SH/.SS boundary.
|
||||
fn collect_subcmd_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
|
||||
let mut acc: Vec<String> = Vec::new();
|
||||
let mut i = start;
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Macro { name, .. } if name == "RS" => {
|
||||
i += 1;
|
||||
// inside .RS — collect until .RE or boundary
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Macro { name, .. } if name == "RE" => {
|
||||
return (acc.join(" "), i + 1);
|
||||
}
|
||||
GroffLine::Text(t) => {
|
||||
acc.push(t.clone());
|
||||
i += 1;
|
||||
}
|
||||
GroffLine::Macro { name, .. }
|
||||
if name == "PP" || name == "SH" || name == "SS" =>
|
||||
{
|
||||
return (acc.join(" "), i);
|
||||
}
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
return (acc.join(" "), i);
|
||||
}
|
||||
GroffLine::Text(t) => {
|
||||
acc.push(t.clone());
|
||||
i += 1;
|
||||
}
|
||||
_ => return (acc.join(" "), i),
|
||||
}
|
||||
}
|
||||
(acc.join(" "), i)
|
||||
}
|
||||
|
||||
/// take the first sentence (up to '.') as the description.
|
||||
fn first_sentence(s: &str) -> String {
|
||||
let s = s.trim();
|
||||
match s.find('.') {
|
||||
Some(idx) if idx > 0 => s[..idx].trim().to_string(),
|
||||
_ => s.to_string(),
|
||||
}
|
||||
}
|
||||
372
src/parsers/manpage/groff.rs
Normal file
372
src/parsers/manpage/groff.rs
Normal file
|
|
@ -0,0 +1,372 @@
|
|||
//! groff escape/formatting stripping and line classification.
|
||||
//!
|
||||
//! groff escapes start with backslash and use various continuation syntaxes.
|
||||
//! we strip them, replacing named characters (like \(aq for apostrophe) with
|
||||
//! their text equivalents and discarding formatting directives.
|
||||
//!
|
||||
//! also exports `make_macro_walker!`, the manpage-side analogue of the
|
||||
//! help parser's `make_parser!`. all of our strategy_* functions are
|
||||
//! "scan lines, on each .MACRO_NAME run a handler, advance, accumulate"
|
||||
//! — this macro factors out the loop scaffolding so each strategy reduces
|
||||
//! to its specific extraction logic.
|
||||
|
||||
/// walk a `&[GroffLine]` slice, and on each macro whose name matches
|
||||
/// `$mname`, invoke the body with `(lines, i, args)` where:
|
||||
/// - `lines` is the full slice (for slicing further bodies)
|
||||
/// - `i` is the current index of the matched macro
|
||||
/// - `args` is the macro's argument string (by reference)
|
||||
///
|
||||
/// the body returns `Option<(T, usize)>`. `Some((value, new_i))` pushes
|
||||
/// `value` and advances the cursor to `new_i` (typically computed as
|
||||
/// `lines.len() - rest.len()` after `collect_text_lines`). `None`
|
||||
/// advances by one line and keeps scanning.
|
||||
///
|
||||
/// matches the help-parser pattern `make_parser!(name -> T, parser => wrap)`:
|
||||
/// the macro hides the loop scaffolding, the handler expresses the actual
|
||||
/// extraction logic.
|
||||
#[macro_export]
|
||||
macro_rules! make_macro_walker {
|
||||
(pub $name:ident -> Vec<$t:ty>, on macro $mname:expr =>
|
||||
|$lines:ident, $i:ident, $args:ident| $body:expr) => {
|
||||
pub fn $name(lines_input: &[$crate::parsers::manpage::GroffLine]) -> Vec<$t> {
|
||||
let mut out = Vec::new();
|
||||
let mut cursor = 0;
|
||||
let $lines: &[$crate::parsers::manpage::GroffLine] = lines_input;
|
||||
while cursor < $lines.len() {
|
||||
if let $crate::parsers::manpage::GroffLine::Macro {
|
||||
name: macro_name,
|
||||
args: $args,
|
||||
} = &$lines[cursor]
|
||||
{
|
||||
if macro_name == $mname {
|
||||
let $i = cursor;
|
||||
// wrap the handler body in an IIFE so an early
|
||||
// `return None` inside the handler returns from the
|
||||
// closure, not from the surrounding strategy function.
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
let result: Option<($t, usize)> = (|| $body)();
|
||||
if let Some((value, new_i)) = result {
|
||||
out.push(value);
|
||||
cursor = new_i;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
cursor += 1;
|
||||
}
|
||||
out
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// every line in a manpage is classified as one of four types.
|
||||
/// this classification drives all subsequent parsing — strategies
|
||||
/// pattern-match on sequences of classified lines.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum GroffLine {
|
||||
/// macro name + args, e.g. ("SH", "OPTIONS") or ("TP", "")
|
||||
Macro { name: String, args: String },
|
||||
/// plain text after groff stripping
|
||||
Text(String),
|
||||
/// empty line
|
||||
Blank,
|
||||
/// groff comment: .backslash-quote or backslash-quote
|
||||
Comment,
|
||||
}
|
||||
|
||||
/// translate a groff named character escape to its text equivalent.
|
||||
/// groff uses two-letter codes like "aq" for apostrophe, "lq"/"rq" for
|
||||
/// left/right quotes, "em"/"en" for dashes.
|
||||
fn named_char_of(name: &str) -> Option<char> {
|
||||
match name {
|
||||
"aq" => Some('\''),
|
||||
"lq" | "Lq" | "rq" | "Rq" => Some('"'),
|
||||
"em" | "en" => Some('-'),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_alnum(c: u8) -> bool {
|
||||
c.is_ascii_alphanumeric()
|
||||
}
|
||||
|
||||
/// strip groff escape sequences, replacing named characters with text
|
||||
/// equivalents and discarding formatting directives.
|
||||
pub fn strip_groff_escapes(source: &str) -> String {
|
||||
let bytes = source.as_bytes();
|
||||
let len = bytes.len();
|
||||
let mut buffer = String::with_capacity(len);
|
||||
let mut pos = 0;
|
||||
let mut prev_char: u8 = 0;
|
||||
|
||||
while pos < len {
|
||||
if bytes[pos] == b'\\' && pos + 1 < len {
|
||||
let next = bytes[pos + 1];
|
||||
match next {
|
||||
b'f' => {
|
||||
// font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...]
|
||||
if pos + 2 < len {
|
||||
let font_char = bytes[pos + 2];
|
||||
// insert space before italic font to preserve word boundaries
|
||||
// e.g. \fB--max-results\fR\fIcount\fR -> "--max-results count"
|
||||
if font_char == b'I' && is_alnum(prev_char) {
|
||||
buffer.push(' ');
|
||||
prev_char = b' ';
|
||||
}
|
||||
if font_char == b'(' {
|
||||
pos += 5; // \f(XX — two-character font name
|
||||
} else if font_char == b'[' {
|
||||
pos += 3;
|
||||
skip_to_byte(bytes, len, &mut pos, b']');
|
||||
if pos < len {
|
||||
pos += 1;
|
||||
}
|
||||
} else {
|
||||
pos += 3; // \fX — single-character font selector
|
||||
}
|
||||
} else {
|
||||
pos += 2;
|
||||
}
|
||||
}
|
||||
b'-' => {
|
||||
// escaped hyphen-minus — emit a plain hyphen
|
||||
buffer.push('-');
|
||||
prev_char = b'-';
|
||||
pos += 2;
|
||||
}
|
||||
b'&' | b'/' | b',' => {
|
||||
// zero-width characters — discard without output
|
||||
pos += 2;
|
||||
}
|
||||
b'(' => {
|
||||
// two-char named character: \(aq, \(lq, \(rq, etc.
|
||||
if pos + 3 < len {
|
||||
let name = &source[pos + 2..pos + 4];
|
||||
if let Some(c) = named_char_of(name) {
|
||||
buffer.push(c);
|
||||
prev_char = c as u8;
|
||||
}
|
||||
pos += 4;
|
||||
} else {
|
||||
pos += 2;
|
||||
}
|
||||
}
|
||||
b'[' => {
|
||||
// bracketed named character: \[aq], \[lq], etc.
|
||||
pos += 2;
|
||||
let start = pos;
|
||||
skip_to_byte(bytes, len, &mut pos, b']');
|
||||
if pos < len {
|
||||
let name = &source[start..pos];
|
||||
if let Some(c) = named_char_of(name) {
|
||||
buffer.push(c);
|
||||
prev_char = c as u8;
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
b's' => {
|
||||
// size escape: \sN, \s+N, \s-N — skip the numeric argument
|
||||
pos += 2;
|
||||
if pos < len && (bytes[pos] == b'+' || bytes[pos] == b'-') {
|
||||
pos += 1;
|
||||
}
|
||||
if pos < len && bytes[pos].is_ascii_digit() {
|
||||
pos += 1;
|
||||
}
|
||||
if pos < len && bytes[pos].is_ascii_digit() {
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
b'm' => {
|
||||
// color escape: \m[...] — skip the bracketed color name
|
||||
pos += 2;
|
||||
if pos < len && bytes[pos] == b'[' {
|
||||
pos += 1;
|
||||
skip_to_byte(bytes, len, &mut pos, b']');
|
||||
if pos < len {
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
b'X' => {
|
||||
// device control: \X'...' — skip the single-quoted payload
|
||||
pos += 2;
|
||||
if pos < len && bytes[pos] == b'\'' {
|
||||
pos += 1;
|
||||
skip_to_byte(bytes, len, &mut pos, b'\'');
|
||||
if pos < len {
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
b'*' => {
|
||||
// string variable: \*X or \*(XX or \*[...] — skip the reference
|
||||
pos += 2;
|
||||
skip_groff_reference(bytes, len, &mut pos);
|
||||
}
|
||||
b'n' => {
|
||||
// number register: \nX or \n(XX or \n[...] — skip the reference
|
||||
pos += 2;
|
||||
skip_groff_reference(bytes, len, &mut pos);
|
||||
}
|
||||
b'e' => {
|
||||
// escaped backslash literal
|
||||
buffer.push('\\');
|
||||
prev_char = b'\\';
|
||||
pos += 2;
|
||||
}
|
||||
b'\\' => {
|
||||
// double backslash — emit one
|
||||
buffer.push('\\');
|
||||
prev_char = b'\\';
|
||||
pos += 2;
|
||||
}
|
||||
b' ' => {
|
||||
// escaped space — emit a regular space
|
||||
buffer.push(' ');
|
||||
prev_char = b' ';
|
||||
pos += 2;
|
||||
}
|
||||
_ => {
|
||||
// unknown escape — skip the two-character sequence
|
||||
pos += 2;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// copy a full utf-8 char from source to buffer
|
||||
let c = source[pos..].chars().next().unwrap();
|
||||
buffer.push(c);
|
||||
prev_char = if c.is_ascii() { c as u8 } else { 0 };
|
||||
pos += c.len_utf8();
|
||||
}
|
||||
}
|
||||
buffer
|
||||
}
|
||||
|
||||
fn skip_to_byte(bytes: &[u8], len: usize, pos: &mut usize, delim: u8) {
|
||||
while *pos < len && bytes[*pos] != delim {
|
||||
*pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// skip a groff reference that uses one of three sub-forms:
|
||||
/// single char — e.g. \*X or \nX
|
||||
/// ( + 2 chars — e.g. \*(XX or \n(XX
|
||||
/// [ to ] — e.g. \*[name] or \n[name]
|
||||
fn skip_groff_reference(bytes: &[u8], len: usize, pos: &mut usize) {
|
||||
if *pos < len {
|
||||
if bytes[*pos] == b'(' {
|
||||
*pos += 3; // skip past '(' + two-character name
|
||||
} else if bytes[*pos] == b'[' {
|
||||
*pos += 1;
|
||||
skip_to_byte(bytes, len, pos, b']');
|
||||
if *pos < len {
|
||||
*pos += 1;
|
||||
}
|
||||
} else {
|
||||
*pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// strip inline macro formatting: .BI, .BR, .IR, etc.
|
||||
/// these macros alternate between fonts for their arguments, e.g.:
|
||||
/// .BI "--output " "FILE"
|
||||
/// becomes "--outputFILE" (arguments concatenated without spaces).
|
||||
///
|
||||
/// quoted strings are kept together (quotes stripped), but unquoted spaces
|
||||
/// are consumed. this matches groff's actual rendering of these macros.
|
||||
pub fn strip_inline_macro_args(text: &str) -> String {
|
||||
let bytes = text.as_bytes();
|
||||
let len = bytes.len();
|
||||
let mut buffer = String::with_capacity(len);
|
||||
let mut pos = 0;
|
||||
while pos < len {
|
||||
if bytes[pos] == b'"' {
|
||||
// quoted argument — copy characters up to the closing quote
|
||||
pos += 1;
|
||||
while pos < len && bytes[pos] != b'"' {
|
||||
let c = text[pos..].chars().next().unwrap();
|
||||
buffer.push(c);
|
||||
pos += c.len_utf8();
|
||||
}
|
||||
if pos < len {
|
||||
pos += 1;
|
||||
}
|
||||
} else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
|
||||
// unquoted whitespace — skip (arguments are concatenated)
|
||||
pos += 1;
|
||||
} else {
|
||||
let c = text[pos..].chars().next().unwrap();
|
||||
buffer.push(c);
|
||||
pos += c.len_utf8();
|
||||
}
|
||||
}
|
||||
buffer
|
||||
}
|
||||
|
||||
/// strip escapes and trim whitespace.
|
||||
pub fn strip_groff(line: &str) -> String {
|
||||
strip_groff_escapes(line).trim().to_string()
|
||||
}
|
||||
|
||||
/// refined comment detection — the base classify_line may miss some comment
|
||||
/// forms, so this wrapper checks more carefully before falling through.
|
||||
fn is_comment_line(line: &str) -> bool {
|
||||
let bytes = line.as_bytes();
|
||||
let len = bytes.len();
|
||||
(len >= 3 && bytes[0] == b'.' && bytes[1] == b'\\' && bytes[2] == b'"')
|
||||
|| (len >= 2 && bytes[0] == b'\\' && bytes[1] == b'"')
|
||||
}
|
||||
|
||||
/// classify a single line of manpage source.
|
||||
/// macro lines start with '.' or '\'' (groff alternate control char).
|
||||
/// the macro name is split from its arguments at the first space/tab.
|
||||
/// arguments wrapped in double quotes are unquoted.
|
||||
pub fn classify_line(line: &str) -> GroffLine {
|
||||
if is_comment_line(line) {
|
||||
return GroffLine::Comment;
|
||||
}
|
||||
let len = line.len();
|
||||
if len == 0 {
|
||||
return GroffLine::Blank;
|
||||
}
|
||||
let bytes = line.as_bytes();
|
||||
// base classify also flags dot-backslash forms as comments
|
||||
if len >= 2 && bytes[0] == b'.' && bytes[1] == b'\\' && (len < 3 || bytes[2] == b'"') {
|
||||
return GroffLine::Comment;
|
||||
}
|
||||
if len >= 3 && bytes[0] == b'\\' && bytes[1] == b'"' {
|
||||
return GroffLine::Comment;
|
||||
}
|
||||
if bytes[0] == b'.' || bytes[0] == b'\'' {
|
||||
// macro line — extract macro name and arguments
|
||||
let rest = line[1..].trim();
|
||||
let split_at = rest.find([' ', '\t']);
|
||||
match split_at {
|
||||
Some(idx) => {
|
||||
let name = rest[..idx].to_string();
|
||||
let args = rest[idx + 1..].trim();
|
||||
// strip surrounding quotes from arguments
|
||||
let args = if args.len() >= 2 && args.starts_with('"') && args.ends_with('"') {
|
||||
args[1..args.len() - 1].to_string()
|
||||
} else {
|
||||
args.to_string()
|
||||
};
|
||||
GroffLine::Macro { name, args }
|
||||
}
|
||||
None => GroffLine::Macro {
|
||||
name: rest.to_string(),
|
||||
args: String::new(),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
let stripped = strip_groff(line);
|
||||
if stripped.is_empty() {
|
||||
GroffLine::Blank
|
||||
} else {
|
||||
GroffLine::Text(stripped)
|
||||
}
|
||||
}
|
||||
}
|
||||
237
src/parsers/manpage/mdoc.rs
Normal file
237
src/parsers/manpage/mdoc.rs
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
//! BSD mdoc format support.
|
||||
//!
|
||||
//! mdoc is the bsd manpage macro package. it uses semantic macros rather than
|
||||
//! presentation macros:
|
||||
//! .Fl v -> flag: -v
|
||||
//! .Ar file -> argument: file
|
||||
//! .Op ... -> optional: [...]
|
||||
//! .Bl/.It/.El -> list begin/item/end
|
||||
//! .Sh -> section header (note lowercase 'h', vs groff's .SH)
|
||||
|
||||
use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes};
|
||||
use crate::parsers::manpage::{ManpageEntry, ManpageResult, OwnedParam, OwnedSwitch};
|
||||
use crate::types::Positional;
|
||||
|
||||
/// detect mdoc format by looking for any .Sh macro.
|
||||
pub fn is_mdoc(lines: &[GroffLine]) -> bool {
|
||||
lines
|
||||
.iter()
|
||||
.any(|l| matches!(l, GroffLine::Macro { name, .. } if name == "Sh"))
|
||||
}
|
||||
|
||||
/// extract renderable text from an mdoc line, skipping structural macros.
|
||||
fn mdoc_text_of(line: &GroffLine) -> Option<String> {
|
||||
match line {
|
||||
GroffLine::Text(t) => Some(strip_groff_escapes(t)),
|
||||
GroffLine::Macro { name, args } => match name.as_str() {
|
||||
"Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt" | "Oo" | "Oc" | "Op" => None,
|
||||
_ => {
|
||||
let text = strip_groff_escapes(args);
|
||||
let text = text.trim();
|
||||
if text.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(text.to_string())
|
||||
}
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// parse an mdoc .It (list item) line that contains flag definitions.
|
||||
/// mdoc .It lines look like: ".It Fl v Ar file"
|
||||
/// where Fl = flag, Ar = argument.
|
||||
fn parse_mdoc_it(args: &str) -> Option<ManpageEntry> {
|
||||
let words: Vec<&str> = args
|
||||
.split(' ')
|
||||
.filter(|w| !w.is_empty() && *w != "Ns")
|
||||
.collect();
|
||||
let param = match words.as_slice() {
|
||||
[_, _, "Ar", name, ..] => Some(OwnedParam::Mandatory(name.to_string())),
|
||||
_ => None,
|
||||
};
|
||||
match words.as_slice() {
|
||||
["Fl", ch, ..] if ch.len() == 1 && ch.chars().next().unwrap().is_ascii_alphanumeric() => {
|
||||
Some(ManpageEntry {
|
||||
switch: OwnedSwitch::Short(ch.chars().next().unwrap()),
|
||||
param,
|
||||
desc: String::new(),
|
||||
})
|
||||
}
|
||||
["Fl", name, ..] if name.len() > 1 && name.starts_with('-') => Some(ManpageEntry {
|
||||
switch: OwnedSwitch::Long(name[1..].to_string()),
|
||||
param,
|
||||
desc: String::new(),
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// extract a positional argument from an mdoc line (.Ar or .Op Ar).
|
||||
fn positional_of_mdoc_line(args: &str) -> Option<(String, bool)> {
|
||||
let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
|
||||
let variadic = words.contains(&"...");
|
||||
match words.first() {
|
||||
Some(name) if name.len() >= 2 => Some((name.to_ascii_lowercase(), variadic)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// parse an entire mdoc-format manpage.
|
||||
/// walks through all classified lines looking for:
|
||||
/// 1. .Bl/.It/.El list blocks containing flag definitions
|
||||
/// 2. .Sh SYNOPSIS sections containing positional arguments (.Ar, .Op Ar)
|
||||
pub fn parse_mdoc_lines(lines: &[GroffLine]) -> ManpageResult {
|
||||
// collect description for an entry — until next structural macro
|
||||
fn desc_of(lines: &[GroffLine], start: usize) -> (String, usize) {
|
||||
let mut acc: Vec<String> = Vec::new();
|
||||
let mut i = start;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& matches!(name.as_str(), "It" | "El" | "Sh" | "Ss")
|
||||
{
|
||||
break;
|
||||
}
|
||||
if let Some(t) = mdoc_text_of(&lines[i]) {
|
||||
acc.push(t);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
(acc.join(" ").trim().to_string(), i)
|
||||
}
|
||||
|
||||
fn skip_to_el(lines: &[GroffLine], start: usize) -> usize {
|
||||
let mut i = start;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& name == "El"
|
||||
{
|
||||
return i + 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
/// parse a single .It entry: extract flag, collect description.
|
||||
fn parse_it(
|
||||
args: &str,
|
||||
lines: &[GroffLine],
|
||||
start: usize,
|
||||
entries: &mut Vec<ManpageEntry>,
|
||||
) -> usize {
|
||||
let (desc, new_start) = desc_of(lines, start);
|
||||
if let Some(mut entry) = parse_mdoc_it(args) {
|
||||
entry.desc = desc;
|
||||
entries.push(entry);
|
||||
}
|
||||
new_start
|
||||
}
|
||||
|
||||
/// parse all .It entries within a .Bl/.El option list.
|
||||
fn parse_option_list(
|
||||
entries: &mut Vec<ManpageEntry>,
|
||||
lines: &[GroffLine],
|
||||
start: usize,
|
||||
) -> usize {
|
||||
let mut i = start;
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Macro { name, .. } if name == "El" => return i + 1,
|
||||
GroffLine::Macro { name, args } if name == "It" => {
|
||||
i = parse_it(args, lines, i + 1, entries);
|
||||
}
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
fn parse_synopsis(
|
||||
positionals: &mut Vec<(String, bool, bool)>,
|
||||
lines: &[GroffLine],
|
||||
start: usize,
|
||||
) -> usize {
|
||||
let mut i = start;
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Macro { name, .. } if name == "Sh" => return i,
|
||||
GroffLine::Macro { name, args } if name == "Ar" => {
|
||||
if let Some((n, v)) = positional_of_mdoc_line(args) {
|
||||
positionals.push((n, false, v));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
GroffLine::Macro { name, args } if name == "Op" => {
|
||||
let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
|
||||
if matches!(words.first(), Some(&"Ar")) {
|
||||
let rest = if args.len() > 3 { &args[3..] } else { "" };
|
||||
if let Some((n, v)) = positional_of_mdoc_line(rest) {
|
||||
positionals.push((n, true, v));
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
let mut entries: Vec<ManpageEntry> = Vec::new();
|
||||
let mut positionals: Vec<(String, bool, bool)> = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
// .Bl + .It header sequence — peek at first .It to decide if this is a flag list
|
||||
if let GroffLine::Macro { name: n1, .. } = &lines[i]
|
||||
&& n1 == "Bl"
|
||||
{
|
||||
let j = i + 1;
|
||||
if j < lines.len()
|
||||
&& let GroffLine::Macro {
|
||||
name: n2,
|
||||
args: it_args,
|
||||
} = &lines[j]
|
||||
&& n2 == "It"
|
||||
{
|
||||
let words: Vec<&str> = it_args.split(' ').filter(|w| !w.is_empty()).collect();
|
||||
if matches!(words.first(), Some(&"Fl")) {
|
||||
let k = parse_it(it_args, lines, j + 1, &mut entries);
|
||||
i = parse_option_list(&mut entries, lines, k);
|
||||
continue;
|
||||
} else {
|
||||
i = skip_to_el(lines, j + 1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i = skip_to_el(lines, j);
|
||||
continue;
|
||||
}
|
||||
if let GroffLine::Macro { name, args } = &lines[i]
|
||||
&& name == "Sh"
|
||||
&& args.trim().eq_ignore_ascii_case("SYNOPSIS")
|
||||
{
|
||||
i = parse_synopsis(&mut positionals, lines, i + 1);
|
||||
continue;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// deduplicate positionals by name, preserving first-seen order
|
||||
let mut seen: Vec<String> = Vec::new();
|
||||
let mut deduped: Vec<(String, Positional)> = Vec::new();
|
||||
for (name, optional, variadic) in positionals {
|
||||
if !seen.contains(&name) {
|
||||
seen.push(name.clone());
|
||||
deduped.push((name, Positional { optional, variadic }));
|
||||
}
|
||||
}
|
||||
|
||||
ManpageResult {
|
||||
entries,
|
||||
subcommands: Vec::new(),
|
||||
positionals: deduped,
|
||||
description: String::new(),
|
||||
}
|
||||
}
|
||||
575
src/parsers/manpage/sections.rs
Normal file
575
src/parsers/manpage/sections.rs
Normal file
|
|
@ -0,0 +1,575 @@
|
|||
//! section extraction from manpages.
|
||||
//!
|
||||
//! manpages are divided into sections by .SH macros. we extract OPTIONS,
|
||||
//! NAME, SYNOPSIS, and COMMANDS sections for their specific content.
|
||||
|
||||
use nom::{Parser, sequence::preceded};
|
||||
|
||||
use crate::parsers::help::{parse_usage_args, parse_usage_flags, skip_command_name};
|
||||
use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes, strip_inline_macro_args};
|
||||
use crate::parsers::manpage::{ManpageEntry, OwnedParam, OwnedSwitch};
|
||||
use crate::types::{Param, Positional, Switch};
|
||||
|
||||
fn is_options_section(name: &str) -> bool {
|
||||
let upper = name.trim().to_ascii_uppercase();
|
||||
upper == "OPTIONS" || upper.contains("OPTION")
|
||||
}
|
||||
|
||||
/// extract the lines from the OPTIONS section(s). collects from all
|
||||
/// option-like .SH sections and concatenates them (handles the nix pattern
|
||||
/// of "Options" and "Common Options" being separate sections).
|
||||
/// falls back to DESCRIPTION if no OPTIONS section exists.
|
||||
pub fn extract_options_section(lines: &[GroffLine]) -> Vec<GroffLine> {
|
||||
let mut acc: Vec<GroffLine> = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, args } = &lines[i]
|
||||
&& name == "SH"
|
||||
&& is_options_section(args)
|
||||
{
|
||||
i += 1;
|
||||
// synthetic separator between concatenated sections so that
|
||||
// collect_desc_text (which stops on SH/SS) does not let descriptions
|
||||
// bleed between sections.
|
||||
if !acc.is_empty() {
|
||||
acc.push(GroffLine::Macro {
|
||||
name: "SH".to_string(),
|
||||
args: String::new(),
|
||||
});
|
||||
}
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& name == "SH"
|
||||
{
|
||||
break;
|
||||
}
|
||||
acc.push(lines[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
if !acc.is_empty() {
|
||||
return acc;
|
||||
}
|
||||
// fallback: DESCRIPTION section
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, args } = &lines[i]
|
||||
&& name == "SH"
|
||||
&& args.trim().eq_ignore_ascii_case("DESCRIPTION")
|
||||
{
|
||||
i += 1;
|
||||
let mut desc_acc: Vec<GroffLine> = Vec::new();
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& name == "SH"
|
||||
{
|
||||
break;
|
||||
}
|
||||
desc_acc.push(lines[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
return desc_acc;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
/// the NAME section follows the convention "command \- short description".
|
||||
/// extract the part after "\-" as the command's description.
|
||||
/// handles both "\-" (groff) and " - " (plain text) separators.
|
||||
pub fn extract_name_description(lines: &[GroffLine]) -> Option<String> {
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, args } = &lines[i]
|
||||
&& name == "SH"
|
||||
&& args.trim().eq_ignore_ascii_case("NAME")
|
||||
{
|
||||
i += 1;
|
||||
let mut acc: Vec<String> = Vec::new();
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& name == "SH"
|
||||
{
|
||||
break;
|
||||
}
|
||||
match &lines[i] {
|
||||
GroffLine::Text(t) => acc.push(t.clone()),
|
||||
GroffLine::Macro { name, args }
|
||||
if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR") =>
|
||||
{
|
||||
let text = strip_groff_escapes(&strip_inline_macro_args(args));
|
||||
let text = text.trim();
|
||||
if !text.is_empty() {
|
||||
acc.push(text.to_string());
|
||||
}
|
||||
}
|
||||
GroffLine::Macro { name, args } if name == "Nm" => {
|
||||
let text = strip_groff_escapes(args);
|
||||
let text = text.trim();
|
||||
if !text.is_empty() {
|
||||
acc.push(text.to_string());
|
||||
}
|
||||
}
|
||||
GroffLine::Macro { name, args } if name == "Nd" => {
|
||||
let text = strip_groff_escapes(args);
|
||||
let text = text.trim();
|
||||
if !text.is_empty() {
|
||||
acc.push(format!("\\- {text}"));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
let full = acc.join(" ").trim().to_string();
|
||||
return split_name_separator(&full);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// split a NAME line on either "\-" (groff) or " - " (plain).
|
||||
/// returns the part after the separator, trimmed.
|
||||
fn split_name_separator(full: &str) -> Option<String> {
|
||||
// search for either marker
|
||||
let groff_idx = find_padded(full, "\\-");
|
||||
let dash_idx = find_padded(full, " - ");
|
||||
let idx = match (groff_idx, dash_idx) {
|
||||
(Some(a), Some(b)) => Some(a.min(b)),
|
||||
(Some(a), None) => Some(a),
|
||||
(None, Some(b)) => Some(b),
|
||||
(None, None) => None,
|
||||
}?;
|
||||
// skip past the matched separator
|
||||
let after = if full[idx..].starts_with("\\-") {
|
||||
&full[idx + 2..]
|
||||
} else {
|
||||
&full[idx + 3..]
|
||||
};
|
||||
let desc = after.trim().to_string();
|
||||
if desc.is_empty() { None } else { Some(desc) }
|
||||
}
|
||||
|
||||
/// find a marker preceded and followed by optional surrounding space.
|
||||
/// approximated by a simple substring search — accepts spaces on either
|
||||
/// side without enforcing how many.
|
||||
fn find_padded(s: &str, needle: &str) -> Option<usize> {
|
||||
s.find(needle)
|
||||
}
|
||||
|
||||
/// extract the command name from the SYNOPSIS section.
|
||||
///
|
||||
/// the SYNOPSIS section shows how to invoke the command:
|
||||
/// .SH SYNOPSIS
|
||||
/// .B git add
|
||||
/// [\fIOPTIONS\fR] [\fB\-\-\fR] [\fI<pathspec>\fR...]
|
||||
///
|
||||
/// we extract the command name by taking consecutive "word" tokens until
|
||||
/// we hit something that looks like an argument (starts with [, <, -, etc.).
|
||||
pub fn extract_synopsis_command(contents: &str) -> Option<String> {
|
||||
// pre-replace italic text (\fI...\fR) with angle-bracketed placeholders
|
||||
// before classification strips the font info. italic in groff indicates
|
||||
// a parameter/placeholder (e.g. \fIoperation\fR), not a command word.
|
||||
// the angle brackets cause extract_cmd to stop at these tokens since
|
||||
// '<' is in its stop set.
|
||||
let preprocessed: Vec<String> = contents
|
||||
.split('\n')
|
||||
.map(replace_italic_with_angles)
|
||||
.collect();
|
||||
let classified: Vec<GroffLine> = preprocessed
|
||||
.iter()
|
||||
.map(|line| crate::parsers::manpage::groff::classify_line(line))
|
||||
.collect();
|
||||
let mut i = 0;
|
||||
while i < classified.len() {
|
||||
if let GroffLine::Macro { name, args } = &classified[i]
|
||||
&& name == "SH"
|
||||
&& args.trim().eq_ignore_ascii_case("SYNOPSIS")
|
||||
{
|
||||
i += 1;
|
||||
while i < classified.len() {
|
||||
match &classified[i] {
|
||||
GroffLine::Macro { name, .. } if name == "SH" => return None,
|
||||
GroffLine::Text(text) => {
|
||||
let trimmed = text.trim();
|
||||
return if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
extract_cmd(trimmed)
|
||||
};
|
||||
}
|
||||
GroffLine::Macro { name, args }
|
||||
if matches!(name.as_str(), "B" | "BI" | "BR") =>
|
||||
{
|
||||
let text = strip_groff_escapes(&strip_inline_macro_args(args));
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return extract_cmd(trimmed);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
return None;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// replace \fI...\f[RP] sequences with <...> so italic params are seen as
|
||||
/// non-word tokens by extract_cmd.
|
||||
///
|
||||
/// exception: some manpages put the command name itself in italics (e.g.
|
||||
/// git-am.1's synopsis reads `\fIgit am\fR ...`). when the first italic
|
||||
/// block on the line appears at the very start (preceded only by
|
||||
/// whitespace) and its content looks like a command word, we strip the
|
||||
/// font markers but leave the content bare so extract_cmd treats it as
|
||||
/// the command name rather than a placeholder.
|
||||
fn replace_italic_with_angles(line: &str) -> String {
|
||||
let bytes = line.as_bytes();
|
||||
let len = bytes.len();
|
||||
let mut out = String::with_capacity(len);
|
||||
let mut i = 0;
|
||||
let mut command_consumed = false;
|
||||
while i < len {
|
||||
// byte-compare to avoid panicking on non-ASCII char boundaries
|
||||
if i + 3 <= len && &bytes[i..i + 3] == b"\\fI" {
|
||||
// find closing \fR or \fP — scan to next '\\'
|
||||
let inner_start = i + 3;
|
||||
let mut j = inner_start;
|
||||
while j < len && bytes[j] != b'\\' {
|
||||
j += 1;
|
||||
}
|
||||
if j + 3 <= len
|
||||
&& bytes[j] == b'\\'
|
||||
&& bytes[j + 1] == b'f'
|
||||
&& (bytes[j + 2] == b'R' || bytes[j + 2] == b'P')
|
||||
{
|
||||
let inner = &line[inner_start..j];
|
||||
let at_line_start = !command_consumed && line[..i].chars().all(char::is_whitespace);
|
||||
if at_line_start && italic_looks_like_command(inner) {
|
||||
out.push_str(inner);
|
||||
command_consumed = true;
|
||||
} else {
|
||||
out.push('<');
|
||||
out.push_str(inner);
|
||||
out.push('>');
|
||||
}
|
||||
i = j + 3;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let c = line[i..].chars().next().unwrap();
|
||||
out.push(c);
|
||||
i += c.len_utf8();
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// is the italic content something that looks like a command name (rather
|
||||
/// than a placeholder)? lowercase letters, digits, hyphens, underscores,
|
||||
/// dots, and spaces only, after groff escapes (like `\-`) are resolved.
|
||||
fn italic_looks_like_command(inner: &str) -> bool {
|
||||
let stripped = strip_groff_escapes(inner);
|
||||
let trimmed = stripped.trim();
|
||||
!trimmed.is_empty()
|
||||
&& trimmed.chars().all(|c| {
|
||||
c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '-' | '_' | '.' | ' ')
|
||||
})
|
||||
}
|
||||
|
||||
/// extract the command name from a synopsis line by taking leading word tokens.
|
||||
fn extract_cmd(line: &str) -> Option<String> {
|
||||
let words: Vec<&str> = line.split(' ').filter(|w| !w.is_empty()).collect();
|
||||
let is_cmd_char = |c: char| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.');
|
||||
let mut taken: Vec<&str> = Vec::new();
|
||||
for word in words {
|
||||
let first = word.chars().next().unwrap();
|
||||
if matches!(first, '[' | '-' | '<' | '(' | '{') {
|
||||
break;
|
||||
}
|
||||
if word.chars().all(is_cmd_char) {
|
||||
taken.push(word);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if taken.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(taken.join(" "))
|
||||
}
|
||||
}
|
||||
|
||||
/// extract the lines that form the SYNOPSIS section.
|
||||
fn extract_synopsis_section(lines: &[GroffLine]) -> Vec<GroffLine> {
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, args } = &lines[i]
|
||||
&& name == "SH"
|
||||
&& args.trim().eq_ignore_ascii_case("SYNOPSIS")
|
||||
{
|
||||
i += 1;
|
||||
let mut acc = Vec::new();
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& name == "SH"
|
||||
{
|
||||
break;
|
||||
}
|
||||
acc.push(lines[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
/// extract positional arguments from the SYNOPSIS section.
|
||||
/// joins all text/formatting macro lines via `join_synopsis_text`, then
|
||||
/// skips the command name prefix and runs `parse_usage_args` on the rest.
|
||||
pub fn extract_synopsis_positionals(lines: &[GroffLine]) -> Vec<(String, Positional)> {
|
||||
let full = join_synopsis_text(lines);
|
||||
if full.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let result: nom::IResult<&str, Vec<(&str, Positional)>> =
|
||||
preceded(skip_command_name, parse_usage_args).parse(&full);
|
||||
match result {
|
||||
Ok((_, map)) => map
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_ascii_lowercase(), v))
|
||||
.collect(),
|
||||
Err(_) => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// join the SYNOPSIS section into a single line of plain text, stripping
|
||||
/// groff escapes and inline font macros. shared by both the positional
|
||||
/// and flag extractors so they see identical input.
|
||||
fn join_synopsis_text(lines: &[GroffLine]) -> String {
|
||||
let section = extract_synopsis_section(lines);
|
||||
let mut acc: Vec<String> = Vec::new();
|
||||
for line in section {
|
||||
match line {
|
||||
GroffLine::Macro { name, .. } if name == "SS" || name == "br" => break,
|
||||
GroffLine::Text(t) => {
|
||||
let text = strip_groff_escapes(&t).trim().to_string();
|
||||
if !text.is_empty() {
|
||||
acc.push(text);
|
||||
}
|
||||
}
|
||||
GroffLine::Macro { name, args }
|
||||
if matches!(
|
||||
name.as_str(),
|
||||
"B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
|
||||
) =>
|
||||
{
|
||||
let text = strip_groff_escapes(&strip_inline_macro_args(&args));
|
||||
let text = text.trim();
|
||||
if !text.is_empty() {
|
||||
acc.push(text.to_string());
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
acc.join(" ").trim().to_string()
|
||||
}
|
||||
|
||||
fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
|
||||
match s {
|
||||
Switch::Short(c) => OwnedSwitch::Short(c),
|
||||
Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
|
||||
Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_owned_param(p: Param<'_>) -> OwnedParam {
|
||||
match p {
|
||||
Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
|
||||
Param::Optional(s) => OwnedParam::Optional(s.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
/// extract flag-tagged entries from the SYNOPSIS line. some manpages
|
||||
/// (notably nix-env, sed) declare flags only in the synopsis and never
|
||||
/// repeat them as entries in the OPTIONS body, so the body-only pass
|
||||
/// misses them. we join the synopsis text the same way the positional
|
||||
/// extractor does, then run `parse_usage_flags` over every bracketed
|
||||
/// switch+param. callers merge with body entries; body wins on duplicate
|
||||
/// flag names since body descriptions are richer.
|
||||
pub fn extract_synopsis_flags(lines: &[GroffLine]) -> Vec<ManpageEntry> {
|
||||
let full = join_synopsis_text(lines);
|
||||
if full.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let result: nom::IResult<&str, Vec<(Switch<'_>, Option<Param<'_>>)>> =
|
||||
preceded(skip_command_name, parse_usage_flags).parse(&full);
|
||||
match result {
|
||||
Ok((_, pairs)) => pairs
|
||||
.into_iter()
|
||||
.map(|(switch, param)| ManpageEntry {
|
||||
switch: to_owned_switch(switch),
|
||||
param: param.map(to_owned_param),
|
||||
desc: String::new(),
|
||||
})
|
||||
.collect(),
|
||||
Err(_) => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_commands_section(name: &str) -> bool {
|
||||
let trimmed = name.trim();
|
||||
// strip a trailing parenthetical group so "HIGH-LEVEL COMMANDS (PORCELAIN)"
|
||||
// (which is git.1's pattern) is treated as "HIGH-LEVEL COMMANDS".
|
||||
let core = match (trimmed.rfind('('), trimmed.ends_with(')')) {
|
||||
(Some(open), true) => trimmed[..open].trim(),
|
||||
_ => trimmed,
|
||||
};
|
||||
let upper = core.to_ascii_uppercase();
|
||||
if upper == "COMMAND" || upper == "COMMANDS" {
|
||||
return true;
|
||||
}
|
||||
// accept headings ending in " COMMANDS" — catches "GIT COMMANDS",
|
||||
// "MAIN COMMANDS", "HIGH-LEVEL COMMANDS", "LOW-LEVEL COMMANDS". the
|
||||
// leading space prevents matches against "COMMAND LINE OPTIONS" etc.
|
||||
upper.ends_with(" COMMANDS")
|
||||
}
|
||||
|
||||
/// find all COMMANDS/.COMMAND sections and collect their lines.
|
||||
pub fn extract_commands_section(lines: &[GroffLine]) -> Vec<GroffLine> {
|
||||
let mut acc: Vec<GroffLine> = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, args } = &lines[i]
|
||||
&& name == "SH"
|
||||
&& is_commands_section(args)
|
||||
{
|
||||
i += 1;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i]
|
||||
&& name == "SH"
|
||||
{
|
||||
break;
|
||||
}
|
||||
acc.push(lines[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
/// extract SUBCOMMAND-style sections (clap-generated manpages put each
|
||||
/// subcommand under its own .SH SUBCOMMAND header with a Usage: line).
|
||||
/// returns triples of (name, description, lines) so the caller can re-parse
|
||||
/// each section as its own help_result.
|
||||
pub fn extract_subcommand_sections(lines: &[GroffLine]) -> Vec<(String, String, Vec<GroffLine>)> {
|
||||
// split into sections at .SH boundaries, keeping only SUBCOMMAND(S) ones
|
||||
let mut sections: Vec<Vec<GroffLine>> = Vec::new();
|
||||
let mut current_name: Option<String> = None;
|
||||
let mut current: Vec<GroffLine> = Vec::new();
|
||||
for line in lines {
|
||||
if let GroffLine::Macro { name, args } = line
|
||||
&& name == "SH"
|
||||
{
|
||||
if current_name.is_some() {
|
||||
sections.push(std::mem::take(&mut current));
|
||||
}
|
||||
let n = args.trim().to_ascii_uppercase();
|
||||
if n == "SUBCOMMAND" || n == "SUBCOMMANDS" {
|
||||
current_name = Some(n);
|
||||
} else {
|
||||
current_name = None;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if current_name.is_some() {
|
||||
current.push(line.clone());
|
||||
}
|
||||
}
|
||||
if current_name.is_some() {
|
||||
sections.push(current);
|
||||
}
|
||||
|
||||
let mut out = Vec::new();
|
||||
for section in sections {
|
||||
// scan section lines for the Usage: line to get the subcommand name
|
||||
let mut subcmd_name: Option<String> = None;
|
||||
let mut desc_lines: Vec<String> = Vec::new();
|
||||
for line in §ion {
|
||||
if subcmd_name.is_some() {
|
||||
break;
|
||||
}
|
||||
match line {
|
||||
GroffLine::Text(t) => match find_usage_name(t) {
|
||||
Some(name) => subcmd_name = Some(name),
|
||||
None => desc_lines.push(t.clone()),
|
||||
},
|
||||
GroffLine::Macro { name, args }
|
||||
if matches!(name.as_str(), "TP" | "B" | "BI" | "BR") =>
|
||||
{
|
||||
let text = strip_groff_escapes(&strip_inline_macro_args(args));
|
||||
let text = text.trim();
|
||||
subcmd_name = find_usage_name(text);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
if let Some(name) = subcmd_name {
|
||||
let desc_raw = desc_lines.join(" ");
|
||||
let desc = strip_groff_escapes(&desc_raw).trim().to_string();
|
||||
let desc = strip_backtick_words(&desc);
|
||||
out.push((name, desc, section));
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// look for "Usage: NAME" and return NAME if found.
|
||||
/// NAME contains alphanumeric, underscore, or dash.
|
||||
fn find_usage_name(text: &str) -> Option<String> {
|
||||
const MARKER: &str = "Usage: ";
|
||||
let idx = text.find(MARKER)?;
|
||||
let after = &text[idx + MARKER.len()..];
|
||||
let end = after
|
||||
.find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
|
||||
.unwrap_or(after.len());
|
||||
if end == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(after[..end].to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// strip backtick-quoted words: `word` -> word.
|
||||
fn strip_backtick_words(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let mut i = 0;
|
||||
let bytes = s.as_bytes();
|
||||
while i < bytes.len() {
|
||||
if bytes[i] == b'`'
|
||||
&& let Some(end) = s[i + 1..].find('`')
|
||||
{
|
||||
out.push_str(&s[i + 1..i + 1 + end]);
|
||||
i += end + 2;
|
||||
continue;
|
||||
}
|
||||
let c = s[i..].chars().next().unwrap();
|
||||
out.push(c);
|
||||
i += c.len_utf8();
|
||||
}
|
||||
out
|
||||
}
|
||||
344
src/parsers/manpage/strategies.rs
Normal file
344
src/parsers/manpage/strategies.rs
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
//! strategy-based entry extraction.
|
||||
//!
|
||||
//! rather than a single monolithic parser, we use multiple "strategies" that
|
||||
//! each target a specific groff formatting pattern. this is necessary because
|
||||
//! manpage authors use very different macro combinations for the same purpose.
|
||||
|
||||
use nom::{Parser, combinator::opt};
|
||||
|
||||
use crate::make_macro_walker;
|
||||
use crate::parsers::help::{help_parser, param_parser, switch_parser};
|
||||
use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes, strip_inline_macro_args};
|
||||
use crate::parsers::manpage::{ManpageEntry, OwnedParam, OwnedSwitch};
|
||||
use crate::types::{Param, Switch};
|
||||
|
||||
/// collect consecutive text lines, joining them with spaces.
|
||||
/// returns (collected, remaining).
|
||||
fn collect_text_lines(lines: &[GroffLine]) -> (String, &[GroffLine]) {
|
||||
let mut acc: Vec<&str> = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Text(t) => acc.push(t),
|
||||
_ => break,
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
(acc.join(" "), &lines[i..])
|
||||
}
|
||||
|
||||
fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
|
||||
match s {
|
||||
Switch::Short(c) => OwnedSwitch::Short(c),
|
||||
Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
|
||||
Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_owned_param(p: Param<'_>) -> OwnedParam {
|
||||
match p {
|
||||
Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
|
||||
Param::Optional(s) => OwnedParam::Optional(s.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
/// attempt to parse a tag string (e.g. "-v, --verbose FILE") into an entry.
|
||||
/// uses the nom switch_parser + param_parser from the help module.
|
||||
/// returns None if the tag doesn't look like a flag definition.
|
||||
pub fn parse_tag_to_entry(tag: &str, desc: String) -> Option<ManpageEntry> {
|
||||
let tag = strip_groff_escapes(tag);
|
||||
let tag = tag.trim();
|
||||
let result: nom::IResult<&str, (Switch<'_>, Option<Param<'_>>)> =
|
||||
(switch_parser, opt(param_parser)).parse(tag);
|
||||
match result {
|
||||
Ok((_, (switch, param))) => Some(ManpageEntry {
|
||||
switch: to_owned_switch(switch),
|
||||
param: param.map(to_owned_param),
|
||||
desc,
|
||||
}),
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// extract tag text from a macro line.
|
||||
/// .B and .I preserve spaces (single argument); .BI, .BR, .IR alternate
|
||||
/// fonts and concatenate arguments.
|
||||
pub fn tag_of_macro(name: &str, args: &str) -> String {
|
||||
match name {
|
||||
"B" | "I" => strip_groff_escapes(args).trim().to_string(),
|
||||
_ => strip_groff_escapes(&strip_inline_macro_args(args))
|
||||
.trim()
|
||||
.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
// strategy a: .TP style (most common — gnu coreutils, help2man).
|
||||
// .TP introduces a tagged paragraph: the next line is the "tag" (flag name)
|
||||
// and subsequent text lines are the description. the tag can be plain text
|
||||
// or wrapped in a formatting macro (.B, .BI, etc.).
|
||||
make_macro_walker!(pub strategy_tp -> Vec<ManpageEntry>, on macro "TP" =>
|
||||
|lines, i, _args| {
|
||||
if i + 1 >= lines.len() { None }
|
||||
else {
|
||||
let (tag, body_start) = match &lines[i + 1] {
|
||||
GroffLine::Text(tag) => (tag.clone(), i + 2),
|
||||
GroffLine::Macro { name, args }
|
||||
if matches!(name.as_str(), "B" | "I" | "BI" | "BR" | "IR") =>
|
||||
{
|
||||
(tag_of_macro(name, args), i + 2)
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
let (desc, rest) = collect_text_lines(&lines[body_start..]);
|
||||
let new_i = lines.len() - rest.len();
|
||||
parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// strategy b: .IP style (curl, hand-written manpages).
|
||||
// .IP takes an inline tag argument: .IP "-v, --verbose"
|
||||
// the description follows as text lines.
|
||||
make_macro_walker!(pub strategy_ip -> Vec<ManpageEntry>, on macro "IP" =>
|
||||
|lines, i, args| {
|
||||
let tag = strip_groff_escapes(args);
|
||||
let (desc, rest) = collect_text_lines(&lines[i + 1..]);
|
||||
let new_i = lines.len() - rest.len();
|
||||
parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
|
||||
}
|
||||
);
|
||||
|
||||
// strategy c: .PP + .RS/.RE style (git, docbook-generated manpages).
|
||||
// flag entries are introduced by .PP (paragraph), with the flag name as
|
||||
// plain text, followed by a .RS (indent) block containing the description,
|
||||
// closed by .RE (de-indent).
|
||||
make_macro_walker!(pub strategy_pp_rs -> Vec<ManpageEntry>, on macro "PP" =>
|
||||
|lines, i, _args| {
|
||||
if i + 1 >= lines.len() { return None; }
|
||||
if let GroffLine::Text(tag) = &lines[i + 1] {
|
||||
let (desc, new_i) = collect_pp_rs_desc(lines, i + 2);
|
||||
parse_tag_to_entry(tag, desc).map(|e| (e, new_i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
fn collect_pp_rs_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
|
||||
let mut acc: Vec<String> = Vec::new();
|
||||
let mut i = start;
|
||||
// outer: look for .RS marker or text
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Macro { name, .. } if name == "RS" => {
|
||||
i += 1;
|
||||
// inside .RS — collect until .RE or boundary macro
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Macro { name, .. } if name == "RE" => {
|
||||
return (acc.join(" "), i + 1);
|
||||
}
|
||||
GroffLine::Text(t) => {
|
||||
acc.push(t.clone());
|
||||
i += 1;
|
||||
}
|
||||
GroffLine::Macro { name, .. } if name == "PP" || name == "SH" => {
|
||||
return (acc.join(" "), i);
|
||||
}
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
return (acc.join(" "), i);
|
||||
}
|
||||
GroffLine::Text(t) => {
|
||||
acc.push(t.clone());
|
||||
i += 1;
|
||||
}
|
||||
_ => return (acc.join(" "), i),
|
||||
}
|
||||
}
|
||||
(acc.join(" "), i)
|
||||
}
|
||||
|
||||
/// strategy d: deroff fallback — strip all groff markup, then feed the
|
||||
/// resulting plain text through the help parser.
|
||||
pub fn strategy_deroff(lines: &[GroffLine]) -> Vec<ManpageEntry> {
|
||||
let mut buffer = String::with_capacity(256);
|
||||
for line in lines {
|
||||
match line {
|
||||
GroffLine::Text(text) => {
|
||||
buffer.push_str(text);
|
||||
buffer.push('\n');
|
||||
}
|
||||
GroffLine::Macro { name, args }
|
||||
if matches!(name.as_str(), "BI" | "BR" | "IR" | "B" | "I") =>
|
||||
{
|
||||
let text = strip_groff_escapes(&strip_inline_macro_args(args));
|
||||
buffer.push_str(&text);
|
||||
buffer.push('\n');
|
||||
}
|
||||
GroffLine::Blank => buffer.push('\n'),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
match help_parser(&buffer) {
|
||||
Ok((_, result)) => result
|
||||
.entries
|
||||
.into_iter()
|
||||
.map(|e| ManpageEntry {
|
||||
switch: to_owned_switch(e.switch),
|
||||
param: e.param.map(to_owned_param),
|
||||
desc: e.desc.join(" "),
|
||||
})
|
||||
.collect(),
|
||||
Err(_) => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_bullet_ip(args: &str) -> bool {
|
||||
!args.trim().is_empty()
|
||||
}
|
||||
|
||||
// strategy e: nix3-style bullet .IP with .UR/.UE hyperlinks.
|
||||
// nix's manpages use .IP with bullet markers for flag entries, interleaved
|
||||
// with .UR/.UE hyperlink macros. the flag tag is in text lines after the
|
||||
// bullet .IP, and the description follows a non-bullet .IP marker.
|
||||
make_macro_walker!(pub strategy_nix -> Vec<ManpageEntry>, on macro "IP" =>
|
||||
|lines, i, args| {
|
||||
if !is_bullet_ip(args) { return None; }
|
||||
// collect tag: skip .UR/.UE macros, gather Text lines
|
||||
let mut tag_idx = i + 1;
|
||||
let mut tag_parts: Vec<String> = Vec::new();
|
||||
while tag_idx < lines.len() {
|
||||
match &lines[tag_idx] {
|
||||
GroffLine::Macro { name, .. } if name == "UR" || name == "UE" => {
|
||||
tag_idx += 1;
|
||||
}
|
||||
GroffLine::Text(t) => {
|
||||
tag_parts.push(t.clone());
|
||||
tag_idx += 1;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
let tag = tag_parts.join(" ");
|
||||
let (desc, new_i) = collect_nix_desc(lines, tag_idx);
|
||||
parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
|
||||
}
|
||||
);
|
||||
|
||||
fn collect_nix_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
|
||||
if start >= lines.len() {
|
||||
return (String::new(), start);
|
||||
}
|
||||
let mut i = start;
|
||||
// require non-bullet .IP marker for description
|
||||
if let GroffLine::Macro { name, args } = &lines[i]
|
||||
&& name == "IP"
|
||||
&& args.trim().is_empty()
|
||||
{
|
||||
i += 1;
|
||||
} else {
|
||||
return (String::new(), start);
|
||||
}
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
while i < lines.len() {
|
||||
match &lines[i] {
|
||||
GroffLine::Text(t) => {
|
||||
parts.push(t.clone());
|
||||
i += 1;
|
||||
}
|
||||
GroffLine::Macro { name, args } if name == "IP" => {
|
||||
if !args.trim().is_empty() {
|
||||
// next bullet entry — stop
|
||||
return (parts.join(" "), i);
|
||||
}
|
||||
// non-bullet .IP = continuation paragraph
|
||||
i += 1;
|
||||
}
|
||||
GroffLine::Macro { name, .. } if name == "SS" || name == "SH" => {
|
||||
return (parts.join(" "), i);
|
||||
}
|
||||
GroffLine::Macro { name, .. } if name == "RS" => {
|
||||
i = skip_rs(lines, i + 1, 1);
|
||||
}
|
||||
GroffLine::Macro { .. } => {
|
||||
i += 1;
|
||||
}
|
||||
GroffLine::Blank | GroffLine::Comment => {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
(parts.join(" "), i)
|
||||
}
|
||||
|
||||
fn skip_rs(lines: &[GroffLine], start: usize, mut depth: usize) -> usize {
|
||||
let mut i = start;
|
||||
while i < lines.len() {
|
||||
if let GroffLine::Macro { name, .. } = &lines[i] {
|
||||
if name == "RE" {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
return i + 1;
|
||||
}
|
||||
} else if name == "RS" {
|
||||
depth += 1;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
/// count occurrences of a specific macro in the section.
|
||||
fn count_macro(name: &str, lines: &[GroffLine]) -> usize {
|
||||
lines
|
||||
.iter()
|
||||
.filter(|line| matches!(line, GroffLine::Macro { name: n, .. } if n == name))
|
||||
.count()
|
||||
}
|
||||
|
||||
/// auto-detect and try strategies, return the one with most entries.
|
||||
/// first counts macros to determine which strategies are applicable,
|
||||
/// then runs all applicable ones and picks the winner by entry count.
|
||||
/// if no specialized strategy produces results, falls back to deroff.
|
||||
pub fn extract_entries(lines: &[GroffLine]) -> Vec<ManpageEntry> {
|
||||
let tp = count_macro("TP", lines);
|
||||
let ip = count_macro("IP", lines);
|
||||
let pp = count_macro("PP", lines);
|
||||
let rs = count_macro("RS", lines);
|
||||
let ur = count_macro("UR", lines);
|
||||
|
||||
let mut specialized: Vec<(&str, Vec<ManpageEntry>)> = Vec::new();
|
||||
if tp > 0 {
|
||||
specialized.push(("TP", strategy_tp(lines)));
|
||||
}
|
||||
if ip > 0 {
|
||||
specialized.push(("IP", strategy_ip(lines)));
|
||||
}
|
||||
if pp > 0 && rs > 0 {
|
||||
specialized.push(("PP+RS", strategy_pp_rs(lines)));
|
||||
}
|
||||
if ur > 0 && ip > 0 {
|
||||
specialized.push(("nix", strategy_nix(lines)));
|
||||
}
|
||||
let candidates: Vec<(&str, Vec<ManpageEntry>)> = {
|
||||
let filtered: Vec<_> = specialized
|
||||
.into_iter()
|
||||
.filter(|(_, e)| !e.is_empty())
|
||||
.collect();
|
||||
if filtered.is_empty() {
|
||||
vec![("deroff", strategy_deroff(lines))]
|
||||
} else {
|
||||
filtered
|
||||
}
|
||||
};
|
||||
let mut best: Vec<ManpageEntry> = Vec::new();
|
||||
for (_, entries) in candidates {
|
||||
if entries.len() >= best.len() {
|
||||
best = entries;
|
||||
}
|
||||
}
|
||||
best
|
||||
}
|
||||
3
src/parsers/mod.rs
Normal file
3
src/parsers/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
pub mod help;
|
||||
pub mod manpage;
|
||||
pub mod nushell;
|
||||
475
src/parsers/nushell.rs
Normal file
475
src/parsers/nushell.rs
Normal file
|
|
@ -0,0 +1,475 @@
|
|||
//! generate nushell `extern` definitions from parsed help data.
|
||||
//!
|
||||
//! this module is the code generation backend. it takes a [`ManpageResult`]
|
||||
//! (from the help or manpage parsers) and produces nushell source that defines
|
||||
//! `extern` declarations — nushell's mechanism for teaching the shell about
|
||||
//! external commands' flags and subcommands so it can offer completions.
|
||||
//!
|
||||
//! key responsibilities:
|
||||
//! - deduplicating flag entries (same flag from multiple help sources)
|
||||
//! - mapping parameter names to nushell types (path, int, string)
|
||||
//! - formatting flags in nushell syntax: --flag(-f): type # description
|
||||
//! - handling positional arguments with nushell's ordering constraints
|
||||
//! - escaping special characters for nushell string literals
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use crate::parsers::manpage::{
|
||||
ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
|
||||
};
|
||||
use crate::types::Positional;
|
||||
|
||||
/// nushell built-in commands and keywords — we must never generate `extern`
|
||||
/// definitions for these because it would shadow nushell's own implementations.
|
||||
/// maintained manually and should be updated with new nushell releases.
|
||||
pub const NUSHELL_BUILTINS: &[&str] = &[
|
||||
"alias",
|
||||
"all",
|
||||
"ansi",
|
||||
"any",
|
||||
"append",
|
||||
"ast",
|
||||
"attr",
|
||||
"bits",
|
||||
"break",
|
||||
"bytes",
|
||||
"cal",
|
||||
"cd",
|
||||
"char",
|
||||
"chunk-by",
|
||||
"chunks",
|
||||
"clear",
|
||||
"collect",
|
||||
"columns",
|
||||
"commandline",
|
||||
"compact",
|
||||
"complete",
|
||||
"config",
|
||||
"const",
|
||||
"continue",
|
||||
"cp",
|
||||
"date",
|
||||
"debug",
|
||||
"decode",
|
||||
"def",
|
||||
"default",
|
||||
"describe",
|
||||
"detect",
|
||||
"do",
|
||||
"drop",
|
||||
"du",
|
||||
"each",
|
||||
"echo",
|
||||
"encode",
|
||||
"enumerate",
|
||||
"error",
|
||||
"every",
|
||||
"exec",
|
||||
"exit",
|
||||
"explain",
|
||||
"explore",
|
||||
"export",
|
||||
"export-env",
|
||||
"extern",
|
||||
"fill",
|
||||
"filter",
|
||||
"find",
|
||||
"first",
|
||||
"flatten",
|
||||
"for",
|
||||
"format",
|
||||
"from",
|
||||
"generate",
|
||||
"get",
|
||||
"glob",
|
||||
"grid",
|
||||
"group-by",
|
||||
"hash",
|
||||
"headers",
|
||||
"help",
|
||||
"hide",
|
||||
"hide-env",
|
||||
"histogram",
|
||||
"history",
|
||||
"http",
|
||||
"if",
|
||||
"ignore",
|
||||
"input",
|
||||
"insert",
|
||||
"inspect",
|
||||
"interleave",
|
||||
"into",
|
||||
"is-admin",
|
||||
"is-empty",
|
||||
"is-not-empty",
|
||||
"is-terminal",
|
||||
"items",
|
||||
"job",
|
||||
"join",
|
||||
"keybindings",
|
||||
"kill",
|
||||
"last",
|
||||
"length",
|
||||
"let",
|
||||
"let-env",
|
||||
"lines",
|
||||
"load-env",
|
||||
"loop",
|
||||
"ls",
|
||||
"match",
|
||||
"math",
|
||||
"merge",
|
||||
"metadata",
|
||||
"mkdir",
|
||||
"mktemp",
|
||||
"module",
|
||||
"move",
|
||||
"mut",
|
||||
"mv",
|
||||
"nu-check",
|
||||
"nu-highlight",
|
||||
"open",
|
||||
"overlay",
|
||||
"panic",
|
||||
"par-each",
|
||||
"parse",
|
||||
"path",
|
||||
"plugin",
|
||||
"port",
|
||||
"prepend",
|
||||
"print",
|
||||
"ps",
|
||||
"query",
|
||||
"random",
|
||||
"reduce",
|
||||
"reject",
|
||||
"rename",
|
||||
"return",
|
||||
"reverse",
|
||||
"rm",
|
||||
"roll",
|
||||
"rotate",
|
||||
"run-external",
|
||||
"save",
|
||||
"schema",
|
||||
"scope",
|
||||
"select",
|
||||
"seq",
|
||||
"shuffle",
|
||||
"skip",
|
||||
"sleep",
|
||||
"slice",
|
||||
"sort",
|
||||
"sort-by",
|
||||
"source",
|
||||
"source-env",
|
||||
"split",
|
||||
"start",
|
||||
"stor",
|
||||
"str",
|
||||
"sys",
|
||||
"table",
|
||||
"take",
|
||||
"tee",
|
||||
"term",
|
||||
"timeit",
|
||||
"to",
|
||||
"touch",
|
||||
"transpose",
|
||||
"try",
|
||||
"tutor",
|
||||
"ulimit",
|
||||
"umask",
|
||||
"uname",
|
||||
"uniq",
|
||||
"uniq-by",
|
||||
"unlet",
|
||||
"update",
|
||||
"upsert",
|
||||
"url",
|
||||
"use",
|
||||
"values",
|
||||
"version",
|
||||
"view",
|
||||
"watch",
|
||||
"where",
|
||||
"which",
|
||||
"while",
|
||||
"whoami",
|
||||
"window",
|
||||
"with-env",
|
||||
"wrap",
|
||||
"zip",
|
||||
];
|
||||
|
||||
fn builtin_set() -> &'static HashSet<&'static str> {
|
||||
static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
|
||||
SET.get_or_init(|| NUSHELL_BUILTINS.iter().copied().collect())
|
||||
}
|
||||
|
||||
/// returns true if the given command name collides with a nushell built-in.
|
||||
pub fn is_nushell_builtin(cmd: &str) -> bool {
|
||||
builtin_set().contains(cmd)
|
||||
}
|
||||
|
||||
/// map parameter names to nushell types.
|
||||
/// nushell's `extern` declarations use typed parameters, so we infer the type
|
||||
/// from the parameter name. file/path-related names become "path" (enables
|
||||
/// path completion), numeric names become "int", everything else is "string".
|
||||
pub fn nushell_type_of_param(name: &str) -> &'static str {
|
||||
match name {
|
||||
"FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY" | "FILENAME"
|
||||
| "PATTERNFILE" => "path",
|
||||
"NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH" | "LINES" | "DEPTH"
|
||||
| "depth" => "int",
|
||||
_ => "string",
|
||||
}
|
||||
}
|
||||
|
||||
/// escape a string for use inside nushell double-quoted string literals.
|
||||
/// only double quotes and backslashes need escaping in nushell's syntax.
|
||||
pub fn escape_nu(s: &str) -> Cow<'_, str> {
|
||||
if !s.contains('"') && !s.contains('\\') {
|
||||
Cow::Borrowed(s)
|
||||
} else {
|
||||
let mut buf = String::with_capacity(s.len() + 4);
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'"' => buf.push_str("\\\""),
|
||||
'\\' => buf.push_str("\\\\"),
|
||||
c => buf.push(c),
|
||||
}
|
||||
}
|
||||
Cow::Owned(buf)
|
||||
}
|
||||
}
|
||||
|
||||
fn entry_key(e: &ManpageEntry) -> String {
|
||||
match &e.switch {
|
||||
OwnedSwitch::Short(c) => format!("-{c}"),
|
||||
OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn entry_score(e: &ManpageEntry) -> i32 {
|
||||
let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) {
|
||||
10
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let param_bonus = if e.param.is_some() { 5 } else { 0 };
|
||||
let desc_bonus = (e.desc.len() / 10).min(5) as i32;
|
||||
switch_bonus + param_bonus + desc_bonus
|
||||
}
|
||||
|
||||
/// deduplicate flag entries that refer to the same flag.
|
||||
///
|
||||
/// when the same flag appears multiple times (e.g. from overlapping manpage
|
||||
/// sections or repeated help text), we keep the "best" version using a score:
|
||||
/// - both short+long form present: +10 (most informative)
|
||||
/// - has a parameter: +5
|
||||
/// - description length bonus: up to +5
|
||||
///
|
||||
/// after deduplication by long name, we also remove standalone short flags
|
||||
/// whose letter is already covered by a Both(short, long) entry. this prevents
|
||||
/// emitting both "-v" and "--verbose(-v)" which nushell would reject as a
|
||||
/// duplicate. the filtering preserves original ordering from the help text.
|
||||
pub fn dedup_entries(entries: &[ManpageEntry]) -> Vec<ManpageEntry> {
|
||||
let mut best: HashMap<String, &ManpageEntry> = HashMap::new();
|
||||
for e in entries {
|
||||
let key = entry_key(e);
|
||||
match best.get(&key) {
|
||||
Some(prev) if entry_score(prev) >= entry_score(e) => {}
|
||||
_ => {
|
||||
best.insert(key, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut covered: HashSet<char> = HashSet::new();
|
||||
for e in best.values() {
|
||||
if let OwnedSwitch::Both(c, _) = &e.switch {
|
||||
covered.insert(*c);
|
||||
}
|
||||
}
|
||||
let mut seen: HashSet<String> = HashSet::new();
|
||||
let mut out: Vec<ManpageEntry> = Vec::new();
|
||||
for e in entries {
|
||||
let key = entry_key(e);
|
||||
if seen.contains(&key) {
|
||||
continue;
|
||||
}
|
||||
if let OwnedSwitch::Short(c) = &e.switch
|
||||
&& covered.contains(c)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
seen.insert(key.clone());
|
||||
out.push((*best.get(&key).unwrap()).clone());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// format a single flag entry as a nushell `extern` parameter line.
|
||||
/// output examples:
|
||||
/// " --verbose(-v) # increase verbosity"
|
||||
/// " --output(-o): path # write output to file"
|
||||
/// " -n: int # number of results"
|
||||
///
|
||||
/// the description is right-padded to column 40 with a "# " comment prefix.
|
||||
pub fn format_flag(entry: &ManpageEntry) -> String {
|
||||
let name = match &entry.switch {
|
||||
OwnedSwitch::Both(c, l) => format!("--{l}(-{c})"),
|
||||
OwnedSwitch::Long(l) => format!("--{l}"),
|
||||
OwnedSwitch::Short(c) => format!("-{c}"),
|
||||
};
|
||||
let typed = match &entry.param {
|
||||
Some(OwnedParam::Mandatory(p)) | Some(OwnedParam::Optional(p)) => {
|
||||
format!(": {}", nushell_type_of_param(p))
|
||||
}
|
||||
None => String::new(),
|
||||
};
|
||||
let flag = format!(" {name}{typed}");
|
||||
if entry.desc.is_empty() {
|
||||
flag
|
||||
} else {
|
||||
let pad_len = 40usize.saturating_sub(flag.len()).max(1);
|
||||
format!("{flag}{}# {}", " ".repeat(pad_len), entry.desc)
|
||||
}
|
||||
}
|
||||
|
||||
/// format a positional argument as a nushell `extern` parameter line.
|
||||
/// nushell syntax: "...name: type" for variadic, "name?: type" for optional.
|
||||
/// hyphens in names are converted to underscores since nushell identifiers
|
||||
/// cannot contain hyphens.
|
||||
pub fn format_positional(name: &str, p: &Positional) -> String {
|
||||
let name_underscored: String = name
|
||||
.chars()
|
||||
.map(|c| if c == '-' { '_' } else { c })
|
||||
.collect();
|
||||
let prefix = if p.variadic { "..." } else { "" };
|
||||
let suffix = if p.optional && !p.variadic { "?" } else { "" };
|
||||
let typ = nushell_type_of_param(&name.to_ascii_uppercase());
|
||||
format!(" {prefix}{name_underscored}{suffix}: {typ}")
|
||||
}
|
||||
|
||||
/// enforce nushell's positional argument ordering rules:
|
||||
/// 1. no required positional may follow an optional one
|
||||
/// 2. at most one variadic ("rest") parameter is allowed
|
||||
///
|
||||
/// if a required positional appears after an optional one, it is silently
|
||||
/// promoted to optional. duplicate variadic params are dropped.
|
||||
pub fn fixup_positionals(positionals: Vec<(String, Positional)>) -> Vec<(String, Positional)> {
|
||||
let mut seen_optional = false;
|
||||
let mut seen_variadic = false;
|
||||
let mut out = Vec::with_capacity(positionals.len());
|
||||
for (name, mut p) in positionals {
|
||||
if p.variadic {
|
||||
if seen_variadic {
|
||||
continue;
|
||||
}
|
||||
seen_variadic = true;
|
||||
seen_optional = true;
|
||||
out.push((name, p));
|
||||
} else if seen_optional {
|
||||
p.optional = true;
|
||||
out.push((name, p));
|
||||
} else {
|
||||
seen_optional = p.optional;
|
||||
out.push((name, p));
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// derive a nushell `module` name from a command name.
|
||||
/// replaces non-alphanumeric characters with hyphens and appends "-completions".
|
||||
pub fn module_name_of(cmd_name: &str) -> String {
|
||||
let mut s: String = cmd_name
|
||||
.chars()
|
||||
.map(|c| {
|
||||
if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
|
||||
c
|
||||
} else {
|
||||
'-'
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
s.push_str("-completions");
|
||||
s
|
||||
}
|
||||
|
||||
/// generate the full nushell `extern` block for a command.
|
||||
///
|
||||
/// produces output like:
|
||||
/// export extern "git add" [
|
||||
/// ...pathspec?: path
|
||||
/// --verbose(-v) # be verbose
|
||||
/// --dry-run(-n) # dry run
|
||||
/// ]
|
||||
///
|
||||
/// subcommands that weren't resolved into their own full definitions get
|
||||
/// stub `extern` blocks with just a comment containing their description:
|
||||
/// export extern "git stash" [ # stash changes
|
||||
/// ]
|
||||
pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String {
|
||||
let entries = dedup_entries(&result.entries);
|
||||
let escaped_name = escape_nu(cmd_name);
|
||||
let positionals = fixup_positionals(result.positionals.clone());
|
||||
|
||||
let mut out = String::new();
|
||||
out.push_str(&format!("export extern \"{escaped_name}\" [\n"));
|
||||
for (name, p) in &positionals {
|
||||
out.push_str(&format_positional(name, p));
|
||||
out.push('\n');
|
||||
}
|
||||
for entry in &entries {
|
||||
out.push_str(&format_flag(entry));
|
||||
out.push('\n');
|
||||
}
|
||||
out.push_str("]\n");
|
||||
|
||||
for sc in &result.subcommands {
|
||||
out.push_str(&format!(
|
||||
"\nexport extern \"{} {}\" [ # {}\n]\n",
|
||||
escaped_name,
|
||||
escape_nu(&sc.name),
|
||||
escape_nu(&sc.desc)
|
||||
));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// generate a complete nushell `module` wrapping the `extern`.
|
||||
/// output: "module git-completions { ... }\n\nuse git-completions *\n"
|
||||
/// the `use` at the end makes the `extern` immediately available in scope.
|
||||
pub fn generate_module(cmd_name: &str, result: &ManpageResult) -> String {
|
||||
let mod_name = module_name_of(cmd_name);
|
||||
format!(
|
||||
"module {mod_name} {{\n{}}}\n\nuse {mod_name} *\n",
|
||||
generate_extern(cmd_name, result)
|
||||
)
|
||||
}
|
||||
|
||||
/// convenience wrapper: generate an `extern` from just a list of entries.
|
||||
pub fn generate_extern_from_entries(cmd_name: &str, entries: Vec<ManpageEntry>) -> String {
|
||||
generate_extern(
|
||||
cmd_name,
|
||||
&ManpageResult {
|
||||
entries,
|
||||
subcommands: Vec::new(),
|
||||
positionals: Vec::new(),
|
||||
description: String::new(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// stub subcommand entry used when extracting subcommands from a parsed
|
||||
/// help result for nushell output.
|
||||
pub fn manpage_subcommand_from(name: &str, desc: &str) -> ManpageSubcommand {
|
||||
ManpageSubcommand {
|
||||
name: name.to_string(),
|
||||
desc: desc.to_string(),
|
||||
}
|
||||
}
|
||||
233
src/pool.rs
Normal file
233
src/pool.rs
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
//! BFS-queue worker pool for parallel subprocess scraping.
|
||||
//!
|
||||
//! workers pull jobs from a shared queue and call a user-supplied
|
||||
//! handler; the handler gets a `Submitter` to push newly-discovered
|
||||
//! child jobs back onto the same queue. when the in-flight count
|
||||
//! reaches zero the pool shuts down and `wait` returns.
|
||||
//!
|
||||
//! the queue-back design is deliberate: command-help trees are uneven
|
||||
//! (one binary has 30 subs, another has 1). queue-back keeps every
|
||||
//! worker fed; spawn-in-place would leave cores idle on lopsided trees.
|
||||
//!
|
||||
//! synchronization: `parking_lot::Condvar` parks workers when the queue is
|
||||
//! empty. the queue, in-flight count, and close state live under one mutex so
|
||||
//! the condvar predicate cannot miss a wakeup.
|
||||
//! parking_lot gives no-poison locks (no `Result` noise on every
|
||||
//! `lock()`) and a single-syscall fast path in the uncontended case.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
use std::thread::{self, JoinHandle};
|
||||
|
||||
use parking_lot::{Condvar, Mutex};
|
||||
|
||||
struct State<J> {
|
||||
queue: VecDeque<J>,
|
||||
/// jobs created but not yet completed. counts both queued and
|
||||
/// in-progress jobs. workers can exit once wait() has closed the pool
|
||||
/// and this reaches 0.
|
||||
in_flight: usize,
|
||||
/// set by wait(), which is also the point where top-level submission is
|
||||
/// done. workers must not exit on transient empty periods before this.
|
||||
closed: bool,
|
||||
}
|
||||
|
||||
/// shared state held behind an `Arc` by every worker and by the
|
||||
/// submitter handles handed to the per-job handler.
|
||||
struct Inner<J> {
|
||||
state: Mutex<State<J>>,
|
||||
notify: Condvar,
|
||||
}
|
||||
|
||||
impl<J> Inner<J> {
|
||||
fn submit(&self, job: J) {
|
||||
let mut state = self.state.lock();
|
||||
state.in_flight += 1;
|
||||
state.queue.push_back(job);
|
||||
self.notify.notify_one();
|
||||
}
|
||||
|
||||
fn next(&self) -> Option<J> {
|
||||
let mut state = self.state.lock();
|
||||
loop {
|
||||
if let Some(job) = state.queue.pop_front() {
|
||||
return Some(job);
|
||||
}
|
||||
if state.closed && state.in_flight == 0 {
|
||||
return None;
|
||||
}
|
||||
self.notify.wait(&mut state);
|
||||
}
|
||||
}
|
||||
|
||||
fn complete(&self) {
|
||||
let mut state = self.state.lock();
|
||||
state.in_flight -= 1;
|
||||
if state.closed && state.in_flight == 0 {
|
||||
// we were the last in-flight job after wait() closed top-level
|
||||
// submission, so parked workers can wake and exit.
|
||||
self.notify.notify_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// cheap-to-clone handle that lets a job handler enqueue further jobs.
|
||||
/// passed by reference to the handler closure.
|
||||
pub struct Submitter<J> {
|
||||
inner: Arc<Inner<J>>,
|
||||
}
|
||||
|
||||
impl<J> Clone for Submitter<J> {
|
||||
fn clone(&self) -> Self {
|
||||
Submitter {
|
||||
inner: self.inner.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<J> Submitter<J> {
|
||||
pub fn submit(&self, job: J) {
|
||||
self.inner.submit(job);
|
||||
}
|
||||
}
|
||||
|
||||
/// BFS-queue worker pool. each worker pulls a job, calls the handler
|
||||
/// (which may submit further jobs via the passed `Submitter`), then marks
|
||||
/// the job complete. when in-flight reaches zero the pool shuts down and
|
||||
/// `wait` returns.
|
||||
pub struct ScrapePool<J> {
|
||||
inner: Arc<Inner<J>>,
|
||||
workers: Vec<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl<J: Send + 'static> ScrapePool<J> {
|
||||
/// spawn `num_workers` threads that run `handler` on each job pulled
|
||||
/// from the queue. the handler receives the job by value and a
|
||||
/// `&Submitter` for enqueuing children.
|
||||
pub fn new<F>(num_workers: usize, handler: F) -> Self
|
||||
where
|
||||
F: Fn(J, &Submitter<J>) + Send + Sync + 'static,
|
||||
{
|
||||
let inner = Arc::new(Inner {
|
||||
state: Mutex::new(State {
|
||||
queue: VecDeque::new(),
|
||||
in_flight: 0,
|
||||
closed: false,
|
||||
}),
|
||||
notify: Condvar::new(),
|
||||
});
|
||||
let handler = Arc::new(handler);
|
||||
let workers = (0..num_workers.max(1))
|
||||
.map(|_| {
|
||||
let inner = inner.clone();
|
||||
let handler = handler.clone();
|
||||
thread::spawn(move || {
|
||||
let submitter = Submitter {
|
||||
inner: inner.clone(),
|
||||
};
|
||||
while let Some(job) = inner.next() {
|
||||
handler(job, &submitter);
|
||||
inner.complete();
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
ScrapePool { inner, workers }
|
||||
}
|
||||
|
||||
/// submit a top-level job. typically called by the orchestrating
|
||||
/// thread before `wait`; handlers should use `Submitter::submit`.
|
||||
pub fn submit(&self, job: J) {
|
||||
self.inner.submit(job);
|
||||
}
|
||||
|
||||
/// block until all jobs (initial + transitively discovered) have
|
||||
/// completed, then join every worker thread.
|
||||
pub fn wait(self) {
|
||||
{
|
||||
let mut state = self.inner.state.lock();
|
||||
state.closed = true;
|
||||
// Wake workers so they can either drain queued work or exit if
|
||||
// the pool was empty. The close flag is guarded by this same lock,
|
||||
// so this cannot race with a worker entering the condvar wait.
|
||||
self.inner.notify.notify_all();
|
||||
}
|
||||
for w in self.workers {
|
||||
let _ = w.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
#[test]
|
||||
fn flat_jobs_processed_once_each() {
|
||||
let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
|
||||
let pool = ScrapePool::new(4, {
|
||||
let collected = collected.clone();
|
||||
move |n: u32, _: &Submitter<u32>| {
|
||||
collected.lock().push(n);
|
||||
}
|
||||
});
|
||||
for i in 0..100u32 {
|
||||
pool.submit(i);
|
||||
}
|
||||
pool.wait();
|
||||
let mut got = collected.lock().clone();
|
||||
got.sort();
|
||||
assert_eq!(got, (0..100).collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discovered_children_processed_to_completion() {
|
||||
// BFS expansion: every odd number under 10 spawns its successor.
|
||||
let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
|
||||
let pool = ScrapePool::new(2, {
|
||||
let collected = collected.clone();
|
||||
move |n: u32, sub: &Submitter<u32>| {
|
||||
collected.lock().push(n);
|
||||
if n < 10 && n % 2 == 1 {
|
||||
sub.submit(n + 1);
|
||||
}
|
||||
}
|
||||
});
|
||||
for i in [1u32, 3, 5, 7, 9] {
|
||||
pool.submit(i);
|
||||
}
|
||||
pool.wait();
|
||||
let mut got = collected.lock().clone();
|
||||
got.sort();
|
||||
assert_eq!(got, vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transient_empty_queue_before_wait_does_not_stop_workers() {
|
||||
let processed = Arc::new(AtomicUsize::new(0));
|
||||
let pool = ScrapePool::new(1, {
|
||||
let processed = processed.clone();
|
||||
move |_: u32, _: &Submitter<u32>| {
|
||||
processed.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
});
|
||||
|
||||
pool.submit(1);
|
||||
while processed.load(Ordering::SeqCst) == 0 {
|
||||
thread::yield_now();
|
||||
}
|
||||
thread::sleep(Duration::from_millis(10));
|
||||
pool.submit(2);
|
||||
pool.wait();
|
||||
|
||||
assert_eq!(processed.load(Ordering::SeqCst), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wait_with_no_jobs_returns_immediately() {
|
||||
let pool: ScrapePool<()> = ScrapePool::new(2, |_, _| {});
|
||||
pool.wait();
|
||||
}
|
||||
}
|
||||
657
src/store.rs
Normal file
657
src/store.rs
Normal file
|
|
@ -0,0 +1,657 @@
|
|||
//! filesystem store for parsed completion data.
|
||||
//!
|
||||
//! write side: serialize ManpageResult to JSON, derive sanitised
|
||||
//! filenames from command names ("git add" → git_add.json).
|
||||
//!
|
||||
//! read side: look up a command by name across the user cache + system
|
||||
//! dirs, deserialize JSON or parse a .nu extern blob back into a result.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::parsers::manpage::{
|
||||
ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
|
||||
};
|
||||
use crate::types::Positional;
|
||||
|
||||
/// default cache directory: $XDG_CACHE_HOME/inshellah, falling back to
|
||||
/// $HOME/.cache/inshellah.
|
||||
pub fn default_store_path() -> PathBuf {
|
||||
if let Ok(xdg) = std::env::var("XDG_CACHE_HOME")
|
||||
&& !xdg.is_empty()
|
||||
{
|
||||
return PathBuf::from(xdg).join("inshellah");
|
||||
}
|
||||
if let Ok(home) = std::env::var("HOME") {
|
||||
return PathBuf::from(home).join(".cache/inshellah");
|
||||
}
|
||||
PathBuf::from(".cache/inshellah")
|
||||
}
|
||||
|
||||
/// create directory and all parents.
|
||||
pub fn ensure_dir(dir: &Path) -> io::Result<()> {
|
||||
fs::create_dir_all(dir)
|
||||
}
|
||||
|
||||
/// derive a safe filename from a command name.
|
||||
/// spaces in subcommand names ("git add") become "_" ("git_add").
|
||||
/// any other non-filesystem-safe characters are also replaced.
|
||||
pub fn filename_of_command(cmd: &str) -> String {
|
||||
cmd.chars()
|
||||
.map(|c| match c {
|
||||
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => c,
|
||||
' ' => '_',
|
||||
_ => '_',
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// reverse: a filename "git_add" produces command name "git add".
|
||||
/// underscores are flipped to spaces unconditionally — names that
|
||||
/// genuinely contained an underscore round-trip as spaces, which is
|
||||
/// acceptable since the read side is only used for display.
|
||||
pub fn command_of_filename(base: &str) -> String {
|
||||
base.replace('_', " ")
|
||||
}
|
||||
|
||||
fn escape_json(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len() + 2);
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'"' => out.push_str("\\\""),
|
||||
'\\' => out.push_str("\\\\"),
|
||||
'\n' => out.push_str("\\n"),
|
||||
'\r' => out.push_str("\\r"),
|
||||
'\t' => out.push_str("\\t"),
|
||||
'\x08' => out.push_str("\\b"),
|
||||
'\x0c' => out.push_str("\\f"),
|
||||
c if (c as u32) < 0x20 => {
|
||||
out.push_str(&format!("\\u{:04x}", c as u32));
|
||||
}
|
||||
c => out.push(c),
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn json_string(s: &str) -> String {
|
||||
format!("\"{}\"", escape_json(s))
|
||||
}
|
||||
|
||||
fn json_switch(s: &OwnedSwitch) -> String {
|
||||
match s {
|
||||
OwnedSwitch::Short(c) => {
|
||||
format!(
|
||||
r#"{{"type":"short","char":{}}}"#,
|
||||
json_string(&c.to_string())
|
||||
)
|
||||
}
|
||||
OwnedSwitch::Long(l) => {
|
||||
format!(r#"{{"type":"long","name":{}}}"#, json_string(l))
|
||||
}
|
||||
OwnedSwitch::Both(c, l) => format!(
|
||||
r#"{{"type":"both","char":{},"name":{}}}"#,
|
||||
json_string(&c.to_string()),
|
||||
json_string(l)
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn json_param(p: &Option<OwnedParam>) -> String {
|
||||
match p {
|
||||
None => "null".to_string(),
|
||||
Some(OwnedParam::Mandatory(n)) => {
|
||||
format!(r#"{{"kind":"mandatory","name":{}}}"#, json_string(n))
|
||||
}
|
||||
Some(OwnedParam::Optional(n)) => {
|
||||
format!(r#"{{"kind":"optional","name":{}}}"#, json_string(n))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn json_entry(e: &ManpageEntry) -> String {
|
||||
format!(
|
||||
r#"{{"switch":{},"param":{},"desc":{}}}"#,
|
||||
json_switch(&e.switch),
|
||||
json_param(&e.param),
|
||||
json_string(&e.desc)
|
||||
)
|
||||
}
|
||||
|
||||
fn json_subcommand(sc: &ManpageSubcommand) -> String {
|
||||
format!(
|
||||
r#"{{"name":{},"desc":{}}}"#,
|
||||
json_string(&sc.name),
|
||||
json_string(&sc.desc)
|
||||
)
|
||||
}
|
||||
|
||||
fn json_positional(name: &str, p: &Positional) -> String {
|
||||
format!(
|
||||
r#"{{"name":{},"optional":{},"variadic":{}}}"#,
|
||||
json_string(name),
|
||||
p.optional,
|
||||
p.variadic
|
||||
)
|
||||
}
|
||||
|
||||
fn json_list<T, F: Fn(&T) -> String>(items: &[T], f: F) -> String {
|
||||
let parts: Vec<String> = items.iter().map(f).collect();
|
||||
format!("[{}]", parts.join(","))
|
||||
}
|
||||
|
||||
/// serialize a ManpageResult to JSON:
|
||||
/// {"source":..., "description":..., "entries":[...],
|
||||
/// "subcommands":[...], "positionals":[...]}
|
||||
pub fn json_of_result(source: &str, result: &ManpageResult) -> String {
|
||||
let entries = json_list(&result.entries, json_entry);
|
||||
let subcommands = json_list(&result.subcommands, json_subcommand);
|
||||
let positionals_parts: Vec<String> = result
|
||||
.positionals
|
||||
.iter()
|
||||
.map(|(name, p)| json_positional(name, p))
|
||||
.collect();
|
||||
let positionals = format!("[{}]", positionals_parts.join(","));
|
||||
format!(
|
||||
r#"{{"source":{},"description":{},"entries":{},"subcommands":{},"positionals":{}}}"#,
|
||||
json_string(source),
|
||||
json_string(&result.description),
|
||||
entries,
|
||||
subcommands,
|
||||
positionals,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn write_file(path: &Path, contents: &str) -> io::Result<()> {
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
fs::write(path, contents)
|
||||
}
|
||||
|
||||
/// write the parsed result for `command` into `dir` as JSON.
|
||||
pub fn write_result(
|
||||
dir: &Path,
|
||||
command: &str,
|
||||
source: &str,
|
||||
result: &ManpageResult,
|
||||
) -> io::Result<()> {
|
||||
let path = dir.join(format!("{}.json", filename_of_command(command)));
|
||||
write_file(&path, &json_of_result(source, result))
|
||||
}
|
||||
|
||||
/// write a native-nushell completion blob (the binary supplied its own).
|
||||
pub fn write_native(dir: &Path, command: &str, data: &str) -> io::Result<()> {
|
||||
let path = dir.join(format!("{}.nu", filename_of_command(command)));
|
||||
write_file(&path, data)
|
||||
}
|
||||
|
||||
// --- read side ---
|
||||
|
||||
fn read_file(path: &Path) -> Option<String> {
|
||||
fs::read_to_string(path).ok()
|
||||
}
|
||||
|
||||
fn read_json_result(path: &Path) -> Option<(String, ManpageResult)> {
|
||||
let data = read_file(path)?;
|
||||
let v = serde_json::from_str::<Value>(&data).ok()?;
|
||||
let source = v
|
||||
.get("source")
|
||||
.and_then(|x| x.as_str())
|
||||
.unwrap_or("json")
|
||||
.to_string();
|
||||
Some((source, result_from_json(&v)))
|
||||
}
|
||||
|
||||
fn switch_from_json(v: &Value) -> Option<OwnedSwitch> {
|
||||
let t = v.get("type")?.as_str()?;
|
||||
match t {
|
||||
"short" => {
|
||||
let c = v.get("char")?.as_str()?.chars().next()?;
|
||||
Some(OwnedSwitch::Short(c))
|
||||
}
|
||||
"long" => Some(OwnedSwitch::Long(v.get("name")?.as_str()?.to_string())),
|
||||
"both" => {
|
||||
let c = v.get("char")?.as_str()?.chars().next()?;
|
||||
let n = v.get("name")?.as_str()?.to_string();
|
||||
Some(OwnedSwitch::Both(c, n))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn param_from_json(v: &Value) -> Option<OwnedParam> {
|
||||
if v.is_null() {
|
||||
return None;
|
||||
}
|
||||
let kind = v.get("kind")?.as_str()?;
|
||||
let name = v.get("name")?.as_str()?.to_string();
|
||||
Some(match kind {
|
||||
"mandatory" => OwnedParam::Mandatory(name),
|
||||
"optional" => OwnedParam::Optional(name),
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
fn entry_from_json(v: &Value) -> Option<ManpageEntry> {
|
||||
let switch = switch_from_json(v.get("switch")?)?;
|
||||
let param = v.get("param").and_then(param_from_json);
|
||||
let desc = v
|
||||
.get("desc")
|
||||
.and_then(|d| d.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
Some(ManpageEntry {
|
||||
switch,
|
||||
param,
|
||||
desc,
|
||||
})
|
||||
}
|
||||
|
||||
fn subcommand_from_json(v: &Value) -> Option<ManpageSubcommand> {
|
||||
let name = v.get("name")?.as_str()?.to_string();
|
||||
let desc = v
|
||||
.get("desc")
|
||||
.and_then(|d| d.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
Some(ManpageSubcommand { name, desc })
|
||||
}
|
||||
|
||||
fn positional_from_json(v: &Value) -> Option<(String, Positional)> {
|
||||
let name = v.get("name")?.as_str()?.to_string();
|
||||
let optional = v.get("optional").and_then(|x| x.as_bool()).unwrap_or(false);
|
||||
let variadic = v.get("variadic").and_then(|x| x.as_bool()).unwrap_or(false);
|
||||
Some((name, Positional { optional, variadic }))
|
||||
}
|
||||
|
||||
/// deserialize a JSON cache entry into ManpageResult.
|
||||
pub fn result_from_json(v: &Value) -> ManpageResult {
|
||||
let description = v
|
||||
.get("description")
|
||||
.and_then(|d| d.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let entries = v
|
||||
.get("entries")
|
||||
.and_then(|x| x.as_array())
|
||||
.map(|arr| arr.iter().filter_map(entry_from_json).collect())
|
||||
.unwrap_or_default();
|
||||
let subcommands = v
|
||||
.get("subcommands")
|
||||
.and_then(|x| x.as_array())
|
||||
.map(|arr| arr.iter().filter_map(subcommand_from_json).collect())
|
||||
.unwrap_or_default();
|
||||
let positionals = v
|
||||
.get("positionals")
|
||||
.and_then(|x| x.as_array())
|
||||
.map(|arr| arr.iter().filter_map(positional_from_json).collect())
|
||||
.unwrap_or_default();
|
||||
ManpageResult {
|
||||
entries,
|
||||
subcommands,
|
||||
positionals,
|
||||
description,
|
||||
}
|
||||
}
|
||||
|
||||
/// parse nushell `export extern` blocks out of a .nu source file.
|
||||
///
|
||||
/// returns the help_result that matches `target_cmd` — its entries,
|
||||
/// positionals, and any other extern blocks under it (`cmd sub`) are
|
||||
/// folded into the subcommands list.
|
||||
pub fn parse_nu_completions(target_cmd: &str, contents: &str) -> ManpageResult {
|
||||
let mut blocks: Vec<NuBlock> = Vec::new();
|
||||
let mut current_desc = String::new();
|
||||
let mut in_block = false;
|
||||
let mut block = NuBlock::default();
|
||||
|
||||
for line in contents.split('\n') {
|
||||
let trimmed = line.trim();
|
||||
if !in_block {
|
||||
if let Some(stripped) = trimmed.strip_prefix("# ") {
|
||||
current_desc = stripped.trim().to_string();
|
||||
} else if trimmed.contains("export extern")
|
||||
&& let Some(cmd) = extract_extern_name(trimmed)
|
||||
{
|
||||
in_block = true;
|
||||
block = NuBlock {
|
||||
cmd,
|
||||
description: std::mem::take(&mut current_desc),
|
||||
..Default::default()
|
||||
};
|
||||
} else {
|
||||
current_desc.clear();
|
||||
}
|
||||
} else if trimmed.starts_with(']') {
|
||||
blocks.push(std::mem::take(&mut block));
|
||||
in_block = false;
|
||||
} else {
|
||||
let (param_part, desc) = match trimmed.find('#') {
|
||||
Some(idx) => (trimmed[..idx].trim(), trimmed[idx + 1..].trim()),
|
||||
None => (trimmed, ""),
|
||||
};
|
||||
parse_nu_param_line_into(param_part, desc, &mut block);
|
||||
}
|
||||
}
|
||||
if in_block {
|
||||
blocks.push(block);
|
||||
}
|
||||
|
||||
// find the block matching target_cmd
|
||||
let Some(matched) = blocks.iter().find(|b| b.cmd == target_cmd) else {
|
||||
return ManpageResult::default();
|
||||
};
|
||||
|
||||
// collect immediate subcommands from other blocks ("target sub" pattern)
|
||||
let prefix = format!("{target_cmd} ");
|
||||
let mut subcommands: Vec<ManpageSubcommand> = Vec::new();
|
||||
for b in &blocks {
|
||||
if let Some(suffix) = b.cmd.strip_prefix(&prefix)
|
||||
&& !suffix.contains(' ')
|
||||
&& !suffix.is_empty()
|
||||
{
|
||||
subcommands.push(ManpageSubcommand {
|
||||
name: suffix.to_string(),
|
||||
desc: b.description.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
ManpageResult {
|
||||
entries: matched.entries.clone(),
|
||||
subcommands,
|
||||
positionals: matched.positionals.clone(),
|
||||
description: matched.description.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_extern_name(line: &str) -> Option<String> {
|
||||
let idx = line.find("export extern")?;
|
||||
let after = line[idx + "export extern".len()..].trim_start();
|
||||
if let Some(rest) = after.strip_prefix('"') {
|
||||
let end = rest.find('"')?;
|
||||
Some(rest[..end].to_string())
|
||||
} else {
|
||||
let end = after
|
||||
.find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
|
||||
.unwrap_or(after.len());
|
||||
if end == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(after[..end].to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_nu_param_line_into(param_part: &str, desc: &str, block: &mut NuBlock) {
|
||||
if param_part.len() < 2 {
|
||||
return;
|
||||
}
|
||||
if let Some(after) = param_part.strip_prefix("--") {
|
||||
// long flag: --name(-c): type or --name: type or --name
|
||||
let (name, rest) = split_at_non_name_char(after);
|
||||
if name.is_empty() {
|
||||
return;
|
||||
}
|
||||
let mut short: Option<char> = None;
|
||||
let mut rest = rest;
|
||||
if let Some(after_open) = rest.strip_prefix("(-")
|
||||
&& let Some(c) = after_open.chars().next()
|
||||
&& after_open[c.len_utf8()..].starts_with(')')
|
||||
{
|
||||
short = Some(c);
|
||||
rest = &after_open[c.len_utf8() + 1..];
|
||||
}
|
||||
let param = parse_type_suffix(rest);
|
||||
let switch = match short {
|
||||
Some(c) => OwnedSwitch::Both(c, name.to_string()),
|
||||
None => OwnedSwitch::Long(name.to_string()),
|
||||
};
|
||||
block.entries.push(ManpageEntry {
|
||||
switch,
|
||||
param,
|
||||
desc: desc.to_string(),
|
||||
});
|
||||
} else if param_part.starts_with('-') {
|
||||
// short flag: -c
|
||||
if let Some(c) = param_part.chars().nth(1)
|
||||
&& c.is_ascii_alphanumeric()
|
||||
{
|
||||
block.entries.push(ManpageEntry {
|
||||
switch: OwnedSwitch::Short(c),
|
||||
param: None,
|
||||
desc: desc.to_string(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// positional: name: type or name?: type or ...name: type
|
||||
let variadic = param_part.starts_with("...");
|
||||
let after_prefix = if variadic {
|
||||
¶m_part[3..]
|
||||
} else {
|
||||
param_part
|
||||
};
|
||||
let optional = after_prefix.contains('?');
|
||||
let name_end = after_prefix.find([':', '?']).unwrap_or(after_prefix.len());
|
||||
let name = after_prefix[..name_end].trim();
|
||||
let name: String = name
|
||||
.chars()
|
||||
.map(|c| if c == '-' { '_' } else { c })
|
||||
.collect();
|
||||
if !name.is_empty() && !name.starts_with('-') {
|
||||
let duplicate = block
|
||||
.positionals
|
||||
.iter()
|
||||
.any(|(existing, _)| existing.eq_ignore_ascii_case(&name));
|
||||
if !duplicate {
|
||||
block.positionals.push((
|
||||
name,
|
||||
Positional {
|
||||
optional: optional || variadic,
|
||||
variadic,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn split_at_non_name_char(s: &str) -> (&str, &str) {
|
||||
let end = s
|
||||
.find(|c: char| !(c.is_ascii_alphanumeric() || c == '-'))
|
||||
.unwrap_or(s.len());
|
||||
(&s[..end], &s[end..])
|
||||
}
|
||||
|
||||
/// parse a `: type` suffix into an OwnedParam (always Mandatory since the
|
||||
/// nushell extern syntax doesn't distinguish optional-with-default).
|
||||
fn parse_type_suffix(s: &str) -> Option<OwnedParam> {
|
||||
let s = s.trim_start();
|
||||
let s = s.strip_prefix(':')?;
|
||||
let s = s.trim_start();
|
||||
let end = s
|
||||
.find(|c: char| !c.is_ascii_alphabetic())
|
||||
.unwrap_or(s.len());
|
||||
if end == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(OwnedParam::Mandatory(s[..end].to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct NuBlock {
|
||||
cmd: String,
|
||||
entries: Vec<ManpageEntry>,
|
||||
positionals: Vec<(String, Positional)>,
|
||||
description: String,
|
||||
}
|
||||
|
||||
/// look up a command's parsed result. source priority is native nushell,
|
||||
/// then manpage JSON, then help JSON. parent .nu files are searched for
|
||||
/// subcommand lookups because clap-generated .nu files contain all extern
|
||||
/// blocks in a single file.
|
||||
pub fn lookup(dirs: &[PathBuf], command: &str) -> Option<ManpageResult> {
|
||||
let base_name = filename_of_command(command);
|
||||
let parent_base = command
|
||||
.find(' ')
|
||||
.map(|i| filename_of_command(&command[..i]));
|
||||
|
||||
for directory in dirs {
|
||||
let nu_path = directory.join(format!("{base_name}.nu"));
|
||||
if let Some(data) = read_file(&nu_path) {
|
||||
return Some(parse_nu_completions(command, &data));
|
||||
}
|
||||
if let Some(pb) = &parent_base {
|
||||
let parent_nu = directory.join(format!("{pb}.nu"));
|
||||
if let Some(data) = read_file(&parent_nu) {
|
||||
let r = parse_nu_completions(command, &data);
|
||||
if !r.entries.is_empty() || !r.subcommands.is_empty() || !r.positionals.is_empty() {
|
||||
return Some(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for directory in dirs {
|
||||
let json_path = directory.join(format!("{base_name}.json"));
|
||||
if let Some((source, result)) = read_json_result(&json_path)
|
||||
&& source != "help"
|
||||
{
|
||||
return Some(result);
|
||||
}
|
||||
}
|
||||
|
||||
for directory in dirs {
|
||||
let json_path = directory.join(format!("{base_name}.json"));
|
||||
if let Some((_, result)) = read_json_result(&json_path) {
|
||||
return Some(result);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// look up a command's raw stored data (JSON or .nu source).
|
||||
pub fn lookup_raw(dirs: &[PathBuf], command: &str) -> Option<String> {
|
||||
let base_name = filename_of_command(command);
|
||||
for directory in dirs {
|
||||
let nu_path = directory.join(format!("{base_name}.nu"));
|
||||
if let Some(data) = read_file(&nu_path) {
|
||||
return Some(data);
|
||||
}
|
||||
}
|
||||
for directory in dirs {
|
||||
let json_path = directory.join(format!("{base_name}.json"));
|
||||
if let Some(data) = read_file(&json_path) {
|
||||
return Some(data);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn chop_extension(filename: &str) -> Option<&str> {
|
||||
filename
|
||||
.strip_suffix(".json")
|
||||
.or_else(|| filename.strip_suffix(".nu"))
|
||||
}
|
||||
|
||||
/// list all indexed commands across all store directories.
|
||||
/// returns a sorted, deduplicated list of command names.
|
||||
pub fn all_commands(dirs: &[PathBuf]) -> Vec<String> {
|
||||
let mut out: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
|
||||
for directory in dirs {
|
||||
let Ok(entries) = fs::read_dir(directory) else {
|
||||
continue;
|
||||
};
|
||||
for entry in entries.flatten() {
|
||||
if let Some(name) = entry.file_name().to_str()
|
||||
&& let Some(base) = chop_extension(name)
|
||||
{
|
||||
out.insert(command_of_filename(base));
|
||||
}
|
||||
}
|
||||
}
|
||||
out.into_iter().collect()
|
||||
}
|
||||
|
||||
/// discover subcommands of a command by scanning filenames in the store
|
||||
/// (e.g. for "git", finds "git_add.json", "git_log.json").
|
||||
pub fn subcommands_of(dirs: &[PathBuf], command: &str) -> Vec<ManpageSubcommand> {
|
||||
let prefix = format!("{}_", filename_of_command(command));
|
||||
let mut seen: HashMap<String, ManpageSubcommand> = HashMap::new();
|
||||
for directory in dirs {
|
||||
let Ok(entries) = fs::read_dir(directory) else {
|
||||
continue;
|
||||
};
|
||||
for entry in entries.flatten() {
|
||||
let Some(filename) = entry.file_name().to_str().map(|s| s.to_string()) else {
|
||||
continue;
|
||||
};
|
||||
if !filename.starts_with(&prefix) {
|
||||
continue;
|
||||
}
|
||||
let is_json = filename.ends_with(".json");
|
||||
let Some(base) = chop_extension(&filename) else {
|
||||
continue;
|
||||
};
|
||||
let rest = &base[prefix.len()..];
|
||||
if rest.is_empty() || rest.contains('_') {
|
||||
continue;
|
||||
}
|
||||
if seen.contains_key(rest) {
|
||||
continue;
|
||||
}
|
||||
let desc = if is_json {
|
||||
read_file(&entry.path())
|
||||
.and_then(|d| serde_json::from_str::<Value>(&d).ok())
|
||||
.and_then(|v| {
|
||||
v.get("description")
|
||||
.and_then(|x| x.as_str())
|
||||
.map(|s| s.to_string())
|
||||
})
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
seen.insert(
|
||||
rest.to_string(),
|
||||
ManpageSubcommand {
|
||||
name: rest.to_string(),
|
||||
desc,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
let mut out: Vec<ManpageSubcommand> = seen.into_values().collect();
|
||||
out.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
out
|
||||
}
|
||||
|
||||
/// determine how a command was indexed: "help", "manpage", "native", etc.
|
||||
/// for JSON files, returns the "source" field. for .nu files, returns "native".
|
||||
pub fn file_type_of(dirs: &[PathBuf], command: &str) -> Option<String> {
|
||||
let base = filename_of_command(command);
|
||||
for directory in dirs {
|
||||
let nu_path = directory.join(format!("{base}.nu"));
|
||||
if nu_path.exists() {
|
||||
return Some("native".to_string());
|
||||
}
|
||||
}
|
||||
for directory in dirs {
|
||||
let json_path = directory.join(format!("{base}.json"));
|
||||
if json_path.exists() {
|
||||
return Some(
|
||||
read_file(&json_path)
|
||||
.and_then(|d| serde_json::from_str::<Value>(&d).ok())
|
||||
.and_then(|v| v.get("source").and_then(|x| x.as_str()).map(String::from))
|
||||
.unwrap_or_else(|| "json".to_string()),
|
||||
);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
34
src/types.rs
Normal file
34
src/types.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
pub enum Switch<'a> {
|
||||
Short(char),
|
||||
Long(&'a str),
|
||||
Both(char, &'a str),
|
||||
}
|
||||
|
||||
pub enum Param<'a> {
|
||||
Mandatory(&'a str),
|
||||
Optional(&'a str),
|
||||
}
|
||||
|
||||
pub struct OptionEntry<'a> {
|
||||
pub switch: Switch<'a>,
|
||||
pub param: Option<Param<'a>>,
|
||||
pub desc: Vec<&'a str>,
|
||||
}
|
||||
|
||||
pub struct Subcommand<'a> {
|
||||
pub name: &'a str,
|
||||
pub desc: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Positional {
|
||||
pub optional: bool,
|
||||
pub variadic: bool,
|
||||
}
|
||||
|
||||
pub struct HelpResult<'a> {
|
||||
pub entries: Vec<OptionEntry<'a>>,
|
||||
pub subcommands: Vec<Subcommand<'a>>,
|
||||
pub positionals: Vec<(&'a str, Positional)>,
|
||||
pub desc: &'a str,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue