tidy up extractions and dynamic completion placement

This commit is contained in:
atagen 2026-05-21 21:02:59 +10:00
parent 2dab68fe03
commit 04b418dc18
7 changed files with 782 additions and 117 deletions

View file

@ -1863,7 +1863,6 @@ fn cmd_complete(
.filter(|t| !t.is_empty())
.cloned()
.collect();
let lookup_depth = lookup_tokens.len();
let resolve_depth = resolve_tokens.len();
let need_resolve = match &found {
Some((_, _, depth)) => *depth < resolve_depth,
@ -1912,14 +1911,28 @@ fn cmd_complete(
None => Vec::new(),
Some((matched_name, r, depth)) => {
let mut scored: Vec<(i32, String)> = Vec::new();
// subcommand candidates (skip if match is too shallow)
if *depth >= lookup_depth.saturating_sub(1) {
// subcommand candidates (skip if match is too shallow). when
// `systemctl status` isn't in the cache, `find_result` falls
// back to `systemctl` at depth 1; we must NOT then offer
// `systemctl`'s subs (`poweroff`, `preset`, ...) — the user has
// already typed past that point. requiring depth >= resolve_depth
// (the count of complete, non-partial tokens) keeps subs
// exclusive to a full-prefix match and lets the dynamic completer
// — systemctl unit names, etc. — take over otherwise.
//
// also: when the typed token *exactly* equals a candidate we
// drop it. the user has already written the full word; echoing
// it back masks any downstream dynamic completer.
if *depth >= resolve_depth {
let subs: Vec<ManpageSubcommand> = if !r.subcommands.is_empty() {
r.subcommands.clone()
} else {
subcommands_of(&dirs, matched_name)
};
for sc in &subs {
if !last_token.is_empty() && last_token == sc.name {
continue;
}
let s = fuzzy_score(&last_token, &sc.name);
if s > 0 {
scored.push((s, completion_json(&sc.name, &sc.desc)));
@ -1961,6 +1974,9 @@ fn cmd_complete(
}
}
};
if !last_token.is_empty() && last_token == flag {
continue;
}
let s = fuzzy_score(&last_token, &flag);
if s > 0 {
scored.push((s, completion_json(&flag, &desc)));

View file

@ -19,8 +19,15 @@ use nom::{
make_parser!(short_switch -> char,
preceded(char('-'), satisfy(|c| c.is_alphanumeric())));
// long flag name: first char must be alphanumeric to reject the
// `------------` style separator lines that show up in `less --help`
// (and therefore in zstdless/bzless help output, which wrap less).
// without this guard, the parser happily eats the whole run of dashes
// after `--` and produces a flag named `------…`.
make_parser!(long_switch -> &'a str,
preceded(tag("--"), take_while1(is_option_char)));
preceded(tag("--"), verify(take_while1(is_option_char), |s: &str| {
s.chars().next().is_some_and(|c| c.is_ascii_alphanumeric())
})));
make_parser!(negatable_long_switch -> &'a str,
preceded(tag("--[no-]"), take_while1(is_option_char)));
@ -121,6 +128,35 @@ make_parser!(slash_sep -> (),
switch_pair!(long_slash_short,
long_switch, slash_sep, short_switch => |l, s| Switch::Both(s, l));
// `-s PARAM, --long` form — ripgrep and other clap-via-roff manpages emit
// the placeholder between the short and long forms:
// `\-e PATTERN, \-\-regexp=PATTERN`
// `\-f PATTERNFILE, \-\-file=PATTERNFILE`
// the placeholder is matched conservatively (uppercase identifier or
// `<angle>` form) so we don't eat normal description words. the trailing
// `=PATTERN` / ` PATTERN` is left for `opt(param_parser)` to consume.
make_parser!(placeholder_token -> &'a str,
alt((
delimited(char('<'), take_till1(|c: char| c == '>'), char('>')),
verify(
take_while1(|c: char|
c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_' || c == '-'
),
|s: &str| {
let first = match s.chars().next() { Some(c) => c, None => return false };
if !(first.is_ascii_uppercase() || first == '_') { return false; }
s.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
}
),
))
);
fn short_param_comma_long(s: &str) -> IResult<&str, Switch<'_>> {
let (rem, (short, _, _ph, _, long)) =
(short_switch, space1, placeholder_token, comma, long_switch).parse(s)?;
Ok((rem, Switch::Both(short, long)))
}
make_parser!(short_as_switch -> Switch<'a>, short_switch => Switch::Short);
make_parser!(negatable_long_as_switch -> Switch<'a>, negatable_long_switch => Switch::Long);
make_parser!(long_as_switch -> Switch<'a>, long_switch => Switch::Long);
@ -131,6 +167,7 @@ make_parser!(pub switch_parser -> Switch<'a>,
short_space_negatable_long,
short_comma_long,
short_space_long,
short_param_comma_long,
long_slash_short,
short_as_switch,
negatable_long_as_switch,

View file

@ -69,6 +69,18 @@ pub struct ManpageResult {
pub description: String,
}
impl ManpageResult {
/// canonicalize the entry list before persistence: fold non-adjacent
/// Short/Long pairs into `Both`, then dedup by key. always called once
/// at the end of a parse path so the JSON cache holds the canonical
/// shape and downstream consumers (runtime completer, static `extern`
/// generation) don't have to repeat the work.
pub fn normalize(&mut self) {
let entries = std::mem::take(&mut self.entries);
self.entries = dedup_entries(merge_short_long_pairs(entries));
}
}
impl From<&Switch<'_>> for OwnedSwitch {
fn from(s: &Switch<'_>) -> Self {
match s {
@ -121,8 +133,8 @@ impl From<&Subcommand<'_>> for ManpageSubcommand {
impl From<&HelpResult<'_>> for ManpageResult {
fn from(r: &HelpResult<'_>) -> Self {
ManpageResult {
entries: merge_short_long_pairs(r.entries.iter().map(Into::into).collect()),
let mut result = ManpageResult {
entries: r.entries.iter().map(Into::into).collect(),
subcommands: r.subcommands.iter().map(Into::into).collect(),
// positional names are stored lowercased so output is
// stable across the various places we extract them from
@ -133,8 +145,91 @@ impl From<&HelpResult<'_>> for ManpageResult {
.map(|(k, v)| (k.to_ascii_lowercase(), v.clone()))
.collect(),
description: r.desc.to_string(),
};
result.normalize();
result
}
}
fn entry_key(e: &ManpageEntry) -> String {
match &e.switch {
OwnedSwitch::Short(c) => format!("-{c}"),
OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"),
}
}
fn entry_score(e: &ManpageEntry) -> i32 {
let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) {
10
} else {
0
};
let param_bonus = if e.param.is_some() { 5 } else { 0 };
let desc_bonus = (e.desc.len() / 10).min(5) as i32;
switch_bonus + param_bonus + desc_bonus
}
/// collapse duplicate flag entries that refer to the same flag.
///
/// real-world manpages emit duplicates for several reasons:
/// - clap subcommand pages list inherited global flags alongside the
/// subcommand's own (e.g. every `homectl <verb>.1` mentions `--json`)
/// - tools like btrfs document a flag once as a "deprecated alias" and
/// once under the "Global options" group
/// - help text occasionally restates a flag in an example block that the
/// parser also picks up
///
/// for each key (the canonical `-c` / `--long` form), we keep the highest
/// scoring entry: `Both` outranks bare Short/Long, param-bearing outranks
/// param-less, longer description outranks shorter. after that we strip
/// standalone Short entries whose char is already covered by a `Both`,
/// since they're now redundant with the merged form.
///
/// preserves original ordering: the surviving entry sits where the first
/// occurrence of its key appeared.
pub fn dedup_entries(entries: Vec<ManpageEntry>) -> Vec<ManpageEntry> {
use std::collections::HashMap;
use std::collections::HashSet;
if entries.len() < 2 {
return entries;
}
let mut best: HashMap<String, usize> = HashMap::new();
for (i, e) in entries.iter().enumerate() {
let key = entry_key(e);
match best.get(&key) {
Some(&prev) if entry_score(&entries[prev]) >= entry_score(e) => {}
_ => {
best.insert(key, i);
}
}
}
let mut covered: HashSet<char> = HashSet::new();
for &idx in best.values() {
if let OwnedSwitch::Both(c, _) = &entries[idx].switch {
covered.insert(*c);
}
}
let mut seen: HashSet<String> = HashSet::new();
let mut out: Vec<ManpageEntry> = Vec::with_capacity(entries.len());
for e in entries.iter() {
let key = entry_key(e);
if seen.contains(&key) {
continue;
}
if let OwnedSwitch::Short(c) = &e.switch
&& covered.contains(c)
{
continue;
}
seen.insert(key.clone());
let best_idx = *best.get(&key).unwrap();
out.push(entries[best_idx].clone());
}
out
}
/// merge non-adjacent Short/Long entries that share an identical, non-empty
@ -224,7 +319,7 @@ pub fn merge_short_long_pairs(entries: Vec<ManpageEntry>) -> Vec<ManpageEntry> {
/// extraction pipeline.
pub fn parse_manpage_lines(lines: &[GroffLine]) -> ManpageResult {
let mut result = parse_manpage_lines_raw(lines);
result.entries = merge_short_long_pairs(result.entries);
result.normalize();
result
}
@ -331,13 +426,13 @@ pub fn parse_manpage_with_subs(contents: &str) -> (ManpageResult, Vec<(String, M
let subs: Vec<(String, ManpageResult)> = sub_sections
.into_iter()
.map(|(name, desc, lines)| {
let entries = merge_short_long_pairs(strategies::extract_entries(&lines));
let sub_result = ManpageResult {
entries,
let mut sub_result = ManpageResult {
entries: strategies::extract_entries(&lines),
subcommands: Vec::new(),
positionals: Default::default(),
description: desc,
};
sub_result.normalize();
(name, sub_result)
})
.collect();
@ -387,6 +482,214 @@ write to FILE
show this help and exit
"#;
const HP_MANPAGE: &str = r#".TH BAT "1"
.SH NAME
bat \- demo
.SH "OPTIONS"
.HP
\fB\-A\fR, \fB\-\-show\-all\fR
.IP
Show non-printable characters.
.HP
\fB\-\-nonprintable\-notation\fR <notation>
.IP
Specify how to display non-printable characters.
Possible values:
.RS
.IP "caret"
Use character sequences like ^G ...
.IP "unicode"
Use special Unicode code points ...
.RE
.HP
\fB\-l\fR, \fB\-\-language\fR <language>
.IP
Set the language.
"#;
#[test]
fn hp_strategy_extracts_flags_and_skips_rs_example_values() {
// bat's manpage uses .HP for flag tags and nests example values in
// .RS/.RE — the parser should pick up the three real flags and
// ignore the inner .IP "caret" / .IP "unicode" example tags.
let r = parse_manpage_string(HP_MANPAGE);
let names: Vec<String> = r
.entries
.iter()
.map(|e| match &e.switch {
OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => l.clone(),
OwnedSwitch::Short(c) => c.to_string(),
})
.collect();
assert_eq!(
names,
vec!["show-all", "nonprintable-notation", "language"],
"expected 3 flags, got {names:?}"
);
assert!(
!r.entries.iter().any(|e| matches!(
&e.switch,
OwnedSwitch::Long(l) if l == "caret" || l == "unicode"
)),
"inner .RS .IP example values must not be picked up as flags: {:?}",
r.entries
);
assert!(matches!(
r.entries[0].switch,
OwnedSwitch::Both('A', ref l) if l == "show-all"
));
assert!(matches!(
r.entries[2].switch,
OwnedSwitch::Both('l', ref l) if l == "language"
));
}
const TEXT_RS_NESTED_MANPAGE: &str = r#".TH TOOL "1"
.SH NAME
tool \- demo
.SH "OPTIONS"
.SS INPUT
\fB\-x\fR, \fB\-\-foo\fR
.RS 4
First flag desc. Possible values:
.RS
some value
.RE
After the inner block.
.RE
.sp
\fB\-y\fR, \fB\-\-bar\fR
.RS 4
Second flag desc.
.RE
"#;
#[test]
fn text_rs_strategy_handles_nested_rs_in_description() {
// when a flag's `.RS` body contains a nested `.RS/.RE` (sub-value
// listing), depth tracking in `collect_pp_rs_desc` must keep the
// outer block intact instead of ending early at the inner `.RE`.
// without it, the second flag's tag (`-y, --bar`) would be
// mis-recognized as new top-level text and parsed twice / out of
// order, or the first desc would be truncated.
let r = parse_manpage_string(TEXT_RS_NESTED_MANPAGE);
assert_eq!(r.entries.len(), 2, "expected exactly 2 flags, got {}", r.entries.len());
assert!(matches!(
r.entries[0].switch,
OwnedSwitch::Both('x', ref l) if l == "foo"
));
assert!(
r.entries[0].desc.contains("First flag desc"),
"outer .RS body should be captured, got: {:?}",
r.entries[0].desc
);
assert!(
r.entries[0].desc.contains("After the inner block"),
"text after the nested .RE must still belong to the outer block, got: {:?}",
r.entries[0].desc
);
assert!(
!r.entries[0].desc.contains("some value"),
"inner .RS sub-value text should be skipped, got: {:?}",
r.entries[0].desc
);
assert!(matches!(
r.entries[1].switch,
OwnedSwitch::Both('y', ref l) if l == "bar"
));
assert!(r.entries[1].desc.contains("Second flag desc"));
}
const TEXT_RS_MANPAGE: &str = r#".TH RG "1"
.SH NAME
rg \- demo
.SH "OPTIONS"
.SS INPUT OPTIONS
\fB\-e\fR \fIPATTERN\fR, \fB\-\-regexp\fR=\fIPATTERN\fR
.RS 4
A pattern to search for. This option can be provided multiple times.
.RE
.sp
\fB\-f\fR \fIPATTERNFILE\fR, \fB\-\-file\fR=\fIPATTERNFILE\fR
.RS 4
Search for patterns from the given file.
.RE
.sp
\fB\-x\fR, \fB\-\-line\-regexp\fR
.RS 4
Only show matches surrounded by line boundaries.
.RE
"#;
#[test]
fn text_rs_strategy_extracts_ripgrep_style_flags() {
// rg's manpage layout: bare Text tag immediately followed by `.RS/.RE`,
// separated by `.sp` — no `.PP` to anchor on. covers both the
// `-s PARAM, --long=PARAM` form and the simpler `-s, --long` form.
let r = parse_manpage_string(TEXT_RS_MANPAGE);
assert_eq!(r.entries.len(), 3, "expected 3 entries, got {}", r.entries.len());
// -e, --regexp with PARAM between short and comma
assert!(matches!(
r.entries[0].switch,
OwnedSwitch::Both('e', ref l) if l == "regexp"
));
assert!(matches!(
r.entries[0].param,
Some(OwnedParam::Mandatory(ref p)) if p == "PATTERN"
));
assert!(r.entries[0].desc.starts_with("A pattern to search for"));
// -f, --file with PARAM
assert!(matches!(
r.entries[1].switch,
OwnedSwitch::Both('f', ref l) if l == "file"
));
// -x, --line-regexp without PARAM (plain comma form)
assert!(matches!(
r.entries[2].switch,
OwnedSwitch::Both('x', ref l) if l == "line-regexp"
));
}
const TP_CLAP_DUAL_PARAGRAPH: &str = r#".TH JJ "1"
.SH NAME
jj \- demo
.SH OPTIONS
.TP
\fB\-\-at\-operation\fR <OP>
Operation to load the repo at
Operation to load the repo at. By default, Jujutsu loads the repo at the most recent operation, and lots of additional sentences that go on for paragraphs.
.TP
\fB\-h\fR, \fB\-\-help\fR
Print help
"#;
#[test]
fn tp_strategy_stops_description_at_blank_line() {
// clap-generated manpages emit "summary\n\nexpanded body" — we want
// just the summary for completion tooltips, not the multi-paragraph
// wall of text. leading blanks (between tag and first body line)
// are still skipped, blanks only terminate once we've collected text.
let r = parse_manpage_string(TP_CLAP_DUAL_PARAGRAPH);
let at_op = r
.entries
.iter()
.find(|e| matches!(&e.switch, OwnedSwitch::Long(l) if l == "at-operation"))
.expect("--at-operation entry");
assert_eq!(
at_op.desc, "Operation to load the repo at",
"expected only the summary line, got: {:?}",
at_op.desc
);
// sanity: the second .TP block still parses (we didn't accidentally
// swallow the next entry).
assert!(r.entries.iter().any(|e| matches!(
&e.switch,
OwnedSwitch::Both('h', l) if l == "help"
)));
}
#[test]
fn tp_strategy_extracts_flags() {
let r = parse_manpage_string(TP_MANPAGE);

View file

@ -55,7 +55,20 @@ fn collect_description_lines(lines: &[GroffLine], start: usize) -> (String, usiz
}
i += 1;
}
GroffLine::Blank | GroffLine::Comment => {
GroffLine::Blank => {
// a blank line ends the description, but only after we've
// collected some text — leading blanks between the tag and
// the first description line are still skipped. this caps
// clap-style "summary\n\nexpanded body" entries (jj, etc.)
// at the summary, which is what completion tooltips want.
// `.IP`-style help already stops at blanks (collect_text_lines
// breaks on any non-Text line); this brings `.TP` in line.
if !acc.is_empty() {
break;
}
i += 1;
}
GroffLine::Comment => {
i += 1;
}
GroffLine::Macro { .. } => {
@ -211,14 +224,210 @@ fn combine_short_long_alternates(
// strategy b: .IP style (curl, hand-written manpages).
// .IP takes an inline tag argument: .IP "-v, --verbose"
// the description follows as text lines.
make_macro_walker!(pub strategy_ip -> Vec<ManpageEntry>, on macro "IP" =>
|lines, i, args| {
let tag = strip_groff_escapes(args);
let (desc, rest) = collect_text_lines(&lines[i + 1..]);
let new_i = lines.len() - rest.len();
parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
//
// .RS/.RE depth-aware: man pages frequently nest .IP inside .RS blocks to
// list example values (e.g. bat's `.IP "caret"` under `--nonprintable-notation`).
// those nested tags look like flag definitions and confuse the parser, so we
// only treat `.IP` at outer scope as a flag entry.
pub fn strategy_ip(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let mut out = Vec::new();
let mut i = 0;
let mut rs_depth: u32 = 0;
while i < lines.len() {
if let GroffLine::Macro { name, args } = &lines[i] {
match name.as_str() {
"RS" => {
rs_depth += 1;
i += 1;
continue;
}
"RE" => {
rs_depth = rs_depth.saturating_sub(1);
i += 1;
continue;
}
"IP" if rs_depth == 0 => {
let tag = strip_groff_escapes(args);
let (desc, rest) = collect_text_lines(&lines[i + 1..]);
let new_i = lines.len() - rest.len();
if let Some(entry) = parse_tag_to_entry(&tag, desc) {
out.push(entry);
i = new_i;
continue;
}
}
_ => {}
}
}
i += 1;
}
);
out
}
// strategy b': .HP style (bat, help2man with hanging paragraphs).
// .HP introduces a hanging-indent paragraph: the next text line is the tag,
// followed by an empty `.IP` macro that starts the description body. example
// value listings are wrapped in `.RS/.RE` and skipped during description
// collection.
//
// .HP
// \fB\-A\fR, \fB\-\-show\-all\fR
// .IP
// Show non-printable characters ...
pub fn strategy_hp(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let mut out = Vec::new();
let mut i = 0;
while i < lines.len() {
let GroffLine::Macro { name, .. } = &lines[i] else {
i += 1;
continue;
};
if name != "HP" {
i += 1;
continue;
}
let mut j = i + 1;
while j < lines.len() && matches!(&lines[j], GroffLine::Blank | GroffLine::Comment) {
j += 1;
}
let Some(tag) = lines.get(j).and_then(tag_from_line) else {
i += 1;
continue;
};
let mut body_start = j + 1;
if let Some(GroffLine::Macro { name, .. }) = lines.get(body_start)
&& name == "IP"
{
body_start += 1;
}
let (desc, new_i) = collect_hp_description(lines, body_start);
if let Some(entry) = parse_tag_to_entry(&tag, desc) {
out.push(entry);
}
i = if new_i > i { new_i } else { i + 1 };
}
out
}
/// description collector for `.HP` entries. stops at the next flag-boundary
/// macro (`.HP`, `.TP`, `.PP`, `.SH`, `.SS`) and skips entire `.RS/.RE`
/// example blocks — those are sub-value listings, not part of the flag's
/// own description text.
fn collect_hp_description(lines: &[GroffLine], start: usize) -> (String, usize) {
let mut acc: Vec<String> = Vec::new();
let mut i = start;
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. }
if matches!(name.as_str(), "HP" | "TP" | "TQ" | "PP" | "SH" | "SS") =>
{
break;
}
GroffLine::Macro { name, .. } if name == "RS" => {
i += 1;
let mut depth: u32 = 1;
while i < lines.len() && depth > 0 {
if let GroffLine::Macro { name, .. } = &lines[i] {
if name == "RS" {
depth += 1;
} else if name == "RE" {
depth -= 1;
}
}
i += 1;
}
}
GroffLine::Text(t) => {
acc.push(t.clone());
i += 1;
}
GroffLine::Macro { name, args }
if matches!(
name.as_str(),
"B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
) =>
{
let text = tag_of_macro(name, args);
if !text.is_empty() {
acc.push(text);
}
i += 1;
}
GroffLine::Blank => {
if !acc.is_empty() {
break;
}
i += 1;
}
GroffLine::Comment => {
i += 1;
}
GroffLine::Macro { .. } => {
i += 1;
}
}
}
(acc.join(" "), i)
}
// strategy b'': bare Text tag immediately followed by `.RS/.RE` (ripgrep,
// some help2man variants). like the `.PP+.RS` shape, but with no `.PP`
// anchor between flag entries — flags sit directly under `.SS` headers
// separated only by `.sp`:
//
// .SS INPUT OPTIONS
// \fB\-e\fP \fIPATTERN\fP, \fB\-\-regexp\fP=\fIPATTERN\fP
// .RS 4
// A pattern to search for ...
// .RE
// .sp
// \fB\-f\fP \fIPATTERNFILE\fP, \fB\-\-file\fP=\fIPATTERNFILE\fP
// .RS 4
// ...
//
// we only treat a top-level Text line as a tag when an `.RS` immediately
// follows (skipping blanks/comments) and the text starts with `-`. nested
// Text lines inside an existing `.RS` block are skipped via depth tracking
// so description paragraphs that happen to begin with a flag reference
// don't get mis-recognized.
pub fn strategy_text_rs(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let mut out = Vec::new();
let mut rs_depth: u32 = 0;
let mut i = 0;
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "RS" => {
rs_depth += 1;
i += 1;
}
GroffLine::Macro { name, .. } if name == "RE" => {
rs_depth = rs_depth.saturating_sub(1);
i += 1;
}
GroffLine::Text(tag) if rs_depth == 0 && tag.trim_start().starts_with('-') => {
let mut j = i + 1;
while j < lines.len()
&& matches!(&lines[j], GroffLine::Blank | GroffLine::Comment)
{
j += 1;
}
if let Some(GroffLine::Macro { name, .. }) = lines.get(j)
&& name == "RS"
{
let (desc, new_i) = collect_pp_rs_desc(lines, j);
if let Some(entry) = parse_tag_to_entry(tag, desc) {
out.push(entry);
i = if new_i > i { new_i } else { i + 1 };
continue;
}
}
i += 1;
}
_ => i += 1,
}
}
out
}
// strategy c: .PP + .RS/.RE style (git, docbook-generated manpages).
// flag entries are introduced by .PP (paragraph), with the flag name as
@ -243,18 +452,36 @@ fn collect_pp_rs_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "RS" => {
// depth-tracked .RS walk. some manpages nest a sub-value
// .RS/.RE inside the flag's main .RS block — without
// tracking depth here, the inner `.RE` would end the
// description early and leave the outer block half-parsed.
let mut depth: u32 = 1;
i += 1;
// inside .RS — collect until .RE or boundary macro
while i < lines.len() {
while i < lines.len() && depth > 0 {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "RE" => {
return (acc.join(" "), i + 1);
}
GroffLine::Text(t) => {
acc.push(t.clone());
GroffLine::Macro { name, .. } if name == "RS" => {
depth += 1;
i += 1;
}
GroffLine::Macro { name, .. } if name == "PP" || name == "SH" => {
GroffLine::Macro { name, .. } if name == "RE" => {
depth -= 1;
i += 1;
}
GroffLine::Text(t) => {
// skip Text inside nested .RS blocks (sub-value
// listings, not part of the flag's own desc).
if depth == 1 {
acc.push(t.clone());
}
i += 1;
}
GroffLine::Macro { name, .. }
if name == "PP" || name == "SH" || name == "SS" =>
{
// section/paragraph boundary — abort even with
// an unclosed .RS (malformed manpage) so we
// don't run off to EOF.
return (acc.join(" "), i);
}
_ => i += 1,
@ -421,6 +648,7 @@ pub fn extract_entries(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let pp = count_macro("PP", lines);
let rs = count_macro("RS", lines);
let ur = count_macro("UR", lines);
let hp = count_macro("HP", lines);
let mut specialized: Vec<(&str, Vec<ManpageEntry>)> = Vec::new();
if tp > 0 {
@ -435,6 +663,12 @@ pub fn extract_entries(lines: &[GroffLine]) -> Vec<ManpageEntry> {
if ur > 0 && ip > 0 {
specialized.push(("nix", strategy_nix(lines)));
}
if hp > 0 {
specialized.push(("HP", strategy_hp(lines)));
}
if rs > 0 {
specialized.push(("Text+RS", strategy_text_rs(lines)));
}
let candidates: Vec<(&str, Vec<ManpageEntry>)> = {
let filtered: Vec<_> = specialized
.into_iter()

View file

@ -13,12 +13,10 @@
//! - escaping special characters for nushell string literals
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::sync::OnceLock;
use crate::parsers::manpage::{
ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
};
use crate::parsers::manpage::{ManpageEntry, ManpageResult, OwnedParam, OwnedSwitch};
use crate::types::Positional;
/// nushell built-in commands and keywords — we must never generate `extern`
@ -246,70 +244,6 @@ pub fn escape_nu(s: &str) -> Cow<'_, str> {
}
}
fn entry_key(e: &ManpageEntry) -> String {
match &e.switch {
OwnedSwitch::Short(c) => format!("-{c}"),
OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"),
}
}
fn entry_score(e: &ManpageEntry) -> i32 {
let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) {
10
} else {
0
};
let param_bonus = if e.param.is_some() { 5 } else { 0 };
let desc_bonus = (e.desc.len() / 10).min(5) as i32;
switch_bonus + param_bonus + desc_bonus
}
/// deduplicate flag entries that refer to the same flag.
///
/// when the same flag appears multiple times (e.g. from overlapping manpage
/// sections or repeated help text), we keep the "best" version using a score:
/// - both short+long form present: +10 (most informative)
/// - has a parameter: +5
/// - description length bonus: up to +5
///
/// after deduplication by long name, we also remove standalone short flags
/// whose letter is already covered by a Both(short, long) entry. this prevents
/// emitting both "-v" and "--verbose(-v)" which nushell would reject as a
/// duplicate. the filtering preserves original ordering from the help text.
pub fn dedup_entries(entries: &[ManpageEntry]) -> Vec<ManpageEntry> {
let mut best: HashMap<String, &ManpageEntry> = HashMap::new();
for e in entries {
let key = entry_key(e);
match best.get(&key) {
Some(prev) if entry_score(prev) >= entry_score(e) => {}
_ => {
best.insert(key, e);
}
}
}
let mut covered: HashSet<char> = HashSet::new();
for e in best.values() {
if let OwnedSwitch::Both(c, _) = &e.switch {
covered.insert(*c);
}
}
let mut seen: HashSet<String> = HashSet::new();
let mut out: Vec<ManpageEntry> = Vec::new();
for e in entries {
let key = entry_key(e);
if seen.contains(&key) {
continue;
}
if let OwnedSwitch::Short(c) = &e.switch
&& covered.contains(c)
{
continue;
}
seen.insert(key.clone());
out.push((*best.get(&key).unwrap()).clone());
}
out
}
/// format a single flag entry as a nushell `extern` parameter line.
/// output examples:
@ -414,7 +348,9 @@ pub fn module_name_of(cmd_name: &str) -> String {
/// export extern "git stash" [ # stash changes
/// ]
pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String {
let entries = dedup_entries(&result.entries);
// entries arrive deduped from the parser pipeline (`parse_manpage_lines`
// and `From<&HelpResult>` both run `manpage::dedup_entries`), so we can
// emit them directly here.
let escaped_name = escape_nu(cmd_name);
let positionals = fixup_positionals(result.positionals.clone());
@ -424,7 +360,7 @@ pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String {
out.push_str(&format_positional(name, p));
out.push('\n');
}
for entry in &entries {
for entry in &result.entries {
out.push_str(&format_flag(entry));
out.push('\n');
}
@ -452,24 +388,3 @@ pub fn generate_module(cmd_name: &str, result: &ManpageResult) -> String {
)
}
/// convenience wrapper: generate an `extern` from just a list of entries.
pub fn generate_extern_from_entries(cmd_name: &str, entries: Vec<ManpageEntry>) -> String {
generate_extern(
cmd_name,
&ManpageResult {
entries,
subcommands: Vec::new(),
positionals: Vec::new(),
description: String::new(),
},
)
}
/// stub subcommand entry used when extracting subcommands from a parsed
/// help result for nushell output.
pub fn manpage_subcommand_from(name: &str, desc: &str) -> ManpageSubcommand {
ManpageSubcommand {
name: name.to_string(),
desc: desc.to_string(),
}
}

View file

@ -814,6 +814,23 @@ fn nushell_module() {
assert!(nu.contains("--verbose(-v)"), "nu = {nu}");
}
#[test]
fn long_switch_rejects_dash_only_separator_lines() {
// less's `--help` (and therefore zstdless/bzless wrappers) is full of
// separator lines like `---------------------------------------------------`.
// the first two chars look like a `--` prefix; the rest is just dashes.
// without an alphanumeric-first guard, the parser would treat the whole
// separator as a flag named `------…`.
let r = parse(" ---------------------------------------------------\n -a, --all show all\n");
assert_eq!(
r.entries.len(),
1,
"separator should not parse as a flag, got {} entries",
r.entries.len()
);
assert!(matches!(&r.entries[0].switch, Switch::Both('a', l) if *l == "all"));
}
#[test]
fn dedup_entries_help() {
let txt = " -v, --verbose verbose output\n --verbose verbose mode\n -v be verbose\n";

View file

@ -241,6 +241,149 @@ fn complete_returns_flags_only_after_hyphen() {
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_does_not_leak_parent_subs_past_uncached_keyword() {
// `systemctl --user status p` — `systemctl status` isn't cached as its
// own file (the real systemctl manpage describes all verbs in one
// place), so `find_result` falls back to the parent `systemctl`. the
// completer must NOT then offer systemctl's top-level subs filtered by
// "p" (poweroff, preset, ...) — the user has already typed `status`.
// it must return null so the downstream dynamic completer (unit names)
// can take over.
let root = unique_temp_dir("inshellah-shallow-fallback");
let cache_dir = root.join("cache");
fs::create_dir_all(&cache_dir).expect("cache dir");
let parent = ManpageResult {
entries: Vec::new(),
subcommands: vec![
ManpageSubcommand {
name: "status".to_string(),
desc: "show status".to_string(),
},
ManpageSubcommand {
name: "poweroff".to_string(),
desc: "power off".to_string(),
},
ManpageSubcommand {
name: "preset".to_string(),
desc: "set preset".to_string(),
},
],
positionals: Vec::new(),
description: String::new(),
};
write_result(&cache_dir, "fakectl", "manpage", &parent).expect("cache");
// intermediate flag `--user` plus an uncached deep keyword `status`.
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("fakectl")
.arg("--user")
.arg("status")
.arg("p")
.env("PATH", "")
.output()
.expect("run inshellah complete");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
assert_eq!(
stdout.trim(),
"null",
"should not surface parent subs past an uncached keyword; stdout = {stdout}"
);
// sanity: at the right depth, parent subs are still offered.
let top_partial = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("fakectl")
.arg("p")
.env("PATH", "")
.output()
.expect("run inshellah complete");
let top_stdout = String::from_utf8(top_partial.stdout).expect("stdout");
assert!(
top_stdout.contains(r#""value":"poweroff""#)
&& top_stdout.contains(r#""value":"preset""#),
"partial at the right depth should still match parent subs; stdout = {top_stdout}"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_drops_exact_subcommand_match() {
// when the typed token exactly equals a cached subcommand, the binary
// returns null so a downstream dynamic completer (systemctl unit names,
// git remote names, etc.) can take over instead of echoing the
// already-typed word back.
let root = unique_temp_dir("inshellah-exact-subcommand-drop");
let cache_dir = root.join("cache");
fs::create_dir_all(&cache_dir).expect("cache dir");
let result = ManpageResult {
entries: Vec::new(),
subcommands: vec![
ManpageSubcommand {
name: "status".to_string(),
desc: "show status".to_string(),
},
ManpageSubcommand {
name: "start".to_string(),
desc: "start unit".to_string(),
},
],
positionals: Vec::new(),
description: String::new(),
};
write_result(&cache_dir, "demo", "manpage", &result).expect("cache");
let exact = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("demo")
.arg("status")
.output()
.expect("run inshellah complete");
assert!(
exact.status.success(),
"stderr = {}",
String::from_utf8_lossy(&exact.stderr)
);
let exact_stdout = String::from_utf8(exact.stdout).expect("stdout");
assert_eq!(
exact_stdout.trim(),
"null",
"exact match should hand off; stdout = {exact_stdout}"
);
let partial = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("demo")
.arg("sta")
.output()
.expect("run inshellah complete");
let partial_stdout = String::from_utf8(partial.stdout).expect("stdout");
assert!(
partial_stdout.contains(r#""value":"status""#)
&& partial_stdout.contains(r#""value":"start""#),
"partial should still match both; stdout = {partial_stdout}"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_resolves_absolute_path_after_elevation_wrapper() {
let root = unique_temp_dir("inshellah-absolute-elevation-complete");