From aed5e5c8bcd926804ddb7e2aa79417b494b2b4b9 Mon Sep 17 00:00:00 2001 From: atagen Date: Thu, 21 May 2026 21:02:59 +1000 Subject: [PATCH] tidy up extractions and dynamic completion placement --- src/main.rs | 12 +- src/parsers/help/options.rs | 39 ++++- src/parsers/manpage.rs | 242 +++++++++++++++++++++++++++++- src/parsers/manpage/strategies.rs | 232 +++++++++++++++++++++++++++- src/parsers/nushell.rs | 97 +----------- tests/ports.rs | 17 +++ tests/runtime_complete.rs | 65 ++++++++ 7 files changed, 600 insertions(+), 104 deletions(-) diff --git a/src/main.rs b/src/main.rs index df1f49b..532d86e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1912,7 +1912,11 @@ fn cmd_complete( None => Vec::new(), Some((matched_name, r, depth)) => { let mut scored: Vec<(i32, String)> = Vec::new(); - // subcommand candidates (skip if match is too shallow) + // subcommand candidates (skip if match is too shallow). when the + // typed token *exactly* equals a candidate we drop it — the user + // has already written the full word; echoing it back masks any + // dynamic completer downstream (e.g. systemctl unit names after + // `systemctl status`). if *depth >= lookup_depth.saturating_sub(1) { let subs: Vec = if !r.subcommands.is_empty() { r.subcommands.clone() @@ -1920,6 +1924,9 @@ fn cmd_complete( subcommands_of(&dirs, matched_name) }; for sc in &subs { + if !last_token.is_empty() && last_token == sc.name { + continue; + } let s = fuzzy_score(&last_token, &sc.name); if s > 0 { scored.push((s, completion_json(&sc.name, &sc.desc))); @@ -1961,6 +1968,9 @@ fn cmd_complete( } } }; + if !last_token.is_empty() && last_token == flag { + continue; + } let s = fuzzy_score(&last_token, &flag); if s > 0 { scored.push((s, completion_json(&flag, &desc))); diff --git a/src/parsers/help/options.rs b/src/parsers/help/options.rs index 73b2170..4e18e2c 100644 --- a/src/parsers/help/options.rs +++ b/src/parsers/help/options.rs @@ -19,8 +19,15 @@ use nom::{ make_parser!(short_switch -> char, preceded(char('-'), satisfy(|c| c.is_alphanumeric()))); +// long flag name: first char must be alphanumeric to reject the +// `------------` style separator lines that show up in `less --help` +// (and therefore in zstdless/bzless help output, which wrap less). +// without this guard, the parser happily eats the whole run of dashes +// after `--` and produces a flag named `------…`. make_parser!(long_switch -> &'a str, - preceded(tag("--"), take_while1(is_option_char))); + preceded(tag("--"), verify(take_while1(is_option_char), |s: &str| { + s.chars().next().is_some_and(|c| c.is_ascii_alphanumeric()) + }))); make_parser!(negatable_long_switch -> &'a str, preceded(tag("--[no-]"), take_while1(is_option_char))); @@ -121,6 +128,35 @@ make_parser!(slash_sep -> (), switch_pair!(long_slash_short, long_switch, slash_sep, short_switch => |l, s| Switch::Both(s, l)); +// `-s PARAM, --long` form — ripgrep and other clap-via-roff manpages emit +// the placeholder between the short and long forms: +// `\-e PATTERN, \-\-regexp=PATTERN` +// `\-f PATTERNFILE, \-\-file=PATTERNFILE` +// the placeholder is matched conservatively (uppercase identifier or +// `` form) so we don't eat normal description words. the trailing +// `=PATTERN` / ` PATTERN` is left for `opt(param_parser)` to consume. +make_parser!(placeholder_token -> &'a str, + alt(( + delimited(char('<'), take_till1(|c: char| c == '>'), char('>')), + verify( + take_while1(|c: char| + c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_' || c == '-' + ), + |s: &str| { + let first = match s.chars().next() { Some(c) => c, None => return false }; + if !(first.is_ascii_uppercase() || first == '_') { return false; } + s.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_') + } + ), + )) +); + +fn short_param_comma_long(s: &str) -> IResult<&str, Switch<'_>> { + let (rem, (short, _, _ph, _, long)) = + (short_switch, space1, placeholder_token, comma, long_switch).parse(s)?; + Ok((rem, Switch::Both(short, long))) +} + make_parser!(short_as_switch -> Switch<'a>, short_switch => Switch::Short); make_parser!(negatable_long_as_switch -> Switch<'a>, negatable_long_switch => Switch::Long); make_parser!(long_as_switch -> Switch<'a>, long_switch => Switch::Long); @@ -131,6 +167,7 @@ make_parser!(pub switch_parser -> Switch<'a>, short_space_negatable_long, short_comma_long, short_space_long, + short_param_comma_long, long_slash_short, short_as_switch, negatable_long_as_switch, diff --git a/src/parsers/manpage.rs b/src/parsers/manpage.rs index 9fc7b5f..8924544 100644 --- a/src/parsers/manpage.rs +++ b/src/parsers/manpage.rs @@ -122,7 +122,9 @@ impl From<&Subcommand<'_>> for ManpageSubcommand { impl From<&HelpResult<'_>> for ManpageResult { fn from(r: &HelpResult<'_>) -> Self { ManpageResult { - entries: merge_short_long_pairs(r.entries.iter().map(Into::into).collect()), + entries: dedup_entries(merge_short_long_pairs( + r.entries.iter().map(Into::into).collect(), + )), subcommands: r.subcommands.iter().map(Into::into).collect(), // positional names are stored lowercased so output is // stable across the various places we extract them from @@ -137,6 +139,87 @@ impl From<&HelpResult<'_>> for ManpageResult { } } +fn entry_key(e: &ManpageEntry) -> String { + match &e.switch { + OwnedSwitch::Short(c) => format!("-{c}"), + OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"), + } +} + +fn entry_score(e: &ManpageEntry) -> i32 { + let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) { + 10 + } else { + 0 + }; + let param_bonus = if e.param.is_some() { 5 } else { 0 }; + let desc_bonus = (e.desc.len() / 10).min(5) as i32; + switch_bonus + param_bonus + desc_bonus +} + +/// collapse duplicate flag entries that refer to the same flag. +/// +/// real-world manpages emit duplicates for several reasons: +/// - clap subcommand pages list inherited global flags alongside the +/// subcommand's own (e.g. every `homectl .1` mentions `--json`) +/// - tools like btrfs document a flag once as a "deprecated alias" and +/// once under the "Global options" group +/// - help text occasionally restates a flag in an example block that the +/// parser also picks up +/// +/// for each key (the canonical `-c` / `--long` form), we keep the highest +/// scoring entry: `Both` outranks bare Short/Long, param-bearing outranks +/// param-less, longer description outranks shorter. after that we strip +/// standalone Short entries whose char is already covered by a `Both`, +/// since they're now redundant with the merged form. +/// +/// preserves original ordering: the surviving entry sits where the first +/// occurrence of its key appeared. +pub fn dedup_entries(entries: Vec) -> Vec { + use std::collections::HashMap; + use std::collections::HashSet; + + if entries.len() < 2 { + return entries; + } + + let mut best: HashMap = HashMap::new(); + for (i, e) in entries.iter().enumerate() { + let key = entry_key(e); + match best.get(&key) { + Some(&prev) if entry_score(&entries[prev]) >= entry_score(e) => {} + _ => { + best.insert(key, i); + } + } + } + + let mut covered: HashSet = HashSet::new(); + for &idx in best.values() { + if let OwnedSwitch::Both(c, _) = &entries[idx].switch { + covered.insert(*c); + } + } + + let mut seen: HashSet = HashSet::new(); + let mut out: Vec = Vec::with_capacity(entries.len()); + for e in entries.iter() { + let key = entry_key(e); + if seen.contains(&key) { + continue; + } + if let OwnedSwitch::Short(c) = &e.switch + && covered.contains(c) + { + continue; + } + seen.insert(key.clone()); + let best_idx = *best.get(&key).unwrap(); + out.push(entries[best_idx].clone()); + } + out +} + /// merge non-adjacent Short/Long entries that share an identical, non-empty /// description into a single `Both` entry. some manpage styles emit `-h` and /// `--help` as independent .TP / .IP blocks rather than as the comma-joined @@ -224,7 +307,7 @@ pub fn merge_short_long_pairs(entries: Vec) -> Vec { /// extraction pipeline. pub fn parse_manpage_lines(lines: &[GroffLine]) -> ManpageResult { let mut result = parse_manpage_lines_raw(lines); - result.entries = merge_short_long_pairs(result.entries); + result.entries = dedup_entries(merge_short_long_pairs(result.entries)); result } @@ -331,7 +414,8 @@ pub fn parse_manpage_with_subs(contents: &str) -> (ManpageResult, Vec<(String, M let subs: Vec<(String, ManpageResult)> = sub_sections .into_iter() .map(|(name, desc, lines)| { - let entries = merge_short_long_pairs(strategies::extract_entries(&lines)); + let entries = + dedup_entries(merge_short_long_pairs(strategies::extract_entries(&lines))); let sub_result = ManpageResult { entries, subcommands: Vec::new(), @@ -387,6 +471,158 @@ write to FILE show this help and exit "#; + const HP_MANPAGE: &str = r#".TH BAT "1" +.SH NAME +bat \- demo +.SH "OPTIONS" +.HP +\fB\-A\fR, \fB\-\-show\-all\fR +.IP +Show non-printable characters. +.HP +\fB\-\-nonprintable\-notation\fR +.IP +Specify how to display non-printable characters. + +Possible values: +.RS +.IP "caret" +Use character sequences like ^G ... +.IP "unicode" +Use special Unicode code points ... +.RE +.HP +\fB\-l\fR, \fB\-\-language\fR +.IP +Set the language. +"#; + + #[test] + fn hp_strategy_extracts_flags_and_skips_rs_example_values() { + // bat's manpage uses .HP for flag tags and nests example values in + // .RS/.RE — the parser should pick up the three real flags and + // ignore the inner .IP "caret" / .IP "unicode" example tags. + let r = parse_manpage_string(HP_MANPAGE); + let names: Vec = r + .entries + .iter() + .map(|e| match &e.switch { + OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => l.clone(), + OwnedSwitch::Short(c) => c.to_string(), + }) + .collect(); + assert_eq!( + names, + vec!["show-all", "nonprintable-notation", "language"], + "expected 3 flags, got {names:?}" + ); + assert!( + !r.entries.iter().any(|e| matches!( + &e.switch, + OwnedSwitch::Long(l) if l == "caret" || l == "unicode" + )), + "inner .RS .IP example values must not be picked up as flags: {:?}", + r.entries + ); + assert!(matches!( + r.entries[0].switch, + OwnedSwitch::Both('A', ref l) if l == "show-all" + )); + assert!(matches!( + r.entries[2].switch, + OwnedSwitch::Both('l', ref l) if l == "language" + )); + } + + const TEXT_RS_MANPAGE: &str = r#".TH RG "1" +.SH NAME +rg \- demo +.SH "OPTIONS" +.SS INPUT OPTIONS +\fB\-e\fR \fIPATTERN\fR, \fB\-\-regexp\fR=\fIPATTERN\fR +.RS 4 +A pattern to search for. This option can be provided multiple times. +.RE +.sp +\fB\-f\fR \fIPATTERNFILE\fR, \fB\-\-file\fR=\fIPATTERNFILE\fR +.RS 4 +Search for patterns from the given file. +.RE +.sp +\fB\-x\fR, \fB\-\-line\-regexp\fR +.RS 4 +Only show matches surrounded by line boundaries. +.RE +"#; + + #[test] + fn text_rs_strategy_extracts_ripgrep_style_flags() { + // rg's manpage layout: bare Text tag immediately followed by `.RS/.RE`, + // separated by `.sp` — no `.PP` to anchor on. covers both the + // `-s PARAM, --long=PARAM` form and the simpler `-s, --long` form. + let r = parse_manpage_string(TEXT_RS_MANPAGE); + assert_eq!(r.entries.len(), 3, "expected 3 entries, got {}", r.entries.len()); + // -e, --regexp with PARAM between short and comma + assert!(matches!( + r.entries[0].switch, + OwnedSwitch::Both('e', ref l) if l == "regexp" + )); + assert!(matches!( + r.entries[0].param, + Some(OwnedParam::Mandatory(ref p)) if p == "PATTERN" + )); + assert!(r.entries[0].desc.starts_with("A pattern to search for")); + // -f, --file with PARAM + assert!(matches!( + r.entries[1].switch, + OwnedSwitch::Both('f', ref l) if l == "file" + )); + // -x, --line-regexp without PARAM (plain comma form) + assert!(matches!( + r.entries[2].switch, + OwnedSwitch::Both('x', ref l) if l == "line-regexp" + )); + } + + const TP_CLAP_DUAL_PARAGRAPH: &str = r#".TH JJ "1" +.SH NAME +jj \- demo +.SH OPTIONS +.TP +\fB\-\-at\-operation\fR +Operation to load the repo at + +Operation to load the repo at. By default, Jujutsu loads the repo at the most recent operation, and lots of additional sentences that go on for paragraphs. +.TP +\fB\-h\fR, \fB\-\-help\fR +Print help +"#; + + #[test] + fn tp_strategy_stops_description_at_blank_line() { + // clap-generated manpages emit "summary\n\nexpanded body" — we want + // just the summary for completion tooltips, not the multi-paragraph + // wall of text. leading blanks (between tag and first body line) + // are still skipped, blanks only terminate once we've collected text. + let r = parse_manpage_string(TP_CLAP_DUAL_PARAGRAPH); + let at_op = r + .entries + .iter() + .find(|e| matches!(&e.switch, OwnedSwitch::Long(l) if l == "at-operation")) + .expect("--at-operation entry"); + assert_eq!( + at_op.desc, "Operation to load the repo at", + "expected only the summary line, got: {:?}", + at_op.desc + ); + // sanity: the second .TP block still parses (we didn't accidentally + // swallow the next entry). + assert!(r.entries.iter().any(|e| matches!( + &e.switch, + OwnedSwitch::Both('h', l) if l == "help" + ))); + } + #[test] fn tp_strategy_extracts_flags() { let r = parse_manpage_string(TP_MANPAGE); diff --git a/src/parsers/manpage/strategies.rs b/src/parsers/manpage/strategies.rs index 855c468..e9d9971 100644 --- a/src/parsers/manpage/strategies.rs +++ b/src/parsers/manpage/strategies.rs @@ -55,7 +55,20 @@ fn collect_description_lines(lines: &[GroffLine], start: usize) -> (String, usiz } i += 1; } - GroffLine::Blank | GroffLine::Comment => { + GroffLine::Blank => { + // a blank line ends the description, but only after we've + // collected some text — leading blanks between the tag and + // the first description line are still skipped. this caps + // clap-style "summary\n\nexpanded body" entries (jj, etc.) + // at the summary, which is what completion tooltips want. + // `.IP`-style help already stops at blanks (collect_text_lines + // breaks on any non-Text line); this brings `.TP` in line. + if !acc.is_empty() { + break; + } + i += 1; + } + GroffLine::Comment => { i += 1; } GroffLine::Macro { .. } => { @@ -211,14 +224,210 @@ fn combine_short_long_alternates( // strategy b: .IP style (curl, hand-written manpages). // .IP takes an inline tag argument: .IP "-v, --verbose" // the description follows as text lines. -make_macro_walker!(pub strategy_ip -> Vec, on macro "IP" => - |lines, i, args| { - let tag = strip_groff_escapes(args); - let (desc, rest) = collect_text_lines(&lines[i + 1..]); - let new_i = lines.len() - rest.len(); - parse_tag_to_entry(&tag, desc).map(|e| (e, new_i)) +// +// .RS/.RE depth-aware: man pages frequently nest .IP inside .RS blocks to +// list example values (e.g. bat's `.IP "caret"` under `--nonprintable-notation`). +// those nested tags look like flag definitions and confuse the parser, so we +// only treat `.IP` at outer scope as a flag entry. +pub fn strategy_ip(lines: &[GroffLine]) -> Vec { + let mut out = Vec::new(); + let mut i = 0; + let mut rs_depth: u32 = 0; + while i < lines.len() { + if let GroffLine::Macro { name, args } = &lines[i] { + match name.as_str() { + "RS" => { + rs_depth += 1; + i += 1; + continue; + } + "RE" => { + rs_depth = rs_depth.saturating_sub(1); + i += 1; + continue; + } + "IP" if rs_depth == 0 => { + let tag = strip_groff_escapes(args); + let (desc, rest) = collect_text_lines(&lines[i + 1..]); + let new_i = lines.len() - rest.len(); + if let Some(entry) = parse_tag_to_entry(&tag, desc) { + out.push(entry); + i = new_i; + continue; + } + } + _ => {} + } + } + i += 1; } -); + out +} + +// strategy b': .HP style (bat, help2man with hanging paragraphs). +// .HP introduces a hanging-indent paragraph: the next text line is the tag, +// followed by an empty `.IP` macro that starts the description body. example +// value listings are wrapped in `.RS/.RE` and skipped during description +// collection. +// +// .HP +// \fB\-A\fR, \fB\-\-show\-all\fR +// .IP +// Show non-printable characters ... +pub fn strategy_hp(lines: &[GroffLine]) -> Vec { + let mut out = Vec::new(); + let mut i = 0; + while i < lines.len() { + let GroffLine::Macro { name, .. } = &lines[i] else { + i += 1; + continue; + }; + if name != "HP" { + i += 1; + continue; + } + let mut j = i + 1; + while j < lines.len() && matches!(&lines[j], GroffLine::Blank | GroffLine::Comment) { + j += 1; + } + let Some(tag) = lines.get(j).and_then(tag_from_line) else { + i += 1; + continue; + }; + let mut body_start = j + 1; + if let Some(GroffLine::Macro { name, .. }) = lines.get(body_start) + && name == "IP" + { + body_start += 1; + } + let (desc, new_i) = collect_hp_description(lines, body_start); + if let Some(entry) = parse_tag_to_entry(&tag, desc) { + out.push(entry); + } + i = if new_i > i { new_i } else { i + 1 }; + } + out +} + +/// description collector for `.HP` entries. stops at the next flag-boundary +/// macro (`.HP`, `.TP`, `.PP`, `.SH`, `.SS`) and skips entire `.RS/.RE` +/// example blocks — those are sub-value listings, not part of the flag's +/// own description text. +fn collect_hp_description(lines: &[GroffLine], start: usize) -> (String, usize) { + let mut acc: Vec = Vec::new(); + let mut i = start; + while i < lines.len() { + match &lines[i] { + GroffLine::Macro { name, .. } + if matches!(name.as_str(), "HP" | "TP" | "TQ" | "PP" | "SH" | "SS") => + { + break; + } + GroffLine::Macro { name, .. } if name == "RS" => { + i += 1; + let mut depth: u32 = 1; + while i < lines.len() && depth > 0 { + if let GroffLine::Macro { name, .. } = &lines[i] { + if name == "RS" { + depth += 1; + } else if name == "RE" { + depth -= 1; + } + } + i += 1; + } + } + GroffLine::Text(t) => { + acc.push(t.clone()); + i += 1; + } + GroffLine::Macro { name, args } + if matches!( + name.as_str(), + "B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI" + ) => + { + let text = tag_of_macro(name, args); + if !text.is_empty() { + acc.push(text); + } + i += 1; + } + GroffLine::Blank => { + if !acc.is_empty() { + break; + } + i += 1; + } + GroffLine::Comment => { + i += 1; + } + GroffLine::Macro { .. } => { + i += 1; + } + } + } + (acc.join(" "), i) +} + +// strategy b'': bare Text tag immediately followed by `.RS/.RE` (ripgrep, +// some help2man variants). like the `.PP+.RS` shape, but with no `.PP` +// anchor between flag entries — flags sit directly under `.SS` headers +// separated only by `.sp`: +// +// .SS INPUT OPTIONS +// \fB\-e\fP \fIPATTERN\fP, \fB\-\-regexp\fP=\fIPATTERN\fP +// .RS 4 +// A pattern to search for ... +// .RE +// .sp +// \fB\-f\fP \fIPATTERNFILE\fP, \fB\-\-file\fP=\fIPATTERNFILE\fP +// .RS 4 +// ... +// +// we only treat a top-level Text line as a tag when an `.RS` immediately +// follows (skipping blanks/comments) and the text starts with `-`. nested +// Text lines inside an existing `.RS` block are skipped via depth tracking +// so description paragraphs that happen to begin with a flag reference +// don't get mis-recognized. +pub fn strategy_text_rs(lines: &[GroffLine]) -> Vec { + let mut out = Vec::new(); + let mut rs_depth: u32 = 0; + let mut i = 0; + while i < lines.len() { + match &lines[i] { + GroffLine::Macro { name, .. } if name == "RS" => { + rs_depth += 1; + i += 1; + } + GroffLine::Macro { name, .. } if name == "RE" => { + rs_depth = rs_depth.saturating_sub(1); + i += 1; + } + GroffLine::Text(tag) if rs_depth == 0 && tag.trim_start().starts_with('-') => { + let mut j = i + 1; + while j < lines.len() + && matches!(&lines[j], GroffLine::Blank | GroffLine::Comment) + { + j += 1; + } + if let Some(GroffLine::Macro { name, .. }) = lines.get(j) + && name == "RS" + { + let (desc, new_i) = collect_pp_rs_desc(lines, j); + if let Some(entry) = parse_tag_to_entry(tag, desc) { + out.push(entry); + i = if new_i > i { new_i } else { i + 1 }; + continue; + } + } + i += 1; + } + _ => i += 1, + } + } + out +} // strategy c: .PP + .RS/.RE style (git, docbook-generated manpages). // flag entries are introduced by .PP (paragraph), with the flag name as @@ -421,6 +630,7 @@ pub fn extract_entries(lines: &[GroffLine]) -> Vec { let pp = count_macro("PP", lines); let rs = count_macro("RS", lines); let ur = count_macro("UR", lines); + let hp = count_macro("HP", lines); let mut specialized: Vec<(&str, Vec)> = Vec::new(); if tp > 0 { @@ -435,6 +645,12 @@ pub fn extract_entries(lines: &[GroffLine]) -> Vec { if ur > 0 && ip > 0 { specialized.push(("nix", strategy_nix(lines))); } + if hp > 0 { + specialized.push(("HP", strategy_hp(lines))); + } + if rs > 0 { + specialized.push(("Text+RS", strategy_text_rs(lines))); + } let candidates: Vec<(&str, Vec)> = { let filtered: Vec<_> = specialized .into_iter() diff --git a/src/parsers/nushell.rs b/src/parsers/nushell.rs index eaf4bcc..c00694f 100644 --- a/src/parsers/nushell.rs +++ b/src/parsers/nushell.rs @@ -13,12 +13,10 @@ //! - escaping special characters for nushell string literals use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; use std::sync::OnceLock; -use crate::parsers::manpage::{ - ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch, -}; +use crate::parsers::manpage::{ManpageEntry, ManpageResult, OwnedParam, OwnedSwitch}; use crate::types::Positional; /// nushell built-in commands and keywords — we must never generate `extern` @@ -246,70 +244,6 @@ pub fn escape_nu(s: &str) -> Cow<'_, str> { } } -fn entry_key(e: &ManpageEntry) -> String { - match &e.switch { - OwnedSwitch::Short(c) => format!("-{c}"), - OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"), - } -} - -fn entry_score(e: &ManpageEntry) -> i32 { - let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) { - 10 - } else { - 0 - }; - let param_bonus = if e.param.is_some() { 5 } else { 0 }; - let desc_bonus = (e.desc.len() / 10).min(5) as i32; - switch_bonus + param_bonus + desc_bonus -} - -/// deduplicate flag entries that refer to the same flag. -/// -/// when the same flag appears multiple times (e.g. from overlapping manpage -/// sections or repeated help text), we keep the "best" version using a score: -/// - both short+long form present: +10 (most informative) -/// - has a parameter: +5 -/// - description length bonus: up to +5 -/// -/// after deduplication by long name, we also remove standalone short flags -/// whose letter is already covered by a Both(short, long) entry. this prevents -/// emitting both "-v" and "--verbose(-v)" which nushell would reject as a -/// duplicate. the filtering preserves original ordering from the help text. -pub fn dedup_entries(entries: &[ManpageEntry]) -> Vec { - let mut best: HashMap = HashMap::new(); - for e in entries { - let key = entry_key(e); - match best.get(&key) { - Some(prev) if entry_score(prev) >= entry_score(e) => {} - _ => { - best.insert(key, e); - } - } - } - let mut covered: HashSet = HashSet::new(); - for e in best.values() { - if let OwnedSwitch::Both(c, _) = &e.switch { - covered.insert(*c); - } - } - let mut seen: HashSet = HashSet::new(); - let mut out: Vec = Vec::new(); - for e in entries { - let key = entry_key(e); - if seen.contains(&key) { - continue; - } - if let OwnedSwitch::Short(c) = &e.switch - && covered.contains(c) - { - continue; - } - seen.insert(key.clone()); - out.push((*best.get(&key).unwrap()).clone()); - } - out -} /// format a single flag entry as a nushell `extern` parameter line. /// output examples: @@ -414,7 +348,9 @@ pub fn module_name_of(cmd_name: &str) -> String { /// export extern "git stash" [ # stash changes /// ] pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String { - let entries = dedup_entries(&result.entries); + // entries arrive deduped from the parser pipeline (`parse_manpage_lines` + // and `From<&HelpResult>` both run `manpage::dedup_entries`), so we can + // emit them directly here. let escaped_name = escape_nu(cmd_name); let positionals = fixup_positionals(result.positionals.clone()); @@ -424,7 +360,7 @@ pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String { out.push_str(&format_positional(name, p)); out.push('\n'); } - for entry in &entries { + for entry in &result.entries { out.push_str(&format_flag(entry)); out.push('\n'); } @@ -452,24 +388,3 @@ pub fn generate_module(cmd_name: &str, result: &ManpageResult) -> String { ) } -/// convenience wrapper: generate an `extern` from just a list of entries. -pub fn generate_extern_from_entries(cmd_name: &str, entries: Vec) -> String { - generate_extern( - cmd_name, - &ManpageResult { - entries, - subcommands: Vec::new(), - positionals: Vec::new(), - description: String::new(), - }, - ) -} - -/// stub subcommand entry used when extracting subcommands from a parsed -/// help result for nushell output. -pub fn manpage_subcommand_from(name: &str, desc: &str) -> ManpageSubcommand { - ManpageSubcommand { - name: name.to_string(), - desc: desc.to_string(), - } -} diff --git a/tests/ports.rs b/tests/ports.rs index 3a3fe4e..f38b793 100644 --- a/tests/ports.rs +++ b/tests/ports.rs @@ -814,6 +814,23 @@ fn nushell_module() { assert!(nu.contains("--verbose(-v)"), "nu = {nu}"); } +#[test] +fn long_switch_rejects_dash_only_separator_lines() { + // less's `--help` (and therefore zstdless/bzless wrappers) is full of + // separator lines like `---------------------------------------------------`. + // the first two chars look like a `--` prefix; the rest is just dashes. + // without an alphanumeric-first guard, the parser would treat the whole + // separator as a flag named `------…`. + let r = parse(" ---------------------------------------------------\n -a, --all show all\n"); + assert_eq!( + r.entries.len(), + 1, + "separator should not parse as a flag, got {} entries", + r.entries.len() + ); + assert!(matches!(&r.entries[0].switch, Switch::Both('a', l) if *l == "all")); +} + #[test] fn dedup_entries_help() { let txt = " -v, --verbose verbose output\n --verbose verbose mode\n -v be verbose\n"; diff --git a/tests/runtime_complete.rs b/tests/runtime_complete.rs index 9d209b7..9e86667 100644 --- a/tests/runtime_complete.rs +++ b/tests/runtime_complete.rs @@ -241,6 +241,71 @@ fn complete_returns_flags_only_after_hyphen() { let _ = fs::remove_dir_all(root); } +#[test] +fn complete_drops_exact_subcommand_match() { + // when the typed token exactly equals a cached subcommand, the binary + // returns null so a downstream dynamic completer (systemctl unit names, + // git remote names, etc.) can take over instead of echoing the + // already-typed word back. + let root = unique_temp_dir("inshellah-exact-subcommand-drop"); + let cache_dir = root.join("cache"); + fs::create_dir_all(&cache_dir).expect("cache dir"); + + let result = ManpageResult { + entries: Vec::new(), + subcommands: vec![ + ManpageSubcommand { + name: "status".to_string(), + desc: "show status".to_string(), + }, + ManpageSubcommand { + name: "start".to_string(), + desc: "start unit".to_string(), + }, + ], + positionals: Vec::new(), + description: String::new(), + }; + write_result(&cache_dir, "demo", "manpage", &result).expect("cache"); + + let exact = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .arg("complete") + .arg("--dir") + .arg(&cache_dir) + .arg("demo") + .arg("status") + .output() + .expect("run inshellah complete"); + assert!( + exact.status.success(), + "stderr = {}", + String::from_utf8_lossy(&exact.stderr) + ); + let exact_stdout = String::from_utf8(exact.stdout).expect("stdout"); + assert_eq!( + exact_stdout.trim(), + "null", + "exact match should hand off; stdout = {exact_stdout}" + ); + + let partial = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .arg("complete") + .arg("--dir") + .arg(&cache_dir) + .arg("demo") + .arg("sta") + .output() + .expect("run inshellah complete"); + let partial_stdout = String::from_utf8(partial.stdout).expect("stdout"); + assert!( + partial_stdout.contains(r#""value":"status""#) + && partial_stdout.contains(r#""value":"start""#), + "partial should still match both; stdout = {partial_stdout}" + ); + + let _ = fs::remove_dir_all(root); +} + #[test] fn complete_resolves_absolute_path_after_elevation_wrapper() { let root = unique_temp_dir("inshellah-absolute-elevation-complete");