From 4a7febee6c6437c1f580b7b0ed3b021cec11ab64 Mon Sep 17 00:00:00 2001 From: atagen Date: Sun, 24 May 2026 18:10:30 +1000 Subject: [PATCH] add darwin support --- README.md | 2 +- doc/nixos.md | 73 +++++++++++- doc/runtime-completions.md | 49 ++++++++ flake.nix | 12 ++ nix/inshellah-completer.nu | 14 ++- nix/module.nix | 90 +++++++++++++- src/config.rs | 233 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 224 +++++++++++++++++++++++++++++------ tests/runtime_complete.rs | 123 ++++++++++++++++++++ 10 files changed, 774 insertions(+), 47 deletions(-) create mode 100644 src/config.rs diff --git a/README.md b/README.md index 1f3e779..f431a30 100644 --- a/README.md +++ b/README.md @@ -8,5 +8,5 @@ see `doc/` for details: - [building and installing](doc/building.md) — cargo, nix, post-install setup - [nushell integration](doc/nushell-integration.md) — setup, the pipeline, the completer -- [nixos module](doc/nixos.md) — automatic build-time indexing + module options +- [nixos / nix-darwin module](doc/nixos.md) — automatic build-time indexing + module options - [runtime completions](doc/runtime-completions.md) — on-the-fly caching via the completer diff --git a/doc/nixos.md b/doc/nixos.md index 6b0a46e..029bd6b 100644 --- a/doc/nixos.md +++ b/doc/nixos.md @@ -1,10 +1,12 @@ -# nixos integration +# nixos / nix-darwin integration -inshellah provides a nixos module that indexes nushell completions for -every installed package at system build time, and a wrapped binary -that knows where to find the result. +inshellah provides a module that indexes nushell completions for every +installed package at system build time, and a wrapped binary that knows +where to find the result. the same module body backs both NixOS and +nix-darwin — on Linux it scrapes ELF binaries, on macOS Mach-O ones, +selected automatically by the inshellah build's target platform. -## enabling +## enabling (NixOS) ```nix # flake.nix outputs: @@ -28,6 +30,34 @@ or importing directly: } ``` +## enabling (nix-darwin) + +```nix +# flake.nix outputs: +{ + darwinConfigurations.mymac = nix-darwin.lib.darwinSystem { + modules = [ + inshellah.darwinModules.default + { programs.inshellah.enable = true; } + ]; + }; +} +``` + +the options and behaviour are identical to the NixOS module — it reads +the same `programs.inshellah.*` settings and writes the same completion +index and nushell shim under the system profile (`/run/current-system/sw`, +which nix-darwin also uses). + +on macOS, the tools you reach through `/usr/bin` (`git`, `clang`, …) are +`xcrun` shims whose real binaries and manpages live under the active +developer dir, outside the nix system profile — so they aren't indexed by +default. rather than probe the host toolchain impurely, list the nix +equivalents you want completed in `extraScrapePackages`; the module rolls +their store paths into the build-time scrape (`inshellah index … --prefix +…`). this applies on NixOS too, for any package whose completions you want +indexed without putting it on the system path. + after rebuilding, completions are immediately available through the autoloaded nushell shim. @@ -69,6 +99,11 @@ programs.inshellah = { # commands to skip manpage parsing for (uses --help instead) helpOnlyCommands = [ "nix" ]; + # extra packages to scrape alongside the system profile; each store path + # is passed to `inshellah index --prefix`. handy on macOS for the nix + # equivalents of /usr/bin shim tools (git, clang, …) + extraScrapePackages = [ pkgs.git pkgs.clang ]; + # per-subprocess timeout in ms during indexing (null = built-in # default of 200ms) timeoutMs = null; @@ -81,11 +116,39 @@ programs.inshellah = { # set to 0 to omit native result-limit flags dynamicLimit = 200; + # characters that trigger flag completions when a partial token begins + # with one of them. default "-"; e.g. "-+" also triggers on "+" + flagTriggers = "-"; + + # also surface flags on an empty token (right after a space), mixed in + # with subcommands. default false + flagOnEmpty = false; + + # cap on candidates returned and nushell's max_results. 0 = no cap + # (nushell's built-in default of 200 still applies) + maxCompletions = 0; + + # per-subprocess timeout (ms) for the completer's on-the-fly --help + # resolution of uncached commands. null = built-in default of 200ms. + # distinct from timeoutMs (indexing) and dynamicTimeoutMs (live shim) + completeTimeoutMs = null; + # worker-thread count for the parallel scrape workers = null; }; ``` +### flag-triggering behaviour + +`flagTriggers` and `flagOnEmpty` control when option/flag completions are +offered. By default flags appear only after a leading `-`. Add characters +to `flagTriggers` (e.g. `"-+"`) to trigger on them as well — for a +non-dash trigger the text after it is matched against the bare flag name, +so `+ver` completes to `--verbose`. Set `flagOnEmpty = true` to list flags +immediately after a space, alongside subcommands. These map to the +`INSHELLAH_FLAG_TRIGGERS` / `INSHELLAH_FLAG_ON_EMPTY` environment variables +(see [runtime-completions.md](runtime-completions.md)). + ## using the completer the module installs the completer under nushell's vendor autoload path, diff --git a/doc/runtime-completions.md b/doc/runtime-completions.md index 3e0ee84..56585e4 100644 --- a/doc/runtime-completions.md +++ b/doc/runtime-completions.md @@ -54,6 +54,35 @@ $env.config.completions.external = { paths after the first in `--dir` are read-only system dirs. +## configuration + +the `complete` path reads a few behavioural knobs from the environment. +each has a compiled-in default that reproduces the original behaviour, so +an unconfigured install is unchanged. on nixos these are set for you by +the module options (see [nixos.md](nixos.md)); elsewhere, export them in +your shell before nushell starts. + +| variable | default | effect | +|---|---|---| +| `INSHELLAH_FLAG_TRIGGERS` | `-` | characters that surface flag completions when a partial token begins with one of them. set to `-+` to also trigger on `+`; whitespace is ignored. an empty value disables prefix-triggered flags (leaving only `INSHELLAH_FLAG_ON_EMPTY`). | +| `INSHELLAH_FLAG_ON_EMPTY` | `0` | when truthy (`1`/`true`/`yes`/`on`), also surface flags on an empty token — i.e. right after a space — alongside subcommands. otherwise an empty token hands off to file/dynamic completion. | +| `INSHELLAH_MAX_COMPLETIONS` | `0` | cap on the number of candidates returned (and nushell's `max_results` when sourcing the bundled snippet). `0` imposes no inshellah cap; nushell's own default of 200 still applies. | +| `INSHELLAH_TIMEOUT_MS` | `200` | per-subprocess timeout for the on-the-fly `--help` resolution. an explicit `--timeout-ms` flag overrides it. | + +### flag triggering + +by default flags are offered only once a token begins with `-` +(`git commit --`). two overrides are available: + +- **other trigger characters** — `INSHELLAH_FLAG_TRIGGERS="-+"` makes a + leading `+` surface flags too. for non-dash triggers the typed text + after the trigger is matched against the bare flag name, so `+ver` + completes to `--verbose`. the emitted value keeps the tool's real + dashed flag. +- **flags after a space** — `INSHELLAH_FLAG_ON_EMPTY=1` lists flags + immediately after a space, mixed in with subcommands, before any + character is typed. + ## cache management ```sh @@ -84,3 +113,23 @@ for upfront indexing on non-nixos systems: ```sh inshellah index /usr /usr/local ``` + +## macOS developer toolchain + +`/usr/bin/git`, `/usr/bin/clang`, and friends are `xcrun` shims whose real +binaries and manpages live under the active developer dir (`xcode-select +-p` — Command Line Tools or full Xcode), outside the usual prefixes. to +index those, point `index` at the real prefix explicitly — either the +developer dir or, preferably, the nix equivalents: + +```sh +# the active developer toolchain +inshellah index --prefix "$(xcode-select -p)/usr" + +# or nix-provided tools, kept reproducible +inshellah index /run/current-system/sw --prefix /nix/store/…-git:/nix/store/…-clang +``` + +`--prefix` takes a colon-separated list of extra prefixes, scraped +alongside the positional ones. the nix module exposes this as +`programs.inshellah.extraScrapePackages` (see [nixos.md](nixos.md)). diff --git a/flake.nix b/flake.nix index b5f9b19..8cfd42e 100644 --- a/flake.nix +++ b/flake.nix @@ -268,11 +268,23 @@ } ); + # the module body in ./nix/module.nix only touches options common to + # both NixOS and nix-darwin (environment.{variables,systemPackages, + # pathsToLink,extraSetup} + a programs.inshellah namespace), so the two + # platform outputs share it verbatim and differ only in which package + # the host system resolves to. nixosModules.default = { pkgs, ... }: { imports = [ ./nix/module.nix ]; programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default; }; + + darwinModules.default = + { pkgs, ... }: + { + imports = [ ./nix/module.nix ]; + programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default; + }; }; } diff --git a/nix/inshellah-completer.nu b/nix/inshellah-completer.nu index 432a64e..d70bad2 100644 --- a/nix/inshellah-completer.nu +++ b/nix/inshellah-completer.nu @@ -37,6 +37,18 @@ let inshellah_limit_args = { |flag| if $inshellah_dynamic_limit == 0 { [] } else { [$flag $inshellah_dynamic_limit] } } +# nushell's own cap on how many external completions it will display. +# mirrors the Rust completer's INSHELLAH_MAX_COMPLETIONS cap so both ends +# agree. 0 (or unset) keeps the historical default of 200. +let inshellah_default_max_results = 200 + +let inshellah_max_results = do { + let raw = (try { + $env.INSHELLAH_MAX_COMPLETIONS? | default 0 | into int + } catch { 0 }) + if $raw > 0 { $raw } else { $inshellah_default_max_results } +} + let inshellah_with_timeout = { |body| if $inshellah_dynamic_timeout_ms == 0 { try { do $body } catch { null } @@ -853,4 +865,4 @@ let inshellah_complete = { |spans| } } -$env.config.completions.external = {enable: true, max_results: 200, completer: $inshellah_complete} +$env.config.completions.external = {enable: true, max_results: $inshellah_max_results, completer: $inshellah_complete} diff --git a/nix/module.nix b/nix/module.nix index 940e816..f28de8a 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -1,4 +1,4 @@ -# NixOS module: automatic nushell completion indexing +# NixOS / nix-darwin module: automatic nushell completion indexing # # Indexes completions using three strategies in priority order: # 1. Native completion generators (e.g. CMD completions nushell) @@ -8,11 +8,18 @@ # Produces a directory of .json/.nu files at build time. # The `complete` command reads from this directory as a system overlay. # -# Usage: +# This module body only uses options shared by NixOS and nix-darwin +# (environment.{variables,systemPackages,pathsToLink,extraSetup}), so the +# same file backs both flake outputs. On macOS the indexer scrapes Mach-O +# binaries; on Linux, ELF — selected by the inshellah build's target os. +# +# Usage (NixOS): # { pkgs, ... }: { # imports = [ ./path/to/inshellah-rs/nix/module.nix ]; # programs.inshellah.enable = true; # } +# Usage (nix-darwin): identical — import the same file (or the flake's +# darwinModules.default) and set programs.inshellah.enable = true. { config, @@ -100,6 +107,22 @@ in ''; }; + extraScrapePackages = lib.mkOption { + type = lib.types.listOf lib.types.package; + default = [ ]; + example = lib.literalExpression "[ pkgs.git pkgs.clang ]"; + description = '' + additional packages to scrape for completions alongside the system + profile. each package's store path is passed to `inshellah index` + via `--prefix`, so it must contain bin/ and/or share/man/. + + useful on macOS, where the active developer toolchain (git, clang, + …) lives outside the nix system profile behind /usr/bin shims: + install the nix equivalents and list them here so their completions + get indexed reproducibly, rather than probing the host toolchain. + ''; + }; + timeoutMs = lib.mkOption { type = lib.types.nullOr lib.types.int; default = null; @@ -132,6 +155,56 @@ in ''; }; + flagTriggers = lib.mkOption { + type = lib.types.str; + default = "-"; + example = "-+"; + description = '' + characters that trigger flag (option) completions when a partial + token begins with one of them. the default "-" reproduces the + original behaviour where only a leading dash surfaces flags. each + character is taken literally; whitespace is ignored. exported as + INSHELLAH_FLAG_TRIGGERS. + ''; + }; + + flagOnEmpty = lib.mkOption { + type = lib.types.bool; + default = false; + example = true; + description = '' + also surface flag completions when nothing has been typed yet — + i.e. right after a space/tab — alongside subcommands. when false + (the default) an empty token hands off to file/dynamic completion. + exported as INSHELLAH_FLAG_ON_EMPTY. + ''; + }; + + maxCompletions = lib.mkOption { + type = lib.types.int; + default = 0; + example = 100; + description = '' + upper bound on the number of static completion candidates returned, + and the nushell `max_results` shown. 0 means no inshellah-imposed + cap (nushell's built-in default of 200 still applies). exported as + INSHELLAH_MAX_COMPLETIONS. + ''; + }; + + completeTimeoutMs = lib.mkOption { + type = lib.types.nullOr lib.types.int; + default = null; + example = 400; + description = '' + per-subprocess timeout in milliseconds for the on-the-fly --help + resolution the completer performs for uncached commands. distinct + from `timeoutMs` (build-time indexing) and `dynamicTimeoutMs` (the + nushell shim's live providers). null uses the binary's compiled + default (currently 200ms). exported as INSHELLAH_TIMEOUT_MS. + ''; + }; + workers = lib.mkOption { type = lib.types.nullOr lib.types.int; default = null; @@ -155,6 +228,12 @@ in config = lib.mkIf cfg.enable { environment.variables.INSHELLAH_DYNAMIC_TIMEOUT_MS = toString cfg.dynamicTimeoutMs; environment.variables.INSHELLAH_DYNAMIC_LIMIT = toString cfg.dynamicLimit; + environment.variables.INSHELLAH_FLAG_TRIGGERS = cfg.flagTriggers; + environment.variables.INSHELLAH_FLAG_ON_EMPTY = if cfg.flagOnEmpty then "1" else "0"; + environment.variables.INSHELLAH_MAX_COMPLETIONS = toString cfg.maxCompletions; + environment.variables.INSHELLAH_TIMEOUT_MS = lib.mkIf ( + cfg.completeTimeoutMs != null + ) (toString cfg.completeTimeoutMs); environment.systemPackages = let @@ -191,13 +270,18 @@ in helpOnlyFlag = lib.optionalString (cfg.helpOnlyCommands != [ ]) " --help-only ${helpOnlyFile}"; timeoutFlag = lib.optionalString (cfg.timeoutMs != null) " --timeout-ms ${toString cfg.timeoutMs}"; workersFlag = lib.optionalString (cfg.workers != null) " --workers ${toString cfg.workers}"; + # roll the explicit extra packages up into a single colon-separated + # --prefix so they're scraped alongside the system profile. + prefixFlag = lib.optionalString (cfg.extraScrapePackages != [ ]) ( + " --prefix " + lib.concatStringsSep ":" (map toString cfg.extraScrapePackages) + ); snippetFile = pkgs.writeText "inshellah-completer.nu" cfg.snippet; in '' mkdir -p ${destDir} if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then - ${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag}${timeoutFlag}${workersFlag} \ + ${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag}${prefixFlag}${timeoutFlag}${workersFlag} \ 2>/dev/null || true fi diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..8d4268d --- /dev/null +++ b/src/config.rs @@ -0,0 +1,233 @@ +//! runtime configuration for the `complete` path. +//! +//! the completer reads a handful of behavioural knobs from the +//! environment. this matches the mechanism already used for the dynamic +//! nushell shim (`INSHELLAH_DYNAMIC_*`): the nixos module exports the +//! variables via `environment.variables`, and users sourcing the snippet +//! by hand can export them directly. every field has a compiled-in +//! default that reproduces the historical behaviour, so an unconfigured +//! install behaves exactly as before. + +/// per-subprocess timeout default for the dynamic `--help` resolve path +/// when neither `--timeout-ms` nor `INSHELLAH_TIMEOUT_MS` is set. +pub const DEFAULT_TIMEOUT_MS: u64 = 200; + +/// the historical (and default) flag-trigger set: a partial token starting +/// with `-` asks for flag completions. +pub const DEFAULT_FLAG_TRIGGERS: &str = "-"; + +/// behavioural configuration resolved once at startup. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Config { + /// characters that, when a partial token begins with one of them, + /// cause flag completions to be emitted. defaults to `['-']` — the + /// only trigger in the original behaviour. + pub flag_triggers: Vec, + /// also emit flags when the partial token is empty, i.e. right after a + /// space/tab with nothing typed yet. defaults to `false`. + pub flag_on_empty: bool, + /// upper bound on the number of completion candidates returned by the + /// static completer. `0` means no inshellah-imposed cap (nushell's own + /// `max_results` still applies). + pub max_completions: usize, + /// per-subprocess timeout (ms) for the dynamic `--help` resolve path. + pub timeout_ms: u64, +} + +impl Default for Config { + fn default() -> Self { + Config { + flag_triggers: DEFAULT_FLAG_TRIGGERS.chars().collect(), + flag_on_empty: false, + max_completions: 0, + timeout_ms: DEFAULT_TIMEOUT_MS, + } + } +} + +impl Config { + /// resolve configuration from the process environment, falling back to + /// the compiled-in defaults for anything unset or unparseable. + pub fn from_env() -> Self { + Self::from_lookup(|key| std::env::var(key).ok()) + } + + /// inner resolver, parameterised over the variable source so tests can + /// drive it without mutating the real (process-global) environment. + pub fn from_lookup(mut get: impl FnMut(&str) -> Option) -> Self { + let mut cfg = Config::default(); + if let Some(raw) = get("INSHELLAH_FLAG_TRIGGERS") { + // tokens are split on whitespace before they reach us, so a + // whitespace character can never be the first byte of a partial + // token — drop any from the trigger set rather than letting it + // silently never match. an explicitly empty value disables + // prefix-triggered flags entirely (leaving only flag_on_empty). + cfg.flag_triggers = raw.chars().filter(|c| !c.is_whitespace()).collect(); + } + if let Some(raw) = get("INSHELLAH_FLAG_ON_EMPTY") { + cfg.flag_on_empty = parse_bool(&raw); + } + if let Some(raw) = get("INSHELLAH_MAX_COMPLETIONS") + && let Ok(n) = raw.trim().parse::() + { + cfg.max_completions = n; + } + if let Some(raw) = get("INSHELLAH_TIMEOUT_MS") + && let Ok(n) = raw.trim().parse::() + { + cfg.timeout_ms = n; + } + cfg + } + + /// whether a partial token should surface flag completions. an empty + /// token is governed by [`Config::flag_on_empty`]; otherwise the first + /// character is matched against the trigger set. + pub fn triggers_flags(&self, token: &str) -> bool { + match token.chars().next() { + None => self.flag_on_empty, + Some(c) => self.flag_triggers.contains(&c), + } + } + + /// derive the needle used to score flag candidates for a triggering + /// token, plus whether that needle should match the *bare* flag name + /// (dashes stripped) rather than the canonical dashed form. + /// + /// the `-` trigger keeps the dashed form so long-vs-short ranking is + /// preserved exactly (`--ver` prefers `--verbose`, `-v` prefers `-v`). + /// any other trigger character has no dash semantics, so we strip the + /// single leading trigger char and match the remainder against the bare + /// name — letting `+ver` match `--verbose`. an empty token yields an + /// empty bare needle, which matches every flag. + pub fn flag_needle<'a>(&self, token: &'a str) -> FlagNeedle<'a> { + match token.chars().next() { + None => FlagNeedle { + needle: token, + bare: true, + }, + Some('-') => FlagNeedle { + needle: token, + bare: false, + }, + Some(c) => FlagNeedle { + needle: &token[c.len_utf8()..], + bare: true, + }, + } + } +} + +/// the scoring needle for flag candidates: [`FlagNeedle::needle`] is matched +/// against the bare flag name when [`FlagNeedle::bare`] is set, else against +/// the dashed form. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct FlagNeedle<'a> { + pub needle: &'a str, + pub bare: bool, +} + +/// permissive truthy parse for boolean env vars. +fn parse_bool(s: &str) -> bool { + matches!( + s.trim().to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + fn cfg_from(pairs: &[(&str, &str)]) -> Config { + let map: HashMap = pairs + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + Config::from_lookup(|k| map.get(k).cloned()) + } + + #[test] + fn defaults_match_historical_behaviour() { + let cfg = Config::default(); + assert_eq!(cfg.flag_triggers, vec!['-']); + assert!(!cfg.flag_on_empty); + assert_eq!(cfg.max_completions, 0); + assert_eq!(cfg.timeout_ms, DEFAULT_TIMEOUT_MS); + + // only "-" prefixes trigger; empty does not. + assert!(cfg.triggers_flags("-")); + assert!(cfg.triggers_flags("--verbose")); + assert!(!cfg.triggers_flags("")); + assert!(!cfg.triggers_flags("build")); + } + + #[test] + fn flag_on_empty_opens_flags_after_a_space() { + let cfg = cfg_from(&[("INSHELLAH_FLAG_ON_EMPTY", "true")]); + assert!(cfg.flag_on_empty); + assert!(cfg.triggers_flags("")); + // a bare word still does not trigger flags. + assert!(!cfg.triggers_flags("sub")); + } + + #[test] + fn custom_trigger_chars_replace_the_dash() { + let cfg = cfg_from(&[("INSHELLAH_FLAG_TRIGGERS", "-+")]); + assert_eq!(cfg.flag_triggers, vec!['-', '+']); + assert!(cfg.triggers_flags("+ver")); + assert!(cfg.triggers_flags("-v")); + assert!(!cfg.triggers_flags("/x")); + } + + #[test] + fn whitespace_in_triggers_is_dropped() { + let cfg = cfg_from(&[("INSHELLAH_FLAG_TRIGGERS", "- ")]); + assert_eq!(cfg.flag_triggers, vec!['-']); + } + + #[test] + fn dash_needle_keeps_dashes_other_triggers_go_bare() { + let cfg = cfg_from(&[("INSHELLAH_FLAG_TRIGGERS", "-+")]); + assert_eq!( + cfg.flag_needle("--ver"), + FlagNeedle { + needle: "--ver", + bare: false + } + ); + assert_eq!( + cfg.flag_needle("+ver"), + FlagNeedle { + needle: "ver", + bare: true + } + ); + assert_eq!( + cfg.flag_needle(""), + FlagNeedle { + needle: "", + bare: true + } + ); + } + + #[test] + fn numeric_knobs_parse_and_fall_back() { + let cfg = cfg_from(&[ + ("INSHELLAH_MAX_COMPLETIONS", "50"), + ("INSHELLAH_TIMEOUT_MS", "1000"), + ]); + assert_eq!(cfg.max_completions, 50); + assert_eq!(cfg.timeout_ms, 1000); + + // garbage leaves the default intact. + let bad = cfg_from(&[ + ("INSHELLAH_MAX_COMPLETIONS", "lots"), + ("INSHELLAH_TIMEOUT_MS", "soon"), + ]); + assert_eq!(bad.max_completions, 0); + assert_eq!(bad.timeout_ms, DEFAULT_TIMEOUT_MS); + } +} diff --git a/src/lib.rs b/src/lib.rs index 2256bee..c2a20fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod config; pub mod parsers; pub mod pool; pub mod store; diff --git a/src/main.rs b/src/main.rs index c04514f..99322c0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,6 +21,7 @@ use std::time::{Duration, Instant}; use parking_lot::Mutex; +use inshellah::config::{Config, DEFAULT_TIMEOUT_MS}; use inshellah::parsers::help::help_parser; use inshellah::parsers::manpage::{ ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch, @@ -35,26 +36,20 @@ use inshellah::store::{ const COMMAND_SECTIONS: &[u8] = &[1, 8]; -/// per-subprocess timeout default when --timeout-ms isn't passed. -/// empirically tuned so that a slow-to-print binary doesn't block the -/// pool, while fast-responding ones (the vast majority) print their -/// --help well inside the window. with `n` parallel workers a 200ms -/// ceiling means the worst-case waste from an unresponsive binary is -/// `200ms / n_workers` of wall time. -const DEFAULT_TIMEOUT_MS: u64 = 200; - fn usage() { eprintln!( "inshellah - nushell completions engine Usage: inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE] - [--timeout-ms N] [--workers N] + [--prefix PATH[:PATH...]] [--timeout-ms N] [--workers N] Index completions into a directory of JSON/nu files. PREFIX is a directory containing bin/ and share/man/. Default dir: $XDG_CACHE_HOME/inshellah --ignore FILE skip listed commands entirely --help-only FILE skip manpages for listed commands, use --help instead + --prefix PATHS extra scrape prefixes, colon-separated (in addition + to the positional PREFIX args) --timeout-ms N per-subprocess timeout in milliseconds (default 200) --workers N parallel scrape workers (default: cpu count) inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]] [--timeout-ms N] @@ -69,6 +64,12 @@ Usage: inshellah manpage FILE Parse a manpage and emit nushell extern inshellah manpage-dir DIR Batch-process manpages under DIR inshellah completions Generate nushell completions for inshellah + +Configuration (environment, read by `complete`): + INSHELLAH_FLAG_TRIGGERS chars that surface flags (default \"-\"; e.g. \"-+\") + INSHELLAH_FLAG_ON_EMPTY 1 to also surface flags on an empty token + INSHELLAH_MAX_COMPLETIONS cap on candidates returned (0 = no cap) + INSHELLAH_TIMEOUT_MS default --help resolve timeout (--timeout-ms wins) " ); } @@ -262,12 +263,41 @@ fn skip_name(name: &str) -> bool { || name.contains('/') } -// --- ELF scanning --- +// --- executable image scanning --- -/// scan an ELF binary (or any file) for string needles. returns the set of -/// needles that appeared. on read failure all needles are reported found -/// (conservative — we'd rather try --help than skip). -fn elf_scan(path: &Path, needles: &[&str]) -> HashSet { +/// is `magic` the leading 4 bytes of an executable image we know how to +/// string-scan on *this* platform? the scan itself is byte-oriented and +/// format-agnostic; this gate just keeps us from slurping data files that +/// happen to carry the executable bit. +/// +/// recognition is strictly per-platform: a macOS build honours only Mach-O +/// (thin 32/64-bit either endianness, plus fat/universal), every other +/// (ELF) target honours only ELF. keeping them mutually exclusive means a +/// Linux build never treats `CA FE BA BE` as an image — that's FAT_MAGIC to +/// Mach-O but also a Java class file, which a Linux box can plausibly carry. +fn is_scannable_magic(magic: &[u8; 4]) -> bool { + #[cfg(target_os = "macos")] + { + matches!( + magic, + [0xce, 0xfa, 0xed, 0xfe] // MH_MAGIC (thin 32-bit, little-endian) + | [0xcf, 0xfa, 0xed, 0xfe] // MH_MAGIC_64 (thin 64-bit, little-endian) + | [0xfe, 0xed, 0xfa, 0xce] // MH_MAGIC (thin 32-bit, big-endian) + | [0xfe, 0xed, 0xfa, 0xcf] // MH_MAGIC_64 (thin 64-bit, big-endian) + | [0xca, 0xfe, 0xba, 0xbe] // FAT_MAGIC (universal) + | [0xca, 0xfe, 0xba, 0xbf] // FAT_MAGIC_64 + ) + } + #[cfg(not(target_os = "macos"))] + { + magic == b"\x7fELF" + } +} + +/// scan an executable image (ELF on Linux, Mach-O on macOS) for string needles. +/// returns the set of needles that appeared. on read failure all needles are +/// reported found (conservative — we'd rather try --help than skip). +fn image_scan(path: &Path, needles: &[&str]) -> HashSet { let mut found: HashSet = HashSet::new(); let real = match fs::canonicalize(path) { Ok(p) => p, @@ -288,8 +318,8 @@ fn elf_scan(path: &Path, needles: &[&str]) -> HashSet { if f.read_exact(&mut magic).is_err() { return found; } - if magic != [0x7f, b'E', b'L', b'F'] { - // not ELF — return empty so caller decides + if !is_scannable_magic(&magic) { + // not a recognised executable image — return empty so caller decides return found; } let max_needle = needles.iter().map(|s| s.len()).max().unwrap_or(0); @@ -410,9 +440,9 @@ enum Classify { Skip, } -/// classify an ELF binary by scanning for help/completion needles. -fn classify_elf(path: &Path) -> Classify { - let found = elf_scan(path, &["-h", "--help", "complet"]); +/// classify an executable image by scanning for help/completion needles. +fn classify_image(path: &Path) -> Classify { + let found = image_scan(path, &["-h", "--help", "complet"]); if found.contains("complet") { Classify::HasNativeCompletions } else if found.contains("-h") || found.contains("--help") { @@ -422,18 +452,19 @@ fn classify_elf(path: &Path) -> Classify { } } -/// classify a binary by its actual nature: script, ELF, or nix wrapper. +/// classify a binary by its actual nature: script, native image, or nix +/// wrapper. native images are ELF on Linux and Mach-O on macOS. fn classify_binary(_bindir: &Path, full: &Path) -> Classify { if is_script(full) { return Classify::TryHelp; } if let Some(target) = nix_wrapper_target(full) { - return classify_elf(&target); + return classify_image(&target); } if let Some(target) = nix_script_wrapper_target(full) { - return classify_elf(&target); + return classify_image(&target); } - classify_elf(full) + classify_image(full) } // --- help text extraction --- @@ -836,6 +867,71 @@ mod main_tests { r#"{"value":"a\"b","description":"line\nnext"}"# ); } + + #[test] + fn completion_dir_mandir_resolves_to_prefix_share_man() { + // /share/inshellah -> /share/man, no doubled "share". + assert_eq!( + mandir_for_completion_dir(Path::new("/run/current-system/sw/share/inshellah")), + Some(PathBuf::from("/run/current-system/sw/share/man")) + ); + assert_eq!( + mandir_for_completion_dir(Path::new("/etc/profiles/per-user/alice/share/inshellah")), + Some(PathBuf::from("/etc/profiles/per-user/alice/share/man")) + ); + } + + #[test] + fn index_prefix_flag_appends_colon_separated_prefixes() { + let args = [ + "/sys".to_string(), + "--prefix".to_string(), + "/a:/b/c".to_string(), + "--prefix".to_string(), + "/d".to_string(), + ]; + let parsed = parse_index_args(&args); + // positional first, then each --prefix segment, in order. + assert_eq!( + parsed.prefixes, + vec![ + PathBuf::from("/sys"), + PathBuf::from("/a"), + PathBuf::from("/b/c"), + PathBuf::from("/d"), + ] + ); + } + + #[test] + fn non_executable_magic_is_never_scannable() { + // a PNG header, a shebang, plain text — none are images on any platform. + assert!(!is_scannable_magic(&[0x89, b'P', b'N', b'G'])); + assert!(!is_scannable_magic(b"#!/b")); + assert!(!is_scannable_magic(b"text")); + } + + // recognition is strictly per-platform: each build honours only its + // native container and rejects the other. + #[cfg(target_os = "macos")] + #[test] + fn macos_scans_mach_o_only() { + // thin 64-bit little-endian — the common arm64/x86_64 layout. + assert!(is_scannable_magic(&[0xcf, 0xfa, 0xed, 0xfe])); + // fat/universal. + assert!(is_scannable_magic(&[0xca, 0xfe, 0xba, 0xbe])); + // ELF is not a native macOS image. + assert!(!is_scannable_magic(b"\x7fELF")); + } + + #[cfg(not(target_os = "macos"))] + #[test] + fn elf_targets_scan_elf_only() { + assert!(is_scannable_magic(b"\x7fELF")); + // Mach-O magics are rejected; FAT_MAGIC also collides with java class. + assert!(!is_scannable_magic(&[0xca, 0xfe, 0xba, 0xbe])); + assert!(!is_scannable_magic(&[0xcf, 0xfa, 0xed, 0xfe])); + } } /// shared state passed to every pool worker. nothing inside mutates @@ -1806,6 +1902,7 @@ fn cmd_complete( system_dirs: &[PathBuf], mandirs: &[PathBuf], timeout_ms: u64, + cfg: &Config, ) { let mut dirs: Vec = system_dirs.to_vec(); dirs.push(user_dir.to_path_buf()); @@ -1951,7 +2048,10 @@ fn cmd_complete( } } - let typing_flag = last_token.starts_with('-') && !last_token.is_empty(); + // flag completions are gated on a configurable trigger: by default a + // leading "-", but the user may add other characters or opt into + // surfacing flags on an empty token (right after a space). + let typing_flag = cfg.triggers_flags(&last_token); let fallback_subcommands = match &found { Some((matched_name, r, _)) if r.subcommands.is_empty() => { subcommands_of(&dirs, matched_name) @@ -2000,25 +2100,38 @@ fn cmd_complete( } } } - // flag candidates + // flag candidates. the needle — and whether it scores against + // the bare flag name or the dashed form — depends on which + // trigger the user typed (see Config::flag_needle). the default + // "-" trigger keeps the dashed form, so ranking is unchanged. if typing_flag { + let fneedle = cfg.flag_needle(&last_token); + let score_against = |dashed: &str, bare_name: &str| -> i32 { + if fneedle.bare { + fuzzy_score(fneedle.needle, bare_name) + } else { + fuzzy_score(fneedle.needle, dashed) + } + }; for e in &r.entries { let (flag, aka, score) = match &e.switch { OwnedSwitch::Long(l) => { let flag = format!("--{l}"); - let score = fuzzy_score(&last_token, &flag); + let score = score_against(&flag, l); (flag, None, score) } OwnedSwitch::Short(c) => { let flag = format!("-{c}"); - let score = fuzzy_score(&last_token, &flag); + let short_bare = c.to_string(); + let score = score_against(&flag, &short_bare); (flag, None, score) } OwnedSwitch::Both(c, l) => { let long_flag = format!("--{l}"); let short_flag = format!("-{c}"); - let ls = fuzzy_score(&last_token, &long_flag); - let ss = fuzzy_score(&last_token, &short_flag); + let short_bare = c.to_string(); + let ls = score_against(&long_flag, l); + let ss = score_against(&short_flag, &short_bare); if ss > ls { (short_flag, Some(long_flag), ss) } else { @@ -2040,6 +2153,9 @@ fn cmd_complete( } } scored.sort_by(|a, b| b.0.cmp(&a.0)); + if cfg.max_completions > 0 { + scored.truncate(cfg.max_completions); + } scored.into_iter().map(|(_, json)| json).collect() } }; @@ -2128,6 +2244,17 @@ fn parse_index_args(args: &[String]) -> IndexArgs { out.help_only = Some(PathBuf::from(&args[i])); } } + // additional scrape prefixes beyond the positional ones, as a + // colon-separated list. lets callers (notably the nix module's + // extraScrapePackages) roll up extra packages without relying on + // positional ordering. + "--prefix" => { + i += 1; + if i < args.len() { + out.prefixes + .extend(args[i].split(':').filter(|s| !s.is_empty()).map(PathBuf::from)); + } + } "--timeout-ms" => { i += 1; if i < args.len() @@ -2164,13 +2291,24 @@ fn man_dir_of_prefix(prefix: &Path) -> PathBuf { prefix.join("share/man") } +/// derive the manpage dir colocated with a read-only system completion dir. +/// the completer is pointed at `/share/inshellah`, so the install +/// prefix is two levels up and its manpages live at `/share/man` — +/// the same bin↔share/man colocation `index` and the binary-prefix walk +/// assume. portable across Linux and macOS prefixes (nix profile, Homebrew, +/// /usr, CommandLineTools). +fn mandir_for_completion_dir(dir: &Path) -> Option { + dir.parent().and_then(Path::parent).map(man_dir_of_prefix) +} + /// parse --dir PATH[:PATH...], optional --timeout-ms N, plus any /// positional args. when --dir isn't supplied, returns the default cache -/// dir as the single entry. -fn parse_dir_args(args: &[String]) -> (Vec, Vec, u64) { +/// dir as the single entry. the timeout is `None` when `--timeout-ms` +/// isn't passed, so the caller can fall back to the configured default. +fn parse_dir_args(args: &[String]) -> (Vec, Vec, Option) { let mut positional = Vec::new(); let mut dirs: Option> = None; - let mut timeout_ms = DEFAULT_TIMEOUT_MS; + let mut timeout_ms: Option = None; let mut i = 0; while i < args.len() { match args[i].as_str() { @@ -2185,7 +2323,7 @@ fn parse_dir_args(args: &[String]) -> (Vec, Vec, u64) { if i < args.len() && let Ok(n) = args[i].parse::() { - timeout_ms = n; + timeout_ms = Some(n); } } _ => { @@ -2262,19 +2400,31 @@ fn main() { } } "complete" => { - let (positional, dirs, timeout_ms) = parse_dir_args(&args[2..]); + let cfg = Config::from_env(); + let (positional, dirs, timeout_override) = parse_dir_args(&args[2..]); + // explicit --timeout-ms wins; otherwise fall back to the + // configured default (INSHELLAH_TIMEOUT_MS or the compiled one). + let timeout_ms = timeout_override.unwrap_or(cfg.timeout_ms); // first dir is the writable user cache; rest are read-only system dirs let (user_dir, system_dirs): (PathBuf, Vec) = match dirs.split_first() { Some((first, rest)) => (first.clone(), rest.to_vec()), None => (default_store_path(), Vec::new()), }; - // mandirs default to share/man siblings of each system dir + // mandirs default to the share/man colocated with each system + // completion dir's install prefix (/share/inshellah). let mandirs: Vec = system_dirs .iter() - .filter_map(|d| d.parent().map(|p| p.join("share/man"))) + .filter_map(|d| mandir_for_completion_dir(d)) .filter(|p| p.is_dir()) .collect(); - cmd_complete(&positional, &user_dir, &system_dirs, &mandirs, timeout_ms); + cmd_complete( + &positional, + &user_dir, + &system_dirs, + &mandirs, + timeout_ms, + &cfg, + ); } "query" => { let (positional, dirs, _timeout_ms) = parse_dir_args(&args[2..]); diff --git a/tests/runtime_complete.rs b/tests/runtime_complete.rs index 50ebd5d..47b08eb 100644 --- a/tests/runtime_complete.rs +++ b/tests/runtime_complete.rs @@ -641,3 +641,126 @@ exit 2 let _ = fs::remove_dir_all(root); } + +/// write a single-command cache directory exposing the given long flags, +/// returning the cache dir. callers drive `inshellah complete demo ...`. +fn flag_demo_cache(name: &str, flags: &[&str]) -> std::path::PathBuf { + let root = unique_temp_dir(name); + let cache_dir = root.join("cache"); + fs::create_dir_all(&cache_dir).expect("cache dir"); + let result = ManpageResult { + entries: flags + .iter() + .map(|f| ManpageEntry { + switch: OwnedSwitch::Long((*f).to_string()), + param: None, + desc: format!("{f} flag"), + }) + .collect(), + subcommands: Vec::new(), + positionals: Vec::new(), + description: String::new(), + }; + write_result(&cache_dir, "demo", "help", &result).expect("cache"); + cache_dir +} + +#[test] +fn complete_flag_on_empty_env_surfaces_flags_after_space() { + let cache_dir = flag_demo_cache("inshellah-flag-on-empty", &["verbose"]); + + // baseline: empty token without the env knob yields no flags. + let baseline = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .args(["complete", "--dir"]) + .arg(&cache_dir) + .args(["demo", ""]) + .output() + .expect("run inshellah complete"); + assert_eq!( + String::from_utf8_lossy(&baseline.stdout).trim(), + "null", + "empty token should not surface flags by default" + ); + + // with INSHELLAH_FLAG_ON_EMPTY, the empty token surfaces flags. + let opted_in = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .env("INSHELLAH_FLAG_ON_EMPTY", "1") + .args(["complete", "--dir"]) + .arg(&cache_dir) + .args(["demo", ""]) + .output() + .expect("run inshellah complete"); + let stdout = String::from_utf8_lossy(&opted_in.stdout); + assert!( + stdout.contains(r#""value":"--verbose""#), + "stdout = {stdout}" + ); + + let _ = fs::remove_dir_all(cache_dir.parent().unwrap()); +} + +#[test] +fn complete_custom_trigger_char_surfaces_flags() { + let cache_dir = flag_demo_cache("inshellah-custom-trigger", &["verbose"]); + + // "+" is not a trigger by default — treated as an argument prefix. + let baseline = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .args(["complete", "--dir"]) + .arg(&cache_dir) + .args(["demo", "+v"]) + .output() + .expect("run inshellah complete"); + assert_eq!( + String::from_utf8_lossy(&baseline.stdout).trim(), + "null", + "'+' should not trigger flags by default" + ); + + // configured as a trigger, "+v" fuzzy-matches the bare flag name. + let opted_in = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .env("INSHELLAH_FLAG_TRIGGERS", "-+") + .args(["complete", "--dir"]) + .arg(&cache_dir) + .args(["demo", "+v"]) + .output() + .expect("run inshellah complete"); + let stdout = String::from_utf8_lossy(&opted_in.stdout); + assert!( + stdout.contains(r#""value":"--verbose""#), + "stdout = {stdout}" + ); + + let _ = fs::remove_dir_all(cache_dir.parent().unwrap()); +} + +#[test] +fn complete_max_completions_caps_results() { + let cache_dir = flag_demo_cache( + "inshellah-max-completions", + &["verbose", "version", "verify", "verbatim"], + ); + + let capped = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .env("INSHELLAH_MAX_COMPLETIONS", "2") + .args(["complete", "--dir"]) + .arg(&cache_dir) + .args(["demo", "--ver"]) + .output() + .expect("run inshellah complete"); + let stdout = String::from_utf8_lossy(&capped.stdout); + let count = stdout.matches(r#""value":"#).count(); + assert_eq!(count, 2, "expected 2 capped candidates, stdout = {stdout}"); + + // without the cap, all four matching flags come back. + let uncapped = Command::new(env!("CARGO_BIN_EXE_inshellah")) + .args(["complete", "--dir"]) + .arg(&cache_dir) + .args(["demo", "--ver"]) + .output() + .expect("run inshellah complete"); + let stdout = String::from_utf8_lossy(&uncapped.stdout); + let count = stdout.matches(r#""value":"#).count(); + assert_eq!(count, 4, "expected 4 candidates, stdout = {stdout}"); + + let _ = fs::remove_dir_all(cache_dir.parent().unwrap()); +}